* config/i386/predicates.md (const2356_operand): Change to ...
[official-gcc.git] / gcc / config / i386 / i386.c
blob38b41a93c22c231dcd6071c4b3fe90ceaea96712
1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988-2014 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 3, or (at your option)
9 any later version.
11 GCC is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
20 #include "config.h"
21 #include "system.h"
22 #include "coretypes.h"
23 #include "tm.h"
24 #include "rtl.h"
25 #include "tree.h"
26 #include "stringpool.h"
27 #include "attribs.h"
28 #include "calls.h"
29 #include "stor-layout.h"
30 #include "varasm.h"
31 #include "tm_p.h"
32 #include "regs.h"
33 #include "hard-reg-set.h"
34 #include "insn-config.h"
35 #include "conditions.h"
36 #include "output.h"
37 #include "insn-codes.h"
38 #include "insn-attr.h"
39 #include "flags.h"
40 #include "except.h"
41 #include "function.h"
42 #include "recog.h"
43 #include "expr.h"
44 #include "optabs.h"
45 #include "diagnostic-core.h"
46 #include "toplev.h"
47 #include "basic-block.h"
48 #include "ggc.h"
49 #include "target.h"
50 #include "target-def.h"
51 #include "common/common-target.h"
52 #include "langhooks.h"
53 #include "reload.h"
54 #include "cgraph.h"
55 #include "pointer-set.h"
56 #include "hash-table.h"
57 #include "vec.h"
58 #include "basic-block.h"
59 #include "tree-ssa-alias.h"
60 #include "internal-fn.h"
61 #include "gimple-fold.h"
62 #include "tree-eh.h"
63 #include "gimple-expr.h"
64 #include "is-a.h"
65 #include "gimple.h"
66 #include "gimplify.h"
67 #include "cfgloop.h"
68 #include "dwarf2.h"
69 #include "df.h"
70 #include "tm-constrs.h"
71 #include "params.h"
72 #include "cselib.h"
73 #include "debug.h"
74 #include "sched-int.h"
75 #include "sbitmap.h"
76 #include "fibheap.h"
77 #include "opts.h"
78 #include "diagnostic.h"
79 #include "dumpfile.h"
80 #include "tree-pass.h"
81 #include "context.h"
82 #include "pass_manager.h"
83 #include "target-globals.h"
85 static rtx legitimize_dllimport_symbol (rtx, bool);
86 static rtx legitimize_pe_coff_extern_decl (rtx, bool);
87 static rtx legitimize_pe_coff_symbol (rtx, bool);
89 #ifndef CHECK_STACK_LIMIT
90 #define CHECK_STACK_LIMIT (-1)
91 #endif
93 /* Return index of given mode in mult and division cost tables. */
94 #define MODE_INDEX(mode) \
95 ((mode) == QImode ? 0 \
96 : (mode) == HImode ? 1 \
97 : (mode) == SImode ? 2 \
98 : (mode) == DImode ? 3 \
99 : 4)
101 /* Processor costs (relative to an add) */
102 /* We assume COSTS_N_INSNS is defined as (N)*4 and an addition is 2 bytes. */
103 #define COSTS_N_BYTES(N) ((N) * 2)
105 #define DUMMY_STRINGOP_ALGS {libcall, {{-1, libcall, false}}}
107 static stringop_algs ix86_size_memcpy[2] = {
108 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}},
109 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}}};
110 static stringop_algs ix86_size_memset[2] = {
111 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}},
112 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}}};
114 const
115 struct processor_costs ix86_size_cost = {/* costs for tuning for size */
116 COSTS_N_BYTES (2), /* cost of an add instruction */
117 COSTS_N_BYTES (3), /* cost of a lea instruction */
118 COSTS_N_BYTES (2), /* variable shift costs */
119 COSTS_N_BYTES (3), /* constant shift costs */
120 {COSTS_N_BYTES (3), /* cost of starting multiply for QI */
121 COSTS_N_BYTES (3), /* HI */
122 COSTS_N_BYTES (3), /* SI */
123 COSTS_N_BYTES (3), /* DI */
124 COSTS_N_BYTES (5)}, /* other */
125 0, /* cost of multiply per each bit set */
126 {COSTS_N_BYTES (3), /* cost of a divide/mod for QI */
127 COSTS_N_BYTES (3), /* HI */
128 COSTS_N_BYTES (3), /* SI */
129 COSTS_N_BYTES (3), /* DI */
130 COSTS_N_BYTES (5)}, /* other */
131 COSTS_N_BYTES (3), /* cost of movsx */
132 COSTS_N_BYTES (3), /* cost of movzx */
133 0, /* "large" insn */
134 2, /* MOVE_RATIO */
135 2, /* cost for loading QImode using movzbl */
136 {2, 2, 2}, /* cost of loading integer registers
137 in QImode, HImode and SImode.
138 Relative to reg-reg move (2). */
139 {2, 2, 2}, /* cost of storing integer registers */
140 2, /* cost of reg,reg fld/fst */
141 {2, 2, 2}, /* cost of loading fp registers
142 in SFmode, DFmode and XFmode */
143 {2, 2, 2}, /* cost of storing fp registers
144 in SFmode, DFmode and XFmode */
145 3, /* cost of moving MMX register */
146 {3, 3}, /* cost of loading MMX registers
147 in SImode and DImode */
148 {3, 3}, /* cost of storing MMX registers
149 in SImode and DImode */
150 3, /* cost of moving SSE register */
151 {3, 3, 3}, /* cost of loading SSE registers
152 in SImode, DImode and TImode */
153 {3, 3, 3}, /* cost of storing SSE registers
154 in SImode, DImode and TImode */
155 3, /* MMX or SSE register to integer */
156 0, /* size of l1 cache */
157 0, /* size of l2 cache */
158 0, /* size of prefetch block */
159 0, /* number of parallel prefetches */
160 2, /* Branch cost */
161 COSTS_N_BYTES (2), /* cost of FADD and FSUB insns. */
162 COSTS_N_BYTES (2), /* cost of FMUL instruction. */
163 COSTS_N_BYTES (2), /* cost of FDIV instruction. */
164 COSTS_N_BYTES (2), /* cost of FABS instruction. */
165 COSTS_N_BYTES (2), /* cost of FCHS instruction. */
166 COSTS_N_BYTES (2), /* cost of FSQRT instruction. */
167 ix86_size_memcpy,
168 ix86_size_memset,
169 1, /* scalar_stmt_cost. */
170 1, /* scalar load_cost. */
171 1, /* scalar_store_cost. */
172 1, /* vec_stmt_cost. */
173 1, /* vec_to_scalar_cost. */
174 1, /* scalar_to_vec_cost. */
175 1, /* vec_align_load_cost. */
176 1, /* vec_unalign_load_cost. */
177 1, /* vec_store_cost. */
178 1, /* cond_taken_branch_cost. */
179 1, /* cond_not_taken_branch_cost. */
182 /* Processor costs (relative to an add) */
183 static stringop_algs i386_memcpy[2] = {
184 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}},
185 DUMMY_STRINGOP_ALGS};
186 static stringop_algs i386_memset[2] = {
187 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}},
188 DUMMY_STRINGOP_ALGS};
190 static const
191 struct processor_costs i386_cost = { /* 386 specific costs */
192 COSTS_N_INSNS (1), /* cost of an add instruction */
193 COSTS_N_INSNS (1), /* cost of a lea instruction */
194 COSTS_N_INSNS (3), /* variable shift costs */
195 COSTS_N_INSNS (2), /* constant shift costs */
196 {COSTS_N_INSNS (6), /* cost of starting multiply for QI */
197 COSTS_N_INSNS (6), /* HI */
198 COSTS_N_INSNS (6), /* SI */
199 COSTS_N_INSNS (6), /* DI */
200 COSTS_N_INSNS (6)}, /* other */
201 COSTS_N_INSNS (1), /* cost of multiply per each bit set */
202 {COSTS_N_INSNS (23), /* cost of a divide/mod for QI */
203 COSTS_N_INSNS (23), /* HI */
204 COSTS_N_INSNS (23), /* SI */
205 COSTS_N_INSNS (23), /* DI */
206 COSTS_N_INSNS (23)}, /* other */
207 COSTS_N_INSNS (3), /* cost of movsx */
208 COSTS_N_INSNS (2), /* cost of movzx */
209 15, /* "large" insn */
210 3, /* MOVE_RATIO */
211 4, /* cost for loading QImode using movzbl */
212 {2, 4, 2}, /* cost of loading integer registers
213 in QImode, HImode and SImode.
214 Relative to reg-reg move (2). */
215 {2, 4, 2}, /* cost of storing integer registers */
216 2, /* cost of reg,reg fld/fst */
217 {8, 8, 8}, /* cost of loading fp registers
218 in SFmode, DFmode and XFmode */
219 {8, 8, 8}, /* cost of storing fp registers
220 in SFmode, DFmode and XFmode */
221 2, /* cost of moving MMX register */
222 {4, 8}, /* cost of loading MMX registers
223 in SImode and DImode */
224 {4, 8}, /* cost of storing MMX registers
225 in SImode and DImode */
226 2, /* cost of moving SSE register */
227 {4, 8, 16}, /* cost of loading SSE registers
228 in SImode, DImode and TImode */
229 {4, 8, 16}, /* cost of storing SSE registers
230 in SImode, DImode and TImode */
231 3, /* MMX or SSE register to integer */
232 0, /* size of l1 cache */
233 0, /* size of l2 cache */
234 0, /* size of prefetch block */
235 0, /* number of parallel prefetches */
236 1, /* Branch cost */
237 COSTS_N_INSNS (23), /* cost of FADD and FSUB insns. */
238 COSTS_N_INSNS (27), /* cost of FMUL instruction. */
239 COSTS_N_INSNS (88), /* cost of FDIV instruction. */
240 COSTS_N_INSNS (22), /* cost of FABS instruction. */
241 COSTS_N_INSNS (24), /* cost of FCHS instruction. */
242 COSTS_N_INSNS (122), /* cost of FSQRT instruction. */
243 i386_memcpy,
244 i386_memset,
245 1, /* scalar_stmt_cost. */
246 1, /* scalar load_cost. */
247 1, /* scalar_store_cost. */
248 1, /* vec_stmt_cost. */
249 1, /* vec_to_scalar_cost. */
250 1, /* scalar_to_vec_cost. */
251 1, /* vec_align_load_cost. */
252 2, /* vec_unalign_load_cost. */
253 1, /* vec_store_cost. */
254 3, /* cond_taken_branch_cost. */
255 1, /* cond_not_taken_branch_cost. */
258 static stringop_algs i486_memcpy[2] = {
259 {rep_prefix_4_byte, {{-1, rep_prefix_4_byte, false}}},
260 DUMMY_STRINGOP_ALGS};
261 static stringop_algs i486_memset[2] = {
262 {rep_prefix_4_byte, {{-1, rep_prefix_4_byte, false}}},
263 DUMMY_STRINGOP_ALGS};
265 static const
266 struct processor_costs i486_cost = { /* 486 specific costs */
267 COSTS_N_INSNS (1), /* cost of an add instruction */
268 COSTS_N_INSNS (1), /* cost of a lea instruction */
269 COSTS_N_INSNS (3), /* variable shift costs */
270 COSTS_N_INSNS (2), /* constant shift costs */
271 {COSTS_N_INSNS (12), /* cost of starting multiply for QI */
272 COSTS_N_INSNS (12), /* HI */
273 COSTS_N_INSNS (12), /* SI */
274 COSTS_N_INSNS (12), /* DI */
275 COSTS_N_INSNS (12)}, /* other */
276 1, /* cost of multiply per each bit set */
277 {COSTS_N_INSNS (40), /* cost of a divide/mod for QI */
278 COSTS_N_INSNS (40), /* HI */
279 COSTS_N_INSNS (40), /* SI */
280 COSTS_N_INSNS (40), /* DI */
281 COSTS_N_INSNS (40)}, /* other */
282 COSTS_N_INSNS (3), /* cost of movsx */
283 COSTS_N_INSNS (2), /* cost of movzx */
284 15, /* "large" insn */
285 3, /* MOVE_RATIO */
286 4, /* cost for loading QImode using movzbl */
287 {2, 4, 2}, /* cost of loading integer registers
288 in QImode, HImode and SImode.
289 Relative to reg-reg move (2). */
290 {2, 4, 2}, /* cost of storing integer registers */
291 2, /* cost of reg,reg fld/fst */
292 {8, 8, 8}, /* cost of loading fp registers
293 in SFmode, DFmode and XFmode */
294 {8, 8, 8}, /* cost of storing fp registers
295 in SFmode, DFmode and XFmode */
296 2, /* cost of moving MMX register */
297 {4, 8}, /* cost of loading MMX registers
298 in SImode and DImode */
299 {4, 8}, /* cost of storing MMX registers
300 in SImode and DImode */
301 2, /* cost of moving SSE register */
302 {4, 8, 16}, /* cost of loading SSE registers
303 in SImode, DImode and TImode */
304 {4, 8, 16}, /* cost of storing SSE registers
305 in SImode, DImode and TImode */
306 3, /* MMX or SSE register to integer */
307 4, /* size of l1 cache. 486 has 8kB cache
308 shared for code and data, so 4kB is
309 not really precise. */
310 4, /* size of l2 cache */
311 0, /* size of prefetch block */
312 0, /* number of parallel prefetches */
313 1, /* Branch cost */
314 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
315 COSTS_N_INSNS (16), /* cost of FMUL instruction. */
316 COSTS_N_INSNS (73), /* cost of FDIV instruction. */
317 COSTS_N_INSNS (3), /* cost of FABS instruction. */
318 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
319 COSTS_N_INSNS (83), /* cost of FSQRT instruction. */
320 i486_memcpy,
321 i486_memset,
322 1, /* scalar_stmt_cost. */
323 1, /* scalar load_cost. */
324 1, /* scalar_store_cost. */
325 1, /* vec_stmt_cost. */
326 1, /* vec_to_scalar_cost. */
327 1, /* scalar_to_vec_cost. */
328 1, /* vec_align_load_cost. */
329 2, /* vec_unalign_load_cost. */
330 1, /* vec_store_cost. */
331 3, /* cond_taken_branch_cost. */
332 1, /* cond_not_taken_branch_cost. */
335 static stringop_algs pentium_memcpy[2] = {
336 {libcall, {{256, rep_prefix_4_byte, false}, {-1, libcall, false}}},
337 DUMMY_STRINGOP_ALGS};
338 static stringop_algs pentium_memset[2] = {
339 {libcall, {{-1, rep_prefix_4_byte, false}}},
340 DUMMY_STRINGOP_ALGS};
342 static const
343 struct processor_costs pentium_cost = {
344 COSTS_N_INSNS (1), /* cost of an add instruction */
345 COSTS_N_INSNS (1), /* cost of a lea instruction */
346 COSTS_N_INSNS (4), /* variable shift costs */
347 COSTS_N_INSNS (1), /* constant shift costs */
348 {COSTS_N_INSNS (11), /* cost of starting multiply for QI */
349 COSTS_N_INSNS (11), /* HI */
350 COSTS_N_INSNS (11), /* SI */
351 COSTS_N_INSNS (11), /* DI */
352 COSTS_N_INSNS (11)}, /* other */
353 0, /* cost of multiply per each bit set */
354 {COSTS_N_INSNS (25), /* cost of a divide/mod for QI */
355 COSTS_N_INSNS (25), /* HI */
356 COSTS_N_INSNS (25), /* SI */
357 COSTS_N_INSNS (25), /* DI */
358 COSTS_N_INSNS (25)}, /* other */
359 COSTS_N_INSNS (3), /* cost of movsx */
360 COSTS_N_INSNS (2), /* cost of movzx */
361 8, /* "large" insn */
362 6, /* MOVE_RATIO */
363 6, /* cost for loading QImode using movzbl */
364 {2, 4, 2}, /* cost of loading integer registers
365 in QImode, HImode and SImode.
366 Relative to reg-reg move (2). */
367 {2, 4, 2}, /* cost of storing integer registers */
368 2, /* cost of reg,reg fld/fst */
369 {2, 2, 6}, /* cost of loading fp registers
370 in SFmode, DFmode and XFmode */
371 {4, 4, 6}, /* cost of storing fp registers
372 in SFmode, DFmode and XFmode */
373 8, /* cost of moving MMX register */
374 {8, 8}, /* cost of loading MMX registers
375 in SImode and DImode */
376 {8, 8}, /* cost of storing MMX registers
377 in SImode and DImode */
378 2, /* cost of moving SSE register */
379 {4, 8, 16}, /* cost of loading SSE registers
380 in SImode, DImode and TImode */
381 {4, 8, 16}, /* cost of storing SSE registers
382 in SImode, DImode and TImode */
383 3, /* MMX or SSE register to integer */
384 8, /* size of l1 cache. */
385 8, /* size of l2 cache */
386 0, /* size of prefetch block */
387 0, /* number of parallel prefetches */
388 2, /* Branch cost */
389 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
390 COSTS_N_INSNS (3), /* cost of FMUL instruction. */
391 COSTS_N_INSNS (39), /* cost of FDIV instruction. */
392 COSTS_N_INSNS (1), /* cost of FABS instruction. */
393 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
394 COSTS_N_INSNS (70), /* cost of FSQRT instruction. */
395 pentium_memcpy,
396 pentium_memset,
397 1, /* scalar_stmt_cost. */
398 1, /* scalar load_cost. */
399 1, /* scalar_store_cost. */
400 1, /* vec_stmt_cost. */
401 1, /* vec_to_scalar_cost. */
402 1, /* scalar_to_vec_cost. */
403 1, /* vec_align_load_cost. */
404 2, /* vec_unalign_load_cost. */
405 1, /* vec_store_cost. */
406 3, /* cond_taken_branch_cost. */
407 1, /* cond_not_taken_branch_cost. */
410 /* PentiumPro has optimized rep instructions for blocks aligned by 8 bytes
411 (we ensure the alignment). For small blocks inline loop is still a
412 noticeable win, for bigger blocks either rep movsl or rep movsb is
413 way to go. Rep movsb has apparently more expensive startup time in CPU,
414 but after 4K the difference is down in the noise. */
415 static stringop_algs pentiumpro_memcpy[2] = {
416 {rep_prefix_4_byte, {{128, loop, false}, {1024, unrolled_loop, false},
417 {8192, rep_prefix_4_byte, false},
418 {-1, rep_prefix_1_byte, false}}},
419 DUMMY_STRINGOP_ALGS};
420 static stringop_algs pentiumpro_memset[2] = {
421 {rep_prefix_4_byte, {{1024, unrolled_loop, false},
422 {8192, rep_prefix_4_byte, false},
423 {-1, libcall, false}}},
424 DUMMY_STRINGOP_ALGS};
425 static const
426 struct processor_costs pentiumpro_cost = {
427 COSTS_N_INSNS (1), /* cost of an add instruction */
428 COSTS_N_INSNS (1), /* cost of a lea instruction */
429 COSTS_N_INSNS (1), /* variable shift costs */
430 COSTS_N_INSNS (1), /* constant shift costs */
431 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
432 COSTS_N_INSNS (4), /* HI */
433 COSTS_N_INSNS (4), /* SI */
434 COSTS_N_INSNS (4), /* DI */
435 COSTS_N_INSNS (4)}, /* other */
436 0, /* cost of multiply per each bit set */
437 {COSTS_N_INSNS (17), /* cost of a divide/mod for QI */
438 COSTS_N_INSNS (17), /* HI */
439 COSTS_N_INSNS (17), /* SI */
440 COSTS_N_INSNS (17), /* DI */
441 COSTS_N_INSNS (17)}, /* other */
442 COSTS_N_INSNS (1), /* cost of movsx */
443 COSTS_N_INSNS (1), /* cost of movzx */
444 8, /* "large" insn */
445 6, /* MOVE_RATIO */
446 2, /* cost for loading QImode using movzbl */
447 {4, 4, 4}, /* cost of loading integer registers
448 in QImode, HImode and SImode.
449 Relative to reg-reg move (2). */
450 {2, 2, 2}, /* cost of storing integer registers */
451 2, /* cost of reg,reg fld/fst */
452 {2, 2, 6}, /* cost of loading fp registers
453 in SFmode, DFmode and XFmode */
454 {4, 4, 6}, /* cost of storing fp registers
455 in SFmode, DFmode and XFmode */
456 2, /* cost of moving MMX register */
457 {2, 2}, /* cost of loading MMX registers
458 in SImode and DImode */
459 {2, 2}, /* cost of storing MMX registers
460 in SImode and DImode */
461 2, /* cost of moving SSE register */
462 {2, 2, 8}, /* cost of loading SSE registers
463 in SImode, DImode and TImode */
464 {2, 2, 8}, /* cost of storing SSE registers
465 in SImode, DImode and TImode */
466 3, /* MMX or SSE register to integer */
467 8, /* size of l1 cache. */
468 256, /* size of l2 cache */
469 32, /* size of prefetch block */
470 6, /* number of parallel prefetches */
471 2, /* Branch cost */
472 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
473 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
474 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
475 COSTS_N_INSNS (2), /* cost of FABS instruction. */
476 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
477 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
478 pentiumpro_memcpy,
479 pentiumpro_memset,
480 1, /* scalar_stmt_cost. */
481 1, /* scalar load_cost. */
482 1, /* scalar_store_cost. */
483 1, /* vec_stmt_cost. */
484 1, /* vec_to_scalar_cost. */
485 1, /* scalar_to_vec_cost. */
486 1, /* vec_align_load_cost. */
487 2, /* vec_unalign_load_cost. */
488 1, /* vec_store_cost. */
489 3, /* cond_taken_branch_cost. */
490 1, /* cond_not_taken_branch_cost. */
493 static stringop_algs geode_memcpy[2] = {
494 {libcall, {{256, rep_prefix_4_byte, false}, {-1, libcall, false}}},
495 DUMMY_STRINGOP_ALGS};
496 static stringop_algs geode_memset[2] = {
497 {libcall, {{256, rep_prefix_4_byte, false}, {-1, libcall, false}}},
498 DUMMY_STRINGOP_ALGS};
499 static const
500 struct processor_costs geode_cost = {
501 COSTS_N_INSNS (1), /* cost of an add instruction */
502 COSTS_N_INSNS (1), /* cost of a lea instruction */
503 COSTS_N_INSNS (2), /* variable shift costs */
504 COSTS_N_INSNS (1), /* constant shift costs */
505 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
506 COSTS_N_INSNS (4), /* HI */
507 COSTS_N_INSNS (7), /* SI */
508 COSTS_N_INSNS (7), /* DI */
509 COSTS_N_INSNS (7)}, /* other */
510 0, /* cost of multiply per each bit set */
511 {COSTS_N_INSNS (15), /* cost of a divide/mod for QI */
512 COSTS_N_INSNS (23), /* HI */
513 COSTS_N_INSNS (39), /* SI */
514 COSTS_N_INSNS (39), /* DI */
515 COSTS_N_INSNS (39)}, /* other */
516 COSTS_N_INSNS (1), /* cost of movsx */
517 COSTS_N_INSNS (1), /* cost of movzx */
518 8, /* "large" insn */
519 4, /* MOVE_RATIO */
520 1, /* cost for loading QImode using movzbl */
521 {1, 1, 1}, /* cost of loading integer registers
522 in QImode, HImode and SImode.
523 Relative to reg-reg move (2). */
524 {1, 1, 1}, /* cost of storing integer registers */
525 1, /* cost of reg,reg fld/fst */
526 {1, 1, 1}, /* cost of loading fp registers
527 in SFmode, DFmode and XFmode */
528 {4, 6, 6}, /* cost of storing fp registers
529 in SFmode, DFmode and XFmode */
531 1, /* cost of moving MMX register */
532 {1, 1}, /* cost of loading MMX registers
533 in SImode and DImode */
534 {1, 1}, /* cost of storing MMX registers
535 in SImode and DImode */
536 1, /* cost of moving SSE register */
537 {1, 1, 1}, /* cost of loading SSE registers
538 in SImode, DImode and TImode */
539 {1, 1, 1}, /* cost of storing SSE registers
540 in SImode, DImode and TImode */
541 1, /* MMX or SSE register to integer */
542 64, /* size of l1 cache. */
543 128, /* size of l2 cache. */
544 32, /* size of prefetch block */
545 1, /* number of parallel prefetches */
546 1, /* Branch cost */
547 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
548 COSTS_N_INSNS (11), /* cost of FMUL instruction. */
549 COSTS_N_INSNS (47), /* cost of FDIV instruction. */
550 COSTS_N_INSNS (1), /* cost of FABS instruction. */
551 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
552 COSTS_N_INSNS (54), /* cost of FSQRT instruction. */
553 geode_memcpy,
554 geode_memset,
555 1, /* scalar_stmt_cost. */
556 1, /* scalar load_cost. */
557 1, /* scalar_store_cost. */
558 1, /* vec_stmt_cost. */
559 1, /* vec_to_scalar_cost. */
560 1, /* scalar_to_vec_cost. */
561 1, /* vec_align_load_cost. */
562 2, /* vec_unalign_load_cost. */
563 1, /* vec_store_cost. */
564 3, /* cond_taken_branch_cost. */
565 1, /* cond_not_taken_branch_cost. */
568 static stringop_algs k6_memcpy[2] = {
569 {libcall, {{256, rep_prefix_4_byte, false}, {-1, libcall, false}}},
570 DUMMY_STRINGOP_ALGS};
571 static stringop_algs k6_memset[2] = {
572 {libcall, {{256, rep_prefix_4_byte, false}, {-1, libcall, false}}},
573 DUMMY_STRINGOP_ALGS};
574 static const
575 struct processor_costs k6_cost = {
576 COSTS_N_INSNS (1), /* cost of an add instruction */
577 COSTS_N_INSNS (2), /* cost of a lea instruction */
578 COSTS_N_INSNS (1), /* variable shift costs */
579 COSTS_N_INSNS (1), /* constant shift costs */
580 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
581 COSTS_N_INSNS (3), /* HI */
582 COSTS_N_INSNS (3), /* SI */
583 COSTS_N_INSNS (3), /* DI */
584 COSTS_N_INSNS (3)}, /* other */
585 0, /* cost of multiply per each bit set */
586 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
587 COSTS_N_INSNS (18), /* HI */
588 COSTS_N_INSNS (18), /* SI */
589 COSTS_N_INSNS (18), /* DI */
590 COSTS_N_INSNS (18)}, /* other */
591 COSTS_N_INSNS (2), /* cost of movsx */
592 COSTS_N_INSNS (2), /* cost of movzx */
593 8, /* "large" insn */
594 4, /* MOVE_RATIO */
595 3, /* cost for loading QImode using movzbl */
596 {4, 5, 4}, /* cost of loading integer registers
597 in QImode, HImode and SImode.
598 Relative to reg-reg move (2). */
599 {2, 3, 2}, /* cost of storing integer registers */
600 4, /* cost of reg,reg fld/fst */
601 {6, 6, 6}, /* cost of loading fp registers
602 in SFmode, DFmode and XFmode */
603 {4, 4, 4}, /* cost of storing fp registers
604 in SFmode, DFmode and XFmode */
605 2, /* cost of moving MMX register */
606 {2, 2}, /* cost of loading MMX registers
607 in SImode and DImode */
608 {2, 2}, /* cost of storing MMX registers
609 in SImode and DImode */
610 2, /* cost of moving SSE register */
611 {2, 2, 8}, /* cost of loading SSE registers
612 in SImode, DImode and TImode */
613 {2, 2, 8}, /* cost of storing SSE registers
614 in SImode, DImode and TImode */
615 6, /* MMX or SSE register to integer */
616 32, /* size of l1 cache. */
617 32, /* size of l2 cache. Some models
618 have integrated l2 cache, but
619 optimizing for k6 is not important
620 enough to worry about that. */
621 32, /* size of prefetch block */
622 1, /* number of parallel prefetches */
623 1, /* Branch cost */
624 COSTS_N_INSNS (2), /* cost of FADD and FSUB insns. */
625 COSTS_N_INSNS (2), /* cost of FMUL instruction. */
626 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
627 COSTS_N_INSNS (2), /* cost of FABS instruction. */
628 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
629 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
630 k6_memcpy,
631 k6_memset,
632 1, /* scalar_stmt_cost. */
633 1, /* scalar load_cost. */
634 1, /* scalar_store_cost. */
635 1, /* vec_stmt_cost. */
636 1, /* vec_to_scalar_cost. */
637 1, /* scalar_to_vec_cost. */
638 1, /* vec_align_load_cost. */
639 2, /* vec_unalign_load_cost. */
640 1, /* vec_store_cost. */
641 3, /* cond_taken_branch_cost. */
642 1, /* cond_not_taken_branch_cost. */
645 /* For some reason, Athlon deals better with REP prefix (relative to loops)
646 compared to K8. Alignment becomes important after 8 bytes for memcpy and
647 128 bytes for memset. */
648 static stringop_algs athlon_memcpy[2] = {
649 {libcall, {{2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
650 DUMMY_STRINGOP_ALGS};
651 static stringop_algs athlon_memset[2] = {
652 {libcall, {{2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
653 DUMMY_STRINGOP_ALGS};
654 static const
655 struct processor_costs athlon_cost = {
656 COSTS_N_INSNS (1), /* cost of an add instruction */
657 COSTS_N_INSNS (2), /* cost of a lea instruction */
658 COSTS_N_INSNS (1), /* variable shift costs */
659 COSTS_N_INSNS (1), /* constant shift costs */
660 {COSTS_N_INSNS (5), /* cost of starting multiply for QI */
661 COSTS_N_INSNS (5), /* HI */
662 COSTS_N_INSNS (5), /* SI */
663 COSTS_N_INSNS (5), /* DI */
664 COSTS_N_INSNS (5)}, /* other */
665 0, /* cost of multiply per each bit set */
666 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
667 COSTS_N_INSNS (26), /* HI */
668 COSTS_N_INSNS (42), /* SI */
669 COSTS_N_INSNS (74), /* DI */
670 COSTS_N_INSNS (74)}, /* other */
671 COSTS_N_INSNS (1), /* cost of movsx */
672 COSTS_N_INSNS (1), /* cost of movzx */
673 8, /* "large" insn */
674 9, /* MOVE_RATIO */
675 4, /* cost for loading QImode using movzbl */
676 {3, 4, 3}, /* cost of loading integer registers
677 in QImode, HImode and SImode.
678 Relative to reg-reg move (2). */
679 {3, 4, 3}, /* cost of storing integer registers */
680 4, /* cost of reg,reg fld/fst */
681 {4, 4, 12}, /* cost of loading fp registers
682 in SFmode, DFmode and XFmode */
683 {6, 6, 8}, /* cost of storing fp registers
684 in SFmode, DFmode and XFmode */
685 2, /* cost of moving MMX register */
686 {4, 4}, /* cost of loading MMX registers
687 in SImode and DImode */
688 {4, 4}, /* cost of storing MMX registers
689 in SImode and DImode */
690 2, /* cost of moving SSE register */
691 {4, 4, 6}, /* cost of loading SSE registers
692 in SImode, DImode and TImode */
693 {4, 4, 5}, /* cost of storing SSE registers
694 in SImode, DImode and TImode */
695 5, /* MMX or SSE register to integer */
696 64, /* size of l1 cache. */
697 256, /* size of l2 cache. */
698 64, /* size of prefetch block */
699 6, /* number of parallel prefetches */
700 5, /* Branch cost */
701 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
702 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
703 COSTS_N_INSNS (24), /* cost of FDIV instruction. */
704 COSTS_N_INSNS (2), /* cost of FABS instruction. */
705 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
706 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
707 athlon_memcpy,
708 athlon_memset,
709 1, /* scalar_stmt_cost. */
710 1, /* scalar load_cost. */
711 1, /* scalar_store_cost. */
712 1, /* vec_stmt_cost. */
713 1, /* vec_to_scalar_cost. */
714 1, /* scalar_to_vec_cost. */
715 1, /* vec_align_load_cost. */
716 2, /* vec_unalign_load_cost. */
717 1, /* vec_store_cost. */
718 3, /* cond_taken_branch_cost. */
719 1, /* cond_not_taken_branch_cost. */
722 /* K8 has optimized REP instruction for medium sized blocks, but for very
723 small blocks it is better to use loop. For large blocks, libcall can
724 do nontemporary accesses and beat inline considerably. */
725 static stringop_algs k8_memcpy[2] = {
726 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
727 {-1, rep_prefix_4_byte, false}}},
728 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
729 {-1, libcall, false}}}};
730 static stringop_algs k8_memset[2] = {
731 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
732 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
733 {libcall, {{48, unrolled_loop, false},
734 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
735 static const
736 struct processor_costs k8_cost = {
737 COSTS_N_INSNS (1), /* cost of an add instruction */
738 COSTS_N_INSNS (2), /* cost of a lea instruction */
739 COSTS_N_INSNS (1), /* variable shift costs */
740 COSTS_N_INSNS (1), /* constant shift costs */
741 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
742 COSTS_N_INSNS (4), /* HI */
743 COSTS_N_INSNS (3), /* SI */
744 COSTS_N_INSNS (4), /* DI */
745 COSTS_N_INSNS (5)}, /* other */
746 0, /* cost of multiply per each bit set */
747 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
748 COSTS_N_INSNS (26), /* HI */
749 COSTS_N_INSNS (42), /* SI */
750 COSTS_N_INSNS (74), /* DI */
751 COSTS_N_INSNS (74)}, /* other */
752 COSTS_N_INSNS (1), /* cost of movsx */
753 COSTS_N_INSNS (1), /* cost of movzx */
754 8, /* "large" insn */
755 9, /* MOVE_RATIO */
756 4, /* cost for loading QImode using movzbl */
757 {3, 4, 3}, /* cost of loading integer registers
758 in QImode, HImode and SImode.
759 Relative to reg-reg move (2). */
760 {3, 4, 3}, /* cost of storing integer registers */
761 4, /* cost of reg,reg fld/fst */
762 {4, 4, 12}, /* cost of loading fp registers
763 in SFmode, DFmode and XFmode */
764 {6, 6, 8}, /* cost of storing fp registers
765 in SFmode, DFmode and XFmode */
766 2, /* cost of moving MMX register */
767 {3, 3}, /* cost of loading MMX registers
768 in SImode and DImode */
769 {4, 4}, /* cost of storing MMX registers
770 in SImode and DImode */
771 2, /* cost of moving SSE register */
772 {4, 3, 6}, /* cost of loading SSE registers
773 in SImode, DImode and TImode */
774 {4, 4, 5}, /* cost of storing SSE registers
775 in SImode, DImode and TImode */
776 5, /* MMX or SSE register to integer */
777 64, /* size of l1 cache. */
778 512, /* size of l2 cache. */
779 64, /* size of prefetch block */
780 /* New AMD processors never drop prefetches; if they cannot be performed
781 immediately, they are queued. We set number of simultaneous prefetches
782 to a large constant to reflect this (it probably is not a good idea not
783 to limit number of prefetches at all, as their execution also takes some
784 time). */
785 100, /* number of parallel prefetches */
786 3, /* Branch cost */
787 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
788 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
789 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
790 COSTS_N_INSNS (2), /* cost of FABS instruction. */
791 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
792 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
794 k8_memcpy,
795 k8_memset,
796 4, /* scalar_stmt_cost. */
797 2, /* scalar load_cost. */
798 2, /* scalar_store_cost. */
799 5, /* vec_stmt_cost. */
800 0, /* vec_to_scalar_cost. */
801 2, /* scalar_to_vec_cost. */
802 2, /* vec_align_load_cost. */
803 3, /* vec_unalign_load_cost. */
804 3, /* vec_store_cost. */
805 3, /* cond_taken_branch_cost. */
806 2, /* cond_not_taken_branch_cost. */
809 /* AMDFAM10 has optimized REP instruction for medium sized blocks, but for
810 very small blocks it is better to use loop. For large blocks, libcall can
811 do nontemporary accesses and beat inline considerably. */
812 static stringop_algs amdfam10_memcpy[2] = {
813 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
814 {-1, rep_prefix_4_byte, false}}},
815 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
816 {-1, libcall, false}}}};
817 static stringop_algs amdfam10_memset[2] = {
818 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
819 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
820 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
821 {-1, libcall, false}}}};
822 struct processor_costs amdfam10_cost = {
823 COSTS_N_INSNS (1), /* cost of an add instruction */
824 COSTS_N_INSNS (2), /* cost of a lea instruction */
825 COSTS_N_INSNS (1), /* variable shift costs */
826 COSTS_N_INSNS (1), /* constant shift costs */
827 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
828 COSTS_N_INSNS (4), /* HI */
829 COSTS_N_INSNS (3), /* SI */
830 COSTS_N_INSNS (4), /* DI */
831 COSTS_N_INSNS (5)}, /* other */
832 0, /* cost of multiply per each bit set */
833 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
834 COSTS_N_INSNS (35), /* HI */
835 COSTS_N_INSNS (51), /* SI */
836 COSTS_N_INSNS (83), /* DI */
837 COSTS_N_INSNS (83)}, /* other */
838 COSTS_N_INSNS (1), /* cost of movsx */
839 COSTS_N_INSNS (1), /* cost of movzx */
840 8, /* "large" insn */
841 9, /* MOVE_RATIO */
842 4, /* cost for loading QImode using movzbl */
843 {3, 4, 3}, /* cost of loading integer registers
844 in QImode, HImode and SImode.
845 Relative to reg-reg move (2). */
846 {3, 4, 3}, /* cost of storing integer registers */
847 4, /* cost of reg,reg fld/fst */
848 {4, 4, 12}, /* cost of loading fp registers
849 in SFmode, DFmode and XFmode */
850 {6, 6, 8}, /* cost of storing fp registers
851 in SFmode, DFmode and XFmode */
852 2, /* cost of moving MMX register */
853 {3, 3}, /* cost of loading MMX registers
854 in SImode and DImode */
855 {4, 4}, /* cost of storing MMX registers
856 in SImode and DImode */
857 2, /* cost of moving SSE register */
858 {4, 4, 3}, /* cost of loading SSE registers
859 in SImode, DImode and TImode */
860 {4, 4, 5}, /* cost of storing SSE registers
861 in SImode, DImode and TImode */
862 3, /* MMX or SSE register to integer */
863 /* On K8:
864 MOVD reg64, xmmreg Double FSTORE 4
865 MOVD reg32, xmmreg Double FSTORE 4
866 On AMDFAM10:
867 MOVD reg64, xmmreg Double FADD 3
868 1/1 1/1
869 MOVD reg32, xmmreg Double FADD 3
870 1/1 1/1 */
871 64, /* size of l1 cache. */
872 512, /* size of l2 cache. */
873 64, /* size of prefetch block */
874 /* New AMD processors never drop prefetches; if they cannot be performed
875 immediately, they are queued. We set number of simultaneous prefetches
876 to a large constant to reflect this (it probably is not a good idea not
877 to limit number of prefetches at all, as their execution also takes some
878 time). */
879 100, /* number of parallel prefetches */
880 2, /* Branch cost */
881 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
882 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
883 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
884 COSTS_N_INSNS (2), /* cost of FABS instruction. */
885 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
886 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
888 amdfam10_memcpy,
889 amdfam10_memset,
890 4, /* scalar_stmt_cost. */
891 2, /* scalar load_cost. */
892 2, /* scalar_store_cost. */
893 6, /* vec_stmt_cost. */
894 0, /* vec_to_scalar_cost. */
895 2, /* scalar_to_vec_cost. */
896 2, /* vec_align_load_cost. */
897 2, /* vec_unalign_load_cost. */
898 2, /* vec_store_cost. */
899 2, /* cond_taken_branch_cost. */
900 1, /* cond_not_taken_branch_cost. */
903 /* BDVER1 has optimized REP instruction for medium sized blocks, but for
904 very small blocks it is better to use loop. For large blocks, libcall
905 can do nontemporary accesses and beat inline considerably. */
906 static stringop_algs bdver1_memcpy[2] = {
907 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
908 {-1, rep_prefix_4_byte, false}}},
909 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
910 {-1, libcall, false}}}};
911 static stringop_algs bdver1_memset[2] = {
912 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
913 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
914 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
915 {-1, libcall, false}}}};
917 const struct processor_costs bdver1_cost = {
918 COSTS_N_INSNS (1), /* cost of an add instruction */
919 COSTS_N_INSNS (1), /* cost of a lea instruction */
920 COSTS_N_INSNS (1), /* variable shift costs */
921 COSTS_N_INSNS (1), /* constant shift costs */
922 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
923 COSTS_N_INSNS (4), /* HI */
924 COSTS_N_INSNS (4), /* SI */
925 COSTS_N_INSNS (6), /* DI */
926 COSTS_N_INSNS (6)}, /* other */
927 0, /* cost of multiply per each bit set */
928 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
929 COSTS_N_INSNS (35), /* HI */
930 COSTS_N_INSNS (51), /* SI */
931 COSTS_N_INSNS (83), /* DI */
932 COSTS_N_INSNS (83)}, /* other */
933 COSTS_N_INSNS (1), /* cost of movsx */
934 COSTS_N_INSNS (1), /* cost of movzx */
935 8, /* "large" insn */
936 9, /* MOVE_RATIO */
937 4, /* cost for loading QImode using movzbl */
938 {5, 5, 4}, /* cost of loading integer registers
939 in QImode, HImode and SImode.
940 Relative to reg-reg move (2). */
941 {4, 4, 4}, /* cost of storing integer registers */
942 2, /* cost of reg,reg fld/fst */
943 {5, 5, 12}, /* cost of loading fp registers
944 in SFmode, DFmode and XFmode */
945 {4, 4, 8}, /* cost of storing fp registers
946 in SFmode, DFmode and XFmode */
947 2, /* cost of moving MMX register */
948 {4, 4}, /* cost of loading MMX registers
949 in SImode and DImode */
950 {4, 4}, /* cost of storing MMX registers
951 in SImode and DImode */
952 2, /* cost of moving SSE register */
953 {4, 4, 4}, /* cost of loading SSE registers
954 in SImode, DImode and TImode */
955 {4, 4, 4}, /* cost of storing SSE registers
956 in SImode, DImode and TImode */
957 2, /* MMX or SSE register to integer */
958 /* On K8:
959 MOVD reg64, xmmreg Double FSTORE 4
960 MOVD reg32, xmmreg Double FSTORE 4
961 On AMDFAM10:
962 MOVD reg64, xmmreg Double FADD 3
963 1/1 1/1
964 MOVD reg32, xmmreg Double FADD 3
965 1/1 1/1 */
966 16, /* size of l1 cache. */
967 2048, /* size of l2 cache. */
968 64, /* size of prefetch block */
969 /* New AMD processors never drop prefetches; if they cannot be performed
970 immediately, they are queued. We set number of simultaneous prefetches
971 to a large constant to reflect this (it probably is not a good idea not
972 to limit number of prefetches at all, as their execution also takes some
973 time). */
974 100, /* number of parallel prefetches */
975 2, /* Branch cost */
976 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
977 COSTS_N_INSNS (6), /* cost of FMUL instruction. */
978 COSTS_N_INSNS (42), /* cost of FDIV instruction. */
979 COSTS_N_INSNS (2), /* cost of FABS instruction. */
980 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
981 COSTS_N_INSNS (52), /* cost of FSQRT instruction. */
983 bdver1_memcpy,
984 bdver1_memset,
985 6, /* scalar_stmt_cost. */
986 4, /* scalar load_cost. */
987 4, /* scalar_store_cost. */
988 6, /* vec_stmt_cost. */
989 0, /* vec_to_scalar_cost. */
990 2, /* scalar_to_vec_cost. */
991 4, /* vec_align_load_cost. */
992 4, /* vec_unalign_load_cost. */
993 4, /* vec_store_cost. */
994 2, /* cond_taken_branch_cost. */
995 1, /* cond_not_taken_branch_cost. */
998 /* BDVER2 has optimized REP instruction for medium sized blocks, but for
999 very small blocks it is better to use loop. For large blocks, libcall
1000 can do nontemporary accesses and beat inline considerably. */
1002 static stringop_algs bdver2_memcpy[2] = {
1003 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
1004 {-1, rep_prefix_4_byte, false}}},
1005 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
1006 {-1, libcall, false}}}};
1007 static stringop_algs bdver2_memset[2] = {
1008 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
1009 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1010 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
1011 {-1, libcall, false}}}};
1013 const struct processor_costs bdver2_cost = {
1014 COSTS_N_INSNS (1), /* cost of an add instruction */
1015 COSTS_N_INSNS (1), /* cost of a lea instruction */
1016 COSTS_N_INSNS (1), /* variable shift costs */
1017 COSTS_N_INSNS (1), /* constant shift costs */
1018 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
1019 COSTS_N_INSNS (4), /* HI */
1020 COSTS_N_INSNS (4), /* SI */
1021 COSTS_N_INSNS (6), /* DI */
1022 COSTS_N_INSNS (6)}, /* other */
1023 0, /* cost of multiply per each bit set */
1024 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1025 COSTS_N_INSNS (35), /* HI */
1026 COSTS_N_INSNS (51), /* SI */
1027 COSTS_N_INSNS (83), /* DI */
1028 COSTS_N_INSNS (83)}, /* other */
1029 COSTS_N_INSNS (1), /* cost of movsx */
1030 COSTS_N_INSNS (1), /* cost of movzx */
1031 8, /* "large" insn */
1032 9, /* MOVE_RATIO */
1033 4, /* cost for loading QImode using movzbl */
1034 {5, 5, 4}, /* cost of loading integer registers
1035 in QImode, HImode and SImode.
1036 Relative to reg-reg move (2). */
1037 {4, 4, 4}, /* cost of storing integer registers */
1038 2, /* cost of reg,reg fld/fst */
1039 {5, 5, 12}, /* cost of loading fp registers
1040 in SFmode, DFmode and XFmode */
1041 {4, 4, 8}, /* cost of storing fp registers
1042 in SFmode, DFmode and XFmode */
1043 2, /* cost of moving MMX register */
1044 {4, 4}, /* cost of loading MMX registers
1045 in SImode and DImode */
1046 {4, 4}, /* cost of storing MMX registers
1047 in SImode and DImode */
1048 2, /* cost of moving SSE register */
1049 {4, 4, 4}, /* cost of loading SSE registers
1050 in SImode, DImode and TImode */
1051 {4, 4, 4}, /* cost of storing SSE registers
1052 in SImode, DImode and TImode */
1053 2, /* MMX or SSE register to integer */
1054 /* On K8:
1055 MOVD reg64, xmmreg Double FSTORE 4
1056 MOVD reg32, xmmreg Double FSTORE 4
1057 On AMDFAM10:
1058 MOVD reg64, xmmreg Double FADD 3
1059 1/1 1/1
1060 MOVD reg32, xmmreg Double FADD 3
1061 1/1 1/1 */
1062 16, /* size of l1 cache. */
1063 2048, /* size of l2 cache. */
1064 64, /* size of prefetch block */
1065 /* New AMD processors never drop prefetches; if they cannot be performed
1066 immediately, they are queued. We set number of simultaneous prefetches
1067 to a large constant to reflect this (it probably is not a good idea not
1068 to limit number of prefetches at all, as their execution also takes some
1069 time). */
1070 100, /* number of parallel prefetches */
1071 2, /* Branch cost */
1072 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1073 COSTS_N_INSNS (6), /* cost of FMUL instruction. */
1074 COSTS_N_INSNS (42), /* cost of FDIV instruction. */
1075 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1076 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1077 COSTS_N_INSNS (52), /* cost of FSQRT instruction. */
1079 bdver2_memcpy,
1080 bdver2_memset,
1081 6, /* scalar_stmt_cost. */
1082 4, /* scalar load_cost. */
1083 4, /* scalar_store_cost. */
1084 6, /* vec_stmt_cost. */
1085 0, /* vec_to_scalar_cost. */
1086 2, /* scalar_to_vec_cost. */
1087 4, /* vec_align_load_cost. */
1088 4, /* vec_unalign_load_cost. */
1089 4, /* vec_store_cost. */
1090 2, /* cond_taken_branch_cost. */
1091 1, /* cond_not_taken_branch_cost. */
1095 /* BDVER3 has optimized REP instruction for medium sized blocks, but for
1096 very small blocks it is better to use loop. For large blocks, libcall
1097 can do nontemporary accesses and beat inline considerably. */
1098 static stringop_algs bdver3_memcpy[2] = {
1099 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
1100 {-1, rep_prefix_4_byte, false}}},
1101 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
1102 {-1, libcall, false}}}};
1103 static stringop_algs bdver3_memset[2] = {
1104 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
1105 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1106 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
1107 {-1, libcall, false}}}};
1108 struct processor_costs bdver3_cost = {
1109 COSTS_N_INSNS (1), /* cost of an add instruction */
1110 COSTS_N_INSNS (1), /* cost of a lea instruction */
1111 COSTS_N_INSNS (1), /* variable shift costs */
1112 COSTS_N_INSNS (1), /* constant shift costs */
1113 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
1114 COSTS_N_INSNS (4), /* HI */
1115 COSTS_N_INSNS (4), /* SI */
1116 COSTS_N_INSNS (6), /* DI */
1117 COSTS_N_INSNS (6)}, /* other */
1118 0, /* cost of multiply per each bit set */
1119 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1120 COSTS_N_INSNS (35), /* HI */
1121 COSTS_N_INSNS (51), /* SI */
1122 COSTS_N_INSNS (83), /* DI */
1123 COSTS_N_INSNS (83)}, /* other */
1124 COSTS_N_INSNS (1), /* cost of movsx */
1125 COSTS_N_INSNS (1), /* cost of movzx */
1126 8, /* "large" insn */
1127 9, /* MOVE_RATIO */
1128 4, /* cost for loading QImode using movzbl */
1129 {5, 5, 4}, /* cost of loading integer registers
1130 in QImode, HImode and SImode.
1131 Relative to reg-reg move (2). */
1132 {4, 4, 4}, /* cost of storing integer registers */
1133 2, /* cost of reg,reg fld/fst */
1134 {5, 5, 12}, /* cost of loading fp registers
1135 in SFmode, DFmode and XFmode */
1136 {4, 4, 8}, /* cost of storing fp registers
1137 in SFmode, DFmode and XFmode */
1138 2, /* cost of moving MMX register */
1139 {4, 4}, /* cost of loading MMX registers
1140 in SImode and DImode */
1141 {4, 4}, /* cost of storing MMX registers
1142 in SImode and DImode */
1143 2, /* cost of moving SSE register */
1144 {4, 4, 4}, /* cost of loading SSE registers
1145 in SImode, DImode and TImode */
1146 {4, 4, 4}, /* cost of storing SSE registers
1147 in SImode, DImode and TImode */
1148 2, /* MMX or SSE register to integer */
1149 16, /* size of l1 cache. */
1150 2048, /* size of l2 cache. */
1151 64, /* size of prefetch block */
1152 /* New AMD processors never drop prefetches; if they cannot be performed
1153 immediately, they are queued. We set number of simultaneous prefetches
1154 to a large constant to reflect this (it probably is not a good idea not
1155 to limit number of prefetches at all, as their execution also takes some
1156 time). */
1157 100, /* number of parallel prefetches */
1158 2, /* Branch cost */
1159 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1160 COSTS_N_INSNS (6), /* cost of FMUL instruction. */
1161 COSTS_N_INSNS (42), /* cost of FDIV instruction. */
1162 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1163 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1164 COSTS_N_INSNS (52), /* cost of FSQRT instruction. */
1166 bdver3_memcpy,
1167 bdver3_memset,
1168 6, /* scalar_stmt_cost. */
1169 4, /* scalar load_cost. */
1170 4, /* scalar_store_cost. */
1171 6, /* vec_stmt_cost. */
1172 0, /* vec_to_scalar_cost. */
1173 2, /* scalar_to_vec_cost. */
1174 4, /* vec_align_load_cost. */
1175 4, /* vec_unalign_load_cost. */
1176 4, /* vec_store_cost. */
1177 2, /* cond_taken_branch_cost. */
1178 1, /* cond_not_taken_branch_cost. */
1181 /* BDVER4 has optimized REP instruction for medium sized blocks, but for
1182 very small blocks it is better to use loop. For large blocks, libcall
1183 can do nontemporary accesses and beat inline considerably. */
1184 static stringop_algs bdver4_memcpy[2] = {
1185 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
1186 {-1, rep_prefix_4_byte, false}}},
1187 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
1188 {-1, libcall, false}}}};
1189 static stringop_algs bdver4_memset[2] = {
1190 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
1191 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1192 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
1193 {-1, libcall, false}}}};
1194 struct processor_costs bdver4_cost = {
1195 COSTS_N_INSNS (1), /* cost of an add instruction */
1196 COSTS_N_INSNS (1), /* cost of a lea instruction */
1197 COSTS_N_INSNS (1), /* variable shift costs */
1198 COSTS_N_INSNS (1), /* constant shift costs */
1199 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
1200 COSTS_N_INSNS (4), /* HI */
1201 COSTS_N_INSNS (4), /* SI */
1202 COSTS_N_INSNS (6), /* DI */
1203 COSTS_N_INSNS (6)}, /* other */
1204 0, /* cost of multiply per each bit set */
1205 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1206 COSTS_N_INSNS (35), /* HI */
1207 COSTS_N_INSNS (51), /* SI */
1208 COSTS_N_INSNS (83), /* DI */
1209 COSTS_N_INSNS (83)}, /* other */
1210 COSTS_N_INSNS (1), /* cost of movsx */
1211 COSTS_N_INSNS (1), /* cost of movzx */
1212 8, /* "large" insn */
1213 9, /* MOVE_RATIO */
1214 4, /* cost for loading QImode using movzbl */
1215 {5, 5, 4}, /* cost of loading integer registers
1216 in QImode, HImode and SImode.
1217 Relative to reg-reg move (2). */
1218 {4, 4, 4}, /* cost of storing integer registers */
1219 2, /* cost of reg,reg fld/fst */
1220 {5, 5, 12}, /* cost of loading fp registers
1221 in SFmode, DFmode and XFmode */
1222 {4, 4, 8}, /* cost of storing fp registers
1223 in SFmode, DFmode and XFmode */
1224 2, /* cost of moving MMX register */
1225 {4, 4}, /* cost of loading MMX registers
1226 in SImode and DImode */
1227 {4, 4}, /* cost of storing MMX registers
1228 in SImode and DImode */
1229 2, /* cost of moving SSE register */
1230 {4, 4, 4}, /* cost of loading SSE registers
1231 in SImode, DImode and TImode */
1232 {4, 4, 4}, /* cost of storing SSE registers
1233 in SImode, DImode and TImode */
1234 2, /* MMX or SSE register to integer */
1235 16, /* size of l1 cache. */
1236 2048, /* size of l2 cache. */
1237 64, /* size of prefetch block */
1238 /* New AMD processors never drop prefetches; if they cannot be performed
1239 immediately, they are queued. We set number of simultaneous prefetches
1240 to a large constant to reflect this (it probably is not a good idea not
1241 to limit number of prefetches at all, as their execution also takes some
1242 time). */
1243 100, /* number of parallel prefetches */
1244 2, /* Branch cost */
1245 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1246 COSTS_N_INSNS (6), /* cost of FMUL instruction. */
1247 COSTS_N_INSNS (42), /* cost of FDIV instruction. */
1248 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1249 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1250 COSTS_N_INSNS (52), /* cost of FSQRT instruction. */
1252 bdver4_memcpy,
1253 bdver4_memset,
1254 6, /* scalar_stmt_cost. */
1255 4, /* scalar load_cost. */
1256 4, /* scalar_store_cost. */
1257 6, /* vec_stmt_cost. */
1258 0, /* vec_to_scalar_cost. */
1259 2, /* scalar_to_vec_cost. */
1260 4, /* vec_align_load_cost. */
1261 4, /* vec_unalign_load_cost. */
1262 4, /* vec_store_cost. */
1263 2, /* cond_taken_branch_cost. */
1264 1, /* cond_not_taken_branch_cost. */
1267 /* BTVER1 has optimized REP instruction for medium sized blocks, but for
1268 very small blocks it is better to use loop. For large blocks, libcall can
1269 do nontemporary accesses and beat inline considerably. */
1270 static stringop_algs btver1_memcpy[2] = {
1271 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
1272 {-1, rep_prefix_4_byte, false}}},
1273 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
1274 {-1, libcall, false}}}};
1275 static stringop_algs btver1_memset[2] = {
1276 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
1277 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1278 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
1279 {-1, libcall, false}}}};
1280 const struct processor_costs btver1_cost = {
1281 COSTS_N_INSNS (1), /* cost of an add instruction */
1282 COSTS_N_INSNS (2), /* cost of a lea instruction */
1283 COSTS_N_INSNS (1), /* variable shift costs */
1284 COSTS_N_INSNS (1), /* constant shift costs */
1285 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1286 COSTS_N_INSNS (4), /* HI */
1287 COSTS_N_INSNS (3), /* SI */
1288 COSTS_N_INSNS (4), /* DI */
1289 COSTS_N_INSNS (5)}, /* other */
1290 0, /* cost of multiply per each bit set */
1291 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1292 COSTS_N_INSNS (35), /* HI */
1293 COSTS_N_INSNS (51), /* SI */
1294 COSTS_N_INSNS (83), /* DI */
1295 COSTS_N_INSNS (83)}, /* other */
1296 COSTS_N_INSNS (1), /* cost of movsx */
1297 COSTS_N_INSNS (1), /* cost of movzx */
1298 8, /* "large" insn */
1299 9, /* MOVE_RATIO */
1300 4, /* cost for loading QImode using movzbl */
1301 {3, 4, 3}, /* cost of loading integer registers
1302 in QImode, HImode and SImode.
1303 Relative to reg-reg move (2). */
1304 {3, 4, 3}, /* cost of storing integer registers */
1305 4, /* cost of reg,reg fld/fst */
1306 {4, 4, 12}, /* cost of loading fp registers
1307 in SFmode, DFmode and XFmode */
1308 {6, 6, 8}, /* cost of storing fp registers
1309 in SFmode, DFmode and XFmode */
1310 2, /* cost of moving MMX register */
1311 {3, 3}, /* cost of loading MMX registers
1312 in SImode and DImode */
1313 {4, 4}, /* cost of storing MMX registers
1314 in SImode and DImode */
1315 2, /* cost of moving SSE register */
1316 {4, 4, 3}, /* cost of loading SSE registers
1317 in SImode, DImode and TImode */
1318 {4, 4, 5}, /* cost of storing SSE registers
1319 in SImode, DImode and TImode */
1320 3, /* MMX or SSE register to integer */
1321 /* On K8:
1322 MOVD reg64, xmmreg Double FSTORE 4
1323 MOVD reg32, xmmreg Double FSTORE 4
1324 On AMDFAM10:
1325 MOVD reg64, xmmreg Double FADD 3
1326 1/1 1/1
1327 MOVD reg32, xmmreg Double FADD 3
1328 1/1 1/1 */
1329 32, /* size of l1 cache. */
1330 512, /* size of l2 cache. */
1331 64, /* size of prefetch block */
1332 100, /* number of parallel prefetches */
1333 2, /* Branch cost */
1334 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
1335 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
1336 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
1337 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1338 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1339 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
1341 btver1_memcpy,
1342 btver1_memset,
1343 4, /* scalar_stmt_cost. */
1344 2, /* scalar load_cost. */
1345 2, /* scalar_store_cost. */
1346 6, /* vec_stmt_cost. */
1347 0, /* vec_to_scalar_cost. */
1348 2, /* scalar_to_vec_cost. */
1349 2, /* vec_align_load_cost. */
1350 2, /* vec_unalign_load_cost. */
1351 2, /* vec_store_cost. */
1352 2, /* cond_taken_branch_cost. */
1353 1, /* cond_not_taken_branch_cost. */
1356 static stringop_algs btver2_memcpy[2] = {
1357 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
1358 {-1, rep_prefix_4_byte, false}}},
1359 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
1360 {-1, libcall, false}}}};
1361 static stringop_algs btver2_memset[2] = {
1362 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
1363 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1364 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
1365 {-1, libcall, false}}}};
1366 const struct processor_costs btver2_cost = {
1367 COSTS_N_INSNS (1), /* cost of an add instruction */
1368 COSTS_N_INSNS (2), /* cost of a lea instruction */
1369 COSTS_N_INSNS (1), /* variable shift costs */
1370 COSTS_N_INSNS (1), /* constant shift costs */
1371 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1372 COSTS_N_INSNS (4), /* HI */
1373 COSTS_N_INSNS (3), /* SI */
1374 COSTS_N_INSNS (4), /* DI */
1375 COSTS_N_INSNS (5)}, /* other */
1376 0, /* cost of multiply per each bit set */
1377 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1378 COSTS_N_INSNS (35), /* HI */
1379 COSTS_N_INSNS (51), /* SI */
1380 COSTS_N_INSNS (83), /* DI */
1381 COSTS_N_INSNS (83)}, /* other */
1382 COSTS_N_INSNS (1), /* cost of movsx */
1383 COSTS_N_INSNS (1), /* cost of movzx */
1384 8, /* "large" insn */
1385 9, /* MOVE_RATIO */
1386 4, /* cost for loading QImode using movzbl */
1387 {3, 4, 3}, /* cost of loading integer registers
1388 in QImode, HImode and SImode.
1389 Relative to reg-reg move (2). */
1390 {3, 4, 3}, /* cost of storing integer registers */
1391 4, /* cost of reg,reg fld/fst */
1392 {4, 4, 12}, /* cost of loading fp registers
1393 in SFmode, DFmode and XFmode */
1394 {6, 6, 8}, /* cost of storing fp registers
1395 in SFmode, DFmode and XFmode */
1396 2, /* cost of moving MMX register */
1397 {3, 3}, /* cost of loading MMX registers
1398 in SImode and DImode */
1399 {4, 4}, /* cost of storing MMX registers
1400 in SImode and DImode */
1401 2, /* cost of moving SSE register */
1402 {4, 4, 3}, /* cost of loading SSE registers
1403 in SImode, DImode and TImode */
1404 {4, 4, 5}, /* cost of storing SSE registers
1405 in SImode, DImode and TImode */
1406 3, /* MMX or SSE register to integer */
1407 /* On K8:
1408 MOVD reg64, xmmreg Double FSTORE 4
1409 MOVD reg32, xmmreg Double FSTORE 4
1410 On AMDFAM10:
1411 MOVD reg64, xmmreg Double FADD 3
1412 1/1 1/1
1413 MOVD reg32, xmmreg Double FADD 3
1414 1/1 1/1 */
1415 32, /* size of l1 cache. */
1416 2048, /* size of l2 cache. */
1417 64, /* size of prefetch block */
1418 100, /* number of parallel prefetches */
1419 2, /* Branch cost */
1420 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
1421 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
1422 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
1423 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1424 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1425 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
1426 btver2_memcpy,
1427 btver2_memset,
1428 4, /* scalar_stmt_cost. */
1429 2, /* scalar load_cost. */
1430 2, /* scalar_store_cost. */
1431 6, /* vec_stmt_cost. */
1432 0, /* vec_to_scalar_cost. */
1433 2, /* scalar_to_vec_cost. */
1434 2, /* vec_align_load_cost. */
1435 2, /* vec_unalign_load_cost. */
1436 2, /* vec_store_cost. */
1437 2, /* cond_taken_branch_cost. */
1438 1, /* cond_not_taken_branch_cost. */
1441 static stringop_algs pentium4_memcpy[2] = {
1442 {libcall, {{12, loop_1_byte, false}, {-1, rep_prefix_4_byte, false}}},
1443 DUMMY_STRINGOP_ALGS};
1444 static stringop_algs pentium4_memset[2] = {
1445 {libcall, {{6, loop_1_byte, false}, {48, loop, false},
1446 {20480, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1447 DUMMY_STRINGOP_ALGS};
1449 static const
1450 struct processor_costs pentium4_cost = {
1451 COSTS_N_INSNS (1), /* cost of an add instruction */
1452 COSTS_N_INSNS (3), /* cost of a lea instruction */
1453 COSTS_N_INSNS (4), /* variable shift costs */
1454 COSTS_N_INSNS (4), /* constant shift costs */
1455 {COSTS_N_INSNS (15), /* cost of starting multiply for QI */
1456 COSTS_N_INSNS (15), /* HI */
1457 COSTS_N_INSNS (15), /* SI */
1458 COSTS_N_INSNS (15), /* DI */
1459 COSTS_N_INSNS (15)}, /* other */
1460 0, /* cost of multiply per each bit set */
1461 {COSTS_N_INSNS (56), /* cost of a divide/mod for QI */
1462 COSTS_N_INSNS (56), /* HI */
1463 COSTS_N_INSNS (56), /* SI */
1464 COSTS_N_INSNS (56), /* DI */
1465 COSTS_N_INSNS (56)}, /* other */
1466 COSTS_N_INSNS (1), /* cost of movsx */
1467 COSTS_N_INSNS (1), /* cost of movzx */
1468 16, /* "large" insn */
1469 6, /* MOVE_RATIO */
1470 2, /* cost for loading QImode using movzbl */
1471 {4, 5, 4}, /* cost of loading integer registers
1472 in QImode, HImode and SImode.
1473 Relative to reg-reg move (2). */
1474 {2, 3, 2}, /* cost of storing integer registers */
1475 2, /* cost of reg,reg fld/fst */
1476 {2, 2, 6}, /* cost of loading fp registers
1477 in SFmode, DFmode and XFmode */
1478 {4, 4, 6}, /* cost of storing fp registers
1479 in SFmode, DFmode and XFmode */
1480 2, /* cost of moving MMX register */
1481 {2, 2}, /* cost of loading MMX registers
1482 in SImode and DImode */
1483 {2, 2}, /* cost of storing MMX registers
1484 in SImode and DImode */
1485 12, /* cost of moving SSE register */
1486 {12, 12, 12}, /* cost of loading SSE registers
1487 in SImode, DImode and TImode */
1488 {2, 2, 8}, /* cost of storing SSE registers
1489 in SImode, DImode and TImode */
1490 10, /* MMX or SSE register to integer */
1491 8, /* size of l1 cache. */
1492 256, /* size of l2 cache. */
1493 64, /* size of prefetch block */
1494 6, /* number of parallel prefetches */
1495 2, /* Branch cost */
1496 COSTS_N_INSNS (5), /* cost of FADD and FSUB insns. */
1497 COSTS_N_INSNS (7), /* cost of FMUL instruction. */
1498 COSTS_N_INSNS (43), /* cost of FDIV instruction. */
1499 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1500 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1501 COSTS_N_INSNS (43), /* cost of FSQRT instruction. */
1502 pentium4_memcpy,
1503 pentium4_memset,
1504 1, /* scalar_stmt_cost. */
1505 1, /* scalar load_cost. */
1506 1, /* scalar_store_cost. */
1507 1, /* vec_stmt_cost. */
1508 1, /* vec_to_scalar_cost. */
1509 1, /* scalar_to_vec_cost. */
1510 1, /* vec_align_load_cost. */
1511 2, /* vec_unalign_load_cost. */
1512 1, /* vec_store_cost. */
1513 3, /* cond_taken_branch_cost. */
1514 1, /* cond_not_taken_branch_cost. */
1517 static stringop_algs nocona_memcpy[2] = {
1518 {libcall, {{12, loop_1_byte, false}, {-1, rep_prefix_4_byte, false}}},
1519 {libcall, {{32, loop, false}, {20000, rep_prefix_8_byte, false},
1520 {100000, unrolled_loop, false}, {-1, libcall, false}}}};
1522 static stringop_algs nocona_memset[2] = {
1523 {libcall, {{6, loop_1_byte, false}, {48, loop, false},
1524 {20480, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1525 {libcall, {{24, loop, false}, {64, unrolled_loop, false},
1526 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
1528 static const
1529 struct processor_costs nocona_cost = {
1530 COSTS_N_INSNS (1), /* cost of an add instruction */
1531 COSTS_N_INSNS (1), /* cost of a lea instruction */
1532 COSTS_N_INSNS (1), /* variable shift costs */
1533 COSTS_N_INSNS (1), /* constant shift costs */
1534 {COSTS_N_INSNS (10), /* cost of starting multiply for QI */
1535 COSTS_N_INSNS (10), /* HI */
1536 COSTS_N_INSNS (10), /* SI */
1537 COSTS_N_INSNS (10), /* DI */
1538 COSTS_N_INSNS (10)}, /* other */
1539 0, /* cost of multiply per each bit set */
1540 {COSTS_N_INSNS (66), /* cost of a divide/mod for QI */
1541 COSTS_N_INSNS (66), /* HI */
1542 COSTS_N_INSNS (66), /* SI */
1543 COSTS_N_INSNS (66), /* DI */
1544 COSTS_N_INSNS (66)}, /* other */
1545 COSTS_N_INSNS (1), /* cost of movsx */
1546 COSTS_N_INSNS (1), /* cost of movzx */
1547 16, /* "large" insn */
1548 17, /* MOVE_RATIO */
1549 4, /* cost for loading QImode using movzbl */
1550 {4, 4, 4}, /* cost of loading integer registers
1551 in QImode, HImode and SImode.
1552 Relative to reg-reg move (2). */
1553 {4, 4, 4}, /* cost of storing integer registers */
1554 3, /* cost of reg,reg fld/fst */
1555 {12, 12, 12}, /* cost of loading fp registers
1556 in SFmode, DFmode and XFmode */
1557 {4, 4, 4}, /* cost of storing fp registers
1558 in SFmode, DFmode and XFmode */
1559 6, /* cost of moving MMX register */
1560 {12, 12}, /* cost of loading MMX registers
1561 in SImode and DImode */
1562 {12, 12}, /* cost of storing MMX registers
1563 in SImode and DImode */
1564 6, /* cost of moving SSE register */
1565 {12, 12, 12}, /* cost of loading SSE registers
1566 in SImode, DImode and TImode */
1567 {12, 12, 12}, /* cost of storing SSE registers
1568 in SImode, DImode and TImode */
1569 8, /* MMX or SSE register to integer */
1570 8, /* size of l1 cache. */
1571 1024, /* size of l2 cache. */
1572 64, /* size of prefetch block */
1573 8, /* number of parallel prefetches */
1574 1, /* Branch cost */
1575 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1576 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1577 COSTS_N_INSNS (40), /* cost of FDIV instruction. */
1578 COSTS_N_INSNS (3), /* cost of FABS instruction. */
1579 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
1580 COSTS_N_INSNS (44), /* cost of FSQRT instruction. */
1581 nocona_memcpy,
1582 nocona_memset,
1583 1, /* scalar_stmt_cost. */
1584 1, /* scalar load_cost. */
1585 1, /* scalar_store_cost. */
1586 1, /* vec_stmt_cost. */
1587 1, /* vec_to_scalar_cost. */
1588 1, /* scalar_to_vec_cost. */
1589 1, /* vec_align_load_cost. */
1590 2, /* vec_unalign_load_cost. */
1591 1, /* vec_store_cost. */
1592 3, /* cond_taken_branch_cost. */
1593 1, /* cond_not_taken_branch_cost. */
1596 static stringop_algs atom_memcpy[2] = {
1597 {libcall, {{11, loop, false}, {-1, rep_prefix_4_byte, false}}},
1598 {libcall, {{32, loop, false}, {64, rep_prefix_4_byte, false},
1599 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
1600 static stringop_algs atom_memset[2] = {
1601 {libcall, {{8, loop, false}, {15, unrolled_loop, false},
1602 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1603 {libcall, {{24, loop, false}, {32, unrolled_loop, false},
1604 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
1605 static const
1606 struct processor_costs atom_cost = {
1607 COSTS_N_INSNS (1), /* cost of an add instruction */
1608 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1609 COSTS_N_INSNS (1), /* variable shift costs */
1610 COSTS_N_INSNS (1), /* constant shift costs */
1611 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1612 COSTS_N_INSNS (4), /* HI */
1613 COSTS_N_INSNS (3), /* SI */
1614 COSTS_N_INSNS (4), /* DI */
1615 COSTS_N_INSNS (2)}, /* other */
1616 0, /* cost of multiply per each bit set */
1617 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1618 COSTS_N_INSNS (26), /* HI */
1619 COSTS_N_INSNS (42), /* SI */
1620 COSTS_N_INSNS (74), /* DI */
1621 COSTS_N_INSNS (74)}, /* other */
1622 COSTS_N_INSNS (1), /* cost of movsx */
1623 COSTS_N_INSNS (1), /* cost of movzx */
1624 8, /* "large" insn */
1625 17, /* MOVE_RATIO */
1626 4, /* cost for loading QImode using movzbl */
1627 {4, 4, 4}, /* cost of loading integer registers
1628 in QImode, HImode and SImode.
1629 Relative to reg-reg move (2). */
1630 {4, 4, 4}, /* cost of storing integer registers */
1631 4, /* cost of reg,reg fld/fst */
1632 {12, 12, 12}, /* cost of loading fp registers
1633 in SFmode, DFmode and XFmode */
1634 {6, 6, 8}, /* cost of storing fp registers
1635 in SFmode, DFmode and XFmode */
1636 2, /* cost of moving MMX register */
1637 {8, 8}, /* cost of loading MMX registers
1638 in SImode and DImode */
1639 {8, 8}, /* cost of storing MMX registers
1640 in SImode and DImode */
1641 2, /* cost of moving SSE register */
1642 {8, 8, 8}, /* cost of loading SSE registers
1643 in SImode, DImode and TImode */
1644 {8, 8, 8}, /* cost of storing SSE registers
1645 in SImode, DImode and TImode */
1646 5, /* MMX or SSE register to integer */
1647 32, /* size of l1 cache. */
1648 256, /* size of l2 cache. */
1649 64, /* size of prefetch block */
1650 6, /* number of parallel prefetches */
1651 3, /* Branch cost */
1652 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1653 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1654 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1655 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1656 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1657 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1658 atom_memcpy,
1659 atom_memset,
1660 1, /* scalar_stmt_cost. */
1661 1, /* scalar load_cost. */
1662 1, /* scalar_store_cost. */
1663 1, /* vec_stmt_cost. */
1664 1, /* vec_to_scalar_cost. */
1665 1, /* scalar_to_vec_cost. */
1666 1, /* vec_align_load_cost. */
1667 2, /* vec_unalign_load_cost. */
1668 1, /* vec_store_cost. */
1669 3, /* cond_taken_branch_cost. */
1670 1, /* cond_not_taken_branch_cost. */
1673 static stringop_algs slm_memcpy[2] = {
1674 {libcall, {{11, loop, false}, {-1, rep_prefix_4_byte, false}}},
1675 {libcall, {{32, loop, false}, {64, rep_prefix_4_byte, false},
1676 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
1677 static stringop_algs slm_memset[2] = {
1678 {libcall, {{8, loop, false}, {15, unrolled_loop, false},
1679 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1680 {libcall, {{24, loop, false}, {32, unrolled_loop, false},
1681 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
1682 static const
1683 struct processor_costs slm_cost = {
1684 COSTS_N_INSNS (1), /* cost of an add instruction */
1685 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1686 COSTS_N_INSNS (1), /* variable shift costs */
1687 COSTS_N_INSNS (1), /* constant shift costs */
1688 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1689 COSTS_N_INSNS (3), /* HI */
1690 COSTS_N_INSNS (3), /* SI */
1691 COSTS_N_INSNS (4), /* DI */
1692 COSTS_N_INSNS (2)}, /* other */
1693 0, /* cost of multiply per each bit set */
1694 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1695 COSTS_N_INSNS (26), /* HI */
1696 COSTS_N_INSNS (42), /* SI */
1697 COSTS_N_INSNS (74), /* DI */
1698 COSTS_N_INSNS (74)}, /* other */
1699 COSTS_N_INSNS (1), /* cost of movsx */
1700 COSTS_N_INSNS (1), /* cost of movzx */
1701 8, /* "large" insn */
1702 17, /* MOVE_RATIO */
1703 4, /* cost for loading QImode using movzbl */
1704 {4, 4, 4}, /* cost of loading integer registers
1705 in QImode, HImode and SImode.
1706 Relative to reg-reg move (2). */
1707 {4, 4, 4}, /* cost of storing integer registers */
1708 4, /* cost of reg,reg fld/fst */
1709 {12, 12, 12}, /* cost of loading fp registers
1710 in SFmode, DFmode and XFmode */
1711 {6, 6, 8}, /* cost of storing fp registers
1712 in SFmode, DFmode and XFmode */
1713 2, /* cost of moving MMX register */
1714 {8, 8}, /* cost of loading MMX registers
1715 in SImode and DImode */
1716 {8, 8}, /* cost of storing MMX registers
1717 in SImode and DImode */
1718 2, /* cost of moving SSE register */
1719 {8, 8, 8}, /* cost of loading SSE registers
1720 in SImode, DImode and TImode */
1721 {8, 8, 8}, /* cost of storing SSE registers
1722 in SImode, DImode and TImode */
1723 5, /* MMX or SSE register to integer */
1724 32, /* size of l1 cache. */
1725 256, /* size of l2 cache. */
1726 64, /* size of prefetch block */
1727 6, /* number of parallel prefetches */
1728 3, /* Branch cost */
1729 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1730 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1731 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1732 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1733 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1734 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1735 slm_memcpy,
1736 slm_memset,
1737 1, /* scalar_stmt_cost. */
1738 1, /* scalar load_cost. */
1739 1, /* scalar_store_cost. */
1740 1, /* vec_stmt_cost. */
1741 1, /* vec_to_scalar_cost. */
1742 1, /* scalar_to_vec_cost. */
1743 1, /* vec_align_load_cost. */
1744 2, /* vec_unalign_load_cost. */
1745 1, /* vec_store_cost. */
1746 3, /* cond_taken_branch_cost. */
1747 1, /* cond_not_taken_branch_cost. */
1750 static stringop_algs intel_memcpy[2] = {
1751 {libcall, {{11, loop, false}, {-1, rep_prefix_4_byte, false}}},
1752 {libcall, {{32, loop, false}, {64, rep_prefix_4_byte, false},
1753 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
1754 static stringop_algs intel_memset[2] = {
1755 {libcall, {{8, loop, false}, {15, unrolled_loop, false},
1756 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1757 {libcall, {{24, loop, false}, {32, unrolled_loop, false},
1758 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
1759 static const
1760 struct processor_costs intel_cost = {
1761 COSTS_N_INSNS (1), /* cost of an add instruction */
1762 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1763 COSTS_N_INSNS (1), /* variable shift costs */
1764 COSTS_N_INSNS (1), /* constant shift costs */
1765 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1766 COSTS_N_INSNS (3), /* HI */
1767 COSTS_N_INSNS (3), /* SI */
1768 COSTS_N_INSNS (4), /* DI */
1769 COSTS_N_INSNS (2)}, /* other */
1770 0, /* cost of multiply per each bit set */
1771 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1772 COSTS_N_INSNS (26), /* HI */
1773 COSTS_N_INSNS (42), /* SI */
1774 COSTS_N_INSNS (74), /* DI */
1775 COSTS_N_INSNS (74)}, /* other */
1776 COSTS_N_INSNS (1), /* cost of movsx */
1777 COSTS_N_INSNS (1), /* cost of movzx */
1778 8, /* "large" insn */
1779 17, /* MOVE_RATIO */
1780 4, /* cost for loading QImode using movzbl */
1781 {4, 4, 4}, /* cost of loading integer registers
1782 in QImode, HImode and SImode.
1783 Relative to reg-reg move (2). */
1784 {4, 4, 4}, /* cost of storing integer registers */
1785 4, /* cost of reg,reg fld/fst */
1786 {12, 12, 12}, /* cost of loading fp registers
1787 in SFmode, DFmode and XFmode */
1788 {6, 6, 8}, /* cost of storing fp registers
1789 in SFmode, DFmode and XFmode */
1790 2, /* cost of moving MMX register */
1791 {8, 8}, /* cost of loading MMX registers
1792 in SImode and DImode */
1793 {8, 8}, /* cost of storing MMX registers
1794 in SImode and DImode */
1795 2, /* cost of moving SSE register */
1796 {8, 8, 8}, /* cost of loading SSE registers
1797 in SImode, DImode and TImode */
1798 {8, 8, 8}, /* cost of storing SSE registers
1799 in SImode, DImode and TImode */
1800 5, /* MMX or SSE register to integer */
1801 32, /* size of l1 cache. */
1802 256, /* size of l2 cache. */
1803 64, /* size of prefetch block */
1804 6, /* number of parallel prefetches */
1805 3, /* Branch cost */
1806 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1807 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1808 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1809 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1810 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1811 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1812 intel_memcpy,
1813 intel_memset,
1814 1, /* scalar_stmt_cost. */
1815 1, /* scalar load_cost. */
1816 1, /* scalar_store_cost. */
1817 1, /* vec_stmt_cost. */
1818 1, /* vec_to_scalar_cost. */
1819 1, /* scalar_to_vec_cost. */
1820 1, /* vec_align_load_cost. */
1821 2, /* vec_unalign_load_cost. */
1822 1, /* vec_store_cost. */
1823 3, /* cond_taken_branch_cost. */
1824 1, /* cond_not_taken_branch_cost. */
1827 /* Generic should produce code tuned for Core-i7 (and newer chips)
1828 and btver1 (and newer chips). */
1830 static stringop_algs generic_memcpy[2] = {
1831 {libcall, {{32, loop, false}, {8192, rep_prefix_4_byte, false},
1832 {-1, libcall, false}}},
1833 {libcall, {{32, loop, false}, {8192, rep_prefix_8_byte, false},
1834 {-1, libcall, false}}}};
1835 static stringop_algs generic_memset[2] = {
1836 {libcall, {{32, loop, false}, {8192, rep_prefix_4_byte, false},
1837 {-1, libcall, false}}},
1838 {libcall, {{32, loop, false}, {8192, rep_prefix_8_byte, false},
1839 {-1, libcall, false}}}};
1840 static const
1841 struct processor_costs generic_cost = {
1842 COSTS_N_INSNS (1), /* cost of an add instruction */
1843 /* On all chips taken into consideration lea is 2 cycles and more. With
1844 this cost however our current implementation of synth_mult results in
1845 use of unnecessary temporary registers causing regression on several
1846 SPECfp benchmarks. */
1847 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1848 COSTS_N_INSNS (1), /* variable shift costs */
1849 COSTS_N_INSNS (1), /* constant shift costs */
1850 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1851 COSTS_N_INSNS (4), /* HI */
1852 COSTS_N_INSNS (3), /* SI */
1853 COSTS_N_INSNS (4), /* DI */
1854 COSTS_N_INSNS (2)}, /* other */
1855 0, /* cost of multiply per each bit set */
1856 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1857 COSTS_N_INSNS (26), /* HI */
1858 COSTS_N_INSNS (42), /* SI */
1859 COSTS_N_INSNS (74), /* DI */
1860 COSTS_N_INSNS (74)}, /* other */
1861 COSTS_N_INSNS (1), /* cost of movsx */
1862 COSTS_N_INSNS (1), /* cost of movzx */
1863 8, /* "large" insn */
1864 17, /* MOVE_RATIO */
1865 4, /* cost for loading QImode using movzbl */
1866 {4, 4, 4}, /* cost of loading integer registers
1867 in QImode, HImode and SImode.
1868 Relative to reg-reg move (2). */
1869 {4, 4, 4}, /* cost of storing integer registers */
1870 4, /* cost of reg,reg fld/fst */
1871 {12, 12, 12}, /* cost of loading fp registers
1872 in SFmode, DFmode and XFmode */
1873 {6, 6, 8}, /* cost of storing fp registers
1874 in SFmode, DFmode and XFmode */
1875 2, /* cost of moving MMX register */
1876 {8, 8}, /* cost of loading MMX registers
1877 in SImode and DImode */
1878 {8, 8}, /* cost of storing MMX registers
1879 in SImode and DImode */
1880 2, /* cost of moving SSE register */
1881 {8, 8, 8}, /* cost of loading SSE registers
1882 in SImode, DImode and TImode */
1883 {8, 8, 8}, /* cost of storing SSE registers
1884 in SImode, DImode and TImode */
1885 5, /* MMX or SSE register to integer */
1886 32, /* size of l1 cache. */
1887 512, /* size of l2 cache. */
1888 64, /* size of prefetch block */
1889 6, /* number of parallel prefetches */
1890 /* Benchmarks shows large regressions on K8 sixtrack benchmark when this
1891 value is increased to perhaps more appropriate value of 5. */
1892 3, /* Branch cost */
1893 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1894 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1895 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1896 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1897 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1898 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1899 generic_memcpy,
1900 generic_memset,
1901 1, /* scalar_stmt_cost. */
1902 1, /* scalar load_cost. */
1903 1, /* scalar_store_cost. */
1904 1, /* vec_stmt_cost. */
1905 1, /* vec_to_scalar_cost. */
1906 1, /* scalar_to_vec_cost. */
1907 1, /* vec_align_load_cost. */
1908 2, /* vec_unalign_load_cost. */
1909 1, /* vec_store_cost. */
1910 3, /* cond_taken_branch_cost. */
1911 1, /* cond_not_taken_branch_cost. */
1914 /* core_cost should produce code tuned for Core familly of CPUs. */
1915 static stringop_algs core_memcpy[2] = {
1916 {libcall, {{1024, rep_prefix_4_byte, true}, {-1, libcall, false}}},
1917 {libcall, {{24, loop, true}, {128, rep_prefix_8_byte, true},
1918 {-1, libcall, false}}}};
1919 static stringop_algs core_memset[2] = {
1920 {libcall, {{6, loop_1_byte, true},
1921 {24, loop, true},
1922 {8192, rep_prefix_4_byte, true},
1923 {-1, libcall, false}}},
1924 {libcall, {{24, loop, true}, {512, rep_prefix_8_byte, true},
1925 {-1, libcall, false}}}};
1927 static const
1928 struct processor_costs core_cost = {
1929 COSTS_N_INSNS (1), /* cost of an add instruction */
1930 /* On all chips taken into consideration lea is 2 cycles and more. With
1931 this cost however our current implementation of synth_mult results in
1932 use of unnecessary temporary registers causing regression on several
1933 SPECfp benchmarks. */
1934 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1935 COSTS_N_INSNS (1), /* variable shift costs */
1936 COSTS_N_INSNS (1), /* constant shift costs */
1937 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1938 COSTS_N_INSNS (4), /* HI */
1939 COSTS_N_INSNS (3), /* SI */
1940 COSTS_N_INSNS (4), /* DI */
1941 COSTS_N_INSNS (2)}, /* other */
1942 0, /* cost of multiply per each bit set */
1943 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1944 COSTS_N_INSNS (26), /* HI */
1945 COSTS_N_INSNS (42), /* SI */
1946 COSTS_N_INSNS (74), /* DI */
1947 COSTS_N_INSNS (74)}, /* other */
1948 COSTS_N_INSNS (1), /* cost of movsx */
1949 COSTS_N_INSNS (1), /* cost of movzx */
1950 8, /* "large" insn */
1951 17, /* MOVE_RATIO */
1952 4, /* cost for loading QImode using movzbl */
1953 {4, 4, 4}, /* cost of loading integer registers
1954 in QImode, HImode and SImode.
1955 Relative to reg-reg move (2). */
1956 {4, 4, 4}, /* cost of storing integer registers */
1957 4, /* cost of reg,reg fld/fst */
1958 {12, 12, 12}, /* cost of loading fp registers
1959 in SFmode, DFmode and XFmode */
1960 {6, 6, 8}, /* cost of storing fp registers
1961 in SFmode, DFmode and XFmode */
1962 2, /* cost of moving MMX register */
1963 {8, 8}, /* cost of loading MMX registers
1964 in SImode and DImode */
1965 {8, 8}, /* cost of storing MMX registers
1966 in SImode and DImode */
1967 2, /* cost of moving SSE register */
1968 {8, 8, 8}, /* cost of loading SSE registers
1969 in SImode, DImode and TImode */
1970 {8, 8, 8}, /* cost of storing SSE registers
1971 in SImode, DImode and TImode */
1972 5, /* MMX or SSE register to integer */
1973 64, /* size of l1 cache. */
1974 512, /* size of l2 cache. */
1975 64, /* size of prefetch block */
1976 6, /* number of parallel prefetches */
1977 /* FIXME perhaps more appropriate value is 5. */
1978 3, /* Branch cost */
1979 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1980 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1981 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1982 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1983 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1984 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1985 core_memcpy,
1986 core_memset,
1987 1, /* scalar_stmt_cost. */
1988 1, /* scalar load_cost. */
1989 1, /* scalar_store_cost. */
1990 1, /* vec_stmt_cost. */
1991 1, /* vec_to_scalar_cost. */
1992 1, /* scalar_to_vec_cost. */
1993 1, /* vec_align_load_cost. */
1994 2, /* vec_unalign_load_cost. */
1995 1, /* vec_store_cost. */
1996 3, /* cond_taken_branch_cost. */
1997 1, /* cond_not_taken_branch_cost. */
2001 /* Set by -mtune. */
2002 const struct processor_costs *ix86_tune_cost = &pentium_cost;
2004 /* Set by -mtune or -Os. */
2005 const struct processor_costs *ix86_cost = &pentium_cost;
2007 /* Processor feature/optimization bitmasks. */
2008 #define m_386 (1<<PROCESSOR_I386)
2009 #define m_486 (1<<PROCESSOR_I486)
2010 #define m_PENT (1<<PROCESSOR_PENTIUM)
2011 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
2012 #define m_PENT4 (1<<PROCESSOR_PENTIUM4)
2013 #define m_NOCONA (1<<PROCESSOR_NOCONA)
2014 #define m_P4_NOCONA (m_PENT4 | m_NOCONA)
2015 #define m_CORE2 (1<<PROCESSOR_CORE2)
2016 #define m_NEHALEM (1<<PROCESSOR_NEHALEM)
2017 #define m_SANDYBRIDGE (1<<PROCESSOR_SANDYBRIDGE)
2018 #define m_HASWELL (1<<PROCESSOR_HASWELL)
2019 #define m_CORE_ALL (m_CORE2 | m_NEHALEM | m_SANDYBRIDGE | m_HASWELL)
2020 #define m_BONNELL (1<<PROCESSOR_BONNELL)
2021 #define m_SILVERMONT (1<<PROCESSOR_SILVERMONT)
2022 #define m_INTEL (1<<PROCESSOR_INTEL)
2024 #define m_GEODE (1<<PROCESSOR_GEODE)
2025 #define m_K6 (1<<PROCESSOR_K6)
2026 #define m_K6_GEODE (m_K6 | m_GEODE)
2027 #define m_K8 (1<<PROCESSOR_K8)
2028 #define m_ATHLON (1<<PROCESSOR_ATHLON)
2029 #define m_ATHLON_K8 (m_K8 | m_ATHLON)
2030 #define m_AMDFAM10 (1<<PROCESSOR_AMDFAM10)
2031 #define m_BDVER1 (1<<PROCESSOR_BDVER1)
2032 #define m_BDVER2 (1<<PROCESSOR_BDVER2)
2033 #define m_BDVER3 (1<<PROCESSOR_BDVER3)
2034 #define m_BDVER4 (1<<PROCESSOR_BDVER4)
2035 #define m_BTVER1 (1<<PROCESSOR_BTVER1)
2036 #define m_BTVER2 (1<<PROCESSOR_BTVER2)
2037 #define m_BDVER (m_BDVER1 | m_BDVER2 | m_BDVER3 | m_BDVER4)
2038 #define m_BTVER (m_BTVER1 | m_BTVER2)
2039 #define m_AMD_MULTIPLE (m_ATHLON_K8 | m_AMDFAM10 | m_BDVER | m_BTVER)
2041 #define m_GENERIC (1<<PROCESSOR_GENERIC)
2043 const char* ix86_tune_feature_names[X86_TUNE_LAST] = {
2044 #undef DEF_TUNE
2045 #define DEF_TUNE(tune, name, selector) name,
2046 #include "x86-tune.def"
2047 #undef DEF_TUNE
2050 /* Feature tests against the various tunings. */
2051 unsigned char ix86_tune_features[X86_TUNE_LAST];
2053 /* Feature tests against the various tunings used to create ix86_tune_features
2054 based on the processor mask. */
2055 static unsigned int initial_ix86_tune_features[X86_TUNE_LAST] = {
2056 #undef DEF_TUNE
2057 #define DEF_TUNE(tune, name, selector) selector,
2058 #include "x86-tune.def"
2059 #undef DEF_TUNE
2062 /* Feature tests against the various architecture variations. */
2063 unsigned char ix86_arch_features[X86_ARCH_LAST];
2065 /* Feature tests against the various architecture variations, used to create
2066 ix86_arch_features based on the processor mask. */
2067 static unsigned int initial_ix86_arch_features[X86_ARCH_LAST] = {
2068 /* X86_ARCH_CMOV: Conditional move was added for pentiumpro. */
2069 ~(m_386 | m_486 | m_PENT | m_K6),
2071 /* X86_ARCH_CMPXCHG: Compare and exchange was added for 80486. */
2072 ~m_386,
2074 /* X86_ARCH_CMPXCHG8B: Compare and exchange 8 bytes was added for pentium. */
2075 ~(m_386 | m_486),
2077 /* X86_ARCH_XADD: Exchange and add was added for 80486. */
2078 ~m_386,
2080 /* X86_ARCH_BSWAP: Byteswap was added for 80486. */
2081 ~m_386,
2084 /* In case the average insn count for single function invocation is
2085 lower than this constant, emit fast (but longer) prologue and
2086 epilogue code. */
2087 #define FAST_PROLOGUE_INSN_COUNT 20
2089 /* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
2090 static const char *const qi_reg_name[] = QI_REGISTER_NAMES;
2091 static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
2092 static const char *const hi_reg_name[] = HI_REGISTER_NAMES;
2094 /* Array of the smallest class containing reg number REGNO, indexed by
2095 REGNO. Used by REGNO_REG_CLASS in i386.h. */
2097 enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
2099 /* ax, dx, cx, bx */
2100 AREG, DREG, CREG, BREG,
2101 /* si, di, bp, sp */
2102 SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
2103 /* FP registers */
2104 FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
2105 FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
2106 /* arg pointer */
2107 NON_Q_REGS,
2108 /* flags, fpsr, fpcr, frame */
2109 NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
2110 /* SSE registers */
2111 SSE_FIRST_REG, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
2112 SSE_REGS, SSE_REGS,
2113 /* MMX registers */
2114 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
2115 MMX_REGS, MMX_REGS,
2116 /* REX registers */
2117 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
2118 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
2119 /* SSE REX registers */
2120 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
2121 SSE_REGS, SSE_REGS,
2122 /* AVX-512 SSE registers */
2123 EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS,
2124 EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS,
2125 EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS,
2126 EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS,
2127 /* Mask registers. */
2128 MASK_REGS, MASK_EVEX_REGS, MASK_EVEX_REGS, MASK_EVEX_REGS,
2129 MASK_EVEX_REGS, MASK_EVEX_REGS, MASK_EVEX_REGS, MASK_EVEX_REGS,
2132 /* The "default" register map used in 32bit mode. */
2134 int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
2136 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
2137 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
2138 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
2139 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
2140 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
2141 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
2142 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
2143 -1, -1, -1, -1, -1, -1, -1, -1, /* AVX-512 registers 16-23*/
2144 -1, -1, -1, -1, -1, -1, -1, -1, /* AVX-512 registers 24-31*/
2145 93, 94, 95, 96, 97, 98, 99, 100, /* Mask registers */
2148 /* The "default" register map used in 64bit mode. */
2150 int const dbx64_register_map[FIRST_PSEUDO_REGISTER] =
2152 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
2153 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
2154 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
2155 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
2156 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
2157 8,9,10,11,12,13,14,15, /* extended integer registers */
2158 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
2159 67, 68, 69, 70, 71, 72, 73, 74, /* AVX-512 registers 16-23 */
2160 75, 76, 77, 78, 79, 80, 81, 82, /* AVX-512 registers 24-31 */
2161 118, 119, 120, 121, 122, 123, 124, 125, /* Mask registers */
2164 /* Define the register numbers to be used in Dwarf debugging information.
2165 The SVR4 reference port C compiler uses the following register numbers
2166 in its Dwarf output code:
2167 0 for %eax (gcc regno = 0)
2168 1 for %ecx (gcc regno = 2)
2169 2 for %edx (gcc regno = 1)
2170 3 for %ebx (gcc regno = 3)
2171 4 for %esp (gcc regno = 7)
2172 5 for %ebp (gcc regno = 6)
2173 6 for %esi (gcc regno = 4)
2174 7 for %edi (gcc regno = 5)
2175 The following three DWARF register numbers are never generated by
2176 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
2177 believes these numbers have these meanings.
2178 8 for %eip (no gcc equivalent)
2179 9 for %eflags (gcc regno = 17)
2180 10 for %trapno (no gcc equivalent)
2181 It is not at all clear how we should number the FP stack registers
2182 for the x86 architecture. If the version of SDB on x86/svr4 were
2183 a bit less brain dead with respect to floating-point then we would
2184 have a precedent to follow with respect to DWARF register numbers
2185 for x86 FP registers, but the SDB on x86/svr4 is so completely
2186 broken with respect to FP registers that it is hardly worth thinking
2187 of it as something to strive for compatibility with.
2188 The version of x86/svr4 SDB I have at the moment does (partially)
2189 seem to believe that DWARF register number 11 is associated with
2190 the x86 register %st(0), but that's about all. Higher DWARF
2191 register numbers don't seem to be associated with anything in
2192 particular, and even for DWARF regno 11, SDB only seems to under-
2193 stand that it should say that a variable lives in %st(0) (when
2194 asked via an `=' command) if we said it was in DWARF regno 11,
2195 but SDB still prints garbage when asked for the value of the
2196 variable in question (via a `/' command).
2197 (Also note that the labels SDB prints for various FP stack regs
2198 when doing an `x' command are all wrong.)
2199 Note that these problems generally don't affect the native SVR4
2200 C compiler because it doesn't allow the use of -O with -g and
2201 because when it is *not* optimizing, it allocates a memory
2202 location for each floating-point variable, and the memory
2203 location is what gets described in the DWARF AT_location
2204 attribute for the variable in question.
2205 Regardless of the severe mental illness of the x86/svr4 SDB, we
2206 do something sensible here and we use the following DWARF
2207 register numbers. Note that these are all stack-top-relative
2208 numbers.
2209 11 for %st(0) (gcc regno = 8)
2210 12 for %st(1) (gcc regno = 9)
2211 13 for %st(2) (gcc regno = 10)
2212 14 for %st(3) (gcc regno = 11)
2213 15 for %st(4) (gcc regno = 12)
2214 16 for %st(5) (gcc regno = 13)
2215 17 for %st(6) (gcc regno = 14)
2216 18 for %st(7) (gcc regno = 15)
2218 int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
2220 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
2221 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
2222 -1, 9, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
2223 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
2224 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
2225 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
2226 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
2227 -1, -1, -1, -1, -1, -1, -1, -1, /* AVX-512 registers 16-23*/
2228 -1, -1, -1, -1, -1, -1, -1, -1, /* AVX-512 registers 24-31*/
2229 93, 94, 95, 96, 97, 98, 99, 100, /* Mask registers */
2232 /* Define parameter passing and return registers. */
2234 static int const x86_64_int_parameter_registers[6] =
2236 DI_REG, SI_REG, DX_REG, CX_REG, R8_REG, R9_REG
2239 static int const x86_64_ms_abi_int_parameter_registers[4] =
2241 CX_REG, DX_REG, R8_REG, R9_REG
2244 static int const x86_64_int_return_registers[4] =
2246 AX_REG, DX_REG, DI_REG, SI_REG
2249 /* Additional registers that are clobbered by SYSV calls. */
2251 int const x86_64_ms_sysv_extra_clobbered_registers[12] =
2253 SI_REG, DI_REG,
2254 XMM6_REG, XMM7_REG,
2255 XMM8_REG, XMM9_REG, XMM10_REG, XMM11_REG,
2256 XMM12_REG, XMM13_REG, XMM14_REG, XMM15_REG
2259 /* Define the structure for the machine field in struct function. */
2261 struct GTY(()) stack_local_entry {
2262 unsigned short mode;
2263 unsigned short n;
2264 rtx rtl;
2265 struct stack_local_entry *next;
2268 /* Structure describing stack frame layout.
2269 Stack grows downward:
2271 [arguments]
2272 <- ARG_POINTER
2273 saved pc
2275 saved static chain if ix86_static_chain_on_stack
2277 saved frame pointer if frame_pointer_needed
2278 <- HARD_FRAME_POINTER
2279 [saved regs]
2280 <- regs_save_offset
2281 [padding0]
2283 [saved SSE regs]
2284 <- sse_regs_save_offset
2285 [padding1] |
2286 | <- FRAME_POINTER
2287 [va_arg registers] |
2289 [frame] |
2291 [padding2] | = to_allocate
2292 <- STACK_POINTER
2294 struct ix86_frame
2296 int nsseregs;
2297 int nregs;
2298 int va_arg_size;
2299 int red_zone_size;
2300 int outgoing_arguments_size;
2302 /* The offsets relative to ARG_POINTER. */
2303 HOST_WIDE_INT frame_pointer_offset;
2304 HOST_WIDE_INT hard_frame_pointer_offset;
2305 HOST_WIDE_INT stack_pointer_offset;
2306 HOST_WIDE_INT hfp_save_offset;
2307 HOST_WIDE_INT reg_save_offset;
2308 HOST_WIDE_INT sse_reg_save_offset;
2310 /* When save_regs_using_mov is set, emit prologue using
2311 move instead of push instructions. */
2312 bool save_regs_using_mov;
2315 /* Which cpu are we scheduling for. */
2316 enum attr_cpu ix86_schedule;
2318 /* Which cpu are we optimizing for. */
2319 enum processor_type ix86_tune;
2321 /* Which instruction set architecture to use. */
2322 enum processor_type ix86_arch;
2324 /* True if processor has SSE prefetch instruction. */
2325 unsigned char x86_prefetch_sse;
2327 /* -mstackrealign option */
2328 static const char ix86_force_align_arg_pointer_string[]
2329 = "force_align_arg_pointer";
2331 static rtx (*ix86_gen_leave) (void);
2332 static rtx (*ix86_gen_add3) (rtx, rtx, rtx);
2333 static rtx (*ix86_gen_sub3) (rtx, rtx, rtx);
2334 static rtx (*ix86_gen_sub3_carry) (rtx, rtx, rtx, rtx, rtx);
2335 static rtx (*ix86_gen_one_cmpl2) (rtx, rtx);
2336 static rtx (*ix86_gen_monitor) (rtx, rtx, rtx);
2337 static rtx (*ix86_gen_andsp) (rtx, rtx, rtx);
2338 static rtx (*ix86_gen_allocate_stack_worker) (rtx, rtx);
2339 static rtx (*ix86_gen_adjust_stack_and_probe) (rtx, rtx, rtx);
2340 static rtx (*ix86_gen_probe_stack_range) (rtx, rtx, rtx);
2341 static rtx (*ix86_gen_tls_global_dynamic_64) (rtx, rtx, rtx);
2342 static rtx (*ix86_gen_tls_local_dynamic_base_64) (rtx, rtx);
2344 /* Preferred alignment for stack boundary in bits. */
2345 unsigned int ix86_preferred_stack_boundary;
2347 /* Alignment for incoming stack boundary in bits specified at
2348 command line. */
2349 static unsigned int ix86_user_incoming_stack_boundary;
2351 /* Default alignment for incoming stack boundary in bits. */
2352 static unsigned int ix86_default_incoming_stack_boundary;
2354 /* Alignment for incoming stack boundary in bits. */
2355 unsigned int ix86_incoming_stack_boundary;
2357 /* Calling abi specific va_list type nodes. */
2358 static GTY(()) tree sysv_va_list_type_node;
2359 static GTY(()) tree ms_va_list_type_node;
2361 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
2362 char internal_label_prefix[16];
2363 int internal_label_prefix_len;
2365 /* Fence to use after loop using movnt. */
2366 tree x86_mfence;
2368 /* Register class used for passing given 64bit part of the argument.
2369 These represent classes as documented by the PS ABI, with the exception
2370 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
2371 use SF or DFmode move instead of DImode to avoid reformatting penalties.
2373 Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
2374 whenever possible (upper half does contain padding). */
2375 enum x86_64_reg_class
2377 X86_64_NO_CLASS,
2378 X86_64_INTEGER_CLASS,
2379 X86_64_INTEGERSI_CLASS,
2380 X86_64_SSE_CLASS,
2381 X86_64_SSESF_CLASS,
2382 X86_64_SSEDF_CLASS,
2383 X86_64_SSEUP_CLASS,
2384 X86_64_X87_CLASS,
2385 X86_64_X87UP_CLASS,
2386 X86_64_COMPLEX_X87_CLASS,
2387 X86_64_MEMORY_CLASS
2390 #define MAX_CLASSES 8
2392 /* Table of constants used by fldpi, fldln2, etc.... */
2393 static REAL_VALUE_TYPE ext_80387_constants_table [5];
2394 static bool ext_80387_constants_init = 0;
2397 static struct machine_function * ix86_init_machine_status (void);
2398 static rtx ix86_function_value (const_tree, const_tree, bool);
2399 static bool ix86_function_value_regno_p (const unsigned int);
2400 static unsigned int ix86_function_arg_boundary (enum machine_mode,
2401 const_tree);
2402 static rtx ix86_static_chain (const_tree, bool);
2403 static int ix86_function_regparm (const_tree, const_tree);
2404 static void ix86_compute_frame_layout (struct ix86_frame *);
2405 static bool ix86_expand_vector_init_one_nonzero (bool, enum machine_mode,
2406 rtx, rtx, int);
2407 static void ix86_add_new_builtins (HOST_WIDE_INT);
2408 static tree ix86_canonical_va_list_type (tree);
2409 static void predict_jump (int);
2410 static unsigned int split_stack_prologue_scratch_regno (void);
2411 static bool i386_asm_output_addr_const_extra (FILE *, rtx);
2413 enum ix86_function_specific_strings
2415 IX86_FUNCTION_SPECIFIC_ARCH,
2416 IX86_FUNCTION_SPECIFIC_TUNE,
2417 IX86_FUNCTION_SPECIFIC_MAX
2420 static char *ix86_target_string (HOST_WIDE_INT, int, const char *,
2421 const char *, enum fpmath_unit, bool);
2422 static void ix86_function_specific_save (struct cl_target_option *,
2423 struct gcc_options *opts);
2424 static void ix86_function_specific_restore (struct gcc_options *opts,
2425 struct cl_target_option *);
2426 static void ix86_function_specific_print (FILE *, int,
2427 struct cl_target_option *);
2428 static bool ix86_valid_target_attribute_p (tree, tree, tree, int);
2429 static bool ix86_valid_target_attribute_inner_p (tree, char *[],
2430 struct gcc_options *,
2431 struct gcc_options *,
2432 struct gcc_options *);
2433 static bool ix86_can_inline_p (tree, tree);
2434 static void ix86_set_current_function (tree);
2435 static unsigned int ix86_minimum_incoming_stack_boundary (bool);
2437 static enum calling_abi ix86_function_abi (const_tree);
2440 #ifndef SUBTARGET32_DEFAULT_CPU
2441 #define SUBTARGET32_DEFAULT_CPU "i386"
2442 #endif
2444 /* Whether -mtune= or -march= were specified */
2445 static int ix86_tune_defaulted;
2446 static int ix86_arch_specified;
2448 /* Vectorization library interface and handlers. */
2449 static tree (*ix86_veclib_handler) (enum built_in_function, tree, tree);
2451 static tree ix86_veclibabi_svml (enum built_in_function, tree, tree);
2452 static tree ix86_veclibabi_acml (enum built_in_function, tree, tree);
2454 /* Processor target table, indexed by processor number */
2455 struct ptt
2457 const char *const name; /* processor name */
2458 const struct processor_costs *cost; /* Processor costs */
2459 const int align_loop; /* Default alignments. */
2460 const int align_loop_max_skip;
2461 const int align_jump;
2462 const int align_jump_max_skip;
2463 const int align_func;
2466 /* This table must be in sync with enum processor_type in i386.h. */
2467 static const struct ptt processor_target_table[PROCESSOR_max] =
2469 {"generic", &generic_cost, 16, 10, 16, 10, 16},
2470 {"i386", &i386_cost, 4, 3, 4, 3, 4},
2471 {"i486", &i486_cost, 16, 15, 16, 15, 16},
2472 {"pentium", &pentium_cost, 16, 7, 16, 7, 16},
2473 {"pentiumpro", &pentiumpro_cost, 16, 15, 16, 10, 16},
2474 {"pentium4", &pentium4_cost, 0, 0, 0, 0, 0},
2475 {"nocona", &nocona_cost, 0, 0, 0, 0, 0},
2476 {"core2", &core_cost, 16, 10, 16, 10, 16},
2477 {"nehalem", &core_cost, 16, 10, 16, 10, 16},
2478 {"sandybridge", &core_cost, 16, 10, 16, 10, 16},
2479 {"haswell", &core_cost, 16, 10, 16, 10, 16},
2480 {"bonnell", &atom_cost, 16, 15, 16, 7, 16},
2481 {"silvermont", &slm_cost, 16, 15, 16, 7, 16},
2482 {"intel", &intel_cost, 16, 15, 16, 7, 16},
2483 {"geode", &geode_cost, 0, 0, 0, 0, 0},
2484 {"k6", &k6_cost, 32, 7, 32, 7, 32},
2485 {"athlon", &athlon_cost, 16, 7, 16, 7, 16},
2486 {"k8", &k8_cost, 16, 7, 16, 7, 16},
2487 {"amdfam10", &amdfam10_cost, 32, 24, 32, 7, 32},
2488 {"bdver1", &bdver1_cost, 16, 10, 16, 7, 11},
2489 {"bdver2", &bdver2_cost, 16, 10, 16, 7, 11},
2490 {"bdver3", &bdver3_cost, 16, 10, 16, 7, 11},
2491 {"bdver4", &bdver4_cost, 16, 10, 16, 7, 11},
2492 {"btver1", &btver1_cost, 16, 10, 16, 7, 11},
2493 {"btver2", &btver2_cost, 16, 10, 16, 7, 11}
2496 static bool
2497 gate_insert_vzeroupper (void)
2499 return TARGET_AVX && !TARGET_AVX512F && TARGET_VZEROUPPER;
2502 static unsigned int
2503 rest_of_handle_insert_vzeroupper (void)
2505 int i;
2507 /* vzeroupper instructions are inserted immediately after reload to
2508 account for possible spills from 256bit registers. The pass
2509 reuses mode switching infrastructure by re-running mode insertion
2510 pass, so disable entities that have already been processed. */
2511 for (i = 0; i < MAX_386_ENTITIES; i++)
2512 ix86_optimize_mode_switching[i] = 0;
2514 ix86_optimize_mode_switching[AVX_U128] = 1;
2516 /* Call optimize_mode_switching. */
2517 g->get_passes ()->execute_pass_mode_switching ();
2518 return 0;
2521 namespace {
2523 const pass_data pass_data_insert_vzeroupper =
2525 RTL_PASS, /* type */
2526 "vzeroupper", /* name */
2527 OPTGROUP_NONE, /* optinfo_flags */
2528 true, /* has_gate */
2529 true, /* has_execute */
2530 TV_NONE, /* tv_id */
2531 0, /* properties_required */
2532 0, /* properties_provided */
2533 0, /* properties_destroyed */
2534 0, /* todo_flags_start */
2535 ( TODO_df_finish | TODO_verify_rtl_sharing | 0 ), /* todo_flags_finish */
2538 class pass_insert_vzeroupper : public rtl_opt_pass
2540 public:
2541 pass_insert_vzeroupper(gcc::context *ctxt)
2542 : rtl_opt_pass(pass_data_insert_vzeroupper, ctxt)
2545 /* opt_pass methods: */
2546 bool gate () { return gate_insert_vzeroupper (); }
2547 unsigned int execute () { return rest_of_handle_insert_vzeroupper (); }
2549 }; // class pass_insert_vzeroupper
2551 } // anon namespace
2553 rtl_opt_pass *
2554 make_pass_insert_vzeroupper (gcc::context *ctxt)
2556 return new pass_insert_vzeroupper (ctxt);
2559 /* Return true if a red-zone is in use. */
2561 static inline bool
2562 ix86_using_red_zone (void)
2564 return TARGET_RED_ZONE && !TARGET_64BIT_MS_ABI;
2567 /* Return a string that documents the current -m options. The caller is
2568 responsible for freeing the string. */
2570 static char *
2571 ix86_target_string (HOST_WIDE_INT isa, int flags, const char *arch,
2572 const char *tune, enum fpmath_unit fpmath,
2573 bool add_nl_p)
2575 struct ix86_target_opts
2577 const char *option; /* option string */
2578 HOST_WIDE_INT mask; /* isa mask options */
2581 /* This table is ordered so that options like -msse4.2 that imply
2582 preceding options while match those first. */
2583 static struct ix86_target_opts isa_opts[] =
2585 { "-mfma4", OPTION_MASK_ISA_FMA4 },
2586 { "-mfma", OPTION_MASK_ISA_FMA },
2587 { "-mxop", OPTION_MASK_ISA_XOP },
2588 { "-mlwp", OPTION_MASK_ISA_LWP },
2589 { "-mavx512f", OPTION_MASK_ISA_AVX512F },
2590 { "-mavx512er", OPTION_MASK_ISA_AVX512ER },
2591 { "-mavx512cd", OPTION_MASK_ISA_AVX512CD },
2592 { "-mavx512pf", OPTION_MASK_ISA_AVX512PF },
2593 { "-msse4a", OPTION_MASK_ISA_SSE4A },
2594 { "-msse4.2", OPTION_MASK_ISA_SSE4_2 },
2595 { "-msse4.1", OPTION_MASK_ISA_SSE4_1 },
2596 { "-mssse3", OPTION_MASK_ISA_SSSE3 },
2597 { "-msse3", OPTION_MASK_ISA_SSE3 },
2598 { "-msse2", OPTION_MASK_ISA_SSE2 },
2599 { "-msse", OPTION_MASK_ISA_SSE },
2600 { "-m3dnow", OPTION_MASK_ISA_3DNOW },
2601 { "-m3dnowa", OPTION_MASK_ISA_3DNOW_A },
2602 { "-mmmx", OPTION_MASK_ISA_MMX },
2603 { "-mabm", OPTION_MASK_ISA_ABM },
2604 { "-mbmi", OPTION_MASK_ISA_BMI },
2605 { "-mbmi2", OPTION_MASK_ISA_BMI2 },
2606 { "-mlzcnt", OPTION_MASK_ISA_LZCNT },
2607 { "-mhle", OPTION_MASK_ISA_HLE },
2608 { "-mfxsr", OPTION_MASK_ISA_FXSR },
2609 { "-mrdseed", OPTION_MASK_ISA_RDSEED },
2610 { "-mprfchw", OPTION_MASK_ISA_PRFCHW },
2611 { "-madx", OPTION_MASK_ISA_ADX },
2612 { "-mtbm", OPTION_MASK_ISA_TBM },
2613 { "-mpopcnt", OPTION_MASK_ISA_POPCNT },
2614 { "-mmovbe", OPTION_MASK_ISA_MOVBE },
2615 { "-mcrc32", OPTION_MASK_ISA_CRC32 },
2616 { "-maes", OPTION_MASK_ISA_AES },
2617 { "-msha", OPTION_MASK_ISA_SHA },
2618 { "-mpclmul", OPTION_MASK_ISA_PCLMUL },
2619 { "-mfsgsbase", OPTION_MASK_ISA_FSGSBASE },
2620 { "-mrdrnd", OPTION_MASK_ISA_RDRND },
2621 { "-mf16c", OPTION_MASK_ISA_F16C },
2622 { "-mrtm", OPTION_MASK_ISA_RTM },
2623 { "-mxsave", OPTION_MASK_ISA_XSAVE },
2624 { "-mxsaveopt", OPTION_MASK_ISA_XSAVEOPT },
2625 { "-mprefetchwt1", OPTION_MASK_ISA_PREFETCHWT1 },
2628 /* Flag options. */
2629 static struct ix86_target_opts flag_opts[] =
2631 { "-m128bit-long-double", MASK_128BIT_LONG_DOUBLE },
2632 { "-mlong-double-128", MASK_LONG_DOUBLE_128 },
2633 { "-mlong-double-64", MASK_LONG_DOUBLE_64 },
2634 { "-m80387", MASK_80387 },
2635 { "-maccumulate-outgoing-args", MASK_ACCUMULATE_OUTGOING_ARGS },
2636 { "-malign-double", MASK_ALIGN_DOUBLE },
2637 { "-mcld", MASK_CLD },
2638 { "-mfp-ret-in-387", MASK_FLOAT_RETURNS },
2639 { "-mieee-fp", MASK_IEEE_FP },
2640 { "-minline-all-stringops", MASK_INLINE_ALL_STRINGOPS },
2641 { "-minline-stringops-dynamically", MASK_INLINE_STRINGOPS_DYNAMICALLY },
2642 { "-mms-bitfields", MASK_MS_BITFIELD_LAYOUT },
2643 { "-mno-align-stringops", MASK_NO_ALIGN_STRINGOPS },
2644 { "-mno-fancy-math-387", MASK_NO_FANCY_MATH_387 },
2645 { "-mno-push-args", MASK_NO_PUSH_ARGS },
2646 { "-mno-red-zone", MASK_NO_RED_ZONE },
2647 { "-momit-leaf-frame-pointer", MASK_OMIT_LEAF_FRAME_POINTER },
2648 { "-mrecip", MASK_RECIP },
2649 { "-mrtd", MASK_RTD },
2650 { "-msseregparm", MASK_SSEREGPARM },
2651 { "-mstack-arg-probe", MASK_STACK_PROBE },
2652 { "-mtls-direct-seg-refs", MASK_TLS_DIRECT_SEG_REFS },
2653 { "-mvect8-ret-in-mem", MASK_VECT8_RETURNS },
2654 { "-m8bit-idiv", MASK_USE_8BIT_IDIV },
2655 { "-mvzeroupper", MASK_VZEROUPPER },
2656 { "-mavx256-split-unaligned-load", MASK_AVX256_SPLIT_UNALIGNED_LOAD},
2657 { "-mavx256-split-unaligned-store", MASK_AVX256_SPLIT_UNALIGNED_STORE},
2658 { "-mprefer-avx128", MASK_PREFER_AVX128},
2661 const char *opts[ARRAY_SIZE (isa_opts) + ARRAY_SIZE (flag_opts) + 6][2];
2663 char isa_other[40];
2664 char target_other[40];
2665 unsigned num = 0;
2666 unsigned i, j;
2667 char *ret;
2668 char *ptr;
2669 size_t len;
2670 size_t line_len;
2671 size_t sep_len;
2672 const char *abi;
2674 memset (opts, '\0', sizeof (opts));
2676 /* Add -march= option. */
2677 if (arch)
2679 opts[num][0] = "-march=";
2680 opts[num++][1] = arch;
2683 /* Add -mtune= option. */
2684 if (tune)
2686 opts[num][0] = "-mtune=";
2687 opts[num++][1] = tune;
2690 /* Add -m32/-m64/-mx32. */
2691 if ((isa & OPTION_MASK_ISA_64BIT) != 0)
2693 if ((isa & OPTION_MASK_ABI_64) != 0)
2694 abi = "-m64";
2695 else
2696 abi = "-mx32";
2697 isa &= ~ (OPTION_MASK_ISA_64BIT
2698 | OPTION_MASK_ABI_64
2699 | OPTION_MASK_ABI_X32);
2701 else
2702 abi = "-m32";
2703 opts[num++][0] = abi;
2705 /* Pick out the options in isa options. */
2706 for (i = 0; i < ARRAY_SIZE (isa_opts); i++)
2708 if ((isa & isa_opts[i].mask) != 0)
2710 opts[num++][0] = isa_opts[i].option;
2711 isa &= ~ isa_opts[i].mask;
2715 if (isa && add_nl_p)
2717 opts[num++][0] = isa_other;
2718 sprintf (isa_other, "(other isa: %#" HOST_WIDE_INT_PRINT "x)",
2719 isa);
2722 /* Add flag options. */
2723 for (i = 0; i < ARRAY_SIZE (flag_opts); i++)
2725 if ((flags & flag_opts[i].mask) != 0)
2727 opts[num++][0] = flag_opts[i].option;
2728 flags &= ~ flag_opts[i].mask;
2732 if (flags && add_nl_p)
2734 opts[num++][0] = target_other;
2735 sprintf (target_other, "(other flags: %#x)", flags);
2738 /* Add -fpmath= option. */
2739 if (fpmath)
2741 opts[num][0] = "-mfpmath=";
2742 switch ((int) fpmath)
2744 case FPMATH_387:
2745 opts[num++][1] = "387";
2746 break;
2748 case FPMATH_SSE:
2749 opts[num++][1] = "sse";
2750 break;
2752 case FPMATH_387 | FPMATH_SSE:
2753 opts[num++][1] = "sse+387";
2754 break;
2756 default:
2757 gcc_unreachable ();
2761 /* Any options? */
2762 if (num == 0)
2763 return NULL;
2765 gcc_assert (num < ARRAY_SIZE (opts));
2767 /* Size the string. */
2768 len = 0;
2769 sep_len = (add_nl_p) ? 3 : 1;
2770 for (i = 0; i < num; i++)
2772 len += sep_len;
2773 for (j = 0; j < 2; j++)
2774 if (opts[i][j])
2775 len += strlen (opts[i][j]);
2778 /* Build the string. */
2779 ret = ptr = (char *) xmalloc (len);
2780 line_len = 0;
2782 for (i = 0; i < num; i++)
2784 size_t len2[2];
2786 for (j = 0; j < 2; j++)
2787 len2[j] = (opts[i][j]) ? strlen (opts[i][j]) : 0;
2789 if (i != 0)
2791 *ptr++ = ' ';
2792 line_len++;
2794 if (add_nl_p && line_len + len2[0] + len2[1] > 70)
2796 *ptr++ = '\\';
2797 *ptr++ = '\n';
2798 line_len = 0;
2802 for (j = 0; j < 2; j++)
2803 if (opts[i][j])
2805 memcpy (ptr, opts[i][j], len2[j]);
2806 ptr += len2[j];
2807 line_len += len2[j];
2811 *ptr = '\0';
2812 gcc_assert (ret + len >= ptr);
2814 return ret;
2817 /* Return true, if profiling code should be emitted before
2818 prologue. Otherwise it returns false.
2819 Note: For x86 with "hotfix" it is sorried. */
2820 static bool
2821 ix86_profile_before_prologue (void)
2823 return flag_fentry != 0;
2826 /* Function that is callable from the debugger to print the current
2827 options. */
2828 void ATTRIBUTE_UNUSED
2829 ix86_debug_options (void)
2831 char *opts = ix86_target_string (ix86_isa_flags, target_flags,
2832 ix86_arch_string, ix86_tune_string,
2833 ix86_fpmath, true);
2835 if (opts)
2837 fprintf (stderr, "%s\n\n", opts);
2838 free (opts);
2840 else
2841 fputs ("<no options>\n\n", stderr);
2843 return;
2846 static const char *stringop_alg_names[] = {
2847 #define DEF_ENUM
2848 #define DEF_ALG(alg, name) #name,
2849 #include "stringop.def"
2850 #undef DEF_ENUM
2851 #undef DEF_ALG
2854 /* Parse parameter string passed to -mmemcpy-strategy= or -mmemset-strategy=.
2855 The string is of the following form (or comma separated list of it):
2857 strategy_alg:max_size:[align|noalign]
2859 where the full size range for the strategy is either [0, max_size] or
2860 [min_size, max_size], in which min_size is the max_size + 1 of the
2861 preceding range. The last size range must have max_size == -1.
2863 Examples:
2866 -mmemcpy-strategy=libcall:-1:noalign
2868 this is equivalent to (for known size memcpy) -mstringop-strategy=libcall
2872 -mmemset-strategy=rep_8byte:16:noalign,vector_loop:2048:align,libcall:-1:noalign
2874 This is to tell the compiler to use the following strategy for memset
2875 1) when the expected size is between [1, 16], use rep_8byte strategy;
2876 2) when the size is between [17, 2048], use vector_loop;
2877 3) when the size is > 2048, use libcall. */
2879 struct stringop_size_range
2881 int max;
2882 stringop_alg alg;
2883 bool noalign;
2886 static void
2887 ix86_parse_stringop_strategy_string (char *strategy_str, bool is_memset)
2889 const struct stringop_algs *default_algs;
2890 stringop_size_range input_ranges[MAX_STRINGOP_ALGS];
2891 char *curr_range_str, *next_range_str;
2892 int i = 0, n = 0;
2894 if (is_memset)
2895 default_algs = &ix86_cost->memset[TARGET_64BIT != 0];
2896 else
2897 default_algs = &ix86_cost->memcpy[TARGET_64BIT != 0];
2899 curr_range_str = strategy_str;
2903 int maxs;
2904 char alg_name[128];
2905 char align[16];
2906 next_range_str = strchr (curr_range_str, ',');
2907 if (next_range_str)
2908 *next_range_str++ = '\0';
2910 if (3 != sscanf (curr_range_str, "%20[^:]:%d:%10s",
2911 alg_name, &maxs, align))
2913 error ("wrong arg %s to option %s", curr_range_str,
2914 is_memset ? "-mmemset_strategy=" : "-mmemcpy_strategy=");
2915 return;
2918 if (n > 0 && (maxs < (input_ranges[n - 1].max + 1) && maxs != -1))
2920 error ("size ranges of option %s should be increasing",
2921 is_memset ? "-mmemset_strategy=" : "-mmemcpy_strategy=");
2922 return;
2925 for (i = 0; i < last_alg; i++)
2926 if (!strcmp (alg_name, stringop_alg_names[i]))
2927 break;
2929 if (i == last_alg)
2931 error ("wrong stringop strategy name %s specified for option %s",
2932 alg_name,
2933 is_memset ? "-mmemset_strategy=" : "-mmemcpy_strategy=");
2934 return;
2937 input_ranges[n].max = maxs;
2938 input_ranges[n].alg = (stringop_alg) i;
2939 if (!strcmp (align, "align"))
2940 input_ranges[n].noalign = false;
2941 else if (!strcmp (align, "noalign"))
2942 input_ranges[n].noalign = true;
2943 else
2945 error ("unknown alignment %s specified for option %s",
2946 align, is_memset ? "-mmemset_strategy=" : "-mmemcpy_strategy=");
2947 return;
2949 n++;
2950 curr_range_str = next_range_str;
2952 while (curr_range_str);
2954 if (input_ranges[n - 1].max != -1)
2956 error ("the max value for the last size range should be -1"
2957 " for option %s",
2958 is_memset ? "-mmemset_strategy=" : "-mmemcpy_strategy=");
2959 return;
2962 if (n > MAX_STRINGOP_ALGS)
2964 error ("too many size ranges specified in option %s",
2965 is_memset ? "-mmemset_strategy=" : "-mmemcpy_strategy=");
2966 return;
2969 /* Now override the default algs array. */
2970 for (i = 0; i < n; i++)
2972 *const_cast<int *>(&default_algs->size[i].max) = input_ranges[i].max;
2973 *const_cast<stringop_alg *>(&default_algs->size[i].alg)
2974 = input_ranges[i].alg;
2975 *const_cast<int *>(&default_algs->size[i].noalign)
2976 = input_ranges[i].noalign;
2981 /* parse -mtune-ctrl= option. When DUMP is true,
2982 print the features that are explicitly set. */
2984 static void
2985 parse_mtune_ctrl_str (bool dump)
2987 if (!ix86_tune_ctrl_string)
2988 return;
2990 char *next_feature_string = NULL;
2991 char *curr_feature_string = xstrdup (ix86_tune_ctrl_string);
2992 char *orig = curr_feature_string;
2993 int i;
2996 bool clear = false;
2998 next_feature_string = strchr (curr_feature_string, ',');
2999 if (next_feature_string)
3000 *next_feature_string++ = '\0';
3001 if (*curr_feature_string == '^')
3003 curr_feature_string++;
3004 clear = true;
3006 for (i = 0; i < X86_TUNE_LAST; i++)
3008 if (!strcmp (curr_feature_string, ix86_tune_feature_names[i]))
3010 ix86_tune_features[i] = !clear;
3011 if (dump)
3012 fprintf (stderr, "Explicitly %s feature %s\n",
3013 clear ? "clear" : "set", ix86_tune_feature_names[i]);
3014 break;
3017 if (i == X86_TUNE_LAST)
3018 error ("Unknown parameter to option -mtune-ctrl: %s",
3019 clear ? curr_feature_string - 1 : curr_feature_string);
3020 curr_feature_string = next_feature_string;
3022 while (curr_feature_string);
3023 free (orig);
3026 /* Helper function to set ix86_tune_features. IX86_TUNE is the
3027 processor type. */
3029 static void
3030 set_ix86_tune_features (enum processor_type ix86_tune, bool dump)
3032 unsigned int ix86_tune_mask = 1u << ix86_tune;
3033 int i;
3035 for (i = 0; i < X86_TUNE_LAST; ++i)
3037 if (ix86_tune_no_default)
3038 ix86_tune_features[i] = 0;
3039 else
3040 ix86_tune_features[i] = !!(initial_ix86_tune_features[i] & ix86_tune_mask);
3043 if (dump)
3045 fprintf (stderr, "List of x86 specific tuning parameter names:\n");
3046 for (i = 0; i < X86_TUNE_LAST; i++)
3047 fprintf (stderr, "%s : %s\n", ix86_tune_feature_names[i],
3048 ix86_tune_features[i] ? "on" : "off");
3051 parse_mtune_ctrl_str (dump);
3055 /* Override various settings based on options. If MAIN_ARGS_P, the
3056 options are from the command line, otherwise they are from
3057 attributes. */
3059 static void
3060 ix86_option_override_internal (bool main_args_p,
3061 struct gcc_options *opts,
3062 struct gcc_options *opts_set)
3064 int i;
3065 unsigned int ix86_arch_mask;
3066 const bool ix86_tune_specified = (opts->x_ix86_tune_string != NULL);
3067 const char *prefix;
3068 const char *suffix;
3069 const char *sw;
3071 #define PTA_3DNOW (HOST_WIDE_INT_1 << 0)
3072 #define PTA_3DNOW_A (HOST_WIDE_INT_1 << 1)
3073 #define PTA_64BIT (HOST_WIDE_INT_1 << 2)
3074 #define PTA_ABM (HOST_WIDE_INT_1 << 3)
3075 #define PTA_AES (HOST_WIDE_INT_1 << 4)
3076 #define PTA_AVX (HOST_WIDE_INT_1 << 5)
3077 #define PTA_BMI (HOST_WIDE_INT_1 << 6)
3078 #define PTA_CX16 (HOST_WIDE_INT_1 << 7)
3079 #define PTA_F16C (HOST_WIDE_INT_1 << 8)
3080 #define PTA_FMA (HOST_WIDE_INT_1 << 9)
3081 #define PTA_FMA4 (HOST_WIDE_INT_1 << 10)
3082 #define PTA_FSGSBASE (HOST_WIDE_INT_1 << 11)
3083 #define PTA_LWP (HOST_WIDE_INT_1 << 12)
3084 #define PTA_LZCNT (HOST_WIDE_INT_1 << 13)
3085 #define PTA_MMX (HOST_WIDE_INT_1 << 14)
3086 #define PTA_MOVBE (HOST_WIDE_INT_1 << 15)
3087 #define PTA_NO_SAHF (HOST_WIDE_INT_1 << 16)
3088 #define PTA_PCLMUL (HOST_WIDE_INT_1 << 17)
3089 #define PTA_POPCNT (HOST_WIDE_INT_1 << 18)
3090 #define PTA_PREFETCH_SSE (HOST_WIDE_INT_1 << 19)
3091 #define PTA_RDRND (HOST_WIDE_INT_1 << 20)
3092 #define PTA_SSE (HOST_WIDE_INT_1 << 21)
3093 #define PTA_SSE2 (HOST_WIDE_INT_1 << 22)
3094 #define PTA_SSE3 (HOST_WIDE_INT_1 << 23)
3095 #define PTA_SSE4_1 (HOST_WIDE_INT_1 << 24)
3096 #define PTA_SSE4_2 (HOST_WIDE_INT_1 << 25)
3097 #define PTA_SSE4A (HOST_WIDE_INT_1 << 26)
3098 #define PTA_SSSE3 (HOST_WIDE_INT_1 << 27)
3099 #define PTA_TBM (HOST_WIDE_INT_1 << 28)
3100 #define PTA_XOP (HOST_WIDE_INT_1 << 29)
3101 #define PTA_AVX2 (HOST_WIDE_INT_1 << 30)
3102 #define PTA_BMI2 (HOST_WIDE_INT_1 << 31)
3103 #define PTA_RTM (HOST_WIDE_INT_1 << 32)
3104 #define PTA_HLE (HOST_WIDE_INT_1 << 33)
3105 #define PTA_PRFCHW (HOST_WIDE_INT_1 << 34)
3106 #define PTA_RDSEED (HOST_WIDE_INT_1 << 35)
3107 #define PTA_ADX (HOST_WIDE_INT_1 << 36)
3108 #define PTA_FXSR (HOST_WIDE_INT_1 << 37)
3109 #define PTA_XSAVE (HOST_WIDE_INT_1 << 38)
3110 #define PTA_XSAVEOPT (HOST_WIDE_INT_1 << 39)
3111 #define PTA_AVX512F (HOST_WIDE_INT_1 << 40)
3112 #define PTA_AVX512ER (HOST_WIDE_INT_1 << 41)
3113 #define PTA_AVX512PF (HOST_WIDE_INT_1 << 42)
3114 #define PTA_AVX512CD (HOST_WIDE_INT_1 << 43)
3115 #define PTA_SHA (HOST_WIDE_INT_1 << 45)
3116 #define PTA_PREFETCHWT1 (HOST_WIDE_INT_1 << 46)
3118 #define PTA_CORE2 \
3119 (PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3 | PTA_SSSE3 \
3120 | PTA_CX16 | PTA_FXSR)
3121 #define PTA_NEHALEM \
3122 (PTA_CORE2 | PTA_SSE4_1 | PTA_SSE4_2 | PTA_POPCNT)
3123 #define PTA_WESTMERE \
3124 (PTA_NEHALEM | PTA_AES | PTA_PCLMUL)
3125 #define PTA_SANDYBRIDGE \
3126 (PTA_WESTMERE | PTA_AVX | PTA_XSAVE | PTA_XSAVEOPT)
3127 #define PTA_IVYBRIDGE \
3128 (PTA_SANDYBRIDGE | PTA_FSGSBASE | PTA_RDRND | PTA_F16C)
3129 #define PTA_HASWELL \
3130 (PTA_IVYBRIDGE | PTA_AVX2 | PTA_BMI | PTA_BMI2 | PTA_LZCNT \
3131 | PTA_FMA | PTA_MOVBE | PTA_RTM | PTA_HLE)
3132 #define PTA_BROADWELL \
3133 (PTA_HASWELL | PTA_ADX | PTA_PRFCHW | PTA_RDSEED)
3134 #define PTA_BONNELL \
3135 (PTA_CORE2 | PTA_MOVBE)
3136 #define PTA_SILVERMONT \
3137 (PTA_WESTMERE | PTA_MOVBE)
3139 /* if this reaches 64, need to widen struct pta flags below */
3141 static struct pta
3143 const char *const name; /* processor name or nickname. */
3144 const enum processor_type processor;
3145 const enum attr_cpu schedule;
3146 const unsigned HOST_WIDE_INT flags;
3148 const processor_alias_table[] =
3150 {"i386", PROCESSOR_I386, CPU_NONE, 0},
3151 {"i486", PROCESSOR_I486, CPU_NONE, 0},
3152 {"i586", PROCESSOR_PENTIUM, CPU_PENTIUM, 0},
3153 {"pentium", PROCESSOR_PENTIUM, CPU_PENTIUM, 0},
3154 {"pentium-mmx", PROCESSOR_PENTIUM, CPU_PENTIUM, PTA_MMX},
3155 {"winchip-c6", PROCESSOR_I486, CPU_NONE, PTA_MMX},
3156 {"winchip2", PROCESSOR_I486, CPU_NONE, PTA_MMX | PTA_3DNOW | PTA_PRFCHW},
3157 {"c3", PROCESSOR_I486, CPU_NONE, PTA_MMX | PTA_3DNOW | PTA_PRFCHW},
3158 {"c3-2", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
3159 PTA_MMX | PTA_SSE | PTA_FXSR},
3160 {"i686", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, 0},
3161 {"pentiumpro", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, 0},
3162 {"pentium2", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, PTA_MMX | PTA_FXSR},
3163 {"pentium3", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
3164 PTA_MMX | PTA_SSE | PTA_FXSR},
3165 {"pentium3m", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
3166 PTA_MMX | PTA_SSE | PTA_FXSR},
3167 {"pentium-m", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
3168 PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_FXSR},
3169 {"pentium4", PROCESSOR_PENTIUM4, CPU_NONE,
3170 PTA_MMX |PTA_SSE | PTA_SSE2 | PTA_FXSR},
3171 {"pentium4m", PROCESSOR_PENTIUM4, CPU_NONE,
3172 PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_FXSR},
3173 {"prescott", PROCESSOR_NOCONA, CPU_NONE,
3174 PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3 | PTA_FXSR},
3175 {"nocona", PROCESSOR_NOCONA, CPU_NONE,
3176 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3177 | PTA_CX16 | PTA_NO_SAHF | PTA_FXSR},
3178 {"core2", PROCESSOR_CORE2, CPU_CORE2, PTA_CORE2},
3179 {"nehalem", PROCESSOR_NEHALEM, CPU_NEHALEM, PTA_NEHALEM},
3180 {"corei7", PROCESSOR_NEHALEM, CPU_NEHALEM, PTA_NEHALEM},
3181 {"westmere", PROCESSOR_NEHALEM, CPU_NEHALEM, PTA_WESTMERE},
3182 {"sandybridge", PROCESSOR_SANDYBRIDGE, CPU_NEHALEM,
3183 PTA_SANDYBRIDGE},
3184 {"corei7-avx", PROCESSOR_SANDYBRIDGE, CPU_NEHALEM,
3185 PTA_SANDYBRIDGE},
3186 {"ivybridge", PROCESSOR_SANDYBRIDGE, CPU_NEHALEM,
3187 PTA_IVYBRIDGE},
3188 {"core-avx-i", PROCESSOR_SANDYBRIDGE, CPU_NEHALEM,
3189 PTA_IVYBRIDGE},
3190 {"haswell", PROCESSOR_HASWELL, CPU_NEHALEM, PTA_HASWELL},
3191 {"core-avx2", PROCESSOR_HASWELL, CPU_NEHALEM, PTA_HASWELL},
3192 {"broadwell", PROCESSOR_HASWELL, CPU_NEHALEM, PTA_BROADWELL},
3193 {"bonnell", PROCESSOR_BONNELL, CPU_ATOM, PTA_BONNELL},
3194 {"atom", PROCESSOR_BONNELL, CPU_ATOM, PTA_BONNELL},
3195 {"silvermont", PROCESSOR_SILVERMONT, CPU_SLM, PTA_SILVERMONT},
3196 {"slm", PROCESSOR_SILVERMONT, CPU_SLM, PTA_SILVERMONT},
3197 {"intel", PROCESSOR_INTEL, CPU_SLM, PTA_NEHALEM},
3198 {"geode", PROCESSOR_GEODE, CPU_GEODE,
3199 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_PREFETCH_SSE | PTA_PRFCHW},
3200 {"k6", PROCESSOR_K6, CPU_K6, PTA_MMX},
3201 {"k6-2", PROCESSOR_K6, CPU_K6, PTA_MMX | PTA_3DNOW | PTA_PRFCHW},
3202 {"k6-3", PROCESSOR_K6, CPU_K6, PTA_MMX | PTA_3DNOW | PTA_PRFCHW},
3203 {"athlon", PROCESSOR_ATHLON, CPU_ATHLON,
3204 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_PREFETCH_SSE | PTA_PRFCHW},
3205 {"athlon-tbird", PROCESSOR_ATHLON, CPU_ATHLON,
3206 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_PREFETCH_SSE | PTA_PRFCHW},
3207 {"athlon-4", PROCESSOR_ATHLON, CPU_ATHLON,
3208 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE | PTA_PRFCHW | PTA_FXSR},
3209 {"athlon-xp", PROCESSOR_ATHLON, CPU_ATHLON,
3210 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE | PTA_PRFCHW | PTA_FXSR},
3211 {"athlon-mp", PROCESSOR_ATHLON, CPU_ATHLON,
3212 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE | PTA_PRFCHW | PTA_FXSR},
3213 {"x86-64", PROCESSOR_K8, CPU_K8,
3214 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_NO_SAHF | PTA_FXSR},
3215 {"k8", PROCESSOR_K8, CPU_K8,
3216 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3217 | PTA_SSE2 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
3218 {"k8-sse3", PROCESSOR_K8, CPU_K8,
3219 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3220 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
3221 {"opteron", PROCESSOR_K8, CPU_K8,
3222 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3223 | PTA_SSE2 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
3224 {"opteron-sse3", PROCESSOR_K8, CPU_K8,
3225 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3226 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
3227 {"athlon64", PROCESSOR_K8, CPU_K8,
3228 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3229 | PTA_SSE2 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
3230 {"athlon64-sse3", PROCESSOR_K8, CPU_K8,
3231 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3232 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
3233 {"athlon-fx", PROCESSOR_K8, CPU_K8,
3234 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3235 | PTA_SSE2 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
3236 {"amdfam10", PROCESSOR_AMDFAM10, CPU_AMDFAM10,
3237 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE | PTA_SSE2
3238 | PTA_SSE3 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_PRFCHW | PTA_FXSR},
3239 {"barcelona", PROCESSOR_AMDFAM10, CPU_AMDFAM10,
3240 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE | PTA_SSE2
3241 | PTA_SSE3 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_PRFCHW | PTA_FXSR},
3242 {"bdver1", PROCESSOR_BDVER1, CPU_BDVER1,
3243 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3244 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3 | PTA_SSE4_1
3245 | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX | PTA_FMA4
3246 | PTA_XOP | PTA_LWP | PTA_PRFCHW | PTA_FXSR | PTA_XSAVE},
3247 {"bdver2", PROCESSOR_BDVER2, CPU_BDVER2,
3248 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3249 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3 | PTA_SSE4_1
3250 | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX | PTA_FMA4
3251 | PTA_XOP | PTA_LWP | PTA_BMI | PTA_TBM | PTA_F16C
3252 | PTA_FMA | PTA_PRFCHW | PTA_FXSR | PTA_XSAVE},
3253 {"bdver3", PROCESSOR_BDVER3, CPU_BDVER3,
3254 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3255 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3 | PTA_SSE4_1
3256 | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX | PTA_FMA4
3257 | PTA_XOP | PTA_LWP | PTA_BMI | PTA_TBM | PTA_F16C
3258 | PTA_FMA | PTA_PRFCHW | PTA_FXSR | PTA_XSAVE
3259 | PTA_XSAVEOPT | PTA_FSGSBASE},
3260 {"bdver4", PROCESSOR_BDVER4, CPU_BDVER4,
3261 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3262 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3 | PTA_SSE4_1
3263 | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX | PTA_AVX2
3264 | PTA_FMA4 | PTA_XOP | PTA_LWP | PTA_BMI | PTA_BMI2
3265 | PTA_TBM | PTA_F16C | PTA_FMA | PTA_PRFCHW | PTA_FXSR
3266 | PTA_XSAVE | PTA_XSAVEOPT | PTA_FSGSBASE},
3267 {"btver1", PROCESSOR_BTVER1, CPU_GENERIC,
3268 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3269 | PTA_SSSE3 | PTA_SSE4A |PTA_ABM | PTA_CX16 | PTA_PRFCHW
3270 | PTA_FXSR | PTA_XSAVE},
3271 {"btver2", PROCESSOR_BTVER2, CPU_BTVER2,
3272 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3273 | PTA_SSSE3 | PTA_SSE4A |PTA_ABM | PTA_CX16 | PTA_SSE4_1
3274 | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX
3275 | PTA_BMI | PTA_F16C | PTA_MOVBE | PTA_PRFCHW
3276 | PTA_FXSR | PTA_XSAVE | PTA_XSAVEOPT},
3278 {"generic", PROCESSOR_GENERIC, CPU_GENERIC,
3279 PTA_64BIT
3280 | PTA_HLE /* flags are only used for -march switch. */ },
3283 /* -mrecip options. */
3284 static struct
3286 const char *string; /* option name */
3287 unsigned int mask; /* mask bits to set */
3289 const recip_options[] =
3291 { "all", RECIP_MASK_ALL },
3292 { "none", RECIP_MASK_NONE },
3293 { "div", RECIP_MASK_DIV },
3294 { "sqrt", RECIP_MASK_SQRT },
3295 { "vec-div", RECIP_MASK_VEC_DIV },
3296 { "vec-sqrt", RECIP_MASK_VEC_SQRT },
3299 int const pta_size = ARRAY_SIZE (processor_alias_table);
3301 /* Set up prefix/suffix so the error messages refer to either the command
3302 line argument, or the attribute(target). */
3303 if (main_args_p)
3305 prefix = "-m";
3306 suffix = "";
3307 sw = "switch";
3309 else
3311 prefix = "option(\"";
3312 suffix = "\")";
3313 sw = "attribute";
3316 /* Turn off both OPTION_MASK_ABI_64 and OPTION_MASK_ABI_X32 if
3317 TARGET_64BIT_DEFAULT is true and TARGET_64BIT is false. */
3318 if (TARGET_64BIT_DEFAULT && !TARGET_64BIT_P (opts->x_ix86_isa_flags))
3319 opts->x_ix86_isa_flags &= ~(OPTION_MASK_ABI_64 | OPTION_MASK_ABI_X32);
3320 #ifdef TARGET_BI_ARCH
3321 else
3323 #if TARGET_BI_ARCH == 1
3324 /* When TARGET_BI_ARCH == 1, by default, OPTION_MASK_ABI_64
3325 is on and OPTION_MASK_ABI_X32 is off. We turn off
3326 OPTION_MASK_ABI_64 if OPTION_MASK_ABI_X32 is turned on by
3327 -mx32. */
3328 if (TARGET_X32_P (opts->x_ix86_isa_flags))
3329 opts->x_ix86_isa_flags &= ~OPTION_MASK_ABI_64;
3330 #else
3331 /* When TARGET_BI_ARCH == 2, by default, OPTION_MASK_ABI_X32 is
3332 on and OPTION_MASK_ABI_64 is off. We turn off
3333 OPTION_MASK_ABI_X32 if OPTION_MASK_ABI_64 is turned on by
3334 -m64. */
3335 if (TARGET_LP64_P (opts->x_ix86_isa_flags))
3336 opts->x_ix86_isa_flags &= ~OPTION_MASK_ABI_X32;
3337 #endif
3339 #endif
3341 if (TARGET_X32_P (opts->x_ix86_isa_flags))
3343 /* Always turn on OPTION_MASK_ISA_64BIT and turn off
3344 OPTION_MASK_ABI_64 for TARGET_X32. */
3345 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_64BIT;
3346 opts->x_ix86_isa_flags &= ~OPTION_MASK_ABI_64;
3348 else if (TARGET_16BIT_P (opts->x_ix86_isa_flags))
3349 opts->x_ix86_isa_flags &= ~(OPTION_MASK_ISA_64BIT
3350 | OPTION_MASK_ABI_X32
3351 | OPTION_MASK_ABI_64);
3352 else if (TARGET_LP64_P (opts->x_ix86_isa_flags))
3354 /* Always turn on OPTION_MASK_ISA_64BIT and turn off
3355 OPTION_MASK_ABI_X32 for TARGET_LP64. */
3356 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_64BIT;
3357 opts->x_ix86_isa_flags &= ~OPTION_MASK_ABI_X32;
3360 #ifdef SUBTARGET_OVERRIDE_OPTIONS
3361 SUBTARGET_OVERRIDE_OPTIONS;
3362 #endif
3364 #ifdef SUBSUBTARGET_OVERRIDE_OPTIONS
3365 SUBSUBTARGET_OVERRIDE_OPTIONS;
3366 #endif
3368 /* -fPIC is the default for x86_64. */
3369 if (TARGET_MACHO && TARGET_64BIT_P (opts->x_ix86_isa_flags))
3370 opts->x_flag_pic = 2;
3372 /* Need to check -mtune=generic first. */
3373 if (opts->x_ix86_tune_string)
3375 /* As special support for cross compilers we read -mtune=native
3376 as -mtune=generic. With native compilers we won't see the
3377 -mtune=native, as it was changed by the driver. */
3378 if (!strcmp (opts->x_ix86_tune_string, "native"))
3380 opts->x_ix86_tune_string = "generic";
3382 else if (!strcmp (opts->x_ix86_tune_string, "x86-64"))
3383 warning (OPT_Wdeprecated, "%stune=x86-64%s is deprecated; use "
3384 "%stune=k8%s or %stune=generic%s instead as appropriate",
3385 prefix, suffix, prefix, suffix, prefix, suffix);
3387 else
3389 if (opts->x_ix86_arch_string)
3390 opts->x_ix86_tune_string = opts->x_ix86_arch_string;
3391 if (!opts->x_ix86_tune_string)
3393 opts->x_ix86_tune_string
3394 = processor_target_table[TARGET_CPU_DEFAULT].name;
3395 ix86_tune_defaulted = 1;
3398 /* opts->x_ix86_tune_string is set to opts->x_ix86_arch_string
3399 or defaulted. We need to use a sensible tune option. */
3400 if (!strcmp (opts->x_ix86_tune_string, "x86-64"))
3402 opts->x_ix86_tune_string = "generic";
3406 if (opts->x_ix86_stringop_alg == rep_prefix_8_byte
3407 && !TARGET_64BIT_P (opts->x_ix86_isa_flags))
3409 /* rep; movq isn't available in 32-bit code. */
3410 error ("-mstringop-strategy=rep_8byte not supported for 32-bit code");
3411 opts->x_ix86_stringop_alg = no_stringop;
3414 if (!opts->x_ix86_arch_string)
3415 opts->x_ix86_arch_string
3416 = TARGET_64BIT_P (opts->x_ix86_isa_flags)
3417 ? "x86-64" : SUBTARGET32_DEFAULT_CPU;
3418 else
3419 ix86_arch_specified = 1;
3421 if (opts_set->x_ix86_pmode)
3423 if ((TARGET_LP64_P (opts->x_ix86_isa_flags)
3424 && opts->x_ix86_pmode == PMODE_SI)
3425 || (!TARGET_64BIT_P (opts->x_ix86_isa_flags)
3426 && opts->x_ix86_pmode == PMODE_DI))
3427 error ("address mode %qs not supported in the %s bit mode",
3428 TARGET_64BIT_P (opts->x_ix86_isa_flags) ? "short" : "long",
3429 TARGET_64BIT_P (opts->x_ix86_isa_flags) ? "64" : "32");
3431 else
3432 opts->x_ix86_pmode = TARGET_LP64_P (opts->x_ix86_isa_flags)
3433 ? PMODE_DI : PMODE_SI;
3435 if (!opts_set->x_ix86_abi)
3436 opts->x_ix86_abi = DEFAULT_ABI;
3438 /* For targets using ms ABI enable ms-extensions, if not
3439 explicit turned off. For non-ms ABI we turn off this
3440 option. */
3441 if (!opts_set->x_flag_ms_extensions)
3442 opts->x_flag_ms_extensions = (MS_ABI == DEFAULT_ABI);
3444 if (opts_set->x_ix86_cmodel)
3446 switch (opts->x_ix86_cmodel)
3448 case CM_SMALL:
3449 case CM_SMALL_PIC:
3450 if (opts->x_flag_pic)
3451 opts->x_ix86_cmodel = CM_SMALL_PIC;
3452 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags))
3453 error ("code model %qs not supported in the %s bit mode",
3454 "small", "32");
3455 break;
3457 case CM_MEDIUM:
3458 case CM_MEDIUM_PIC:
3459 if (opts->x_flag_pic)
3460 opts->x_ix86_cmodel = CM_MEDIUM_PIC;
3461 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags))
3462 error ("code model %qs not supported in the %s bit mode",
3463 "medium", "32");
3464 else if (TARGET_X32_P (opts->x_ix86_isa_flags))
3465 error ("code model %qs not supported in x32 mode",
3466 "medium");
3467 break;
3469 case CM_LARGE:
3470 case CM_LARGE_PIC:
3471 if (opts->x_flag_pic)
3472 opts->x_ix86_cmodel = CM_LARGE_PIC;
3473 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags))
3474 error ("code model %qs not supported in the %s bit mode",
3475 "large", "32");
3476 else if (TARGET_X32_P (opts->x_ix86_isa_flags))
3477 error ("code model %qs not supported in x32 mode",
3478 "large");
3479 break;
3481 case CM_32:
3482 if (opts->x_flag_pic)
3483 error ("code model %s does not support PIC mode", "32");
3484 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
3485 error ("code model %qs not supported in the %s bit mode",
3486 "32", "64");
3487 break;
3489 case CM_KERNEL:
3490 if (opts->x_flag_pic)
3492 error ("code model %s does not support PIC mode", "kernel");
3493 opts->x_ix86_cmodel = CM_32;
3495 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags))
3496 error ("code model %qs not supported in the %s bit mode",
3497 "kernel", "32");
3498 break;
3500 default:
3501 gcc_unreachable ();
3504 else
3506 /* For TARGET_64BIT and MS_ABI, force pic on, in order to enable the
3507 use of rip-relative addressing. This eliminates fixups that
3508 would otherwise be needed if this object is to be placed in a
3509 DLL, and is essentially just as efficient as direct addressing. */
3510 if (TARGET_64BIT_P (opts->x_ix86_isa_flags)
3511 && (TARGET_RDOS || TARGET_PECOFF))
3512 opts->x_ix86_cmodel = CM_MEDIUM_PIC, opts->x_flag_pic = 1;
3513 else if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
3514 opts->x_ix86_cmodel = opts->x_flag_pic ? CM_SMALL_PIC : CM_SMALL;
3515 else
3516 opts->x_ix86_cmodel = CM_32;
3518 if (TARGET_MACHO && opts->x_ix86_asm_dialect == ASM_INTEL)
3520 error ("-masm=intel not supported in this configuration");
3521 opts->x_ix86_asm_dialect = ASM_ATT;
3523 if ((TARGET_64BIT_P (opts->x_ix86_isa_flags) != 0)
3524 != ((opts->x_ix86_isa_flags & OPTION_MASK_ISA_64BIT) != 0))
3525 sorry ("%i-bit mode not compiled in",
3526 (opts->x_ix86_isa_flags & OPTION_MASK_ISA_64BIT) ? 64 : 32);
3528 for (i = 0; i < pta_size; i++)
3529 if (! strcmp (opts->x_ix86_arch_string, processor_alias_table[i].name))
3531 ix86_schedule = processor_alias_table[i].schedule;
3532 ix86_arch = processor_alias_table[i].processor;
3533 /* Default cpu tuning to the architecture. */
3534 ix86_tune = ix86_arch;
3536 if (TARGET_64BIT_P (opts->x_ix86_isa_flags)
3537 && !(processor_alias_table[i].flags & PTA_64BIT))
3538 error ("CPU you selected does not support x86-64 "
3539 "instruction set");
3541 if (processor_alias_table[i].flags & PTA_MMX
3542 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_MMX))
3543 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_MMX;
3544 if (processor_alias_table[i].flags & PTA_3DNOW
3545 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_3DNOW))
3546 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_3DNOW;
3547 if (processor_alias_table[i].flags & PTA_3DNOW_A
3548 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_3DNOW_A))
3549 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_3DNOW_A;
3550 if (processor_alias_table[i].flags & PTA_SSE
3551 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE))
3552 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE;
3553 if (processor_alias_table[i].flags & PTA_SSE2
3554 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE2))
3555 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE2;
3556 if (processor_alias_table[i].flags & PTA_SSE3
3557 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE3))
3558 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE3;
3559 if (processor_alias_table[i].flags & PTA_SSSE3
3560 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSSE3))
3561 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSSE3;
3562 if (processor_alias_table[i].flags & PTA_SSE4_1
3563 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4_1))
3564 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE4_1;
3565 if (processor_alias_table[i].flags & PTA_SSE4_2
3566 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4_2))
3567 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE4_2;
3568 if (processor_alias_table[i].flags & PTA_AVX
3569 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX))
3570 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX;
3571 if (processor_alias_table[i].flags & PTA_AVX2
3572 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX2))
3573 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX2;
3574 if (processor_alias_table[i].flags & PTA_FMA
3575 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_FMA))
3576 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_FMA;
3577 if (processor_alias_table[i].flags & PTA_SSE4A
3578 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4A))
3579 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE4A;
3580 if (processor_alias_table[i].flags & PTA_FMA4
3581 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_FMA4))
3582 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_FMA4;
3583 if (processor_alias_table[i].flags & PTA_XOP
3584 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_XOP))
3585 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_XOP;
3586 if (processor_alias_table[i].flags & PTA_LWP
3587 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_LWP))
3588 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_LWP;
3589 if (processor_alias_table[i].flags & PTA_ABM
3590 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_ABM))
3591 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_ABM;
3592 if (processor_alias_table[i].flags & PTA_BMI
3593 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_BMI))
3594 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_BMI;
3595 if (processor_alias_table[i].flags & (PTA_LZCNT | PTA_ABM)
3596 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_LZCNT))
3597 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_LZCNT;
3598 if (processor_alias_table[i].flags & PTA_TBM
3599 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_TBM))
3600 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_TBM;
3601 if (processor_alias_table[i].flags & PTA_BMI2
3602 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_BMI2))
3603 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_BMI2;
3604 if (processor_alias_table[i].flags & PTA_CX16
3605 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_CX16))
3606 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_CX16;
3607 if (processor_alias_table[i].flags & (PTA_POPCNT | PTA_ABM)
3608 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_POPCNT))
3609 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_POPCNT;
3610 if (!(TARGET_64BIT_P (opts->x_ix86_isa_flags)
3611 && (processor_alias_table[i].flags & PTA_NO_SAHF))
3612 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SAHF))
3613 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SAHF;
3614 if (processor_alias_table[i].flags & PTA_MOVBE
3615 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_MOVBE))
3616 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_MOVBE;
3617 if (processor_alias_table[i].flags & PTA_AES
3618 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_AES))
3619 ix86_isa_flags |= OPTION_MASK_ISA_AES;
3620 if (processor_alias_table[i].flags & PTA_SHA
3621 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SHA))
3622 ix86_isa_flags |= OPTION_MASK_ISA_SHA;
3623 if (processor_alias_table[i].flags & PTA_PCLMUL
3624 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_PCLMUL))
3625 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_PCLMUL;
3626 if (processor_alias_table[i].flags & PTA_FSGSBASE
3627 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_FSGSBASE))
3628 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_FSGSBASE;
3629 if (processor_alias_table[i].flags & PTA_RDRND
3630 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_RDRND))
3631 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_RDRND;
3632 if (processor_alias_table[i].flags & PTA_F16C
3633 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_F16C))
3634 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_F16C;
3635 if (processor_alias_table[i].flags & PTA_RTM
3636 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_RTM))
3637 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_RTM;
3638 if (processor_alias_table[i].flags & PTA_HLE
3639 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_HLE))
3640 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_HLE;
3641 if (processor_alias_table[i].flags & PTA_PRFCHW
3642 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_PRFCHW))
3643 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_PRFCHW;
3644 if (processor_alias_table[i].flags & PTA_RDSEED
3645 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_RDSEED))
3646 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_RDSEED;
3647 if (processor_alias_table[i].flags & PTA_ADX
3648 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_ADX))
3649 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_ADX;
3650 if (processor_alias_table[i].flags & PTA_FXSR
3651 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_FXSR))
3652 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_FXSR;
3653 if (processor_alias_table[i].flags & PTA_XSAVE
3654 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_XSAVE))
3655 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_XSAVE;
3656 if (processor_alias_table[i].flags & PTA_XSAVEOPT
3657 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_XSAVEOPT))
3658 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_XSAVEOPT;
3659 if (processor_alias_table[i].flags & PTA_AVX512F
3660 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512F))
3661 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512F;
3662 if (processor_alias_table[i].flags & PTA_AVX512ER
3663 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512ER))
3664 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512ER;
3665 if (processor_alias_table[i].flags & PTA_AVX512PF
3666 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512PF))
3667 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512PF;
3668 if (processor_alias_table[i].flags & PTA_AVX512CD
3669 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512CD))
3670 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512CD;
3671 if (processor_alias_table[i].flags & PTA_PREFETCHWT1
3672 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_PREFETCHWT1))
3673 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_PREFETCHWT1;
3674 if (processor_alias_table[i].flags & (PTA_PREFETCH_SSE | PTA_SSE))
3675 x86_prefetch_sse = true;
3677 break;
3680 if (!strcmp (opts->x_ix86_arch_string, "generic"))
3681 error ("generic CPU can be used only for %stune=%s %s",
3682 prefix, suffix, sw);
3683 else if (!strcmp (opts->x_ix86_arch_string, "intel"))
3684 error ("intel CPU can be used only for %stune=%s %s",
3685 prefix, suffix, sw);
3686 else if (i == pta_size)
3687 error ("bad value (%s) for %sarch=%s %s",
3688 opts->x_ix86_arch_string, prefix, suffix, sw);
3690 ix86_arch_mask = 1u << ix86_arch;
3691 for (i = 0; i < X86_ARCH_LAST; ++i)
3692 ix86_arch_features[i] = !!(initial_ix86_arch_features[i] & ix86_arch_mask);
3694 for (i = 0; i < pta_size; i++)
3695 if (! strcmp (opts->x_ix86_tune_string, processor_alias_table[i].name))
3697 ix86_schedule = processor_alias_table[i].schedule;
3698 ix86_tune = processor_alias_table[i].processor;
3699 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
3701 if (!(processor_alias_table[i].flags & PTA_64BIT))
3703 if (ix86_tune_defaulted)
3705 opts->x_ix86_tune_string = "x86-64";
3706 for (i = 0; i < pta_size; i++)
3707 if (! strcmp (opts->x_ix86_tune_string,
3708 processor_alias_table[i].name))
3709 break;
3710 ix86_schedule = processor_alias_table[i].schedule;
3711 ix86_tune = processor_alias_table[i].processor;
3713 else
3714 error ("CPU you selected does not support x86-64 "
3715 "instruction set");
3718 /* Intel CPUs have always interpreted SSE prefetch instructions as
3719 NOPs; so, we can enable SSE prefetch instructions even when
3720 -mtune (rather than -march) points us to a processor that has them.
3721 However, the VIA C3 gives a SIGILL, so we only do that for i686 and
3722 higher processors. */
3723 if (TARGET_CMOV
3724 && (processor_alias_table[i].flags & (PTA_PREFETCH_SSE | PTA_SSE)))
3725 x86_prefetch_sse = true;
3726 break;
3729 if (ix86_tune_specified && i == pta_size)
3730 error ("bad value (%s) for %stune=%s %s",
3731 opts->x_ix86_tune_string, prefix, suffix, sw);
3733 set_ix86_tune_features (ix86_tune, opts->x_ix86_dump_tunes);
3735 #ifndef USE_IX86_FRAME_POINTER
3736 #define USE_IX86_FRAME_POINTER 0
3737 #endif
3739 #ifndef USE_X86_64_FRAME_POINTER
3740 #define USE_X86_64_FRAME_POINTER 0
3741 #endif
3743 /* Set the default values for switches whose default depends on TARGET_64BIT
3744 in case they weren't overwritten by command line options. */
3745 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
3747 if (opts->x_optimize >= 1 && !opts_set->x_flag_omit_frame_pointer)
3748 opts->x_flag_omit_frame_pointer = !USE_X86_64_FRAME_POINTER;
3749 if (opts->x_flag_asynchronous_unwind_tables
3750 && !opts_set->x_flag_unwind_tables
3751 && TARGET_64BIT_MS_ABI)
3752 opts->x_flag_unwind_tables = 1;
3753 if (opts->x_flag_asynchronous_unwind_tables == 2)
3754 opts->x_flag_unwind_tables
3755 = opts->x_flag_asynchronous_unwind_tables = 1;
3756 if (opts->x_flag_pcc_struct_return == 2)
3757 opts->x_flag_pcc_struct_return = 0;
3759 else
3761 if (opts->x_optimize >= 1 && !opts_set->x_flag_omit_frame_pointer)
3762 opts->x_flag_omit_frame_pointer
3763 = !(USE_IX86_FRAME_POINTER || opts->x_optimize_size);
3764 if (opts->x_flag_asynchronous_unwind_tables == 2)
3765 opts->x_flag_asynchronous_unwind_tables = !USE_IX86_FRAME_POINTER;
3766 if (opts->x_flag_pcc_struct_return == 2)
3767 opts->x_flag_pcc_struct_return = DEFAULT_PCC_STRUCT_RETURN;
3770 ix86_tune_cost = processor_target_table[ix86_tune].cost;
3771 if (opts->x_optimize_size)
3772 ix86_cost = &ix86_size_cost;
3773 else
3774 ix86_cost = ix86_tune_cost;
3776 /* Arrange to set up i386_stack_locals for all functions. */
3777 init_machine_status = ix86_init_machine_status;
3779 /* Validate -mregparm= value. */
3780 if (opts_set->x_ix86_regparm)
3782 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
3783 warning (0, "-mregparm is ignored in 64-bit mode");
3784 if (opts->x_ix86_regparm > REGPARM_MAX)
3786 error ("-mregparm=%d is not between 0 and %d",
3787 opts->x_ix86_regparm, REGPARM_MAX);
3788 opts->x_ix86_regparm = 0;
3791 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
3792 opts->x_ix86_regparm = REGPARM_MAX;
3794 /* Default align_* from the processor table. */
3795 if (opts->x_align_loops == 0)
3797 opts->x_align_loops = processor_target_table[ix86_tune].align_loop;
3798 align_loops_max_skip = processor_target_table[ix86_tune].align_loop_max_skip;
3800 if (opts->x_align_jumps == 0)
3802 opts->x_align_jumps = processor_target_table[ix86_tune].align_jump;
3803 align_jumps_max_skip = processor_target_table[ix86_tune].align_jump_max_skip;
3805 if (opts->x_align_functions == 0)
3807 opts->x_align_functions = processor_target_table[ix86_tune].align_func;
3810 /* Provide default for -mbranch-cost= value. */
3811 if (!opts_set->x_ix86_branch_cost)
3812 opts->x_ix86_branch_cost = ix86_cost->branch_cost;
3814 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
3816 opts->x_target_flags
3817 |= TARGET_SUBTARGET64_DEFAULT & ~opts_set->x_target_flags;
3819 /* Enable by default the SSE and MMX builtins. Do allow the user to
3820 explicitly disable any of these. In particular, disabling SSE and
3821 MMX for kernel code is extremely useful. */
3822 if (!ix86_arch_specified)
3823 opts->x_ix86_isa_flags
3824 |= ((OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_MMX
3825 | TARGET_SUBTARGET64_ISA_DEFAULT)
3826 & ~opts->x_ix86_isa_flags_explicit);
3828 if (TARGET_RTD_P (opts->x_target_flags))
3829 warning (0, "%srtd%s is ignored in 64bit mode", prefix, suffix);
3831 else
3833 opts->x_target_flags
3834 |= TARGET_SUBTARGET32_DEFAULT & ~opts_set->x_target_flags;
3836 if (!ix86_arch_specified)
3837 opts->x_ix86_isa_flags
3838 |= TARGET_SUBTARGET32_ISA_DEFAULT & ~opts->x_ix86_isa_flags_explicit;
3840 /* i386 ABI does not specify red zone. It still makes sense to use it
3841 when programmer takes care to stack from being destroyed. */
3842 if (!(opts_set->x_target_flags & MASK_NO_RED_ZONE))
3843 opts->x_target_flags |= MASK_NO_RED_ZONE;
3846 /* Keep nonleaf frame pointers. */
3847 if (opts->x_flag_omit_frame_pointer)
3848 opts->x_target_flags &= ~MASK_OMIT_LEAF_FRAME_POINTER;
3849 else if (TARGET_OMIT_LEAF_FRAME_POINTER_P (opts->x_target_flags))
3850 opts->x_flag_omit_frame_pointer = 1;
3852 /* If we're doing fast math, we don't care about comparison order
3853 wrt NaNs. This lets us use a shorter comparison sequence. */
3854 if (opts->x_flag_finite_math_only)
3855 opts->x_target_flags &= ~MASK_IEEE_FP;
3857 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
3858 since the insns won't need emulation. */
3859 if (ix86_tune_features [X86_TUNE_ALWAYS_FANCY_MATH_387])
3860 opts->x_target_flags &= ~MASK_NO_FANCY_MATH_387;
3862 /* Likewise, if the target doesn't have a 387, or we've specified
3863 software floating point, don't use 387 inline intrinsics. */
3864 if (!TARGET_80387_P (opts->x_target_flags))
3865 opts->x_target_flags |= MASK_NO_FANCY_MATH_387;
3867 /* Turn on MMX builtins for -msse. */
3868 if (TARGET_SSE_P (opts->x_ix86_isa_flags))
3869 opts->x_ix86_isa_flags
3870 |= OPTION_MASK_ISA_MMX & ~opts->x_ix86_isa_flags_explicit;
3872 /* Enable SSE prefetch. */
3873 if (TARGET_SSE_P (opts->x_ix86_isa_flags)
3874 || (TARGET_PRFCHW && !TARGET_3DNOW_P (opts->x_ix86_isa_flags)))
3875 x86_prefetch_sse = true;
3877 /* Enable prefetch{,w} instructions for -m3dnow and -mprefetchwt1. */
3878 if (TARGET_3DNOW_P (opts->x_ix86_isa_flags)
3879 || TARGET_PREFETCHWT1_P (opts->x_ix86_isa_flags))
3880 opts->x_ix86_isa_flags
3881 |= OPTION_MASK_ISA_PRFCHW & ~opts->x_ix86_isa_flags_explicit;
3883 /* Enable popcnt instruction for -msse4.2 or -mabm. */
3884 if (TARGET_SSE4_2_P (opts->x_ix86_isa_flags)
3885 || TARGET_ABM_P (opts->x_ix86_isa_flags))
3886 opts->x_ix86_isa_flags
3887 |= OPTION_MASK_ISA_POPCNT & ~opts->x_ix86_isa_flags_explicit;
3889 /* Enable lzcnt instruction for -mabm. */
3890 if (TARGET_ABM_P(opts->x_ix86_isa_flags))
3891 opts->x_ix86_isa_flags
3892 |= OPTION_MASK_ISA_LZCNT & ~opts->x_ix86_isa_flags_explicit;
3894 /* Validate -mpreferred-stack-boundary= value or default it to
3895 PREFERRED_STACK_BOUNDARY_DEFAULT. */
3896 ix86_preferred_stack_boundary = PREFERRED_STACK_BOUNDARY_DEFAULT;
3897 if (opts_set->x_ix86_preferred_stack_boundary_arg)
3899 int min = (TARGET_64BIT_P (opts->x_ix86_isa_flags)
3900 ? (TARGET_SSE_P (opts->x_ix86_isa_flags) ? 4 : 3) : 2);
3901 int max = (TARGET_SEH ? 4 : 12);
3903 if (opts->x_ix86_preferred_stack_boundary_arg < min
3904 || opts->x_ix86_preferred_stack_boundary_arg > max)
3906 if (min == max)
3907 error ("-mpreferred-stack-boundary is not supported "
3908 "for this target");
3909 else
3910 error ("-mpreferred-stack-boundary=%d is not between %d and %d",
3911 opts->x_ix86_preferred_stack_boundary_arg, min, max);
3913 else
3914 ix86_preferred_stack_boundary
3915 = (1 << opts->x_ix86_preferred_stack_boundary_arg) * BITS_PER_UNIT;
3918 /* Set the default value for -mstackrealign. */
3919 if (opts->x_ix86_force_align_arg_pointer == -1)
3920 opts->x_ix86_force_align_arg_pointer = STACK_REALIGN_DEFAULT;
3922 ix86_default_incoming_stack_boundary = PREFERRED_STACK_BOUNDARY;
3924 /* Validate -mincoming-stack-boundary= value or default it to
3925 MIN_STACK_BOUNDARY/PREFERRED_STACK_BOUNDARY. */
3926 ix86_incoming_stack_boundary = ix86_default_incoming_stack_boundary;
3927 if (opts_set->x_ix86_incoming_stack_boundary_arg)
3929 if (opts->x_ix86_incoming_stack_boundary_arg
3930 < (TARGET_64BIT_P (opts->x_ix86_isa_flags) ? 4 : 2)
3931 || opts->x_ix86_incoming_stack_boundary_arg > 12)
3932 error ("-mincoming-stack-boundary=%d is not between %d and 12",
3933 opts->x_ix86_incoming_stack_boundary_arg,
3934 TARGET_64BIT_P (opts->x_ix86_isa_flags) ? 4 : 2);
3935 else
3937 ix86_user_incoming_stack_boundary
3938 = (1 << opts->x_ix86_incoming_stack_boundary_arg) * BITS_PER_UNIT;
3939 ix86_incoming_stack_boundary
3940 = ix86_user_incoming_stack_boundary;
3944 /* Accept -msseregparm only if at least SSE support is enabled. */
3945 if (TARGET_SSEREGPARM_P (opts->x_target_flags)
3946 && ! TARGET_SSE_P (opts->x_ix86_isa_flags))
3947 error ("%ssseregparm%s used without SSE enabled", prefix, suffix);
3949 if (opts_set->x_ix86_fpmath)
3951 if (opts->x_ix86_fpmath & FPMATH_SSE)
3953 if (!TARGET_SSE_P (opts->x_ix86_isa_flags))
3955 warning (0, "SSE instruction set disabled, using 387 arithmetics");
3956 opts->x_ix86_fpmath = FPMATH_387;
3958 else if ((opts->x_ix86_fpmath & FPMATH_387)
3959 && !TARGET_80387_P (opts->x_target_flags))
3961 warning (0, "387 instruction set disabled, using SSE arithmetics");
3962 opts->x_ix86_fpmath = FPMATH_SSE;
3966 /* For all chips supporting SSE2, -mfpmath=sse performs better than
3967 fpmath=387. The second is however default at many targets since the
3968 extra 80bit precision of temporaries is considered to be part of ABI.
3969 Overwrite the default at least for -ffast-math.
3970 TODO: -mfpmath=both seems to produce same performing code with bit
3971 smaller binaries. It is however not clear if register allocation is
3972 ready for this setting.
3973 Also -mfpmath=387 is overall a lot more compact (bout 4-5%) than SSE
3974 codegen. We may switch to 387 with -ffast-math for size optimized
3975 functions. */
3976 else if (fast_math_flags_set_p (&global_options)
3977 && TARGET_SSE2_P (opts->x_ix86_isa_flags))
3978 opts->x_ix86_fpmath = FPMATH_SSE;
3979 else
3980 opts->x_ix86_fpmath = TARGET_FPMATH_DEFAULT_P (opts->x_ix86_isa_flags);
3982 /* If the i387 is disabled, then do not return values in it. */
3983 if (!TARGET_80387_P (opts->x_target_flags))
3984 opts->x_target_flags &= ~MASK_FLOAT_RETURNS;
3986 /* Use external vectorized library in vectorizing intrinsics. */
3987 if (opts_set->x_ix86_veclibabi_type)
3988 switch (opts->x_ix86_veclibabi_type)
3990 case ix86_veclibabi_type_svml:
3991 ix86_veclib_handler = ix86_veclibabi_svml;
3992 break;
3994 case ix86_veclibabi_type_acml:
3995 ix86_veclib_handler = ix86_veclibabi_acml;
3996 break;
3998 default:
3999 gcc_unreachable ();
4002 if (ix86_tune_features [X86_TUNE_ACCUMULATE_OUTGOING_ARGS]
4003 && !(opts_set->x_target_flags & MASK_ACCUMULATE_OUTGOING_ARGS)
4004 && !opts->x_optimize_size)
4005 opts->x_target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
4007 /* If stack probes are required, the space used for large function
4008 arguments on the stack must also be probed, so enable
4009 -maccumulate-outgoing-args so this happens in the prologue. */
4010 if (TARGET_STACK_PROBE_P (opts->x_target_flags)
4011 && !(opts->x_target_flags & MASK_ACCUMULATE_OUTGOING_ARGS))
4013 if (opts_set->x_target_flags & MASK_ACCUMULATE_OUTGOING_ARGS)
4014 warning (0, "stack probing requires %saccumulate-outgoing-args%s "
4015 "for correctness", prefix, suffix);
4016 opts->x_target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
4019 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
4021 char *p;
4022 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix, "LX", 0);
4023 p = strchr (internal_label_prefix, 'X');
4024 internal_label_prefix_len = p - internal_label_prefix;
4025 *p = '\0';
4028 /* When scheduling description is not available, disable scheduler pass
4029 so it won't slow down the compilation and make x87 code slower. */
4030 if (!TARGET_SCHEDULE)
4031 opts->x_flag_schedule_insns_after_reload = opts->x_flag_schedule_insns = 0;
4033 maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES,
4034 ix86_tune_cost->simultaneous_prefetches,
4035 opts->x_param_values,
4036 opts_set->x_param_values);
4037 maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE,
4038 ix86_tune_cost->prefetch_block,
4039 opts->x_param_values,
4040 opts_set->x_param_values);
4041 maybe_set_param_value (PARAM_L1_CACHE_SIZE,
4042 ix86_tune_cost->l1_cache_size,
4043 opts->x_param_values,
4044 opts_set->x_param_values);
4045 maybe_set_param_value (PARAM_L2_CACHE_SIZE,
4046 ix86_tune_cost->l2_cache_size,
4047 opts->x_param_values,
4048 opts_set->x_param_values);
4050 /* Enable sw prefetching at -O3 for CPUS that prefetching is helpful. */
4051 if (opts->x_flag_prefetch_loop_arrays < 0
4052 && HAVE_prefetch
4053 && (opts->x_optimize >= 3 || opts->x_flag_profile_use)
4054 && TARGET_SOFTWARE_PREFETCHING_BENEFICIAL)
4055 opts->x_flag_prefetch_loop_arrays = 1;
4057 /* If using typedef char *va_list, signal that __builtin_va_start (&ap, 0)
4058 can be opts->x_optimized to ap = __builtin_next_arg (0). */
4059 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags) && !opts->x_flag_split_stack)
4060 targetm.expand_builtin_va_start = NULL;
4062 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
4064 ix86_gen_leave = gen_leave_rex64;
4065 if (Pmode == DImode)
4067 ix86_gen_tls_global_dynamic_64 = gen_tls_global_dynamic_64_di;
4068 ix86_gen_tls_local_dynamic_base_64
4069 = gen_tls_local_dynamic_base_64_di;
4071 else
4073 ix86_gen_tls_global_dynamic_64 = gen_tls_global_dynamic_64_si;
4074 ix86_gen_tls_local_dynamic_base_64
4075 = gen_tls_local_dynamic_base_64_si;
4078 else
4079 ix86_gen_leave = gen_leave;
4081 if (Pmode == DImode)
4083 ix86_gen_add3 = gen_adddi3;
4084 ix86_gen_sub3 = gen_subdi3;
4085 ix86_gen_sub3_carry = gen_subdi3_carry;
4086 ix86_gen_one_cmpl2 = gen_one_cmpldi2;
4087 ix86_gen_andsp = gen_anddi3;
4088 ix86_gen_allocate_stack_worker = gen_allocate_stack_worker_probe_di;
4089 ix86_gen_adjust_stack_and_probe = gen_adjust_stack_and_probedi;
4090 ix86_gen_probe_stack_range = gen_probe_stack_rangedi;
4091 ix86_gen_monitor = gen_sse3_monitor_di;
4093 else
4095 ix86_gen_add3 = gen_addsi3;
4096 ix86_gen_sub3 = gen_subsi3;
4097 ix86_gen_sub3_carry = gen_subsi3_carry;
4098 ix86_gen_one_cmpl2 = gen_one_cmplsi2;
4099 ix86_gen_andsp = gen_andsi3;
4100 ix86_gen_allocate_stack_worker = gen_allocate_stack_worker_probe_si;
4101 ix86_gen_adjust_stack_and_probe = gen_adjust_stack_and_probesi;
4102 ix86_gen_probe_stack_range = gen_probe_stack_rangesi;
4103 ix86_gen_monitor = gen_sse3_monitor_si;
4106 #ifdef USE_IX86_CLD
4107 /* Use -mcld by default for 32-bit code if configured with --enable-cld. */
4108 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags))
4109 opts->x_target_flags |= MASK_CLD & ~opts_set->x_target_flags;
4110 #endif
4112 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags) && opts->x_flag_pic)
4114 if (opts->x_flag_fentry > 0)
4115 sorry ("-mfentry isn%'t supported for 32-bit in combination "
4116 "with -fpic");
4117 opts->x_flag_fentry = 0;
4119 else if (TARGET_SEH)
4121 if (opts->x_flag_fentry == 0)
4122 sorry ("-mno-fentry isn%'t compatible with SEH");
4123 opts->x_flag_fentry = 1;
4125 else if (opts->x_flag_fentry < 0)
4127 #if defined(PROFILE_BEFORE_PROLOGUE)
4128 opts->x_flag_fentry = 1;
4129 #else
4130 opts->x_flag_fentry = 0;
4131 #endif
4134 /* When not opts->x_optimize for size, enable vzeroupper optimization for
4135 TARGET_AVX with -fexpensive-optimizations and split 32-byte
4136 AVX unaligned load/store. */
4137 if (!opts->x_optimize_size)
4139 if (flag_expensive_optimizations
4140 && !(opts_set->x_target_flags & MASK_VZEROUPPER))
4141 opts->x_target_flags |= MASK_VZEROUPPER;
4142 if (!ix86_tune_features[X86_TUNE_AVX256_UNALIGNED_LOAD_OPTIMAL]
4143 && !(opts_set->x_target_flags & MASK_AVX256_SPLIT_UNALIGNED_LOAD))
4144 opts->x_target_flags |= MASK_AVX256_SPLIT_UNALIGNED_LOAD;
4145 if (!ix86_tune_features[X86_TUNE_AVX256_UNALIGNED_STORE_OPTIMAL]
4146 && !(opts_set->x_target_flags & MASK_AVX256_SPLIT_UNALIGNED_STORE))
4147 opts->x_target_flags |= MASK_AVX256_SPLIT_UNALIGNED_STORE;
4148 /* Enable 128-bit AVX instruction generation
4149 for the auto-vectorizer. */
4150 if (TARGET_AVX128_OPTIMAL
4151 && !(opts_set->x_target_flags & MASK_PREFER_AVX128))
4152 opts->x_target_flags |= MASK_PREFER_AVX128;
4155 if (opts->x_ix86_recip_name)
4157 char *p = ASTRDUP (opts->x_ix86_recip_name);
4158 char *q;
4159 unsigned int mask, i;
4160 bool invert;
4162 while ((q = strtok (p, ",")) != NULL)
4164 p = NULL;
4165 if (*q == '!')
4167 invert = true;
4168 q++;
4170 else
4171 invert = false;
4173 if (!strcmp (q, "default"))
4174 mask = RECIP_MASK_ALL;
4175 else
4177 for (i = 0; i < ARRAY_SIZE (recip_options); i++)
4178 if (!strcmp (q, recip_options[i].string))
4180 mask = recip_options[i].mask;
4181 break;
4184 if (i == ARRAY_SIZE (recip_options))
4186 error ("unknown option for -mrecip=%s", q);
4187 invert = false;
4188 mask = RECIP_MASK_NONE;
4192 opts->x_recip_mask_explicit |= mask;
4193 if (invert)
4194 opts->x_recip_mask &= ~mask;
4195 else
4196 opts->x_recip_mask |= mask;
4200 if (TARGET_RECIP_P (opts->x_target_flags))
4201 opts->x_recip_mask |= RECIP_MASK_ALL & ~opts->x_recip_mask_explicit;
4202 else if (opts_set->x_target_flags & MASK_RECIP)
4203 opts->x_recip_mask &= ~(RECIP_MASK_ALL & ~opts->x_recip_mask_explicit);
4205 /* Default long double to 64-bit for 32-bit Bionic and to __float128
4206 for 64-bit Bionic. */
4207 if (TARGET_HAS_BIONIC
4208 && !(opts_set->x_target_flags
4209 & (MASK_LONG_DOUBLE_64 | MASK_LONG_DOUBLE_128)))
4210 opts->x_target_flags |= (TARGET_64BIT
4211 ? MASK_LONG_DOUBLE_128
4212 : MASK_LONG_DOUBLE_64);
4214 /* Only one of them can be active. */
4215 gcc_assert ((opts->x_target_flags & MASK_LONG_DOUBLE_64) == 0
4216 || (opts->x_target_flags & MASK_LONG_DOUBLE_128) == 0);
4218 /* Save the initial options in case the user does function specific
4219 options. */
4220 if (main_args_p)
4221 target_option_default_node = target_option_current_node
4222 = build_target_option_node (opts);
4224 /* Handle stack protector */
4225 if (!opts_set->x_ix86_stack_protector_guard)
4226 opts->x_ix86_stack_protector_guard
4227 = TARGET_HAS_BIONIC ? SSP_GLOBAL : SSP_TLS;
4229 /* Handle -mmemcpy-strategy= and -mmemset-strategy= */
4230 if (opts->x_ix86_tune_memcpy_strategy)
4232 char *str = xstrdup (opts->x_ix86_tune_memcpy_strategy);
4233 ix86_parse_stringop_strategy_string (str, false);
4234 free (str);
4237 if (opts->x_ix86_tune_memset_strategy)
4239 char *str = xstrdup (opts->x_ix86_tune_memset_strategy);
4240 ix86_parse_stringop_strategy_string (str, true);
4241 free (str);
4245 /* Implement the TARGET_OPTION_OVERRIDE hook. */
4247 static void
4248 ix86_option_override (void)
4250 opt_pass *pass_insert_vzeroupper = make_pass_insert_vzeroupper (g);
4251 static struct register_pass_info insert_vzeroupper_info
4252 = { pass_insert_vzeroupper, "reload",
4253 1, PASS_POS_INSERT_AFTER
4256 ix86_option_override_internal (true, &global_options, &global_options_set);
4259 /* This needs to be done at start up. It's convenient to do it here. */
4260 register_pass (&insert_vzeroupper_info);
4263 /* Update register usage after having seen the compiler flags. */
4265 static void
4266 ix86_conditional_register_usage (void)
4268 int i, c_mask;
4269 unsigned int j;
4271 /* The PIC register, if it exists, is fixed. */
4272 j = PIC_OFFSET_TABLE_REGNUM;
4273 if (j != INVALID_REGNUM)
4274 fixed_regs[j] = call_used_regs[j] = 1;
4276 /* For 32-bit targets, squash the REX registers. */
4277 if (! TARGET_64BIT)
4279 for (i = FIRST_REX_INT_REG; i <= LAST_REX_INT_REG; i++)
4280 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4281 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
4282 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4283 for (i = FIRST_EXT_REX_SSE_REG; i <= LAST_EXT_REX_SSE_REG; i++)
4284 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4287 /* See the definition of CALL_USED_REGISTERS in i386.h. */
4288 c_mask = (TARGET_64BIT_MS_ABI ? (1 << 3)
4289 : TARGET_64BIT ? (1 << 2)
4290 : (1 << 1));
4292 CLEAR_HARD_REG_SET (reg_class_contents[(int)CLOBBERED_REGS]);
4294 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
4296 /* Set/reset conditionally defined registers from
4297 CALL_USED_REGISTERS initializer. */
4298 if (call_used_regs[i] > 1)
4299 call_used_regs[i] = !!(call_used_regs[i] & c_mask);
4301 /* Calculate registers of CLOBBERED_REGS register set
4302 as call used registers from GENERAL_REGS register set. */
4303 if (TEST_HARD_REG_BIT (reg_class_contents[(int)GENERAL_REGS], i)
4304 && call_used_regs[i])
4305 SET_HARD_REG_BIT (reg_class_contents[(int)CLOBBERED_REGS], i);
4308 /* If MMX is disabled, squash the registers. */
4309 if (! TARGET_MMX)
4310 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
4311 if (TEST_HARD_REG_BIT (reg_class_contents[(int)MMX_REGS], i))
4312 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4314 /* If SSE is disabled, squash the registers. */
4315 if (! TARGET_SSE)
4316 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
4317 if (TEST_HARD_REG_BIT (reg_class_contents[(int)SSE_REGS], i))
4318 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4320 /* If the FPU is disabled, squash the registers. */
4321 if (! (TARGET_80387 || TARGET_FLOAT_RETURNS_IN_80387))
4322 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
4323 if (TEST_HARD_REG_BIT (reg_class_contents[(int)FLOAT_REGS], i))
4324 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4326 /* If AVX512F is disabled, squash the registers. */
4327 if (! TARGET_AVX512F)
4329 for (i = FIRST_EXT_REX_SSE_REG; i <= LAST_EXT_REX_SSE_REG; i++)
4330 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4332 for (i = FIRST_MASK_REG; i <= LAST_MASK_REG; i++)
4333 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4338 /* Save the current options */
4340 static void
4341 ix86_function_specific_save (struct cl_target_option *ptr,
4342 struct gcc_options *opts)
4344 ptr->arch = ix86_arch;
4345 ptr->schedule = ix86_schedule;
4346 ptr->tune = ix86_tune;
4347 ptr->branch_cost = ix86_branch_cost;
4348 ptr->tune_defaulted = ix86_tune_defaulted;
4349 ptr->arch_specified = ix86_arch_specified;
4350 ptr->x_ix86_isa_flags_explicit = opts->x_ix86_isa_flags_explicit;
4351 ptr->x_ix86_target_flags_explicit = opts->x_ix86_target_flags_explicit;
4352 ptr->x_recip_mask_explicit = opts->x_recip_mask_explicit;
4353 ptr->x_ix86_arch_string = opts->x_ix86_arch_string;
4354 ptr->x_ix86_tune_string = opts->x_ix86_tune_string;
4355 ptr->x_ix86_cmodel = opts->x_ix86_cmodel;
4356 ptr->x_ix86_abi = opts->x_ix86_abi;
4357 ptr->x_ix86_asm_dialect = opts->x_ix86_asm_dialect;
4358 ptr->x_ix86_branch_cost = opts->x_ix86_branch_cost;
4359 ptr->x_ix86_dump_tunes = opts->x_ix86_dump_tunes;
4360 ptr->x_ix86_force_align_arg_pointer = opts->x_ix86_force_align_arg_pointer;
4361 ptr->x_ix86_force_drap = opts->x_ix86_force_drap;
4362 ptr->x_ix86_incoming_stack_boundary_arg = opts->x_ix86_incoming_stack_boundary_arg;
4363 ptr->x_ix86_pmode = opts->x_ix86_pmode;
4364 ptr->x_ix86_preferred_stack_boundary_arg = opts->x_ix86_preferred_stack_boundary_arg;
4365 ptr->x_ix86_recip_name = opts->x_ix86_recip_name;
4366 ptr->x_ix86_regparm = opts->x_ix86_regparm;
4367 ptr->x_ix86_section_threshold = opts->x_ix86_section_threshold;
4368 ptr->x_ix86_sse2avx = opts->x_ix86_sse2avx;
4369 ptr->x_ix86_stack_protector_guard = opts->x_ix86_stack_protector_guard;
4370 ptr->x_ix86_stringop_alg = opts->x_ix86_stringop_alg;
4371 ptr->x_ix86_tls_dialect = opts->x_ix86_tls_dialect;
4372 ptr->x_ix86_tune_ctrl_string = opts->x_ix86_tune_ctrl_string;
4373 ptr->x_ix86_tune_memcpy_strategy = opts->x_ix86_tune_memcpy_strategy;
4374 ptr->x_ix86_tune_memset_strategy = opts->x_ix86_tune_memset_strategy;
4375 ptr->x_ix86_tune_no_default = opts->x_ix86_tune_no_default;
4376 ptr->x_ix86_veclibabi_type = opts->x_ix86_veclibabi_type;
4378 /* The fields are char but the variables are not; make sure the
4379 values fit in the fields. */
4380 gcc_assert (ptr->arch == ix86_arch);
4381 gcc_assert (ptr->schedule == ix86_schedule);
4382 gcc_assert (ptr->tune == ix86_tune);
4383 gcc_assert (ptr->branch_cost == ix86_branch_cost);
4386 /* Restore the current options */
4388 static void
4389 ix86_function_specific_restore (struct gcc_options *opts,
4390 struct cl_target_option *ptr)
4392 enum processor_type old_tune = ix86_tune;
4393 enum processor_type old_arch = ix86_arch;
4394 unsigned int ix86_arch_mask;
4395 int i;
4397 /* We don't change -fPIC. */
4398 opts->x_flag_pic = flag_pic;
4400 ix86_arch = (enum processor_type) ptr->arch;
4401 ix86_schedule = (enum attr_cpu) ptr->schedule;
4402 ix86_tune = (enum processor_type) ptr->tune;
4403 opts->x_ix86_branch_cost = ptr->branch_cost;
4404 ix86_tune_defaulted = ptr->tune_defaulted;
4405 ix86_arch_specified = ptr->arch_specified;
4406 opts->x_ix86_isa_flags_explicit = ptr->x_ix86_isa_flags_explicit;
4407 opts->x_ix86_target_flags_explicit = ptr->x_ix86_target_flags_explicit;
4408 opts->x_recip_mask_explicit = ptr->x_recip_mask_explicit;
4409 opts->x_ix86_arch_string = ptr->x_ix86_arch_string;
4410 opts->x_ix86_tune_string = ptr->x_ix86_tune_string;
4411 opts->x_ix86_cmodel = ptr->x_ix86_cmodel;
4412 opts->x_ix86_abi = ptr->x_ix86_abi;
4413 opts->x_ix86_asm_dialect = ptr->x_ix86_asm_dialect;
4414 opts->x_ix86_branch_cost = ptr->x_ix86_branch_cost;
4415 opts->x_ix86_dump_tunes = ptr->x_ix86_dump_tunes;
4416 opts->x_ix86_force_align_arg_pointer = ptr->x_ix86_force_align_arg_pointer;
4417 opts->x_ix86_force_drap = ptr->x_ix86_force_drap;
4418 opts->x_ix86_incoming_stack_boundary_arg = ptr->x_ix86_incoming_stack_boundary_arg;
4419 opts->x_ix86_pmode = ptr->x_ix86_pmode;
4420 opts->x_ix86_preferred_stack_boundary_arg = ptr->x_ix86_preferred_stack_boundary_arg;
4421 opts->x_ix86_recip_name = ptr->x_ix86_recip_name;
4422 opts->x_ix86_regparm = ptr->x_ix86_regparm;
4423 opts->x_ix86_section_threshold = ptr->x_ix86_section_threshold;
4424 opts->x_ix86_sse2avx = ptr->x_ix86_sse2avx;
4425 opts->x_ix86_stack_protector_guard = ptr->x_ix86_stack_protector_guard;
4426 opts->x_ix86_stringop_alg = ptr->x_ix86_stringop_alg;
4427 opts->x_ix86_tls_dialect = ptr->x_ix86_tls_dialect;
4428 opts->x_ix86_tune_ctrl_string = ptr->x_ix86_tune_ctrl_string;
4429 opts->x_ix86_tune_memcpy_strategy = ptr->x_ix86_tune_memcpy_strategy;
4430 opts->x_ix86_tune_memset_strategy = ptr->x_ix86_tune_memset_strategy;
4431 opts->x_ix86_tune_no_default = ptr->x_ix86_tune_no_default;
4432 opts->x_ix86_veclibabi_type = ptr->x_ix86_veclibabi_type;
4434 /* Recreate the arch feature tests if the arch changed */
4435 if (old_arch != ix86_arch)
4437 ix86_arch_mask = 1u << ix86_arch;
4438 for (i = 0; i < X86_ARCH_LAST; ++i)
4439 ix86_arch_features[i]
4440 = !!(initial_ix86_arch_features[i] & ix86_arch_mask);
4443 /* Recreate the tune optimization tests */
4444 if (old_tune != ix86_tune)
4445 set_ix86_tune_features (ix86_tune, false);
4448 /* Print the current options */
4450 static void
4451 ix86_function_specific_print (FILE *file, int indent,
4452 struct cl_target_option *ptr)
4454 char *target_string
4455 = ix86_target_string (ptr->x_ix86_isa_flags, ptr->x_target_flags,
4456 NULL, NULL, ptr->x_ix86_fpmath, false);
4458 gcc_assert (ptr->arch < PROCESSOR_max);
4459 fprintf (file, "%*sarch = %d (%s)\n",
4460 indent, "",
4461 ptr->arch, processor_target_table[ptr->arch].name);
4463 gcc_assert (ptr->tune < PROCESSOR_max);
4464 fprintf (file, "%*stune = %d (%s)\n",
4465 indent, "",
4466 ptr->tune, processor_target_table[ptr->tune].name);
4468 fprintf (file, "%*sbranch_cost = %d\n", indent, "", ptr->branch_cost);
4470 if (target_string)
4472 fprintf (file, "%*s%s\n", indent, "", target_string);
4473 free (target_string);
4478 /* Inner function to process the attribute((target(...))), take an argument and
4479 set the current options from the argument. If we have a list, recursively go
4480 over the list. */
4482 static bool
4483 ix86_valid_target_attribute_inner_p (tree args, char *p_strings[],
4484 struct gcc_options *opts,
4485 struct gcc_options *opts_set,
4486 struct gcc_options *enum_opts_set)
4488 char *next_optstr;
4489 bool ret = true;
4491 #define IX86_ATTR_ISA(S,O) { S, sizeof (S)-1, ix86_opt_isa, O, 0 }
4492 #define IX86_ATTR_STR(S,O) { S, sizeof (S)-1, ix86_opt_str, O, 0 }
4493 #define IX86_ATTR_ENUM(S,O) { S, sizeof (S)-1, ix86_opt_enum, O, 0 }
4494 #define IX86_ATTR_YES(S,O,M) { S, sizeof (S)-1, ix86_opt_yes, O, M }
4495 #define IX86_ATTR_NO(S,O,M) { S, sizeof (S)-1, ix86_opt_no, O, M }
4497 enum ix86_opt_type
4499 ix86_opt_unknown,
4500 ix86_opt_yes,
4501 ix86_opt_no,
4502 ix86_opt_str,
4503 ix86_opt_enum,
4504 ix86_opt_isa
4507 static const struct
4509 const char *string;
4510 size_t len;
4511 enum ix86_opt_type type;
4512 int opt;
4513 int mask;
4514 } attrs[] = {
4515 /* isa options */
4516 IX86_ATTR_ISA ("3dnow", OPT_m3dnow),
4517 IX86_ATTR_ISA ("abm", OPT_mabm),
4518 IX86_ATTR_ISA ("bmi", OPT_mbmi),
4519 IX86_ATTR_ISA ("bmi2", OPT_mbmi2),
4520 IX86_ATTR_ISA ("lzcnt", OPT_mlzcnt),
4521 IX86_ATTR_ISA ("tbm", OPT_mtbm),
4522 IX86_ATTR_ISA ("aes", OPT_maes),
4523 IX86_ATTR_ISA ("sha", OPT_msha),
4524 IX86_ATTR_ISA ("avx", OPT_mavx),
4525 IX86_ATTR_ISA ("avx2", OPT_mavx2),
4526 IX86_ATTR_ISA ("avx512f", OPT_mavx512f),
4527 IX86_ATTR_ISA ("avx512pf", OPT_mavx512pf),
4528 IX86_ATTR_ISA ("avx512er", OPT_mavx512er),
4529 IX86_ATTR_ISA ("avx512cd", OPT_mavx512cd),
4530 IX86_ATTR_ISA ("mmx", OPT_mmmx),
4531 IX86_ATTR_ISA ("pclmul", OPT_mpclmul),
4532 IX86_ATTR_ISA ("popcnt", OPT_mpopcnt),
4533 IX86_ATTR_ISA ("sse", OPT_msse),
4534 IX86_ATTR_ISA ("sse2", OPT_msse2),
4535 IX86_ATTR_ISA ("sse3", OPT_msse3),
4536 IX86_ATTR_ISA ("sse4", OPT_msse4),
4537 IX86_ATTR_ISA ("sse4.1", OPT_msse4_1),
4538 IX86_ATTR_ISA ("sse4.2", OPT_msse4_2),
4539 IX86_ATTR_ISA ("sse4a", OPT_msse4a),
4540 IX86_ATTR_ISA ("ssse3", OPT_mssse3),
4541 IX86_ATTR_ISA ("fma4", OPT_mfma4),
4542 IX86_ATTR_ISA ("fma", OPT_mfma),
4543 IX86_ATTR_ISA ("xop", OPT_mxop),
4544 IX86_ATTR_ISA ("lwp", OPT_mlwp),
4545 IX86_ATTR_ISA ("fsgsbase", OPT_mfsgsbase),
4546 IX86_ATTR_ISA ("rdrnd", OPT_mrdrnd),
4547 IX86_ATTR_ISA ("f16c", OPT_mf16c),
4548 IX86_ATTR_ISA ("rtm", OPT_mrtm),
4549 IX86_ATTR_ISA ("hle", OPT_mhle),
4550 IX86_ATTR_ISA ("prfchw", OPT_mprfchw),
4551 IX86_ATTR_ISA ("rdseed", OPT_mrdseed),
4552 IX86_ATTR_ISA ("adx", OPT_madx),
4553 IX86_ATTR_ISA ("fxsr", OPT_mfxsr),
4554 IX86_ATTR_ISA ("xsave", OPT_mxsave),
4555 IX86_ATTR_ISA ("xsaveopt", OPT_mxsaveopt),
4556 IX86_ATTR_ISA ("prefetchwt1", OPT_mprefetchwt1),
4558 /* enum options */
4559 IX86_ATTR_ENUM ("fpmath=", OPT_mfpmath_),
4561 /* string options */
4562 IX86_ATTR_STR ("arch=", IX86_FUNCTION_SPECIFIC_ARCH),
4563 IX86_ATTR_STR ("tune=", IX86_FUNCTION_SPECIFIC_TUNE),
4565 /* flag options */
4566 IX86_ATTR_YES ("cld",
4567 OPT_mcld,
4568 MASK_CLD),
4570 IX86_ATTR_NO ("fancy-math-387",
4571 OPT_mfancy_math_387,
4572 MASK_NO_FANCY_MATH_387),
4574 IX86_ATTR_YES ("ieee-fp",
4575 OPT_mieee_fp,
4576 MASK_IEEE_FP),
4578 IX86_ATTR_YES ("inline-all-stringops",
4579 OPT_minline_all_stringops,
4580 MASK_INLINE_ALL_STRINGOPS),
4582 IX86_ATTR_YES ("inline-stringops-dynamically",
4583 OPT_minline_stringops_dynamically,
4584 MASK_INLINE_STRINGOPS_DYNAMICALLY),
4586 IX86_ATTR_NO ("align-stringops",
4587 OPT_mno_align_stringops,
4588 MASK_NO_ALIGN_STRINGOPS),
4590 IX86_ATTR_YES ("recip",
4591 OPT_mrecip,
4592 MASK_RECIP),
4596 /* If this is a list, recurse to get the options. */
4597 if (TREE_CODE (args) == TREE_LIST)
4599 bool ret = true;
4601 for (; args; args = TREE_CHAIN (args))
4602 if (TREE_VALUE (args)
4603 && !ix86_valid_target_attribute_inner_p (TREE_VALUE (args),
4604 p_strings, opts, opts_set,
4605 enum_opts_set))
4606 ret = false;
4608 return ret;
4611 else if (TREE_CODE (args) != STRING_CST)
4613 error ("attribute %<target%> argument not a string");
4614 return false;
4617 /* Handle multiple arguments separated by commas. */
4618 next_optstr = ASTRDUP (TREE_STRING_POINTER (args));
4620 while (next_optstr && *next_optstr != '\0')
4622 char *p = next_optstr;
4623 char *orig_p = p;
4624 char *comma = strchr (next_optstr, ',');
4625 const char *opt_string;
4626 size_t len, opt_len;
4627 int opt;
4628 bool opt_set_p;
4629 char ch;
4630 unsigned i;
4631 enum ix86_opt_type type = ix86_opt_unknown;
4632 int mask = 0;
4634 if (comma)
4636 *comma = '\0';
4637 len = comma - next_optstr;
4638 next_optstr = comma + 1;
4640 else
4642 len = strlen (p);
4643 next_optstr = NULL;
4646 /* Recognize no-xxx. */
4647 if (len > 3 && p[0] == 'n' && p[1] == 'o' && p[2] == '-')
4649 opt_set_p = false;
4650 p += 3;
4651 len -= 3;
4653 else
4654 opt_set_p = true;
4656 /* Find the option. */
4657 ch = *p;
4658 opt = N_OPTS;
4659 for (i = 0; i < ARRAY_SIZE (attrs); i++)
4661 type = attrs[i].type;
4662 opt_len = attrs[i].len;
4663 if (ch == attrs[i].string[0]
4664 && ((type != ix86_opt_str && type != ix86_opt_enum)
4665 ? len == opt_len
4666 : len > opt_len)
4667 && memcmp (p, attrs[i].string, opt_len) == 0)
4669 opt = attrs[i].opt;
4670 mask = attrs[i].mask;
4671 opt_string = attrs[i].string;
4672 break;
4676 /* Process the option. */
4677 if (opt == N_OPTS)
4679 error ("attribute(target(\"%s\")) is unknown", orig_p);
4680 ret = false;
4683 else if (type == ix86_opt_isa)
4685 struct cl_decoded_option decoded;
4687 generate_option (opt, NULL, opt_set_p, CL_TARGET, &decoded);
4688 ix86_handle_option (opts, opts_set,
4689 &decoded, input_location);
4692 else if (type == ix86_opt_yes || type == ix86_opt_no)
4694 if (type == ix86_opt_no)
4695 opt_set_p = !opt_set_p;
4697 if (opt_set_p)
4698 opts->x_target_flags |= mask;
4699 else
4700 opts->x_target_flags &= ~mask;
4703 else if (type == ix86_opt_str)
4705 if (p_strings[opt])
4707 error ("option(\"%s\") was already specified", opt_string);
4708 ret = false;
4710 else
4711 p_strings[opt] = xstrdup (p + opt_len);
4714 else if (type == ix86_opt_enum)
4716 bool arg_ok;
4717 int value;
4719 arg_ok = opt_enum_arg_to_value (opt, p + opt_len, &value, CL_TARGET);
4720 if (arg_ok)
4721 set_option (opts, enum_opts_set, opt, value,
4722 p + opt_len, DK_UNSPECIFIED, input_location,
4723 global_dc);
4724 else
4726 error ("attribute(target(\"%s\")) is unknown", orig_p);
4727 ret = false;
4731 else
4732 gcc_unreachable ();
4735 return ret;
4738 /* Return a TARGET_OPTION_NODE tree of the target options listed or NULL. */
4740 tree
4741 ix86_valid_target_attribute_tree (tree args,
4742 struct gcc_options *opts,
4743 struct gcc_options *opts_set)
4745 const char *orig_arch_string = opts->x_ix86_arch_string;
4746 const char *orig_tune_string = opts->x_ix86_tune_string;
4747 enum fpmath_unit orig_fpmath_set = opts_set->x_ix86_fpmath;
4748 int orig_tune_defaulted = ix86_tune_defaulted;
4749 int orig_arch_specified = ix86_arch_specified;
4750 char *option_strings[IX86_FUNCTION_SPECIFIC_MAX] = { NULL, NULL };
4751 tree t = NULL_TREE;
4752 int i;
4753 struct cl_target_option *def
4754 = TREE_TARGET_OPTION (target_option_default_node);
4755 struct gcc_options enum_opts_set;
4757 memset (&enum_opts_set, 0, sizeof (enum_opts_set));
4759 /* Process each of the options on the chain. */
4760 if (! ix86_valid_target_attribute_inner_p (args, option_strings, opts,
4761 opts_set, &enum_opts_set))
4762 return error_mark_node;
4764 /* If the changed options are different from the default, rerun
4765 ix86_option_override_internal, and then save the options away.
4766 The string options are are attribute options, and will be undone
4767 when we copy the save structure. */
4768 if (opts->x_ix86_isa_flags != def->x_ix86_isa_flags
4769 || opts->x_target_flags != def->x_target_flags
4770 || option_strings[IX86_FUNCTION_SPECIFIC_ARCH]
4771 || option_strings[IX86_FUNCTION_SPECIFIC_TUNE]
4772 || enum_opts_set.x_ix86_fpmath)
4774 /* If we are using the default tune= or arch=, undo the string assigned,
4775 and use the default. */
4776 if (option_strings[IX86_FUNCTION_SPECIFIC_ARCH])
4777 opts->x_ix86_arch_string = option_strings[IX86_FUNCTION_SPECIFIC_ARCH];
4778 else if (!orig_arch_specified)
4779 opts->x_ix86_arch_string = NULL;
4781 if (option_strings[IX86_FUNCTION_SPECIFIC_TUNE])
4782 opts->x_ix86_tune_string = option_strings[IX86_FUNCTION_SPECIFIC_TUNE];
4783 else if (orig_tune_defaulted)
4784 opts->x_ix86_tune_string = NULL;
4786 /* If fpmath= is not set, and we now have sse2 on 32-bit, use it. */
4787 if (enum_opts_set.x_ix86_fpmath)
4788 opts_set->x_ix86_fpmath = (enum fpmath_unit) 1;
4789 else if (!TARGET_64BIT_P (opts->x_ix86_isa_flags)
4790 && TARGET_SSE_P (opts->x_ix86_isa_flags))
4792 opts->x_ix86_fpmath = (enum fpmath_unit) (FPMATH_SSE | FPMATH_387);
4793 opts_set->x_ix86_fpmath = (enum fpmath_unit) 1;
4796 /* Do any overrides, such as arch=xxx, or tune=xxx support. */
4797 ix86_option_override_internal (false, opts, opts_set);
4799 /* Add any builtin functions with the new isa if any. */
4800 ix86_add_new_builtins (opts->x_ix86_isa_flags);
4802 /* Save the current options unless we are validating options for
4803 #pragma. */
4804 t = build_target_option_node (opts);
4806 opts->x_ix86_arch_string = orig_arch_string;
4807 opts->x_ix86_tune_string = orig_tune_string;
4808 opts_set->x_ix86_fpmath = orig_fpmath_set;
4810 /* Free up memory allocated to hold the strings */
4811 for (i = 0; i < IX86_FUNCTION_SPECIFIC_MAX; i++)
4812 free (option_strings[i]);
4815 return t;
4818 /* Hook to validate attribute((target("string"))). */
4820 static bool
4821 ix86_valid_target_attribute_p (tree fndecl,
4822 tree ARG_UNUSED (name),
4823 tree args,
4824 int ARG_UNUSED (flags))
4826 struct gcc_options func_options;
4827 tree new_target, new_optimize;
4828 bool ret = true;
4830 /* attribute((target("default"))) does nothing, beyond
4831 affecting multi-versioning. */
4832 if (TREE_VALUE (args)
4833 && TREE_CODE (TREE_VALUE (args)) == STRING_CST
4834 && TREE_CHAIN (args) == NULL_TREE
4835 && strcmp (TREE_STRING_POINTER (TREE_VALUE (args)), "default") == 0)
4836 return true;
4838 tree old_optimize = build_optimization_node (&global_options);
4840 /* Get the optimization options of the current function. */
4841 tree func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl);
4843 if (!func_optimize)
4844 func_optimize = old_optimize;
4846 /* Init func_options. */
4847 memset (&func_options, 0, sizeof (func_options));
4848 init_options_struct (&func_options, NULL);
4849 lang_hooks.init_options_struct (&func_options);
4851 cl_optimization_restore (&func_options,
4852 TREE_OPTIMIZATION (func_optimize));
4854 /* Initialize func_options to the default before its target options can
4855 be set. */
4856 cl_target_option_restore (&func_options,
4857 TREE_TARGET_OPTION (target_option_default_node));
4859 new_target = ix86_valid_target_attribute_tree (args, &func_options,
4860 &global_options_set);
4862 new_optimize = build_optimization_node (&func_options);
4864 if (new_target == error_mark_node)
4865 ret = false;
4867 else if (fndecl && new_target)
4869 DECL_FUNCTION_SPECIFIC_TARGET (fndecl) = new_target;
4871 if (old_optimize != new_optimize)
4872 DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl) = new_optimize;
4875 return ret;
4879 /* Hook to determine if one function can safely inline another. */
4881 static bool
4882 ix86_can_inline_p (tree caller, tree callee)
4884 bool ret = false;
4885 tree caller_tree = DECL_FUNCTION_SPECIFIC_TARGET (caller);
4886 tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (callee);
4888 /* If callee has no option attributes, then it is ok to inline. */
4889 if (!callee_tree)
4890 ret = true;
4892 /* If caller has no option attributes, but callee does then it is not ok to
4893 inline. */
4894 else if (!caller_tree)
4895 ret = false;
4897 else
4899 struct cl_target_option *caller_opts = TREE_TARGET_OPTION (caller_tree);
4900 struct cl_target_option *callee_opts = TREE_TARGET_OPTION (callee_tree);
4902 /* Callee's isa options should a subset of the caller's, i.e. a SSE4 function
4903 can inline a SSE2 function but a SSE2 function can't inline a SSE4
4904 function. */
4905 if ((caller_opts->x_ix86_isa_flags & callee_opts->x_ix86_isa_flags)
4906 != callee_opts->x_ix86_isa_flags)
4907 ret = false;
4909 /* See if we have the same non-isa options. */
4910 else if (caller_opts->x_target_flags != callee_opts->x_target_flags)
4911 ret = false;
4913 /* See if arch, tune, etc. are the same. */
4914 else if (caller_opts->arch != callee_opts->arch)
4915 ret = false;
4917 else if (caller_opts->tune != callee_opts->tune)
4918 ret = false;
4920 else if (caller_opts->x_ix86_fpmath != callee_opts->x_ix86_fpmath)
4921 ret = false;
4923 else if (caller_opts->branch_cost != callee_opts->branch_cost)
4924 ret = false;
4926 else
4927 ret = true;
4930 return ret;
4934 /* Remember the last target of ix86_set_current_function. */
4935 static GTY(()) tree ix86_previous_fndecl;
4937 /* Invalidate ix86_previous_fndecl cache. */
4938 void
4939 ix86_reset_previous_fndecl (void)
4941 ix86_previous_fndecl = NULL_TREE;
4944 /* Establish appropriate back-end context for processing the function
4945 FNDECL. The argument might be NULL to indicate processing at top
4946 level, outside of any function scope. */
4947 static void
4948 ix86_set_current_function (tree fndecl)
4950 /* Only change the context if the function changes. This hook is called
4951 several times in the course of compiling a function, and we don't want to
4952 slow things down too much or call target_reinit when it isn't safe. */
4953 if (fndecl && fndecl != ix86_previous_fndecl)
4955 tree old_tree = (ix86_previous_fndecl
4956 ? DECL_FUNCTION_SPECIFIC_TARGET (ix86_previous_fndecl)
4957 : NULL_TREE);
4959 tree new_tree = (fndecl
4960 ? DECL_FUNCTION_SPECIFIC_TARGET (fndecl)
4961 : NULL_TREE);
4963 ix86_previous_fndecl = fndecl;
4964 if (old_tree == new_tree)
4967 else if (new_tree)
4969 cl_target_option_restore (&global_options,
4970 TREE_TARGET_OPTION (new_tree));
4971 if (TREE_TARGET_GLOBALS (new_tree))
4972 restore_target_globals (TREE_TARGET_GLOBALS (new_tree));
4973 else
4974 TREE_TARGET_GLOBALS (new_tree)
4975 = save_target_globals_default_opts ();
4978 else if (old_tree)
4980 new_tree = target_option_current_node;
4981 cl_target_option_restore (&global_options,
4982 TREE_TARGET_OPTION (new_tree));
4983 if (TREE_TARGET_GLOBALS (new_tree))
4984 restore_target_globals (TREE_TARGET_GLOBALS (new_tree));
4985 else if (new_tree == target_option_default_node)
4986 restore_target_globals (&default_target_globals);
4987 else
4988 TREE_TARGET_GLOBALS (new_tree)
4989 = save_target_globals_default_opts ();
4995 /* Return true if this goes in large data/bss. */
4997 static bool
4998 ix86_in_large_data_p (tree exp)
5000 if (ix86_cmodel != CM_MEDIUM && ix86_cmodel != CM_MEDIUM_PIC)
5001 return false;
5003 /* Functions are never large data. */
5004 if (TREE_CODE (exp) == FUNCTION_DECL)
5005 return false;
5007 if (TREE_CODE (exp) == VAR_DECL && DECL_SECTION_NAME (exp))
5009 const char *section = TREE_STRING_POINTER (DECL_SECTION_NAME (exp));
5010 if (strcmp (section, ".ldata") == 0
5011 || strcmp (section, ".lbss") == 0)
5012 return true;
5013 return false;
5015 else
5017 HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (exp));
5019 /* If this is an incomplete type with size 0, then we can't put it
5020 in data because it might be too big when completed. */
5021 if (!size || size > ix86_section_threshold)
5022 return true;
5025 return false;
5028 /* Switch to the appropriate section for output of DECL.
5029 DECL is either a `VAR_DECL' node or a constant of some sort.
5030 RELOC indicates whether forming the initial value of DECL requires
5031 link-time relocations. */
5033 ATTRIBUTE_UNUSED static section *
5034 x86_64_elf_select_section (tree decl, int reloc,
5035 unsigned HOST_WIDE_INT align)
5037 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
5038 && ix86_in_large_data_p (decl))
5040 const char *sname = NULL;
5041 unsigned int flags = SECTION_WRITE;
5042 switch (categorize_decl_for_section (decl, reloc))
5044 case SECCAT_DATA:
5045 sname = ".ldata";
5046 break;
5047 case SECCAT_DATA_REL:
5048 sname = ".ldata.rel";
5049 break;
5050 case SECCAT_DATA_REL_LOCAL:
5051 sname = ".ldata.rel.local";
5052 break;
5053 case SECCAT_DATA_REL_RO:
5054 sname = ".ldata.rel.ro";
5055 break;
5056 case SECCAT_DATA_REL_RO_LOCAL:
5057 sname = ".ldata.rel.ro.local";
5058 break;
5059 case SECCAT_BSS:
5060 sname = ".lbss";
5061 flags |= SECTION_BSS;
5062 break;
5063 case SECCAT_RODATA:
5064 case SECCAT_RODATA_MERGE_STR:
5065 case SECCAT_RODATA_MERGE_STR_INIT:
5066 case SECCAT_RODATA_MERGE_CONST:
5067 sname = ".lrodata";
5068 flags = 0;
5069 break;
5070 case SECCAT_SRODATA:
5071 case SECCAT_SDATA:
5072 case SECCAT_SBSS:
5073 gcc_unreachable ();
5074 case SECCAT_TEXT:
5075 case SECCAT_TDATA:
5076 case SECCAT_TBSS:
5077 /* We don't split these for medium model. Place them into
5078 default sections and hope for best. */
5079 break;
5081 if (sname)
5083 /* We might get called with string constants, but get_named_section
5084 doesn't like them as they are not DECLs. Also, we need to set
5085 flags in that case. */
5086 if (!DECL_P (decl))
5087 return get_section (sname, flags, NULL);
5088 return get_named_section (decl, sname, reloc);
5091 return default_elf_select_section (decl, reloc, align);
5094 /* Select a set of attributes for section NAME based on the properties
5095 of DECL and whether or not RELOC indicates that DECL's initializer
5096 might contain runtime relocations. */
5098 static unsigned int ATTRIBUTE_UNUSED
5099 x86_64_elf_section_type_flags (tree decl, const char *name, int reloc)
5101 unsigned int flags = default_section_type_flags (decl, name, reloc);
5103 if (decl == NULL_TREE
5104 && (strcmp (name, ".ldata.rel.ro") == 0
5105 || strcmp (name, ".ldata.rel.ro.local") == 0))
5106 flags |= SECTION_RELRO;
5108 if (strcmp (name, ".lbss") == 0
5109 || strncmp (name, ".lbss.", 5) == 0
5110 || strncmp (name, ".gnu.linkonce.lb.", 16) == 0)
5111 flags |= SECTION_BSS;
5113 return flags;
5116 /* Build up a unique section name, expressed as a
5117 STRING_CST node, and assign it to DECL_SECTION_NAME (decl).
5118 RELOC indicates whether the initial value of EXP requires
5119 link-time relocations. */
5121 static void ATTRIBUTE_UNUSED
5122 x86_64_elf_unique_section (tree decl, int reloc)
5124 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
5125 && ix86_in_large_data_p (decl))
5127 const char *prefix = NULL;
5128 /* We only need to use .gnu.linkonce if we don't have COMDAT groups. */
5129 bool one_only = DECL_ONE_ONLY (decl) && !HAVE_COMDAT_GROUP;
5131 switch (categorize_decl_for_section (decl, reloc))
5133 case SECCAT_DATA:
5134 case SECCAT_DATA_REL:
5135 case SECCAT_DATA_REL_LOCAL:
5136 case SECCAT_DATA_REL_RO:
5137 case SECCAT_DATA_REL_RO_LOCAL:
5138 prefix = one_only ? ".ld" : ".ldata";
5139 break;
5140 case SECCAT_BSS:
5141 prefix = one_only ? ".lb" : ".lbss";
5142 break;
5143 case SECCAT_RODATA:
5144 case SECCAT_RODATA_MERGE_STR:
5145 case SECCAT_RODATA_MERGE_STR_INIT:
5146 case SECCAT_RODATA_MERGE_CONST:
5147 prefix = one_only ? ".lr" : ".lrodata";
5148 break;
5149 case SECCAT_SRODATA:
5150 case SECCAT_SDATA:
5151 case SECCAT_SBSS:
5152 gcc_unreachable ();
5153 case SECCAT_TEXT:
5154 case SECCAT_TDATA:
5155 case SECCAT_TBSS:
5156 /* We don't split these for medium model. Place them into
5157 default sections and hope for best. */
5158 break;
5160 if (prefix)
5162 const char *name, *linkonce;
5163 char *string;
5165 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
5166 name = targetm.strip_name_encoding (name);
5168 /* If we're using one_only, then there needs to be a .gnu.linkonce
5169 prefix to the section name. */
5170 linkonce = one_only ? ".gnu.linkonce" : "";
5172 string = ACONCAT ((linkonce, prefix, ".", name, NULL));
5174 DECL_SECTION_NAME (decl) = build_string (strlen (string), string);
5175 return;
5178 default_unique_section (decl, reloc);
5181 #ifdef COMMON_ASM_OP
5182 /* This says how to output assembler code to declare an
5183 uninitialized external linkage data object.
5185 For medium model x86-64 we need to use .largecomm opcode for
5186 large objects. */
5187 void
5188 x86_elf_aligned_common (FILE *file,
5189 const char *name, unsigned HOST_WIDE_INT size,
5190 int align)
5192 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
5193 && size > (unsigned int)ix86_section_threshold)
5194 fputs (".largecomm\t", file);
5195 else
5196 fputs (COMMON_ASM_OP, file);
5197 assemble_name (file, name);
5198 fprintf (file, "," HOST_WIDE_INT_PRINT_UNSIGNED ",%u\n",
5199 size, align / BITS_PER_UNIT);
5201 #endif
5203 /* Utility function for targets to use in implementing
5204 ASM_OUTPUT_ALIGNED_BSS. */
5206 void
5207 x86_output_aligned_bss (FILE *file, tree decl ATTRIBUTE_UNUSED,
5208 const char *name, unsigned HOST_WIDE_INT size,
5209 int align)
5211 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
5212 && size > (unsigned int)ix86_section_threshold)
5213 switch_to_section (get_named_section (decl, ".lbss", 0));
5214 else
5215 switch_to_section (bss_section);
5216 ASM_OUTPUT_ALIGN (file, floor_log2 (align / BITS_PER_UNIT));
5217 #ifdef ASM_DECLARE_OBJECT_NAME
5218 last_assemble_variable_decl = decl;
5219 ASM_DECLARE_OBJECT_NAME (file, name, decl);
5220 #else
5221 /* Standard thing is just output label for the object. */
5222 ASM_OUTPUT_LABEL (file, name);
5223 #endif /* ASM_DECLARE_OBJECT_NAME */
5224 ASM_OUTPUT_SKIP (file, size ? size : 1);
5227 /* Decide whether we must probe the stack before any space allocation
5228 on this target. It's essentially TARGET_STACK_PROBE except when
5229 -fstack-check causes the stack to be already probed differently. */
5231 bool
5232 ix86_target_stack_probe (void)
5234 /* Do not probe the stack twice if static stack checking is enabled. */
5235 if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
5236 return false;
5238 return TARGET_STACK_PROBE;
5241 /* Decide whether we can make a sibling call to a function. DECL is the
5242 declaration of the function being targeted by the call and EXP is the
5243 CALL_EXPR representing the call. */
5245 static bool
5246 ix86_function_ok_for_sibcall (tree decl, tree exp)
5248 tree type, decl_or_type;
5249 rtx a, b;
5251 /* If we are generating position-independent code, we cannot sibcall
5252 optimize any indirect call, or a direct call to a global function,
5253 as the PLT requires %ebx be live. (Darwin does not have a PLT.) */
5254 if (!TARGET_MACHO
5255 && !TARGET_64BIT
5256 && flag_pic
5257 && (!decl || !targetm.binds_local_p (decl)))
5258 return false;
5260 /* If we need to align the outgoing stack, then sibcalling would
5261 unalign the stack, which may break the called function. */
5262 if (ix86_minimum_incoming_stack_boundary (true)
5263 < PREFERRED_STACK_BOUNDARY)
5264 return false;
5266 if (decl)
5268 decl_or_type = decl;
5269 type = TREE_TYPE (decl);
5271 else
5273 /* We're looking at the CALL_EXPR, we need the type of the function. */
5274 type = CALL_EXPR_FN (exp); /* pointer expression */
5275 type = TREE_TYPE (type); /* pointer type */
5276 type = TREE_TYPE (type); /* function type */
5277 decl_or_type = type;
5280 /* Check that the return value locations are the same. Like
5281 if we are returning floats on the 80387 register stack, we cannot
5282 make a sibcall from a function that doesn't return a float to a
5283 function that does or, conversely, from a function that does return
5284 a float to a function that doesn't; the necessary stack adjustment
5285 would not be executed. This is also the place we notice
5286 differences in the return value ABI. Note that it is ok for one
5287 of the functions to have void return type as long as the return
5288 value of the other is passed in a register. */
5289 a = ix86_function_value (TREE_TYPE (exp), decl_or_type, false);
5290 b = ix86_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
5291 cfun->decl, false);
5292 if (STACK_REG_P (a) || STACK_REG_P (b))
5294 if (!rtx_equal_p (a, b))
5295 return false;
5297 else if (VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
5299 else if (!rtx_equal_p (a, b))
5300 return false;
5302 if (TARGET_64BIT)
5304 /* The SYSV ABI has more call-clobbered registers;
5305 disallow sibcalls from MS to SYSV. */
5306 if (cfun->machine->call_abi == MS_ABI
5307 && ix86_function_type_abi (type) == SYSV_ABI)
5308 return false;
5310 else
5312 /* If this call is indirect, we'll need to be able to use a
5313 call-clobbered register for the address of the target function.
5314 Make sure that all such registers are not used for passing
5315 parameters. Note that DLLIMPORT functions are indirect. */
5316 if (!decl
5317 || (TARGET_DLLIMPORT_DECL_ATTRIBUTES && DECL_DLLIMPORT_P (decl)))
5319 if (ix86_function_regparm (type, NULL) >= 3)
5321 /* ??? Need to count the actual number of registers to be used,
5322 not the possible number of registers. Fix later. */
5323 return false;
5328 /* Otherwise okay. That also includes certain types of indirect calls. */
5329 return true;
5332 /* Handle "cdecl", "stdcall", "fastcall", "regparm", "thiscall",
5333 and "sseregparm" calling convention attributes;
5334 arguments as in struct attribute_spec.handler. */
5336 static tree
5337 ix86_handle_cconv_attribute (tree *node, tree name,
5338 tree args,
5339 int flags ATTRIBUTE_UNUSED,
5340 bool *no_add_attrs)
5342 if (TREE_CODE (*node) != FUNCTION_TYPE
5343 && TREE_CODE (*node) != METHOD_TYPE
5344 && TREE_CODE (*node) != FIELD_DECL
5345 && TREE_CODE (*node) != TYPE_DECL)
5347 warning (OPT_Wattributes, "%qE attribute only applies to functions",
5348 name);
5349 *no_add_attrs = true;
5350 return NULL_TREE;
5353 /* Can combine regparm with all attributes but fastcall, and thiscall. */
5354 if (is_attribute_p ("regparm", name))
5356 tree cst;
5358 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
5360 error ("fastcall and regparm attributes are not compatible");
5363 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
5365 error ("regparam and thiscall attributes are not compatible");
5368 cst = TREE_VALUE (args);
5369 if (TREE_CODE (cst) != INTEGER_CST)
5371 warning (OPT_Wattributes,
5372 "%qE attribute requires an integer constant argument",
5373 name);
5374 *no_add_attrs = true;
5376 else if (compare_tree_int (cst, REGPARM_MAX) > 0)
5378 warning (OPT_Wattributes, "argument to %qE attribute larger than %d",
5379 name, REGPARM_MAX);
5380 *no_add_attrs = true;
5383 return NULL_TREE;
5386 if (TARGET_64BIT)
5388 /* Do not warn when emulating the MS ABI. */
5389 if ((TREE_CODE (*node) != FUNCTION_TYPE
5390 && TREE_CODE (*node) != METHOD_TYPE)
5391 || ix86_function_type_abi (*node) != MS_ABI)
5392 warning (OPT_Wattributes, "%qE attribute ignored",
5393 name);
5394 *no_add_attrs = true;
5395 return NULL_TREE;
5398 /* Can combine fastcall with stdcall (redundant) and sseregparm. */
5399 if (is_attribute_p ("fastcall", name))
5401 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
5403 error ("fastcall and cdecl attributes are not compatible");
5405 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
5407 error ("fastcall and stdcall attributes are not compatible");
5409 if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node)))
5411 error ("fastcall and regparm attributes are not compatible");
5413 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
5415 error ("fastcall and thiscall attributes are not compatible");
5419 /* Can combine stdcall with fastcall (redundant), regparm and
5420 sseregparm. */
5421 else if (is_attribute_p ("stdcall", name))
5423 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
5425 error ("stdcall and cdecl attributes are not compatible");
5427 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
5429 error ("stdcall and fastcall attributes are not compatible");
5431 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
5433 error ("stdcall and thiscall attributes are not compatible");
5437 /* Can combine cdecl with regparm and sseregparm. */
5438 else if (is_attribute_p ("cdecl", name))
5440 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
5442 error ("stdcall and cdecl attributes are not compatible");
5444 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
5446 error ("fastcall and cdecl attributes are not compatible");
5448 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
5450 error ("cdecl and thiscall attributes are not compatible");
5453 else if (is_attribute_p ("thiscall", name))
5455 if (TREE_CODE (*node) != METHOD_TYPE && pedantic)
5456 warning (OPT_Wattributes, "%qE attribute is used for none class-method",
5457 name);
5458 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
5460 error ("stdcall and thiscall attributes are not compatible");
5462 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
5464 error ("fastcall and thiscall attributes are not compatible");
5466 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
5468 error ("cdecl and thiscall attributes are not compatible");
5472 /* Can combine sseregparm with all attributes. */
5474 return NULL_TREE;
5477 /* The transactional memory builtins are implicitly regparm or fastcall
5478 depending on the ABI. Override the generic do-nothing attribute that
5479 these builtins were declared with, and replace it with one of the two
5480 attributes that we expect elsewhere. */
5482 static tree
5483 ix86_handle_tm_regparm_attribute (tree *node, tree name ATTRIBUTE_UNUSED,
5484 tree args ATTRIBUTE_UNUSED,
5485 int flags, bool *no_add_attrs)
5487 tree alt;
5489 /* In no case do we want to add the placeholder attribute. */
5490 *no_add_attrs = true;
5492 /* The 64-bit ABI is unchanged for transactional memory. */
5493 if (TARGET_64BIT)
5494 return NULL_TREE;
5496 /* ??? Is there a better way to validate 32-bit windows? We have
5497 cfun->machine->call_abi, but that seems to be set only for 64-bit. */
5498 if (CHECK_STACK_LIMIT > 0)
5499 alt = tree_cons (get_identifier ("fastcall"), NULL, NULL);
5500 else
5502 alt = tree_cons (NULL, build_int_cst (NULL, 2), NULL);
5503 alt = tree_cons (get_identifier ("regparm"), alt, NULL);
5505 decl_attributes (node, alt, flags);
5507 return NULL_TREE;
5510 /* This function determines from TYPE the calling-convention. */
5512 unsigned int
5513 ix86_get_callcvt (const_tree type)
5515 unsigned int ret = 0;
5516 bool is_stdarg;
5517 tree attrs;
5519 if (TARGET_64BIT)
5520 return IX86_CALLCVT_CDECL;
5522 attrs = TYPE_ATTRIBUTES (type);
5523 if (attrs != NULL_TREE)
5525 if (lookup_attribute ("cdecl", attrs))
5526 ret |= IX86_CALLCVT_CDECL;
5527 else if (lookup_attribute ("stdcall", attrs))
5528 ret |= IX86_CALLCVT_STDCALL;
5529 else if (lookup_attribute ("fastcall", attrs))
5530 ret |= IX86_CALLCVT_FASTCALL;
5531 else if (lookup_attribute ("thiscall", attrs))
5532 ret |= IX86_CALLCVT_THISCALL;
5534 /* Regparam isn't allowed for thiscall and fastcall. */
5535 if ((ret & (IX86_CALLCVT_THISCALL | IX86_CALLCVT_FASTCALL)) == 0)
5537 if (lookup_attribute ("regparm", attrs))
5538 ret |= IX86_CALLCVT_REGPARM;
5539 if (lookup_attribute ("sseregparm", attrs))
5540 ret |= IX86_CALLCVT_SSEREGPARM;
5543 if (IX86_BASE_CALLCVT(ret) != 0)
5544 return ret;
5547 is_stdarg = stdarg_p (type);
5548 if (TARGET_RTD && !is_stdarg)
5549 return IX86_CALLCVT_STDCALL | ret;
5551 if (ret != 0
5552 || is_stdarg
5553 || TREE_CODE (type) != METHOD_TYPE
5554 || ix86_function_type_abi (type) != MS_ABI)
5555 return IX86_CALLCVT_CDECL | ret;
5557 return IX86_CALLCVT_THISCALL;
5560 /* Return 0 if the attributes for two types are incompatible, 1 if they
5561 are compatible, and 2 if they are nearly compatible (which causes a
5562 warning to be generated). */
5564 static int
5565 ix86_comp_type_attributes (const_tree type1, const_tree type2)
5567 unsigned int ccvt1, ccvt2;
5569 if (TREE_CODE (type1) != FUNCTION_TYPE
5570 && TREE_CODE (type1) != METHOD_TYPE)
5571 return 1;
5573 ccvt1 = ix86_get_callcvt (type1);
5574 ccvt2 = ix86_get_callcvt (type2);
5575 if (ccvt1 != ccvt2)
5576 return 0;
5577 if (ix86_function_regparm (type1, NULL)
5578 != ix86_function_regparm (type2, NULL))
5579 return 0;
5581 return 1;
5584 /* Return the regparm value for a function with the indicated TYPE and DECL.
5585 DECL may be NULL when calling function indirectly
5586 or considering a libcall. */
5588 static int
5589 ix86_function_regparm (const_tree type, const_tree decl)
5591 tree attr;
5592 int regparm;
5593 unsigned int ccvt;
5595 if (TARGET_64BIT)
5596 return (ix86_function_type_abi (type) == SYSV_ABI
5597 ? X86_64_REGPARM_MAX : X86_64_MS_REGPARM_MAX);
5598 ccvt = ix86_get_callcvt (type);
5599 regparm = ix86_regparm;
5601 if ((ccvt & IX86_CALLCVT_REGPARM) != 0)
5603 attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type));
5604 if (attr)
5606 regparm = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
5607 return regparm;
5610 else if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
5611 return 2;
5612 else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
5613 return 1;
5615 /* Use register calling convention for local functions when possible. */
5616 if (decl
5617 && TREE_CODE (decl) == FUNCTION_DECL
5618 /* Caller and callee must agree on the calling convention, so
5619 checking here just optimize means that with
5620 __attribute__((optimize (...))) caller could use regparm convention
5621 and callee not, or vice versa. Instead look at whether the callee
5622 is optimized or not. */
5623 && opt_for_fn (decl, optimize)
5624 && !(profile_flag && !flag_fentry))
5626 /* FIXME: remove this CONST_CAST when cgraph.[ch] is constified. */
5627 struct cgraph_local_info *i = cgraph_local_info (CONST_CAST_TREE (decl));
5628 if (i && i->local && i->can_change_signature)
5630 int local_regparm, globals = 0, regno;
5632 /* Make sure no regparm register is taken by a
5633 fixed register variable. */
5634 for (local_regparm = 0; local_regparm < REGPARM_MAX; local_regparm++)
5635 if (fixed_regs[local_regparm])
5636 break;
5638 /* We don't want to use regparm(3) for nested functions as
5639 these use a static chain pointer in the third argument. */
5640 if (local_regparm == 3 && DECL_STATIC_CHAIN (decl))
5641 local_regparm = 2;
5643 /* In 32-bit mode save a register for the split stack. */
5644 if (!TARGET_64BIT && local_regparm == 3 && flag_split_stack)
5645 local_regparm = 2;
5647 /* Each fixed register usage increases register pressure,
5648 so less registers should be used for argument passing.
5649 This functionality can be overriden by an explicit
5650 regparm value. */
5651 for (regno = AX_REG; regno <= DI_REG; regno++)
5652 if (fixed_regs[regno])
5653 globals++;
5655 local_regparm
5656 = globals < local_regparm ? local_regparm - globals : 0;
5658 if (local_regparm > regparm)
5659 regparm = local_regparm;
5663 return regparm;
5666 /* Return 1 or 2, if we can pass up to SSE_REGPARM_MAX SFmode (1) and
5667 DFmode (2) arguments in SSE registers for a function with the
5668 indicated TYPE and DECL. DECL may be NULL when calling function
5669 indirectly or considering a libcall. Otherwise return 0. */
5671 static int
5672 ix86_function_sseregparm (const_tree type, const_tree decl, bool warn)
5674 gcc_assert (!TARGET_64BIT);
5676 /* Use SSE registers to pass SFmode and DFmode arguments if requested
5677 by the sseregparm attribute. */
5678 if (TARGET_SSEREGPARM
5679 || (type && lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type))))
5681 if (!TARGET_SSE)
5683 if (warn)
5685 if (decl)
5686 error ("calling %qD with attribute sseregparm without "
5687 "SSE/SSE2 enabled", decl);
5688 else
5689 error ("calling %qT with attribute sseregparm without "
5690 "SSE/SSE2 enabled", type);
5692 return 0;
5695 return 2;
5698 /* For local functions, pass up to SSE_REGPARM_MAX SFmode
5699 (and DFmode for SSE2) arguments in SSE registers. */
5700 if (decl && TARGET_SSE_MATH && optimize
5701 && !(profile_flag && !flag_fentry))
5703 /* FIXME: remove this CONST_CAST when cgraph.[ch] is constified. */
5704 struct cgraph_local_info *i = cgraph_local_info (CONST_CAST_TREE(decl));
5705 if (i && i->local && i->can_change_signature)
5706 return TARGET_SSE2 ? 2 : 1;
5709 return 0;
5712 /* Return true if EAX is live at the start of the function. Used by
5713 ix86_expand_prologue to determine if we need special help before
5714 calling allocate_stack_worker. */
5716 static bool
5717 ix86_eax_live_at_start_p (void)
5719 /* Cheat. Don't bother working forward from ix86_function_regparm
5720 to the function type to whether an actual argument is located in
5721 eax. Instead just look at cfg info, which is still close enough
5722 to correct at this point. This gives false positives for broken
5723 functions that might use uninitialized data that happens to be
5724 allocated in eax, but who cares? */
5725 return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun)), 0);
5728 static bool
5729 ix86_keep_aggregate_return_pointer (tree fntype)
5731 tree attr;
5733 if (!TARGET_64BIT)
5735 attr = lookup_attribute ("callee_pop_aggregate_return",
5736 TYPE_ATTRIBUTES (fntype));
5737 if (attr)
5738 return (TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr))) == 0);
5740 /* For 32-bit MS-ABI the default is to keep aggregate
5741 return pointer. */
5742 if (ix86_function_type_abi (fntype) == MS_ABI)
5743 return true;
5745 return KEEP_AGGREGATE_RETURN_POINTER != 0;
5748 /* Value is the number of bytes of arguments automatically
5749 popped when returning from a subroutine call.
5750 FUNDECL is the declaration node of the function (as a tree),
5751 FUNTYPE is the data type of the function (as a tree),
5752 or for a library call it is an identifier node for the subroutine name.
5753 SIZE is the number of bytes of arguments passed on the stack.
5755 On the 80386, the RTD insn may be used to pop them if the number
5756 of args is fixed, but if the number is variable then the caller
5757 must pop them all. RTD can't be used for library calls now
5758 because the library is compiled with the Unix compiler.
5759 Use of RTD is a selectable option, since it is incompatible with
5760 standard Unix calling sequences. If the option is not selected,
5761 the caller must always pop the args.
5763 The attribute stdcall is equivalent to RTD on a per module basis. */
5765 static int
5766 ix86_return_pops_args (tree fundecl, tree funtype, int size)
5768 unsigned int ccvt;
5770 /* None of the 64-bit ABIs pop arguments. */
5771 if (TARGET_64BIT)
5772 return 0;
5774 ccvt = ix86_get_callcvt (funtype);
5776 if ((ccvt & (IX86_CALLCVT_STDCALL | IX86_CALLCVT_FASTCALL
5777 | IX86_CALLCVT_THISCALL)) != 0
5778 && ! stdarg_p (funtype))
5779 return size;
5781 /* Lose any fake structure return argument if it is passed on the stack. */
5782 if (aggregate_value_p (TREE_TYPE (funtype), fundecl)
5783 && !ix86_keep_aggregate_return_pointer (funtype))
5785 int nregs = ix86_function_regparm (funtype, fundecl);
5786 if (nregs == 0)
5787 return GET_MODE_SIZE (Pmode);
5790 return 0;
5793 /* Implement the TARGET_LEGITIMATE_COMBINED_INSN hook. */
5795 static bool
5796 ix86_legitimate_combined_insn (rtx insn)
5798 /* Check operand constraints in case hard registers were propagated
5799 into insn pattern. This check prevents combine pass from
5800 generating insn patterns with invalid hard register operands.
5801 These invalid insns can eventually confuse reload to error out
5802 with a spill failure. See also PRs 46829 and 46843. */
5803 if ((INSN_CODE (insn) = recog (PATTERN (insn), insn, 0)) >= 0)
5805 int i;
5807 extract_insn (insn);
5808 preprocess_constraints ();
5810 for (i = 0; i < recog_data.n_operands; i++)
5812 rtx op = recog_data.operand[i];
5813 enum machine_mode mode = GET_MODE (op);
5814 struct operand_alternative *op_alt;
5815 int offset = 0;
5816 bool win;
5817 int j;
5819 /* For pre-AVX disallow unaligned loads/stores where the
5820 instructions don't support it. */
5821 if (!TARGET_AVX
5822 && VECTOR_MODE_P (GET_MODE (op))
5823 && misaligned_operand (op, GET_MODE (op)))
5825 int min_align = get_attr_ssememalign (insn);
5826 if (min_align == 0)
5827 return false;
5830 /* A unary operator may be accepted by the predicate, but it
5831 is irrelevant for matching constraints. */
5832 if (UNARY_P (op))
5833 op = XEXP (op, 0);
5835 if (GET_CODE (op) == SUBREG)
5837 if (REG_P (SUBREG_REG (op))
5838 && REGNO (SUBREG_REG (op)) < FIRST_PSEUDO_REGISTER)
5839 offset = subreg_regno_offset (REGNO (SUBREG_REG (op)),
5840 GET_MODE (SUBREG_REG (op)),
5841 SUBREG_BYTE (op),
5842 GET_MODE (op));
5843 op = SUBREG_REG (op);
5846 if (!(REG_P (op) && HARD_REGISTER_P (op)))
5847 continue;
5849 op_alt = recog_op_alt[i];
5851 /* Operand has no constraints, anything is OK. */
5852 win = !recog_data.n_alternatives;
5854 for (j = 0; j < recog_data.n_alternatives; j++)
5856 if (op_alt[j].anything_ok
5857 || (op_alt[j].matches != -1
5858 && operands_match_p
5859 (recog_data.operand[i],
5860 recog_data.operand[op_alt[j].matches]))
5861 || reg_fits_class_p (op, op_alt[j].cl, offset, mode))
5863 win = true;
5864 break;
5868 if (!win)
5869 return false;
5873 return true;
5876 /* Implement the TARGET_ASAN_SHADOW_OFFSET hook. */
5878 static unsigned HOST_WIDE_INT
5879 ix86_asan_shadow_offset (void)
5881 return TARGET_LP64 ? (TARGET_MACHO ? (HOST_WIDE_INT_1 << 44)
5882 : HOST_WIDE_INT_C (0x7fff8000))
5883 : (HOST_WIDE_INT_1 << 29);
5886 /* Argument support functions. */
5888 /* Return true when register may be used to pass function parameters. */
5889 bool
5890 ix86_function_arg_regno_p (int regno)
5892 int i;
5893 const int *parm_regs;
5895 if (!TARGET_64BIT)
5897 if (TARGET_MACHO)
5898 return (regno < REGPARM_MAX
5899 || (TARGET_SSE && SSE_REGNO_P (regno) && !fixed_regs[regno]));
5900 else
5901 return (regno < REGPARM_MAX
5902 || (TARGET_MMX && MMX_REGNO_P (regno)
5903 && (regno < FIRST_MMX_REG + MMX_REGPARM_MAX))
5904 || (TARGET_SSE && SSE_REGNO_P (regno)
5905 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX)));
5908 if (TARGET_SSE && SSE_REGNO_P (regno)
5909 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX))
5910 return true;
5912 /* TODO: The function should depend on current function ABI but
5913 builtins.c would need updating then. Therefore we use the
5914 default ABI. */
5916 /* RAX is used as hidden argument to va_arg functions. */
5917 if (ix86_abi == SYSV_ABI && regno == AX_REG)
5918 return true;
5920 if (ix86_abi == MS_ABI)
5921 parm_regs = x86_64_ms_abi_int_parameter_registers;
5922 else
5923 parm_regs = x86_64_int_parameter_registers;
5924 for (i = 0; i < (ix86_abi == MS_ABI
5925 ? X86_64_MS_REGPARM_MAX : X86_64_REGPARM_MAX); i++)
5926 if (regno == parm_regs[i])
5927 return true;
5928 return false;
5931 /* Return if we do not know how to pass TYPE solely in registers. */
5933 static bool
5934 ix86_must_pass_in_stack (enum machine_mode mode, const_tree type)
5936 if (must_pass_in_stack_var_size_or_pad (mode, type))
5937 return true;
5939 /* For 32-bit, we want TImode aggregates to go on the stack. But watch out!
5940 The layout_type routine is crafty and tries to trick us into passing
5941 currently unsupported vector types on the stack by using TImode. */
5942 return (!TARGET_64BIT && mode == TImode
5943 && type && TREE_CODE (type) != VECTOR_TYPE);
5946 /* It returns the size, in bytes, of the area reserved for arguments passed
5947 in registers for the function represented by fndecl dependent to the used
5948 abi format. */
5950 ix86_reg_parm_stack_space (const_tree fndecl)
5952 enum calling_abi call_abi = SYSV_ABI;
5953 if (fndecl != NULL_TREE && TREE_CODE (fndecl) == FUNCTION_DECL)
5954 call_abi = ix86_function_abi (fndecl);
5955 else
5956 call_abi = ix86_function_type_abi (fndecl);
5957 if (TARGET_64BIT && call_abi == MS_ABI)
5958 return 32;
5959 return 0;
5962 /* Returns value SYSV_ABI, MS_ABI dependent on fntype, specifying the
5963 call abi used. */
5964 enum calling_abi
5965 ix86_function_type_abi (const_tree fntype)
5967 if (fntype != NULL_TREE && TYPE_ATTRIBUTES (fntype) != NULL_TREE)
5969 enum calling_abi abi = ix86_abi;
5970 if (abi == SYSV_ABI)
5972 if (lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (fntype)))
5973 abi = MS_ABI;
5975 else if (lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (fntype)))
5976 abi = SYSV_ABI;
5977 return abi;
5979 return ix86_abi;
5982 /* We add this as a workaround in order to use libc_has_function
5983 hook in i386.md. */
5984 bool
5985 ix86_libc_has_function (enum function_class fn_class)
5987 return targetm.libc_has_function (fn_class);
5990 static bool
5991 ix86_function_ms_hook_prologue (const_tree fn)
5993 if (fn && lookup_attribute ("ms_hook_prologue", DECL_ATTRIBUTES (fn)))
5995 if (decl_function_context (fn) != NULL_TREE)
5996 error_at (DECL_SOURCE_LOCATION (fn),
5997 "ms_hook_prologue is not compatible with nested function");
5998 else
5999 return true;
6001 return false;
6004 static enum calling_abi
6005 ix86_function_abi (const_tree fndecl)
6007 if (! fndecl)
6008 return ix86_abi;
6009 return ix86_function_type_abi (TREE_TYPE (fndecl));
6012 /* Returns value SYSV_ABI, MS_ABI dependent on cfun, specifying the
6013 call abi used. */
6014 enum calling_abi
6015 ix86_cfun_abi (void)
6017 if (! cfun)
6018 return ix86_abi;
6019 return cfun->machine->call_abi;
6022 /* Write the extra assembler code needed to declare a function properly. */
6024 void
6025 ix86_asm_output_function_label (FILE *asm_out_file, const char *fname,
6026 tree decl)
6028 bool is_ms_hook = ix86_function_ms_hook_prologue (decl);
6030 if (is_ms_hook)
6032 int i, filler_count = (TARGET_64BIT ? 32 : 16);
6033 unsigned int filler_cc = 0xcccccccc;
6035 for (i = 0; i < filler_count; i += 4)
6036 fprintf (asm_out_file, ASM_LONG " %#x\n", filler_cc);
6039 #ifdef SUBTARGET_ASM_UNWIND_INIT
6040 SUBTARGET_ASM_UNWIND_INIT (asm_out_file);
6041 #endif
6043 ASM_OUTPUT_LABEL (asm_out_file, fname);
6045 /* Output magic byte marker, if hot-patch attribute is set. */
6046 if (is_ms_hook)
6048 if (TARGET_64BIT)
6050 /* leaq [%rsp + 0], %rsp */
6051 asm_fprintf (asm_out_file, ASM_BYTE
6052 "0x48, 0x8d, 0xa4, 0x24, 0x00, 0x00, 0x00, 0x00\n");
6054 else
6056 /* movl.s %edi, %edi
6057 push %ebp
6058 movl.s %esp, %ebp */
6059 asm_fprintf (asm_out_file, ASM_BYTE
6060 "0x8b, 0xff, 0x55, 0x8b, 0xec\n");
6065 /* regclass.c */
6066 extern void init_regs (void);
6068 /* Implementation of call abi switching target hook. Specific to FNDECL
6069 the specific call register sets are set. See also
6070 ix86_conditional_register_usage for more details. */
6071 void
6072 ix86_call_abi_override (const_tree fndecl)
6074 if (fndecl == NULL_TREE)
6075 cfun->machine->call_abi = ix86_abi;
6076 else
6077 cfun->machine->call_abi = ix86_function_type_abi (TREE_TYPE (fndecl));
6080 /* 64-bit MS and SYSV ABI have different set of call used registers. Avoid
6081 expensive re-initialization of init_regs each time we switch function context
6082 since this is needed only during RTL expansion. */
6083 static void
6084 ix86_maybe_switch_abi (void)
6086 if (TARGET_64BIT &&
6087 call_used_regs[SI_REG] == (cfun->machine->call_abi == MS_ABI))
6088 reinit_regs ();
6091 /* Initialize a variable CUM of type CUMULATIVE_ARGS
6092 for a call to a function whose data type is FNTYPE.
6093 For a library call, FNTYPE is 0. */
6095 void
6096 init_cumulative_args (CUMULATIVE_ARGS *cum, /* Argument info to initialize */
6097 tree fntype, /* tree ptr for function decl */
6098 rtx libname, /* SYMBOL_REF of library name or 0 */
6099 tree fndecl,
6100 int caller)
6102 struct cgraph_local_info *i;
6104 memset (cum, 0, sizeof (*cum));
6106 if (fndecl)
6108 i = cgraph_local_info (fndecl);
6109 cum->call_abi = ix86_function_abi (fndecl);
6111 else
6113 i = NULL;
6114 cum->call_abi = ix86_function_type_abi (fntype);
6117 cum->caller = caller;
6119 /* Set up the number of registers to use for passing arguments. */
6120 cum->nregs = ix86_regparm;
6121 if (TARGET_64BIT)
6123 cum->nregs = (cum->call_abi == SYSV_ABI
6124 ? X86_64_REGPARM_MAX
6125 : X86_64_MS_REGPARM_MAX);
6127 if (TARGET_SSE)
6129 cum->sse_nregs = SSE_REGPARM_MAX;
6130 if (TARGET_64BIT)
6132 cum->sse_nregs = (cum->call_abi == SYSV_ABI
6133 ? X86_64_SSE_REGPARM_MAX
6134 : X86_64_MS_SSE_REGPARM_MAX);
6137 if (TARGET_MMX)
6138 cum->mmx_nregs = MMX_REGPARM_MAX;
6139 cum->warn_avx512f = true;
6140 cum->warn_avx = true;
6141 cum->warn_sse = true;
6142 cum->warn_mmx = true;
6144 /* Because type might mismatch in between caller and callee, we need to
6145 use actual type of function for local calls.
6146 FIXME: cgraph_analyze can be told to actually record if function uses
6147 va_start so for local functions maybe_vaarg can be made aggressive
6148 helping K&R code.
6149 FIXME: once typesytem is fixed, we won't need this code anymore. */
6150 if (i && i->local && i->can_change_signature)
6151 fntype = TREE_TYPE (fndecl);
6152 cum->maybe_vaarg = (fntype
6153 ? (!prototype_p (fntype) || stdarg_p (fntype))
6154 : !libname);
6156 if (!TARGET_64BIT)
6158 /* If there are variable arguments, then we won't pass anything
6159 in registers in 32-bit mode. */
6160 if (stdarg_p (fntype))
6162 cum->nregs = 0;
6163 cum->sse_nregs = 0;
6164 cum->mmx_nregs = 0;
6165 cum->warn_avx512f = false;
6166 cum->warn_avx = false;
6167 cum->warn_sse = false;
6168 cum->warn_mmx = false;
6169 return;
6172 /* Use ecx and edx registers if function has fastcall attribute,
6173 else look for regparm information. */
6174 if (fntype)
6176 unsigned int ccvt = ix86_get_callcvt (fntype);
6177 if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
6179 cum->nregs = 1;
6180 cum->fastcall = 1; /* Same first register as in fastcall. */
6182 else if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
6184 cum->nregs = 2;
6185 cum->fastcall = 1;
6187 else
6188 cum->nregs = ix86_function_regparm (fntype, fndecl);
6191 /* Set up the number of SSE registers used for passing SFmode
6192 and DFmode arguments. Warn for mismatching ABI. */
6193 cum->float_in_sse = ix86_function_sseregparm (fntype, fndecl, true);
6197 /* Return the "natural" mode for TYPE. In most cases, this is just TYPE_MODE.
6198 But in the case of vector types, it is some vector mode.
6200 When we have only some of our vector isa extensions enabled, then there
6201 are some modes for which vector_mode_supported_p is false. For these
6202 modes, the generic vector support in gcc will choose some non-vector mode
6203 in order to implement the type. By computing the natural mode, we'll
6204 select the proper ABI location for the operand and not depend on whatever
6205 the middle-end decides to do with these vector types.
6207 The midde-end can't deal with the vector types > 16 bytes. In this
6208 case, we return the original mode and warn ABI change if CUM isn't
6209 NULL.
6211 If INT_RETURN is true, warn ABI change if the vector mode isn't
6212 available for function return value. */
6214 static enum machine_mode
6215 type_natural_mode (const_tree type, const CUMULATIVE_ARGS *cum,
6216 bool in_return)
6218 enum machine_mode mode = TYPE_MODE (type);
6220 if (TREE_CODE (type) == VECTOR_TYPE && !VECTOR_MODE_P (mode))
6222 HOST_WIDE_INT size = int_size_in_bytes (type);
6223 if ((size == 8 || size == 16 || size == 32 || size == 64)
6224 /* ??? Generic code allows us to create width 1 vectors. Ignore. */
6225 && TYPE_VECTOR_SUBPARTS (type) > 1)
6227 enum machine_mode innermode = TYPE_MODE (TREE_TYPE (type));
6229 if (TREE_CODE (TREE_TYPE (type)) == REAL_TYPE)
6230 mode = MIN_MODE_VECTOR_FLOAT;
6231 else
6232 mode = MIN_MODE_VECTOR_INT;
6234 /* Get the mode which has this inner mode and number of units. */
6235 for (; mode != VOIDmode; mode = GET_MODE_WIDER_MODE (mode))
6236 if (GET_MODE_NUNITS (mode) == TYPE_VECTOR_SUBPARTS (type)
6237 && GET_MODE_INNER (mode) == innermode)
6239 if (size == 64 && !TARGET_AVX512F)
6241 static bool warnedavx512f;
6242 static bool warnedavx512f_ret;
6244 if (cum && cum->warn_avx512f && !warnedavx512f)
6246 if (warning (OPT_Wpsabi, "AVX512F vector argument "
6247 "without AVX512F enabled changes the ABI"))
6248 warnedavx512f = true;
6250 else if (in_return && !warnedavx512f_ret)
6252 if (warning (OPT_Wpsabi, "AVX512F vector return "
6253 "without AVX512F enabled changes the ABI"))
6254 warnedavx512f_ret = true;
6257 return TYPE_MODE (type);
6259 else if (size == 32 && !TARGET_AVX)
6261 static bool warnedavx;
6262 static bool warnedavx_ret;
6264 if (cum && cum->warn_avx && !warnedavx)
6266 if (warning (OPT_Wpsabi, "AVX vector argument "
6267 "without AVX enabled changes the ABI"))
6268 warnedavx = true;
6270 else if (in_return && !warnedavx_ret)
6272 if (warning (OPT_Wpsabi, "AVX vector return "
6273 "without AVX enabled changes the ABI"))
6274 warnedavx_ret = true;
6277 return TYPE_MODE (type);
6279 else if (((size == 8 && TARGET_64BIT) || size == 16)
6280 && !TARGET_SSE)
6282 static bool warnedsse;
6283 static bool warnedsse_ret;
6285 if (cum && cum->warn_sse && !warnedsse)
6287 if (warning (OPT_Wpsabi, "SSE vector argument "
6288 "without SSE enabled changes the ABI"))
6289 warnedsse = true;
6291 else if (!TARGET_64BIT && in_return && !warnedsse_ret)
6293 if (warning (OPT_Wpsabi, "SSE vector return "
6294 "without SSE enabled changes the ABI"))
6295 warnedsse_ret = true;
6298 else if ((size == 8 && !TARGET_64BIT) && !TARGET_MMX)
6300 static bool warnedmmx;
6301 static bool warnedmmx_ret;
6303 if (cum && cum->warn_mmx && !warnedmmx)
6305 if (warning (OPT_Wpsabi, "MMX vector argument "
6306 "without MMX enabled changes the ABI"))
6307 warnedmmx = true;
6309 else if (in_return && !warnedmmx_ret)
6311 if (warning (OPT_Wpsabi, "MMX vector return "
6312 "without MMX enabled changes the ABI"))
6313 warnedmmx_ret = true;
6316 return mode;
6319 gcc_unreachable ();
6323 return mode;
6326 /* We want to pass a value in REGNO whose "natural" mode is MODE. However,
6327 this may not agree with the mode that the type system has chosen for the
6328 register, which is ORIG_MODE. If ORIG_MODE is not BLKmode, then we can
6329 go ahead and use it. Otherwise we have to build a PARALLEL instead. */
6331 static rtx
6332 gen_reg_or_parallel (enum machine_mode mode, enum machine_mode orig_mode,
6333 unsigned int regno)
6335 rtx tmp;
6337 if (orig_mode != BLKmode)
6338 tmp = gen_rtx_REG (orig_mode, regno);
6339 else
6341 tmp = gen_rtx_REG (mode, regno);
6342 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp, const0_rtx);
6343 tmp = gen_rtx_PARALLEL (orig_mode, gen_rtvec (1, tmp));
6346 return tmp;
6349 /* x86-64 register passing implementation. See x86-64 ABI for details. Goal
6350 of this code is to classify each 8bytes of incoming argument by the register
6351 class and assign registers accordingly. */
6353 /* Return the union class of CLASS1 and CLASS2.
6354 See the x86-64 PS ABI for details. */
6356 static enum x86_64_reg_class
6357 merge_classes (enum x86_64_reg_class class1, enum x86_64_reg_class class2)
6359 /* Rule #1: If both classes are equal, this is the resulting class. */
6360 if (class1 == class2)
6361 return class1;
6363 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
6364 the other class. */
6365 if (class1 == X86_64_NO_CLASS)
6366 return class2;
6367 if (class2 == X86_64_NO_CLASS)
6368 return class1;
6370 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
6371 if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
6372 return X86_64_MEMORY_CLASS;
6374 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
6375 if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
6376 || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
6377 return X86_64_INTEGERSI_CLASS;
6378 if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
6379 || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
6380 return X86_64_INTEGER_CLASS;
6382 /* Rule #5: If one of the classes is X87, X87UP, or COMPLEX_X87 class,
6383 MEMORY is used. */
6384 if (class1 == X86_64_X87_CLASS
6385 || class1 == X86_64_X87UP_CLASS
6386 || class1 == X86_64_COMPLEX_X87_CLASS
6387 || class2 == X86_64_X87_CLASS
6388 || class2 == X86_64_X87UP_CLASS
6389 || class2 == X86_64_COMPLEX_X87_CLASS)
6390 return X86_64_MEMORY_CLASS;
6392 /* Rule #6: Otherwise class SSE is used. */
6393 return X86_64_SSE_CLASS;
6396 /* Classify the argument of type TYPE and mode MODE.
6397 CLASSES will be filled by the register class used to pass each word
6398 of the operand. The number of words is returned. In case the parameter
6399 should be passed in memory, 0 is returned. As a special case for zero
6400 sized containers, classes[0] will be NO_CLASS and 1 is returned.
6402 BIT_OFFSET is used internally for handling records and specifies offset
6403 of the offset in bits modulo 512 to avoid overflow cases.
6405 See the x86-64 PS ABI for details.
6408 static int
6409 classify_argument (enum machine_mode mode, const_tree type,
6410 enum x86_64_reg_class classes[MAX_CLASSES], int bit_offset)
6412 HOST_WIDE_INT bytes =
6413 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
6414 int words
6415 = (bytes + (bit_offset % 64) / 8 + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
6417 /* Variable sized entities are always passed/returned in memory. */
6418 if (bytes < 0)
6419 return 0;
6421 if (mode != VOIDmode
6422 && targetm.calls.must_pass_in_stack (mode, type))
6423 return 0;
6425 if (type && AGGREGATE_TYPE_P (type))
6427 int i;
6428 tree field;
6429 enum x86_64_reg_class subclasses[MAX_CLASSES];
6431 /* On x86-64 we pass structures larger than 64 bytes on the stack. */
6432 if (bytes > 64)
6433 return 0;
6435 for (i = 0; i < words; i++)
6436 classes[i] = X86_64_NO_CLASS;
6438 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
6439 signalize memory class, so handle it as special case. */
6440 if (!words)
6442 classes[0] = X86_64_NO_CLASS;
6443 return 1;
6446 /* Classify each field of record and merge classes. */
6447 switch (TREE_CODE (type))
6449 case RECORD_TYPE:
6450 /* And now merge the fields of structure. */
6451 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
6453 if (TREE_CODE (field) == FIELD_DECL)
6455 int num;
6457 if (TREE_TYPE (field) == error_mark_node)
6458 continue;
6460 /* Bitfields are always classified as integer. Handle them
6461 early, since later code would consider them to be
6462 misaligned integers. */
6463 if (DECL_BIT_FIELD (field))
6465 for (i = (int_bit_position (field)
6466 + (bit_offset % 64)) / 8 / 8;
6467 i < ((int_bit_position (field) + (bit_offset % 64))
6468 + tree_to_shwi (DECL_SIZE (field))
6469 + 63) / 8 / 8; i++)
6470 classes[i] =
6471 merge_classes (X86_64_INTEGER_CLASS,
6472 classes[i]);
6474 else
6476 int pos;
6478 type = TREE_TYPE (field);
6480 /* Flexible array member is ignored. */
6481 if (TYPE_MODE (type) == BLKmode
6482 && TREE_CODE (type) == ARRAY_TYPE
6483 && TYPE_SIZE (type) == NULL_TREE
6484 && TYPE_DOMAIN (type) != NULL_TREE
6485 && (TYPE_MAX_VALUE (TYPE_DOMAIN (type))
6486 == NULL_TREE))
6488 static bool warned;
6490 if (!warned && warn_psabi)
6492 warned = true;
6493 inform (input_location,
6494 "the ABI of passing struct with"
6495 " a flexible array member has"
6496 " changed in GCC 4.4");
6498 continue;
6500 num = classify_argument (TYPE_MODE (type), type,
6501 subclasses,
6502 (int_bit_position (field)
6503 + bit_offset) % 512);
6504 if (!num)
6505 return 0;
6506 pos = (int_bit_position (field)
6507 + (bit_offset % 64)) / 8 / 8;
6508 for (i = 0; i < num && (i + pos) < words; i++)
6509 classes[i + pos] =
6510 merge_classes (subclasses[i], classes[i + pos]);
6514 break;
6516 case ARRAY_TYPE:
6517 /* Arrays are handled as small records. */
6519 int num;
6520 num = classify_argument (TYPE_MODE (TREE_TYPE (type)),
6521 TREE_TYPE (type), subclasses, bit_offset);
6522 if (!num)
6523 return 0;
6525 /* The partial classes are now full classes. */
6526 if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4)
6527 subclasses[0] = X86_64_SSE_CLASS;
6528 if (subclasses[0] == X86_64_INTEGERSI_CLASS
6529 && !((bit_offset % 64) == 0 && bytes == 4))
6530 subclasses[0] = X86_64_INTEGER_CLASS;
6532 for (i = 0; i < words; i++)
6533 classes[i] = subclasses[i % num];
6535 break;
6537 case UNION_TYPE:
6538 case QUAL_UNION_TYPE:
6539 /* Unions are similar to RECORD_TYPE but offset is always 0.
6541 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
6543 if (TREE_CODE (field) == FIELD_DECL)
6545 int num;
6547 if (TREE_TYPE (field) == error_mark_node)
6548 continue;
6550 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
6551 TREE_TYPE (field), subclasses,
6552 bit_offset);
6553 if (!num)
6554 return 0;
6555 for (i = 0; i < num; i++)
6556 classes[i] = merge_classes (subclasses[i], classes[i]);
6559 break;
6561 default:
6562 gcc_unreachable ();
6565 if (words > 2)
6567 /* When size > 16 bytes, if the first one isn't
6568 X86_64_SSE_CLASS or any other ones aren't
6569 X86_64_SSEUP_CLASS, everything should be passed in
6570 memory. */
6571 if (classes[0] != X86_64_SSE_CLASS)
6572 return 0;
6574 for (i = 1; i < words; i++)
6575 if (classes[i] != X86_64_SSEUP_CLASS)
6576 return 0;
6579 /* Final merger cleanup. */
6580 for (i = 0; i < words; i++)
6582 /* If one class is MEMORY, everything should be passed in
6583 memory. */
6584 if (classes[i] == X86_64_MEMORY_CLASS)
6585 return 0;
6587 /* The X86_64_SSEUP_CLASS should be always preceded by
6588 X86_64_SSE_CLASS or X86_64_SSEUP_CLASS. */
6589 if (classes[i] == X86_64_SSEUP_CLASS
6590 && classes[i - 1] != X86_64_SSE_CLASS
6591 && classes[i - 1] != X86_64_SSEUP_CLASS)
6593 /* The first one should never be X86_64_SSEUP_CLASS. */
6594 gcc_assert (i != 0);
6595 classes[i] = X86_64_SSE_CLASS;
6598 /* If X86_64_X87UP_CLASS isn't preceded by X86_64_X87_CLASS,
6599 everything should be passed in memory. */
6600 if (classes[i] == X86_64_X87UP_CLASS
6601 && (classes[i - 1] != X86_64_X87_CLASS))
6603 static bool warned;
6605 /* The first one should never be X86_64_X87UP_CLASS. */
6606 gcc_assert (i != 0);
6607 if (!warned && warn_psabi)
6609 warned = true;
6610 inform (input_location,
6611 "the ABI of passing union with long double"
6612 " has changed in GCC 4.4");
6614 return 0;
6617 return words;
6620 /* Compute alignment needed. We align all types to natural boundaries with
6621 exception of XFmode that is aligned to 64bits. */
6622 if (mode != VOIDmode && mode != BLKmode)
6624 int mode_alignment = GET_MODE_BITSIZE (mode);
6626 if (mode == XFmode)
6627 mode_alignment = 128;
6628 else if (mode == XCmode)
6629 mode_alignment = 256;
6630 if (COMPLEX_MODE_P (mode))
6631 mode_alignment /= 2;
6632 /* Misaligned fields are always returned in memory. */
6633 if (bit_offset % mode_alignment)
6634 return 0;
6637 /* for V1xx modes, just use the base mode */
6638 if (VECTOR_MODE_P (mode) && mode != V1DImode && mode != V1TImode
6639 && GET_MODE_SIZE (GET_MODE_INNER (mode)) == bytes)
6640 mode = GET_MODE_INNER (mode);
6642 /* Classification of atomic types. */
6643 switch (mode)
6645 case SDmode:
6646 case DDmode:
6647 classes[0] = X86_64_SSE_CLASS;
6648 return 1;
6649 case TDmode:
6650 classes[0] = X86_64_SSE_CLASS;
6651 classes[1] = X86_64_SSEUP_CLASS;
6652 return 2;
6653 case DImode:
6654 case SImode:
6655 case HImode:
6656 case QImode:
6657 case CSImode:
6658 case CHImode:
6659 case CQImode:
6661 int size = bit_offset + (int) GET_MODE_BITSIZE (mode);
6663 /* Analyze last 128 bits only. */
6664 size = (size - 1) & 0x7f;
6666 if (size < 32)
6668 classes[0] = X86_64_INTEGERSI_CLASS;
6669 return 1;
6671 else if (size < 64)
6673 classes[0] = X86_64_INTEGER_CLASS;
6674 return 1;
6676 else if (size < 64+32)
6678 classes[0] = X86_64_INTEGER_CLASS;
6679 classes[1] = X86_64_INTEGERSI_CLASS;
6680 return 2;
6682 else if (size < 64+64)
6684 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
6685 return 2;
6687 else
6688 gcc_unreachable ();
6690 case CDImode:
6691 case TImode:
6692 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
6693 return 2;
6694 case COImode:
6695 case OImode:
6696 /* OImode shouldn't be used directly. */
6697 gcc_unreachable ();
6698 case CTImode:
6699 return 0;
6700 case SFmode:
6701 if (!(bit_offset % 64))
6702 classes[0] = X86_64_SSESF_CLASS;
6703 else
6704 classes[0] = X86_64_SSE_CLASS;
6705 return 1;
6706 case DFmode:
6707 classes[0] = X86_64_SSEDF_CLASS;
6708 return 1;
6709 case XFmode:
6710 classes[0] = X86_64_X87_CLASS;
6711 classes[1] = X86_64_X87UP_CLASS;
6712 return 2;
6713 case TFmode:
6714 classes[0] = X86_64_SSE_CLASS;
6715 classes[1] = X86_64_SSEUP_CLASS;
6716 return 2;
6717 case SCmode:
6718 classes[0] = X86_64_SSE_CLASS;
6719 if (!(bit_offset % 64))
6720 return 1;
6721 else
6723 static bool warned;
6725 if (!warned && warn_psabi)
6727 warned = true;
6728 inform (input_location,
6729 "the ABI of passing structure with complex float"
6730 " member has changed in GCC 4.4");
6732 classes[1] = X86_64_SSESF_CLASS;
6733 return 2;
6735 case DCmode:
6736 classes[0] = X86_64_SSEDF_CLASS;
6737 classes[1] = X86_64_SSEDF_CLASS;
6738 return 2;
6739 case XCmode:
6740 classes[0] = X86_64_COMPLEX_X87_CLASS;
6741 return 1;
6742 case TCmode:
6743 /* This modes is larger than 16 bytes. */
6744 return 0;
6745 case V8SFmode:
6746 case V8SImode:
6747 case V32QImode:
6748 case V16HImode:
6749 case V4DFmode:
6750 case V4DImode:
6751 classes[0] = X86_64_SSE_CLASS;
6752 classes[1] = X86_64_SSEUP_CLASS;
6753 classes[2] = X86_64_SSEUP_CLASS;
6754 classes[3] = X86_64_SSEUP_CLASS;
6755 return 4;
6756 case V8DFmode:
6757 case V16SFmode:
6758 case V8DImode:
6759 case V16SImode:
6760 case V32HImode:
6761 case V64QImode:
6762 classes[0] = X86_64_SSE_CLASS;
6763 classes[1] = X86_64_SSEUP_CLASS;
6764 classes[2] = X86_64_SSEUP_CLASS;
6765 classes[3] = X86_64_SSEUP_CLASS;
6766 classes[4] = X86_64_SSEUP_CLASS;
6767 classes[5] = X86_64_SSEUP_CLASS;
6768 classes[6] = X86_64_SSEUP_CLASS;
6769 classes[7] = X86_64_SSEUP_CLASS;
6770 return 8;
6771 case V4SFmode:
6772 case V4SImode:
6773 case V16QImode:
6774 case V8HImode:
6775 case V2DFmode:
6776 case V2DImode:
6777 classes[0] = X86_64_SSE_CLASS;
6778 classes[1] = X86_64_SSEUP_CLASS;
6779 return 2;
6780 case V1TImode:
6781 case V1DImode:
6782 case V2SFmode:
6783 case V2SImode:
6784 case V4HImode:
6785 case V8QImode:
6786 classes[0] = X86_64_SSE_CLASS;
6787 return 1;
6788 case BLKmode:
6789 case VOIDmode:
6790 return 0;
6791 default:
6792 gcc_assert (VECTOR_MODE_P (mode));
6794 if (bytes > 16)
6795 return 0;
6797 gcc_assert (GET_MODE_CLASS (GET_MODE_INNER (mode)) == MODE_INT);
6799 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
6800 classes[0] = X86_64_INTEGERSI_CLASS;
6801 else
6802 classes[0] = X86_64_INTEGER_CLASS;
6803 classes[1] = X86_64_INTEGER_CLASS;
6804 return 1 + (bytes > 8);
6808 /* Examine the argument and return set number of register required in each
6809 class. Return 0 iff parameter should be passed in memory. */
6810 static int
6811 examine_argument (enum machine_mode mode, const_tree type, int in_return,
6812 int *int_nregs, int *sse_nregs)
6814 enum x86_64_reg_class regclass[MAX_CLASSES];
6815 int n = classify_argument (mode, type, regclass, 0);
6817 *int_nregs = 0;
6818 *sse_nregs = 0;
6819 if (!n)
6820 return 0;
6821 for (n--; n >= 0; n--)
6822 switch (regclass[n])
6824 case X86_64_INTEGER_CLASS:
6825 case X86_64_INTEGERSI_CLASS:
6826 (*int_nregs)++;
6827 break;
6828 case X86_64_SSE_CLASS:
6829 case X86_64_SSESF_CLASS:
6830 case X86_64_SSEDF_CLASS:
6831 (*sse_nregs)++;
6832 break;
6833 case X86_64_NO_CLASS:
6834 case X86_64_SSEUP_CLASS:
6835 break;
6836 case X86_64_X87_CLASS:
6837 case X86_64_X87UP_CLASS:
6838 if (!in_return)
6839 return 0;
6840 break;
6841 case X86_64_COMPLEX_X87_CLASS:
6842 return in_return ? 2 : 0;
6843 case X86_64_MEMORY_CLASS:
6844 gcc_unreachable ();
6846 return 1;
6849 /* Construct container for the argument used by GCC interface. See
6850 FUNCTION_ARG for the detailed description. */
6852 static rtx
6853 construct_container (enum machine_mode mode, enum machine_mode orig_mode,
6854 const_tree type, int in_return, int nintregs, int nsseregs,
6855 const int *intreg, int sse_regno)
6857 /* The following variables hold the static issued_error state. */
6858 static bool issued_sse_arg_error;
6859 static bool issued_sse_ret_error;
6860 static bool issued_x87_ret_error;
6862 enum machine_mode tmpmode;
6863 int bytes =
6864 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
6865 enum x86_64_reg_class regclass[MAX_CLASSES];
6866 int n;
6867 int i;
6868 int nexps = 0;
6869 int needed_sseregs, needed_intregs;
6870 rtx exp[MAX_CLASSES];
6871 rtx ret;
6873 n = classify_argument (mode, type, regclass, 0);
6874 if (!n)
6875 return NULL;
6876 if (!examine_argument (mode, type, in_return, &needed_intregs,
6877 &needed_sseregs))
6878 return NULL;
6879 if (needed_intregs > nintregs || needed_sseregs > nsseregs)
6880 return NULL;
6882 /* We allowed the user to turn off SSE for kernel mode. Don't crash if
6883 some less clueful developer tries to use floating-point anyway. */
6884 if (needed_sseregs && !TARGET_SSE)
6886 if (in_return)
6888 if (!issued_sse_ret_error)
6890 error ("SSE register return with SSE disabled");
6891 issued_sse_ret_error = true;
6894 else if (!issued_sse_arg_error)
6896 error ("SSE register argument with SSE disabled");
6897 issued_sse_arg_error = true;
6899 return NULL;
6902 /* Likewise, error if the ABI requires us to return values in the
6903 x87 registers and the user specified -mno-80387. */
6904 if (!TARGET_FLOAT_RETURNS_IN_80387 && in_return)
6905 for (i = 0; i < n; i++)
6906 if (regclass[i] == X86_64_X87_CLASS
6907 || regclass[i] == X86_64_X87UP_CLASS
6908 || regclass[i] == X86_64_COMPLEX_X87_CLASS)
6910 if (!issued_x87_ret_error)
6912 error ("x87 register return with x87 disabled");
6913 issued_x87_ret_error = true;
6915 return NULL;
6918 /* First construct simple cases. Avoid SCmode, since we want to use
6919 single register to pass this type. */
6920 if (n == 1 && mode != SCmode)
6921 switch (regclass[0])
6923 case X86_64_INTEGER_CLASS:
6924 case X86_64_INTEGERSI_CLASS:
6925 return gen_rtx_REG (mode, intreg[0]);
6926 case X86_64_SSE_CLASS:
6927 case X86_64_SSESF_CLASS:
6928 case X86_64_SSEDF_CLASS:
6929 if (mode != BLKmode)
6930 return gen_reg_or_parallel (mode, orig_mode,
6931 SSE_REGNO (sse_regno));
6932 break;
6933 case X86_64_X87_CLASS:
6934 case X86_64_COMPLEX_X87_CLASS:
6935 return gen_rtx_REG (mode, FIRST_STACK_REG);
6936 case X86_64_NO_CLASS:
6937 /* Zero sized array, struct or class. */
6938 return NULL;
6939 default:
6940 gcc_unreachable ();
6942 if (n == 2
6943 && regclass[0] == X86_64_SSE_CLASS
6944 && regclass[1] == X86_64_SSEUP_CLASS
6945 && mode != BLKmode)
6946 return gen_reg_or_parallel (mode, orig_mode,
6947 SSE_REGNO (sse_regno));
6948 if (n == 4
6949 && regclass[0] == X86_64_SSE_CLASS
6950 && regclass[1] == X86_64_SSEUP_CLASS
6951 && regclass[2] == X86_64_SSEUP_CLASS
6952 && regclass[3] == X86_64_SSEUP_CLASS
6953 && mode != BLKmode)
6954 return gen_reg_or_parallel (mode, orig_mode,
6955 SSE_REGNO (sse_regno));
6956 if (n == 8
6957 && regclass[0] == X86_64_SSE_CLASS
6958 && regclass[1] == X86_64_SSEUP_CLASS
6959 && regclass[2] == X86_64_SSEUP_CLASS
6960 && regclass[3] == X86_64_SSEUP_CLASS
6961 && regclass[4] == X86_64_SSEUP_CLASS
6962 && regclass[5] == X86_64_SSEUP_CLASS
6963 && regclass[6] == X86_64_SSEUP_CLASS
6964 && regclass[7] == X86_64_SSEUP_CLASS
6965 && mode != BLKmode)
6966 return gen_reg_or_parallel (mode, orig_mode,
6967 SSE_REGNO (sse_regno));
6968 if (n == 2
6969 && regclass[0] == X86_64_X87_CLASS
6970 && regclass[1] == X86_64_X87UP_CLASS)
6971 return gen_rtx_REG (XFmode, FIRST_STACK_REG);
6973 if (n == 2
6974 && regclass[0] == X86_64_INTEGER_CLASS
6975 && regclass[1] == X86_64_INTEGER_CLASS
6976 && (mode == CDImode || mode == TImode)
6977 && intreg[0] + 1 == intreg[1])
6978 return gen_rtx_REG (mode, intreg[0]);
6980 /* Otherwise figure out the entries of the PARALLEL. */
6981 for (i = 0; i < n; i++)
6983 int pos;
6985 switch (regclass[i])
6987 case X86_64_NO_CLASS:
6988 break;
6989 case X86_64_INTEGER_CLASS:
6990 case X86_64_INTEGERSI_CLASS:
6991 /* Merge TImodes on aligned occasions here too. */
6992 if (i * 8 + 8 > bytes)
6993 tmpmode
6994 = mode_for_size ((bytes - i * 8) * BITS_PER_UNIT, MODE_INT, 0);
6995 else if (regclass[i] == X86_64_INTEGERSI_CLASS)
6996 tmpmode = SImode;
6997 else
6998 tmpmode = DImode;
6999 /* We've requested 24 bytes we
7000 don't have mode for. Use DImode. */
7001 if (tmpmode == BLKmode)
7002 tmpmode = DImode;
7003 exp [nexps++]
7004 = gen_rtx_EXPR_LIST (VOIDmode,
7005 gen_rtx_REG (tmpmode, *intreg),
7006 GEN_INT (i*8));
7007 intreg++;
7008 break;
7009 case X86_64_SSESF_CLASS:
7010 exp [nexps++]
7011 = gen_rtx_EXPR_LIST (VOIDmode,
7012 gen_rtx_REG (SFmode,
7013 SSE_REGNO (sse_regno)),
7014 GEN_INT (i*8));
7015 sse_regno++;
7016 break;
7017 case X86_64_SSEDF_CLASS:
7018 exp [nexps++]
7019 = gen_rtx_EXPR_LIST (VOIDmode,
7020 gen_rtx_REG (DFmode,
7021 SSE_REGNO (sse_regno)),
7022 GEN_INT (i*8));
7023 sse_regno++;
7024 break;
7025 case X86_64_SSE_CLASS:
7026 pos = i;
7027 switch (n)
7029 case 1:
7030 tmpmode = DImode;
7031 break;
7032 case 2:
7033 if (i == 0 && regclass[1] == X86_64_SSEUP_CLASS)
7035 tmpmode = TImode;
7036 i++;
7038 else
7039 tmpmode = DImode;
7040 break;
7041 case 4:
7042 gcc_assert (i == 0
7043 && regclass[1] == X86_64_SSEUP_CLASS
7044 && regclass[2] == X86_64_SSEUP_CLASS
7045 && regclass[3] == X86_64_SSEUP_CLASS);
7046 tmpmode = OImode;
7047 i += 3;
7048 break;
7049 case 8:
7050 gcc_assert (i == 0
7051 && regclass[1] == X86_64_SSEUP_CLASS
7052 && regclass[2] == X86_64_SSEUP_CLASS
7053 && regclass[3] == X86_64_SSEUP_CLASS
7054 && regclass[4] == X86_64_SSEUP_CLASS
7055 && regclass[5] == X86_64_SSEUP_CLASS
7056 && regclass[6] == X86_64_SSEUP_CLASS
7057 && regclass[7] == X86_64_SSEUP_CLASS);
7058 tmpmode = XImode;
7059 i += 7;
7060 break;
7061 default:
7062 gcc_unreachable ();
7064 exp [nexps++]
7065 = gen_rtx_EXPR_LIST (VOIDmode,
7066 gen_rtx_REG (tmpmode,
7067 SSE_REGNO (sse_regno)),
7068 GEN_INT (pos*8));
7069 sse_regno++;
7070 break;
7071 default:
7072 gcc_unreachable ();
7076 /* Empty aligned struct, union or class. */
7077 if (nexps == 0)
7078 return NULL;
7080 ret = gen_rtx_PARALLEL (mode, rtvec_alloc (nexps));
7081 for (i = 0; i < nexps; i++)
7082 XVECEXP (ret, 0, i) = exp [i];
7083 return ret;
7086 /* Update the data in CUM to advance over an argument of mode MODE
7087 and data type TYPE. (TYPE is null for libcalls where that information
7088 may not be available.) */
7090 static void
7091 function_arg_advance_32 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
7092 const_tree type, HOST_WIDE_INT bytes,
7093 HOST_WIDE_INT words)
7095 switch (mode)
7097 default:
7098 break;
7100 case BLKmode:
7101 if (bytes < 0)
7102 break;
7103 /* FALLTHRU */
7105 case DImode:
7106 case SImode:
7107 case HImode:
7108 case QImode:
7109 cum->words += words;
7110 cum->nregs -= words;
7111 cum->regno += words;
7113 if (cum->nregs <= 0)
7115 cum->nregs = 0;
7116 cum->regno = 0;
7118 break;
7120 case OImode:
7121 /* OImode shouldn't be used directly. */
7122 gcc_unreachable ();
7124 case DFmode:
7125 if (cum->float_in_sse < 2)
7126 break;
7127 case SFmode:
7128 if (cum->float_in_sse < 1)
7129 break;
7130 /* FALLTHRU */
7132 case V8SFmode:
7133 case V8SImode:
7134 case V64QImode:
7135 case V32HImode:
7136 case V16SImode:
7137 case V8DImode:
7138 case V16SFmode:
7139 case V8DFmode:
7140 case V32QImode:
7141 case V16HImode:
7142 case V4DFmode:
7143 case V4DImode:
7144 case TImode:
7145 case V16QImode:
7146 case V8HImode:
7147 case V4SImode:
7148 case V2DImode:
7149 case V4SFmode:
7150 case V2DFmode:
7151 if (!type || !AGGREGATE_TYPE_P (type))
7153 cum->sse_words += words;
7154 cum->sse_nregs -= 1;
7155 cum->sse_regno += 1;
7156 if (cum->sse_nregs <= 0)
7158 cum->sse_nregs = 0;
7159 cum->sse_regno = 0;
7162 break;
7164 case V8QImode:
7165 case V4HImode:
7166 case V2SImode:
7167 case V2SFmode:
7168 case V1TImode:
7169 case V1DImode:
7170 if (!type || !AGGREGATE_TYPE_P (type))
7172 cum->mmx_words += words;
7173 cum->mmx_nregs -= 1;
7174 cum->mmx_regno += 1;
7175 if (cum->mmx_nregs <= 0)
7177 cum->mmx_nregs = 0;
7178 cum->mmx_regno = 0;
7181 break;
7185 static void
7186 function_arg_advance_64 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
7187 const_tree type, HOST_WIDE_INT words, bool named)
7189 int int_nregs, sse_nregs;
7191 /* Unnamed 512 and 256bit vector mode parameters are passed on stack. */
7192 if (!named && (VALID_AVX512F_REG_MODE (mode)
7193 || VALID_AVX256_REG_MODE (mode)))
7194 return;
7196 if (examine_argument (mode, type, 0, &int_nregs, &sse_nregs)
7197 && sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs)
7199 cum->nregs -= int_nregs;
7200 cum->sse_nregs -= sse_nregs;
7201 cum->regno += int_nregs;
7202 cum->sse_regno += sse_nregs;
7204 else
7206 int align = ix86_function_arg_boundary (mode, type) / BITS_PER_WORD;
7207 cum->words = (cum->words + align - 1) & ~(align - 1);
7208 cum->words += words;
7212 static void
7213 function_arg_advance_ms_64 (CUMULATIVE_ARGS *cum, HOST_WIDE_INT bytes,
7214 HOST_WIDE_INT words)
7216 /* Otherwise, this should be passed indirect. */
7217 gcc_assert (bytes == 1 || bytes == 2 || bytes == 4 || bytes == 8);
7219 cum->words += words;
7220 if (cum->nregs > 0)
7222 cum->nregs -= 1;
7223 cum->regno += 1;
7227 /* Update the data in CUM to advance over an argument of mode MODE and
7228 data type TYPE. (TYPE is null for libcalls where that information
7229 may not be available.) */
7231 static void
7232 ix86_function_arg_advance (cumulative_args_t cum_v, enum machine_mode mode,
7233 const_tree type, bool named)
7235 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
7236 HOST_WIDE_INT bytes, words;
7238 if (mode == BLKmode)
7239 bytes = int_size_in_bytes (type);
7240 else
7241 bytes = GET_MODE_SIZE (mode);
7242 words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
7244 if (type)
7245 mode = type_natural_mode (type, NULL, false);
7247 if (TARGET_64BIT && (cum ? cum->call_abi : ix86_abi) == MS_ABI)
7248 function_arg_advance_ms_64 (cum, bytes, words);
7249 else if (TARGET_64BIT)
7250 function_arg_advance_64 (cum, mode, type, words, named);
7251 else
7252 function_arg_advance_32 (cum, mode, type, bytes, words);
7255 /* Define where to put the arguments to a function.
7256 Value is zero to push the argument on the stack,
7257 or a hard register in which to store the argument.
7259 MODE is the argument's machine mode.
7260 TYPE is the data type of the argument (as a tree).
7261 This is null for libcalls where that information may
7262 not be available.
7263 CUM is a variable of type CUMULATIVE_ARGS which gives info about
7264 the preceding args and about the function being called.
7265 NAMED is nonzero if this argument is a named parameter
7266 (otherwise it is an extra parameter matching an ellipsis). */
7268 static rtx
7269 function_arg_32 (const CUMULATIVE_ARGS *cum, enum machine_mode mode,
7270 enum machine_mode orig_mode, const_tree type,
7271 HOST_WIDE_INT bytes, HOST_WIDE_INT words)
7273 /* Avoid the AL settings for the Unix64 ABI. */
7274 if (mode == VOIDmode)
7275 return constm1_rtx;
7277 switch (mode)
7279 default:
7280 break;
7282 case BLKmode:
7283 if (bytes < 0)
7284 break;
7285 /* FALLTHRU */
7286 case DImode:
7287 case SImode:
7288 case HImode:
7289 case QImode:
7290 if (words <= cum->nregs)
7292 int regno = cum->regno;
7294 /* Fastcall allocates the first two DWORD (SImode) or
7295 smaller arguments to ECX and EDX if it isn't an
7296 aggregate type . */
7297 if (cum->fastcall)
7299 if (mode == BLKmode
7300 || mode == DImode
7301 || (type && AGGREGATE_TYPE_P (type)))
7302 break;
7304 /* ECX not EAX is the first allocated register. */
7305 if (regno == AX_REG)
7306 regno = CX_REG;
7308 return gen_rtx_REG (mode, regno);
7310 break;
7312 case DFmode:
7313 if (cum->float_in_sse < 2)
7314 break;
7315 case SFmode:
7316 if (cum->float_in_sse < 1)
7317 break;
7318 /* FALLTHRU */
7319 case TImode:
7320 /* In 32bit, we pass TImode in xmm registers. */
7321 case V16QImode:
7322 case V8HImode:
7323 case V4SImode:
7324 case V2DImode:
7325 case V4SFmode:
7326 case V2DFmode:
7327 if (!type || !AGGREGATE_TYPE_P (type))
7329 if (cum->sse_nregs)
7330 return gen_reg_or_parallel (mode, orig_mode,
7331 cum->sse_regno + FIRST_SSE_REG);
7333 break;
7335 case OImode:
7336 case XImode:
7337 /* OImode and XImode shouldn't be used directly. */
7338 gcc_unreachable ();
7340 case V64QImode:
7341 case V32HImode:
7342 case V16SImode:
7343 case V8DImode:
7344 case V16SFmode:
7345 case V8DFmode:
7346 case V8SFmode:
7347 case V8SImode:
7348 case V32QImode:
7349 case V16HImode:
7350 case V4DFmode:
7351 case V4DImode:
7352 if (!type || !AGGREGATE_TYPE_P (type))
7354 if (cum->sse_nregs)
7355 return gen_reg_or_parallel (mode, orig_mode,
7356 cum->sse_regno + FIRST_SSE_REG);
7358 break;
7360 case V8QImode:
7361 case V4HImode:
7362 case V2SImode:
7363 case V2SFmode:
7364 case V1TImode:
7365 case V1DImode:
7366 if (!type || !AGGREGATE_TYPE_P (type))
7368 if (cum->mmx_nregs)
7369 return gen_reg_or_parallel (mode, orig_mode,
7370 cum->mmx_regno + FIRST_MMX_REG);
7372 break;
7375 return NULL_RTX;
7378 static rtx
7379 function_arg_64 (const CUMULATIVE_ARGS *cum, enum machine_mode mode,
7380 enum machine_mode orig_mode, const_tree type, bool named)
7382 /* Handle a hidden AL argument containing number of registers
7383 for varargs x86-64 functions. */
7384 if (mode == VOIDmode)
7385 return GEN_INT (cum->maybe_vaarg
7386 ? (cum->sse_nregs < 0
7387 ? X86_64_SSE_REGPARM_MAX
7388 : cum->sse_regno)
7389 : -1);
7391 switch (mode)
7393 default:
7394 break;
7396 case V8SFmode:
7397 case V8SImode:
7398 case V32QImode:
7399 case V16HImode:
7400 case V4DFmode:
7401 case V4DImode:
7402 case V16SFmode:
7403 case V16SImode:
7404 case V64QImode:
7405 case V32HImode:
7406 case V8DFmode:
7407 case V8DImode:
7408 /* Unnamed 256 and 512bit vector mode parameters are passed on stack. */
7409 if (!named)
7410 return NULL;
7411 break;
7414 return construct_container (mode, orig_mode, type, 0, cum->nregs,
7415 cum->sse_nregs,
7416 &x86_64_int_parameter_registers [cum->regno],
7417 cum->sse_regno);
7420 static rtx
7421 function_arg_ms_64 (const CUMULATIVE_ARGS *cum, enum machine_mode mode,
7422 enum machine_mode orig_mode, bool named,
7423 HOST_WIDE_INT bytes)
7425 unsigned int regno;
7427 /* We need to add clobber for MS_ABI->SYSV ABI calls in expand_call.
7428 We use value of -2 to specify that current function call is MSABI. */
7429 if (mode == VOIDmode)
7430 return GEN_INT (-2);
7432 /* If we've run out of registers, it goes on the stack. */
7433 if (cum->nregs == 0)
7434 return NULL_RTX;
7436 regno = x86_64_ms_abi_int_parameter_registers[cum->regno];
7438 /* Only floating point modes are passed in anything but integer regs. */
7439 if (TARGET_SSE && (mode == SFmode || mode == DFmode))
7441 if (named)
7442 regno = cum->regno + FIRST_SSE_REG;
7443 else
7445 rtx t1, t2;
7447 /* Unnamed floating parameters are passed in both the
7448 SSE and integer registers. */
7449 t1 = gen_rtx_REG (mode, cum->regno + FIRST_SSE_REG);
7450 t2 = gen_rtx_REG (mode, regno);
7451 t1 = gen_rtx_EXPR_LIST (VOIDmode, t1, const0_rtx);
7452 t2 = gen_rtx_EXPR_LIST (VOIDmode, t2, const0_rtx);
7453 return gen_rtx_PARALLEL (mode, gen_rtvec (2, t1, t2));
7456 /* Handle aggregated types passed in register. */
7457 if (orig_mode == BLKmode)
7459 if (bytes > 0 && bytes <= 8)
7460 mode = (bytes > 4 ? DImode : SImode);
7461 if (mode == BLKmode)
7462 mode = DImode;
7465 return gen_reg_or_parallel (mode, orig_mode, regno);
7468 /* Return where to put the arguments to a function.
7469 Return zero to push the argument on the stack, or a hard register in which to store the argument.
7471 MODE is the argument's machine mode. TYPE is the data type of the
7472 argument. It is null for libcalls where that information may not be
7473 available. CUM gives information about the preceding args and about
7474 the function being called. NAMED is nonzero if this argument is a
7475 named parameter (otherwise it is an extra parameter matching an
7476 ellipsis). */
7478 static rtx
7479 ix86_function_arg (cumulative_args_t cum_v, enum machine_mode omode,
7480 const_tree type, bool named)
7482 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
7483 enum machine_mode mode = omode;
7484 HOST_WIDE_INT bytes, words;
7485 rtx arg;
7487 if (mode == BLKmode)
7488 bytes = int_size_in_bytes (type);
7489 else
7490 bytes = GET_MODE_SIZE (mode);
7491 words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
7493 /* To simplify the code below, represent vector types with a vector mode
7494 even if MMX/SSE are not active. */
7495 if (type && TREE_CODE (type) == VECTOR_TYPE)
7496 mode = type_natural_mode (type, cum, false);
7498 if (TARGET_64BIT && (cum ? cum->call_abi : ix86_abi) == MS_ABI)
7499 arg = function_arg_ms_64 (cum, mode, omode, named, bytes);
7500 else if (TARGET_64BIT)
7501 arg = function_arg_64 (cum, mode, omode, type, named);
7502 else
7503 arg = function_arg_32 (cum, mode, omode, type, bytes, words);
7505 return arg;
7508 /* A C expression that indicates when an argument must be passed by
7509 reference. If nonzero for an argument, a copy of that argument is
7510 made in memory and a pointer to the argument is passed instead of
7511 the argument itself. The pointer is passed in whatever way is
7512 appropriate for passing a pointer to that type. */
7514 static bool
7515 ix86_pass_by_reference (cumulative_args_t cum_v, enum machine_mode mode,
7516 const_tree type, bool named ATTRIBUTE_UNUSED)
7518 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
7520 /* See Windows x64 Software Convention. */
7521 if (TARGET_64BIT && (cum ? cum->call_abi : ix86_abi) == MS_ABI)
7523 int msize = (int) GET_MODE_SIZE (mode);
7524 if (type)
7526 /* Arrays are passed by reference. */
7527 if (TREE_CODE (type) == ARRAY_TYPE)
7528 return true;
7530 if (AGGREGATE_TYPE_P (type))
7532 /* Structs/unions of sizes other than 8, 16, 32, or 64 bits
7533 are passed by reference. */
7534 msize = int_size_in_bytes (type);
7538 /* __m128 is passed by reference. */
7539 switch (msize) {
7540 case 1: case 2: case 4: case 8:
7541 break;
7542 default:
7543 return true;
7546 else if (TARGET_64BIT && type && int_size_in_bytes (type) == -1)
7547 return 1;
7549 return 0;
7552 /* Return true when TYPE should be 128bit aligned for 32bit argument
7553 passing ABI. XXX: This function is obsolete and is only used for
7554 checking psABI compatibility with previous versions of GCC. */
7556 static bool
7557 ix86_compat_aligned_value_p (const_tree type)
7559 enum machine_mode mode = TYPE_MODE (type);
7560 if (((TARGET_SSE && SSE_REG_MODE_P (mode))
7561 || mode == TDmode
7562 || mode == TFmode
7563 || mode == TCmode)
7564 && (!TYPE_USER_ALIGN (type) || TYPE_ALIGN (type) > 128))
7565 return true;
7566 if (TYPE_ALIGN (type) < 128)
7567 return false;
7569 if (AGGREGATE_TYPE_P (type))
7571 /* Walk the aggregates recursively. */
7572 switch (TREE_CODE (type))
7574 case RECORD_TYPE:
7575 case UNION_TYPE:
7576 case QUAL_UNION_TYPE:
7578 tree field;
7580 /* Walk all the structure fields. */
7581 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
7583 if (TREE_CODE (field) == FIELD_DECL
7584 && ix86_compat_aligned_value_p (TREE_TYPE (field)))
7585 return true;
7587 break;
7590 case ARRAY_TYPE:
7591 /* Just for use if some languages passes arrays by value. */
7592 if (ix86_compat_aligned_value_p (TREE_TYPE (type)))
7593 return true;
7594 break;
7596 default:
7597 gcc_unreachable ();
7600 return false;
7603 /* Return the alignment boundary for MODE and TYPE with alignment ALIGN.
7604 XXX: This function is obsolete and is only used for checking psABI
7605 compatibility with previous versions of GCC. */
7607 static unsigned int
7608 ix86_compat_function_arg_boundary (enum machine_mode mode,
7609 const_tree type, unsigned int align)
7611 /* In 32bit, only _Decimal128 and __float128 are aligned to their
7612 natural boundaries. */
7613 if (!TARGET_64BIT && mode != TDmode && mode != TFmode)
7615 /* i386 ABI defines all arguments to be 4 byte aligned. We have to
7616 make an exception for SSE modes since these require 128bit
7617 alignment.
7619 The handling here differs from field_alignment. ICC aligns MMX
7620 arguments to 4 byte boundaries, while structure fields are aligned
7621 to 8 byte boundaries. */
7622 if (!type)
7624 if (!(TARGET_SSE && SSE_REG_MODE_P (mode)))
7625 align = PARM_BOUNDARY;
7627 else
7629 if (!ix86_compat_aligned_value_p (type))
7630 align = PARM_BOUNDARY;
7633 if (align > BIGGEST_ALIGNMENT)
7634 align = BIGGEST_ALIGNMENT;
7635 return align;
7638 /* Return true when TYPE should be 128bit aligned for 32bit argument
7639 passing ABI. */
7641 static bool
7642 ix86_contains_aligned_value_p (const_tree type)
7644 enum machine_mode mode = TYPE_MODE (type);
7646 if (mode == XFmode || mode == XCmode)
7647 return false;
7649 if (TYPE_ALIGN (type) < 128)
7650 return false;
7652 if (AGGREGATE_TYPE_P (type))
7654 /* Walk the aggregates recursively. */
7655 switch (TREE_CODE (type))
7657 case RECORD_TYPE:
7658 case UNION_TYPE:
7659 case QUAL_UNION_TYPE:
7661 tree field;
7663 /* Walk all the structure fields. */
7664 for (field = TYPE_FIELDS (type);
7665 field;
7666 field = DECL_CHAIN (field))
7668 if (TREE_CODE (field) == FIELD_DECL
7669 && ix86_contains_aligned_value_p (TREE_TYPE (field)))
7670 return true;
7672 break;
7675 case ARRAY_TYPE:
7676 /* Just for use if some languages passes arrays by value. */
7677 if (ix86_contains_aligned_value_p (TREE_TYPE (type)))
7678 return true;
7679 break;
7681 default:
7682 gcc_unreachable ();
7685 else
7686 return TYPE_ALIGN (type) >= 128;
7688 return false;
7691 /* Gives the alignment boundary, in bits, of an argument with the
7692 specified mode and type. */
7694 static unsigned int
7695 ix86_function_arg_boundary (enum machine_mode mode, const_tree type)
7697 unsigned int align;
7698 if (type)
7700 /* Since the main variant type is used for call, we convert it to
7701 the main variant type. */
7702 type = TYPE_MAIN_VARIANT (type);
7703 align = TYPE_ALIGN (type);
7705 else
7706 align = GET_MODE_ALIGNMENT (mode);
7707 if (align < PARM_BOUNDARY)
7708 align = PARM_BOUNDARY;
7709 else
7711 static bool warned;
7712 unsigned int saved_align = align;
7714 if (!TARGET_64BIT)
7716 /* i386 ABI defines XFmode arguments to be 4 byte aligned. */
7717 if (!type)
7719 if (mode == XFmode || mode == XCmode)
7720 align = PARM_BOUNDARY;
7722 else if (!ix86_contains_aligned_value_p (type))
7723 align = PARM_BOUNDARY;
7725 if (align < 128)
7726 align = PARM_BOUNDARY;
7729 if (warn_psabi
7730 && !warned
7731 && align != ix86_compat_function_arg_boundary (mode, type,
7732 saved_align))
7734 warned = true;
7735 inform (input_location,
7736 "The ABI for passing parameters with %d-byte"
7737 " alignment has changed in GCC 4.6",
7738 align / BITS_PER_UNIT);
7742 return align;
7745 /* Return true if N is a possible register number of function value. */
7747 static bool
7748 ix86_function_value_regno_p (const unsigned int regno)
7750 switch (regno)
7752 case AX_REG:
7753 case DX_REG:
7754 return true;
7755 case DI_REG:
7756 case SI_REG:
7757 return TARGET_64BIT && ix86_abi != MS_ABI;
7759 /* Complex values are returned in %st(0)/%st(1) pair. */
7760 case ST0_REG:
7761 case ST1_REG:
7762 /* TODO: The function should depend on current function ABI but
7763 builtins.c would need updating then. Therefore we use the
7764 default ABI. */
7765 if (TARGET_64BIT && ix86_abi == MS_ABI)
7766 return false;
7767 return TARGET_FLOAT_RETURNS_IN_80387;
7769 /* Complex values are returned in %xmm0/%xmm1 pair. */
7770 case XMM0_REG:
7771 case XMM1_REG:
7772 return TARGET_SSE;
7774 case MM0_REG:
7775 if (TARGET_MACHO || TARGET_64BIT)
7776 return false;
7777 return TARGET_MMX;
7780 return false;
7783 /* Define how to find the value returned by a function.
7784 VALTYPE is the data type of the value (as a tree).
7785 If the precise function being called is known, FUNC is its FUNCTION_DECL;
7786 otherwise, FUNC is 0. */
7788 static rtx
7789 function_value_32 (enum machine_mode orig_mode, enum machine_mode mode,
7790 const_tree fntype, const_tree fn)
7792 unsigned int regno;
7794 /* 8-byte vector modes in %mm0. See ix86_return_in_memory for where
7795 we normally prevent this case when mmx is not available. However
7796 some ABIs may require the result to be returned like DImode. */
7797 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
7798 regno = FIRST_MMX_REG;
7800 /* 16-byte vector modes in %xmm0. See ix86_return_in_memory for where
7801 we prevent this case when sse is not available. However some ABIs
7802 may require the result to be returned like integer TImode. */
7803 else if (mode == TImode
7804 || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
7805 regno = FIRST_SSE_REG;
7807 /* 32-byte vector modes in %ymm0. */
7808 else if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 32)
7809 regno = FIRST_SSE_REG;
7811 /* 64-byte vector modes in %zmm0. */
7812 else if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 64)
7813 regno = FIRST_SSE_REG;
7815 /* Floating point return values in %st(0) (unless -mno-fp-ret-in-387). */
7816 else if (X87_FLOAT_MODE_P (mode) && TARGET_FLOAT_RETURNS_IN_80387)
7817 regno = FIRST_FLOAT_REG;
7818 else
7819 /* Most things go in %eax. */
7820 regno = AX_REG;
7822 /* Override FP return register with %xmm0 for local functions when
7823 SSE math is enabled or for functions with sseregparm attribute. */
7824 if ((fn || fntype) && (mode == SFmode || mode == DFmode))
7826 int sse_level = ix86_function_sseregparm (fntype, fn, false);
7827 if ((sse_level >= 1 && mode == SFmode)
7828 || (sse_level == 2 && mode == DFmode))
7829 regno = FIRST_SSE_REG;
7832 /* OImode shouldn't be used directly. */
7833 gcc_assert (mode != OImode);
7835 return gen_rtx_REG (orig_mode, regno);
7838 static rtx
7839 function_value_64 (enum machine_mode orig_mode, enum machine_mode mode,
7840 const_tree valtype)
7842 rtx ret;
7844 /* Handle libcalls, which don't provide a type node. */
7845 if (valtype == NULL)
7847 unsigned int regno;
7849 switch (mode)
7851 case SFmode:
7852 case SCmode:
7853 case DFmode:
7854 case DCmode:
7855 case TFmode:
7856 case SDmode:
7857 case DDmode:
7858 case TDmode:
7859 regno = FIRST_SSE_REG;
7860 break;
7861 case XFmode:
7862 case XCmode:
7863 regno = FIRST_FLOAT_REG;
7864 break;
7865 case TCmode:
7866 return NULL;
7867 default:
7868 regno = AX_REG;
7871 return gen_rtx_REG (mode, regno);
7873 else if (POINTER_TYPE_P (valtype))
7875 /* Pointers are always returned in word_mode. */
7876 mode = word_mode;
7879 ret = construct_container (mode, orig_mode, valtype, 1,
7880 X86_64_REGPARM_MAX, X86_64_SSE_REGPARM_MAX,
7881 x86_64_int_return_registers, 0);
7883 /* For zero sized structures, construct_container returns NULL, but we
7884 need to keep rest of compiler happy by returning meaningful value. */
7885 if (!ret)
7886 ret = gen_rtx_REG (orig_mode, AX_REG);
7888 return ret;
7891 static rtx
7892 function_value_ms_64 (enum machine_mode orig_mode, enum machine_mode mode,
7893 const_tree valtype)
7895 unsigned int regno = AX_REG;
7897 if (TARGET_SSE)
7899 switch (GET_MODE_SIZE (mode))
7901 case 16:
7902 if (valtype != NULL_TREE
7903 && !VECTOR_INTEGER_TYPE_P (valtype)
7904 && !VECTOR_INTEGER_TYPE_P (valtype)
7905 && !INTEGRAL_TYPE_P (valtype)
7906 && !VECTOR_FLOAT_TYPE_P (valtype))
7907 break;
7908 if ((SCALAR_INT_MODE_P (mode) || VECTOR_MODE_P (mode))
7909 && !COMPLEX_MODE_P (mode))
7910 regno = FIRST_SSE_REG;
7911 break;
7912 case 8:
7913 case 4:
7914 if (mode == SFmode || mode == DFmode)
7915 regno = FIRST_SSE_REG;
7916 break;
7917 default:
7918 break;
7921 return gen_rtx_REG (orig_mode, regno);
7924 static rtx
7925 ix86_function_value_1 (const_tree valtype, const_tree fntype_or_decl,
7926 enum machine_mode orig_mode, enum machine_mode mode)
7928 const_tree fn, fntype;
7930 fn = NULL_TREE;
7931 if (fntype_or_decl && DECL_P (fntype_or_decl))
7932 fn = fntype_or_decl;
7933 fntype = fn ? TREE_TYPE (fn) : fntype_or_decl;
7935 if (TARGET_64BIT && ix86_function_type_abi (fntype) == MS_ABI)
7936 return function_value_ms_64 (orig_mode, mode, valtype);
7937 else if (TARGET_64BIT)
7938 return function_value_64 (orig_mode, mode, valtype);
7939 else
7940 return function_value_32 (orig_mode, mode, fntype, fn);
7943 static rtx
7944 ix86_function_value (const_tree valtype, const_tree fntype_or_decl,
7945 bool outgoing ATTRIBUTE_UNUSED)
7947 enum machine_mode mode, orig_mode;
7949 orig_mode = TYPE_MODE (valtype);
7950 mode = type_natural_mode (valtype, NULL, true);
7951 return ix86_function_value_1 (valtype, fntype_or_decl, orig_mode, mode);
7954 /* Pointer function arguments and return values are promoted to
7955 word_mode. */
7957 static enum machine_mode
7958 ix86_promote_function_mode (const_tree type, enum machine_mode mode,
7959 int *punsignedp, const_tree fntype,
7960 int for_return)
7962 if (type != NULL_TREE && POINTER_TYPE_P (type))
7964 *punsignedp = POINTERS_EXTEND_UNSIGNED;
7965 return word_mode;
7967 return default_promote_function_mode (type, mode, punsignedp, fntype,
7968 for_return);
7971 /* Return true if a structure, union or array with MODE containing FIELD
7972 should be accessed using BLKmode. */
7974 static bool
7975 ix86_member_type_forces_blk (const_tree field, enum machine_mode mode)
7977 /* Union with XFmode must be in BLKmode. */
7978 return (mode == XFmode
7979 && (TREE_CODE (DECL_FIELD_CONTEXT (field)) == UNION_TYPE
7980 || TREE_CODE (DECL_FIELD_CONTEXT (field)) == QUAL_UNION_TYPE));
7984 ix86_libcall_value (enum machine_mode mode)
7986 return ix86_function_value_1 (NULL, NULL, mode, mode);
7989 /* Return true iff type is returned in memory. */
7991 static bool ATTRIBUTE_UNUSED
7992 return_in_memory_32 (const_tree type, enum machine_mode mode)
7994 HOST_WIDE_INT size;
7996 if (mode == BLKmode)
7997 return true;
7999 size = int_size_in_bytes (type);
8001 if (MS_AGGREGATE_RETURN && AGGREGATE_TYPE_P (type) && size <= 8)
8002 return false;
8004 if (VECTOR_MODE_P (mode) || mode == TImode)
8006 /* User-created vectors small enough to fit in EAX. */
8007 if (size < 8)
8008 return false;
8010 /* MMX/3dNow values are returned in MM0,
8011 except when it doesn't exits or the ABI prescribes otherwise. */
8012 if (size == 8)
8013 return !TARGET_MMX || TARGET_VECT8_RETURNS;
8015 /* SSE values are returned in XMM0, except when it doesn't exist. */
8016 if (size == 16)
8017 return !TARGET_SSE;
8019 /* AVX values are returned in YMM0, except when it doesn't exist. */
8020 if (size == 32)
8021 return !TARGET_AVX;
8023 /* AVX512F values are returned in ZMM0, except when it doesn't exist. */
8024 if (size == 64)
8025 return !TARGET_AVX512F;
8028 if (mode == XFmode)
8029 return false;
8031 if (size > 12)
8032 return true;
8034 /* OImode shouldn't be used directly. */
8035 gcc_assert (mode != OImode);
8037 return false;
8040 static bool ATTRIBUTE_UNUSED
8041 return_in_memory_64 (const_tree type, enum machine_mode mode)
8043 int needed_intregs, needed_sseregs;
8044 return !examine_argument (mode, type, 1, &needed_intregs, &needed_sseregs);
8047 static bool ATTRIBUTE_UNUSED
8048 return_in_memory_ms_64 (const_tree type, enum machine_mode mode)
8050 HOST_WIDE_INT size = int_size_in_bytes (type);
8052 /* __m128 is returned in xmm0. */
8053 if ((!type || VECTOR_INTEGER_TYPE_P (type) || INTEGRAL_TYPE_P (type)
8054 || VECTOR_FLOAT_TYPE_P (type))
8055 && (SCALAR_INT_MODE_P (mode) || VECTOR_MODE_P (mode))
8056 && !COMPLEX_MODE_P (mode) && (GET_MODE_SIZE (mode) == 16 || size == 16))
8057 return false;
8059 /* Otherwise, the size must be exactly in [1248]. */
8060 return size != 1 && size != 2 && size != 4 && size != 8;
8063 static bool
8064 ix86_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
8066 #ifdef SUBTARGET_RETURN_IN_MEMORY
8067 return SUBTARGET_RETURN_IN_MEMORY (type, fntype);
8068 #else
8069 const enum machine_mode mode = type_natural_mode (type, NULL, true);
8071 if (TARGET_64BIT)
8073 if (ix86_function_type_abi (fntype) == MS_ABI)
8074 return return_in_memory_ms_64 (type, mode);
8075 else
8076 return return_in_memory_64 (type, mode);
8078 else
8079 return return_in_memory_32 (type, mode);
8080 #endif
8084 /* Create the va_list data type. */
8086 /* Returns the calling convention specific va_list date type.
8087 The argument ABI can be DEFAULT_ABI, MS_ABI, or SYSV_ABI. */
8089 static tree
8090 ix86_build_builtin_va_list_abi (enum calling_abi abi)
8092 tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
8094 /* For i386 we use plain pointer to argument area. */
8095 if (!TARGET_64BIT || abi == MS_ABI)
8096 return build_pointer_type (char_type_node);
8098 record = lang_hooks.types.make_type (RECORD_TYPE);
8099 type_decl = build_decl (BUILTINS_LOCATION,
8100 TYPE_DECL, get_identifier ("__va_list_tag"), record);
8102 f_gpr = build_decl (BUILTINS_LOCATION,
8103 FIELD_DECL, get_identifier ("gp_offset"),
8104 unsigned_type_node);
8105 f_fpr = build_decl (BUILTINS_LOCATION,
8106 FIELD_DECL, get_identifier ("fp_offset"),
8107 unsigned_type_node);
8108 f_ovf = build_decl (BUILTINS_LOCATION,
8109 FIELD_DECL, get_identifier ("overflow_arg_area"),
8110 ptr_type_node);
8111 f_sav = build_decl (BUILTINS_LOCATION,
8112 FIELD_DECL, get_identifier ("reg_save_area"),
8113 ptr_type_node);
8115 va_list_gpr_counter_field = f_gpr;
8116 va_list_fpr_counter_field = f_fpr;
8118 DECL_FIELD_CONTEXT (f_gpr) = record;
8119 DECL_FIELD_CONTEXT (f_fpr) = record;
8120 DECL_FIELD_CONTEXT (f_ovf) = record;
8121 DECL_FIELD_CONTEXT (f_sav) = record;
8123 TYPE_STUB_DECL (record) = type_decl;
8124 TYPE_NAME (record) = type_decl;
8125 TYPE_FIELDS (record) = f_gpr;
8126 DECL_CHAIN (f_gpr) = f_fpr;
8127 DECL_CHAIN (f_fpr) = f_ovf;
8128 DECL_CHAIN (f_ovf) = f_sav;
8130 layout_type (record);
8132 /* The correct type is an array type of one element. */
8133 return build_array_type (record, build_index_type (size_zero_node));
8136 /* Setup the builtin va_list data type and for 64-bit the additional
8137 calling convention specific va_list data types. */
8139 static tree
8140 ix86_build_builtin_va_list (void)
8142 tree ret = ix86_build_builtin_va_list_abi (ix86_abi);
8144 /* Initialize abi specific va_list builtin types. */
8145 if (TARGET_64BIT)
8147 tree t;
8148 if (ix86_abi == MS_ABI)
8150 t = ix86_build_builtin_va_list_abi (SYSV_ABI);
8151 if (TREE_CODE (t) != RECORD_TYPE)
8152 t = build_variant_type_copy (t);
8153 sysv_va_list_type_node = t;
8155 else
8157 t = ret;
8158 if (TREE_CODE (t) != RECORD_TYPE)
8159 t = build_variant_type_copy (t);
8160 sysv_va_list_type_node = t;
8162 if (ix86_abi != MS_ABI)
8164 t = ix86_build_builtin_va_list_abi (MS_ABI);
8165 if (TREE_CODE (t) != RECORD_TYPE)
8166 t = build_variant_type_copy (t);
8167 ms_va_list_type_node = t;
8169 else
8171 t = ret;
8172 if (TREE_CODE (t) != RECORD_TYPE)
8173 t = build_variant_type_copy (t);
8174 ms_va_list_type_node = t;
8178 return ret;
8181 /* Worker function for TARGET_SETUP_INCOMING_VARARGS. */
8183 static void
8184 setup_incoming_varargs_64 (CUMULATIVE_ARGS *cum)
8186 rtx save_area, mem;
8187 alias_set_type set;
8188 int i, max;
8190 /* GPR size of varargs save area. */
8191 if (cfun->va_list_gpr_size)
8192 ix86_varargs_gpr_size = X86_64_REGPARM_MAX * UNITS_PER_WORD;
8193 else
8194 ix86_varargs_gpr_size = 0;
8196 /* FPR size of varargs save area. We don't need it if we don't pass
8197 anything in SSE registers. */
8198 if (TARGET_SSE && cfun->va_list_fpr_size)
8199 ix86_varargs_fpr_size = X86_64_SSE_REGPARM_MAX * 16;
8200 else
8201 ix86_varargs_fpr_size = 0;
8203 if (! ix86_varargs_gpr_size && ! ix86_varargs_fpr_size)
8204 return;
8206 save_area = frame_pointer_rtx;
8207 set = get_varargs_alias_set ();
8209 max = cum->regno + cfun->va_list_gpr_size / UNITS_PER_WORD;
8210 if (max > X86_64_REGPARM_MAX)
8211 max = X86_64_REGPARM_MAX;
8213 for (i = cum->regno; i < max; i++)
8215 mem = gen_rtx_MEM (word_mode,
8216 plus_constant (Pmode, save_area, i * UNITS_PER_WORD));
8217 MEM_NOTRAP_P (mem) = 1;
8218 set_mem_alias_set (mem, set);
8219 emit_move_insn (mem,
8220 gen_rtx_REG (word_mode,
8221 x86_64_int_parameter_registers[i]));
8224 if (ix86_varargs_fpr_size)
8226 enum machine_mode smode;
8227 rtx label, test;
8229 /* Now emit code to save SSE registers. The AX parameter contains number
8230 of SSE parameter registers used to call this function, though all we
8231 actually check here is the zero/non-zero status. */
8233 label = gen_label_rtx ();
8234 test = gen_rtx_EQ (VOIDmode, gen_rtx_REG (QImode, AX_REG), const0_rtx);
8235 emit_jump_insn (gen_cbranchqi4 (test, XEXP (test, 0), XEXP (test, 1),
8236 label));
8238 /* ??? If !TARGET_SSE_TYPELESS_STORES, would we perform better if
8239 we used movdqa (i.e. TImode) instead? Perhaps even better would
8240 be if we could determine the real mode of the data, via a hook
8241 into pass_stdarg. Ignore all that for now. */
8242 smode = V4SFmode;
8243 if (crtl->stack_alignment_needed < GET_MODE_ALIGNMENT (smode))
8244 crtl->stack_alignment_needed = GET_MODE_ALIGNMENT (smode);
8246 max = cum->sse_regno + cfun->va_list_fpr_size / 16;
8247 if (max > X86_64_SSE_REGPARM_MAX)
8248 max = X86_64_SSE_REGPARM_MAX;
8250 for (i = cum->sse_regno; i < max; ++i)
8252 mem = plus_constant (Pmode, save_area,
8253 i * 16 + ix86_varargs_gpr_size);
8254 mem = gen_rtx_MEM (smode, mem);
8255 MEM_NOTRAP_P (mem) = 1;
8256 set_mem_alias_set (mem, set);
8257 set_mem_align (mem, GET_MODE_ALIGNMENT (smode));
8259 emit_move_insn (mem, gen_rtx_REG (smode, SSE_REGNO (i)));
8262 emit_label (label);
8266 static void
8267 setup_incoming_varargs_ms_64 (CUMULATIVE_ARGS *cum)
8269 alias_set_type set = get_varargs_alias_set ();
8270 int i;
8272 /* Reset to zero, as there might be a sysv vaarg used
8273 before. */
8274 ix86_varargs_gpr_size = 0;
8275 ix86_varargs_fpr_size = 0;
8277 for (i = cum->regno; i < X86_64_MS_REGPARM_MAX; i++)
8279 rtx reg, mem;
8281 mem = gen_rtx_MEM (Pmode,
8282 plus_constant (Pmode, virtual_incoming_args_rtx,
8283 i * UNITS_PER_WORD));
8284 MEM_NOTRAP_P (mem) = 1;
8285 set_mem_alias_set (mem, set);
8287 reg = gen_rtx_REG (Pmode, x86_64_ms_abi_int_parameter_registers[i]);
8288 emit_move_insn (mem, reg);
8292 static void
8293 ix86_setup_incoming_varargs (cumulative_args_t cum_v, enum machine_mode mode,
8294 tree type, int *pretend_size ATTRIBUTE_UNUSED,
8295 int no_rtl)
8297 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
8298 CUMULATIVE_ARGS next_cum;
8299 tree fntype;
8301 /* This argument doesn't appear to be used anymore. Which is good,
8302 because the old code here didn't suppress rtl generation. */
8303 gcc_assert (!no_rtl);
8305 if (!TARGET_64BIT)
8306 return;
8308 fntype = TREE_TYPE (current_function_decl);
8310 /* For varargs, we do not want to skip the dummy va_dcl argument.
8311 For stdargs, we do want to skip the last named argument. */
8312 next_cum = *cum;
8313 if (stdarg_p (fntype))
8314 ix86_function_arg_advance (pack_cumulative_args (&next_cum), mode, type,
8315 true);
8317 if (cum->call_abi == MS_ABI)
8318 setup_incoming_varargs_ms_64 (&next_cum);
8319 else
8320 setup_incoming_varargs_64 (&next_cum);
8323 /* Checks if TYPE is of kind va_list char *. */
8325 static bool
8326 is_va_list_char_pointer (tree type)
8328 tree canonic;
8330 /* For 32-bit it is always true. */
8331 if (!TARGET_64BIT)
8332 return true;
8333 canonic = ix86_canonical_va_list_type (type);
8334 return (canonic == ms_va_list_type_node
8335 || (ix86_abi == MS_ABI && canonic == va_list_type_node));
8338 /* Implement va_start. */
8340 static void
8341 ix86_va_start (tree valist, rtx nextarg)
8343 HOST_WIDE_INT words, n_gpr, n_fpr;
8344 tree f_gpr, f_fpr, f_ovf, f_sav;
8345 tree gpr, fpr, ovf, sav, t;
8346 tree type;
8347 rtx ovf_rtx;
8349 if (flag_split_stack
8350 && cfun->machine->split_stack_varargs_pointer == NULL_RTX)
8352 unsigned int scratch_regno;
8354 /* When we are splitting the stack, we can't refer to the stack
8355 arguments using internal_arg_pointer, because they may be on
8356 the old stack. The split stack prologue will arrange to
8357 leave a pointer to the old stack arguments in a scratch
8358 register, which we here copy to a pseudo-register. The split
8359 stack prologue can't set the pseudo-register directly because
8360 it (the prologue) runs before any registers have been saved. */
8362 scratch_regno = split_stack_prologue_scratch_regno ();
8363 if (scratch_regno != INVALID_REGNUM)
8365 rtx reg, seq;
8367 reg = gen_reg_rtx (Pmode);
8368 cfun->machine->split_stack_varargs_pointer = reg;
8370 start_sequence ();
8371 emit_move_insn (reg, gen_rtx_REG (Pmode, scratch_regno));
8372 seq = get_insns ();
8373 end_sequence ();
8375 push_topmost_sequence ();
8376 emit_insn_after (seq, entry_of_function ());
8377 pop_topmost_sequence ();
8381 /* Only 64bit target needs something special. */
8382 if (!TARGET_64BIT || is_va_list_char_pointer (TREE_TYPE (valist)))
8384 if (cfun->machine->split_stack_varargs_pointer == NULL_RTX)
8385 std_expand_builtin_va_start (valist, nextarg);
8386 else
8388 rtx va_r, next;
8390 va_r = expand_expr (valist, NULL_RTX, VOIDmode, EXPAND_WRITE);
8391 next = expand_binop (ptr_mode, add_optab,
8392 cfun->machine->split_stack_varargs_pointer,
8393 crtl->args.arg_offset_rtx,
8394 NULL_RTX, 0, OPTAB_LIB_WIDEN);
8395 convert_move (va_r, next, 0);
8397 return;
8400 f_gpr = TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node));
8401 f_fpr = DECL_CHAIN (f_gpr);
8402 f_ovf = DECL_CHAIN (f_fpr);
8403 f_sav = DECL_CHAIN (f_ovf);
8405 valist = build_simple_mem_ref (valist);
8406 TREE_TYPE (valist) = TREE_TYPE (sysv_va_list_type_node);
8407 /* The following should be folded into the MEM_REF offset. */
8408 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), unshare_expr (valist),
8409 f_gpr, NULL_TREE);
8410 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), unshare_expr (valist),
8411 f_fpr, NULL_TREE);
8412 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), unshare_expr (valist),
8413 f_ovf, NULL_TREE);
8414 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), unshare_expr (valist),
8415 f_sav, NULL_TREE);
8417 /* Count number of gp and fp argument registers used. */
8418 words = crtl->args.info.words;
8419 n_gpr = crtl->args.info.regno;
8420 n_fpr = crtl->args.info.sse_regno;
8422 if (cfun->va_list_gpr_size)
8424 type = TREE_TYPE (gpr);
8425 t = build2 (MODIFY_EXPR, type,
8426 gpr, build_int_cst (type, n_gpr * 8));
8427 TREE_SIDE_EFFECTS (t) = 1;
8428 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
8431 if (TARGET_SSE && cfun->va_list_fpr_size)
8433 type = TREE_TYPE (fpr);
8434 t = build2 (MODIFY_EXPR, type, fpr,
8435 build_int_cst (type, n_fpr * 16 + 8*X86_64_REGPARM_MAX));
8436 TREE_SIDE_EFFECTS (t) = 1;
8437 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
8440 /* Find the overflow area. */
8441 type = TREE_TYPE (ovf);
8442 if (cfun->machine->split_stack_varargs_pointer == NULL_RTX)
8443 ovf_rtx = crtl->args.internal_arg_pointer;
8444 else
8445 ovf_rtx = cfun->machine->split_stack_varargs_pointer;
8446 t = make_tree (type, ovf_rtx);
8447 if (words != 0)
8448 t = fold_build_pointer_plus_hwi (t, words * UNITS_PER_WORD);
8449 t = build2 (MODIFY_EXPR, type, ovf, t);
8450 TREE_SIDE_EFFECTS (t) = 1;
8451 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
8453 if (ix86_varargs_gpr_size || ix86_varargs_fpr_size)
8455 /* Find the register save area.
8456 Prologue of the function save it right above stack frame. */
8457 type = TREE_TYPE (sav);
8458 t = make_tree (type, frame_pointer_rtx);
8459 if (!ix86_varargs_gpr_size)
8460 t = fold_build_pointer_plus_hwi (t, -8 * X86_64_REGPARM_MAX);
8461 t = build2 (MODIFY_EXPR, type, sav, t);
8462 TREE_SIDE_EFFECTS (t) = 1;
8463 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
8467 /* Implement va_arg. */
8469 static tree
8470 ix86_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p,
8471 gimple_seq *post_p)
8473 static const int intreg[6] = { 0, 1, 2, 3, 4, 5 };
8474 tree f_gpr, f_fpr, f_ovf, f_sav;
8475 tree gpr, fpr, ovf, sav, t;
8476 int size, rsize;
8477 tree lab_false, lab_over = NULL_TREE;
8478 tree addr, t2;
8479 rtx container;
8480 int indirect_p = 0;
8481 tree ptrtype;
8482 enum machine_mode nat_mode;
8483 unsigned int arg_boundary;
8485 /* Only 64bit target needs something special. */
8486 if (!TARGET_64BIT || is_va_list_char_pointer (TREE_TYPE (valist)))
8487 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
8489 f_gpr = TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node));
8490 f_fpr = DECL_CHAIN (f_gpr);
8491 f_ovf = DECL_CHAIN (f_fpr);
8492 f_sav = DECL_CHAIN (f_ovf);
8494 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr),
8495 build_va_arg_indirect_ref (valist), f_gpr, NULL_TREE);
8496 valist = build_va_arg_indirect_ref (valist);
8497 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
8498 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
8499 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
8501 indirect_p = pass_by_reference (NULL, TYPE_MODE (type), type, false);
8502 if (indirect_p)
8503 type = build_pointer_type (type);
8504 size = int_size_in_bytes (type);
8505 rsize = (size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
8507 nat_mode = type_natural_mode (type, NULL, false);
8508 switch (nat_mode)
8510 case V8SFmode:
8511 case V8SImode:
8512 case V32QImode:
8513 case V16HImode:
8514 case V4DFmode:
8515 case V4DImode:
8516 case V16SFmode:
8517 case V16SImode:
8518 case V64QImode:
8519 case V32HImode:
8520 case V8DFmode:
8521 case V8DImode:
8522 /* Unnamed 256 and 512bit vector mode parameters are passed on stack. */
8523 if (!TARGET_64BIT_MS_ABI)
8525 container = NULL;
8526 break;
8529 default:
8530 container = construct_container (nat_mode, TYPE_MODE (type),
8531 type, 0, X86_64_REGPARM_MAX,
8532 X86_64_SSE_REGPARM_MAX, intreg,
8534 break;
8537 /* Pull the value out of the saved registers. */
8539 addr = create_tmp_var (ptr_type_node, "addr");
8541 if (container)
8543 int needed_intregs, needed_sseregs;
8544 bool need_temp;
8545 tree int_addr, sse_addr;
8547 lab_false = create_artificial_label (UNKNOWN_LOCATION);
8548 lab_over = create_artificial_label (UNKNOWN_LOCATION);
8550 examine_argument (nat_mode, type, 0, &needed_intregs, &needed_sseregs);
8552 need_temp = (!REG_P (container)
8553 && ((needed_intregs && TYPE_ALIGN (type) > 64)
8554 || TYPE_ALIGN (type) > 128));
8556 /* In case we are passing structure, verify that it is consecutive block
8557 on the register save area. If not we need to do moves. */
8558 if (!need_temp && !REG_P (container))
8560 /* Verify that all registers are strictly consecutive */
8561 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0))))
8563 int i;
8565 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
8567 rtx slot = XVECEXP (container, 0, i);
8568 if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int) i
8569 || INTVAL (XEXP (slot, 1)) != i * 16)
8570 need_temp = 1;
8573 else
8575 int i;
8577 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
8579 rtx slot = XVECEXP (container, 0, i);
8580 if (REGNO (XEXP (slot, 0)) != (unsigned int) i
8581 || INTVAL (XEXP (slot, 1)) != i * 8)
8582 need_temp = 1;
8586 if (!need_temp)
8588 int_addr = addr;
8589 sse_addr = addr;
8591 else
8593 int_addr = create_tmp_var (ptr_type_node, "int_addr");
8594 sse_addr = create_tmp_var (ptr_type_node, "sse_addr");
8597 /* First ensure that we fit completely in registers. */
8598 if (needed_intregs)
8600 t = build_int_cst (TREE_TYPE (gpr),
8601 (X86_64_REGPARM_MAX - needed_intregs + 1) * 8);
8602 t = build2 (GE_EXPR, boolean_type_node, gpr, t);
8603 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
8604 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
8605 gimplify_and_add (t, pre_p);
8607 if (needed_sseregs)
8609 t = build_int_cst (TREE_TYPE (fpr),
8610 (X86_64_SSE_REGPARM_MAX - needed_sseregs + 1) * 16
8611 + X86_64_REGPARM_MAX * 8);
8612 t = build2 (GE_EXPR, boolean_type_node, fpr, t);
8613 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
8614 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
8615 gimplify_and_add (t, pre_p);
8618 /* Compute index to start of area used for integer regs. */
8619 if (needed_intregs)
8621 /* int_addr = gpr + sav; */
8622 t = fold_build_pointer_plus (sav, gpr);
8623 gimplify_assign (int_addr, t, pre_p);
8625 if (needed_sseregs)
8627 /* sse_addr = fpr + sav; */
8628 t = fold_build_pointer_plus (sav, fpr);
8629 gimplify_assign (sse_addr, t, pre_p);
8631 if (need_temp)
8633 int i, prev_size = 0;
8634 tree temp = create_tmp_var (type, "va_arg_tmp");
8636 /* addr = &temp; */
8637 t = build1 (ADDR_EXPR, build_pointer_type (type), temp);
8638 gimplify_assign (addr, t, pre_p);
8640 for (i = 0; i < XVECLEN (container, 0); i++)
8642 rtx slot = XVECEXP (container, 0, i);
8643 rtx reg = XEXP (slot, 0);
8644 enum machine_mode mode = GET_MODE (reg);
8645 tree piece_type;
8646 tree addr_type;
8647 tree daddr_type;
8648 tree src_addr, src;
8649 int src_offset;
8650 tree dest_addr, dest;
8651 int cur_size = GET_MODE_SIZE (mode);
8653 gcc_assert (prev_size <= INTVAL (XEXP (slot, 1)));
8654 prev_size = INTVAL (XEXP (slot, 1));
8655 if (prev_size + cur_size > size)
8657 cur_size = size - prev_size;
8658 mode = mode_for_size (cur_size * BITS_PER_UNIT, MODE_INT, 1);
8659 if (mode == BLKmode)
8660 mode = QImode;
8662 piece_type = lang_hooks.types.type_for_mode (mode, 1);
8663 if (mode == GET_MODE (reg))
8664 addr_type = build_pointer_type (piece_type);
8665 else
8666 addr_type = build_pointer_type_for_mode (piece_type, ptr_mode,
8667 true);
8668 daddr_type = build_pointer_type_for_mode (piece_type, ptr_mode,
8669 true);
8671 if (SSE_REGNO_P (REGNO (reg)))
8673 src_addr = sse_addr;
8674 src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16;
8676 else
8678 src_addr = int_addr;
8679 src_offset = REGNO (reg) * 8;
8681 src_addr = fold_convert (addr_type, src_addr);
8682 src_addr = fold_build_pointer_plus_hwi (src_addr, src_offset);
8684 dest_addr = fold_convert (daddr_type, addr);
8685 dest_addr = fold_build_pointer_plus_hwi (dest_addr, prev_size);
8686 if (cur_size == GET_MODE_SIZE (mode))
8688 src = build_va_arg_indirect_ref (src_addr);
8689 dest = build_va_arg_indirect_ref (dest_addr);
8691 gimplify_assign (dest, src, pre_p);
8693 else
8695 tree copy
8696 = build_call_expr (builtin_decl_implicit (BUILT_IN_MEMCPY),
8697 3, dest_addr, src_addr,
8698 size_int (cur_size));
8699 gimplify_and_add (copy, pre_p);
8701 prev_size += cur_size;
8705 if (needed_intregs)
8707 t = build2 (PLUS_EXPR, TREE_TYPE (gpr), gpr,
8708 build_int_cst (TREE_TYPE (gpr), needed_intregs * 8));
8709 gimplify_assign (gpr, t, pre_p);
8712 if (needed_sseregs)
8714 t = build2 (PLUS_EXPR, TREE_TYPE (fpr), fpr,
8715 build_int_cst (TREE_TYPE (fpr), needed_sseregs * 16));
8716 gimplify_assign (fpr, t, pre_p);
8719 gimple_seq_add_stmt (pre_p, gimple_build_goto (lab_over));
8721 gimple_seq_add_stmt (pre_p, gimple_build_label (lab_false));
8724 /* ... otherwise out of the overflow area. */
8726 /* When we align parameter on stack for caller, if the parameter
8727 alignment is beyond MAX_SUPPORTED_STACK_ALIGNMENT, it will be
8728 aligned at MAX_SUPPORTED_STACK_ALIGNMENT. We will match callee
8729 here with caller. */
8730 arg_boundary = ix86_function_arg_boundary (VOIDmode, type);
8731 if ((unsigned int) arg_boundary > MAX_SUPPORTED_STACK_ALIGNMENT)
8732 arg_boundary = MAX_SUPPORTED_STACK_ALIGNMENT;
8734 /* Care for on-stack alignment if needed. */
8735 if (arg_boundary <= 64 || size == 0)
8736 t = ovf;
8737 else
8739 HOST_WIDE_INT align = arg_boundary / 8;
8740 t = fold_build_pointer_plus_hwi (ovf, align - 1);
8741 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
8742 build_int_cst (TREE_TYPE (t), -align));
8745 gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue);
8746 gimplify_assign (addr, t, pre_p);
8748 t = fold_build_pointer_plus_hwi (t, rsize * UNITS_PER_WORD);
8749 gimplify_assign (unshare_expr (ovf), t, pre_p);
8751 if (container)
8752 gimple_seq_add_stmt (pre_p, gimple_build_label (lab_over));
8754 ptrtype = build_pointer_type_for_mode (type, ptr_mode, true);
8755 addr = fold_convert (ptrtype, addr);
8757 if (indirect_p)
8758 addr = build_va_arg_indirect_ref (addr);
8759 return build_va_arg_indirect_ref (addr);
8762 /* Return true if OPNUM's MEM should be matched
8763 in movabs* patterns. */
8765 bool
8766 ix86_check_movabs (rtx insn, int opnum)
8768 rtx set, mem;
8770 set = PATTERN (insn);
8771 if (GET_CODE (set) == PARALLEL)
8772 set = XVECEXP (set, 0, 0);
8773 gcc_assert (GET_CODE (set) == SET);
8774 mem = XEXP (set, opnum);
8775 while (GET_CODE (mem) == SUBREG)
8776 mem = SUBREG_REG (mem);
8777 gcc_assert (MEM_P (mem));
8778 return volatile_ok || !MEM_VOLATILE_P (mem);
8781 /* Initialize the table of extra 80387 mathematical constants. */
8783 static void
8784 init_ext_80387_constants (void)
8786 static const char * cst[5] =
8788 "0.3010299956639811952256464283594894482", /* 0: fldlg2 */
8789 "0.6931471805599453094286904741849753009", /* 1: fldln2 */
8790 "1.4426950408889634073876517827983434472", /* 2: fldl2e */
8791 "3.3219280948873623478083405569094566090", /* 3: fldl2t */
8792 "3.1415926535897932385128089594061862044", /* 4: fldpi */
8794 int i;
8796 for (i = 0; i < 5; i++)
8798 real_from_string (&ext_80387_constants_table[i], cst[i]);
8799 /* Ensure each constant is rounded to XFmode precision. */
8800 real_convert (&ext_80387_constants_table[i],
8801 XFmode, &ext_80387_constants_table[i]);
8804 ext_80387_constants_init = 1;
8807 /* Return non-zero if the constant is something that
8808 can be loaded with a special instruction. */
8811 standard_80387_constant_p (rtx x)
8813 enum machine_mode mode = GET_MODE (x);
8815 REAL_VALUE_TYPE r;
8817 if (!(X87_FLOAT_MODE_P (mode) && (GET_CODE (x) == CONST_DOUBLE)))
8818 return -1;
8820 if (x == CONST0_RTX (mode))
8821 return 1;
8822 if (x == CONST1_RTX (mode))
8823 return 2;
8825 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
8827 /* For XFmode constants, try to find a special 80387 instruction when
8828 optimizing for size or on those CPUs that benefit from them. */
8829 if (mode == XFmode
8830 && (optimize_function_for_size_p (cfun) || TARGET_EXT_80387_CONSTANTS))
8832 int i;
8834 if (! ext_80387_constants_init)
8835 init_ext_80387_constants ();
8837 for (i = 0; i < 5; i++)
8838 if (real_identical (&r, &ext_80387_constants_table[i]))
8839 return i + 3;
8842 /* Load of the constant -0.0 or -1.0 will be split as
8843 fldz;fchs or fld1;fchs sequence. */
8844 if (real_isnegzero (&r))
8845 return 8;
8846 if (real_identical (&r, &dconstm1))
8847 return 9;
8849 return 0;
8852 /* Return the opcode of the special instruction to be used to load
8853 the constant X. */
8855 const char *
8856 standard_80387_constant_opcode (rtx x)
8858 switch (standard_80387_constant_p (x))
8860 case 1:
8861 return "fldz";
8862 case 2:
8863 return "fld1";
8864 case 3:
8865 return "fldlg2";
8866 case 4:
8867 return "fldln2";
8868 case 5:
8869 return "fldl2e";
8870 case 6:
8871 return "fldl2t";
8872 case 7:
8873 return "fldpi";
8874 case 8:
8875 case 9:
8876 return "#";
8877 default:
8878 gcc_unreachable ();
8882 /* Return the CONST_DOUBLE representing the 80387 constant that is
8883 loaded by the specified special instruction. The argument IDX
8884 matches the return value from standard_80387_constant_p. */
8887 standard_80387_constant_rtx (int idx)
8889 int i;
8891 if (! ext_80387_constants_init)
8892 init_ext_80387_constants ();
8894 switch (idx)
8896 case 3:
8897 case 4:
8898 case 5:
8899 case 6:
8900 case 7:
8901 i = idx - 3;
8902 break;
8904 default:
8905 gcc_unreachable ();
8908 return CONST_DOUBLE_FROM_REAL_VALUE (ext_80387_constants_table[i],
8909 XFmode);
8912 /* Return 1 if X is all 0s and 2 if x is all 1s
8913 in supported SSE/AVX vector mode. */
8916 standard_sse_constant_p (rtx x)
8918 enum machine_mode mode = GET_MODE (x);
8920 if (x == const0_rtx || x == CONST0_RTX (GET_MODE (x)))
8921 return 1;
8922 if (vector_all_ones_operand (x, mode))
8923 switch (mode)
8925 case V16QImode:
8926 case V8HImode:
8927 case V4SImode:
8928 case V2DImode:
8929 if (TARGET_SSE2)
8930 return 2;
8931 case V32QImode:
8932 case V16HImode:
8933 case V8SImode:
8934 case V4DImode:
8935 if (TARGET_AVX2)
8936 return 2;
8937 case V64QImode:
8938 case V32HImode:
8939 case V16SImode:
8940 case V8DImode:
8941 if (TARGET_AVX512F)
8942 return 2;
8943 default:
8944 break;
8947 return 0;
8950 /* Return the opcode of the special instruction to be used to load
8951 the constant X. */
8953 const char *
8954 standard_sse_constant_opcode (rtx insn, rtx x)
8956 switch (standard_sse_constant_p (x))
8958 case 1:
8959 switch (get_attr_mode (insn))
8961 case MODE_XI:
8962 case MODE_V16SF:
8963 return "vpxord\t%g0, %g0, %g0";
8964 case MODE_V8DF:
8965 return "vpxorq\t%g0, %g0, %g0";
8966 case MODE_TI:
8967 return "%vpxor\t%0, %d0";
8968 case MODE_V2DF:
8969 return "%vxorpd\t%0, %d0";
8970 case MODE_V4SF:
8971 return "%vxorps\t%0, %d0";
8973 case MODE_OI:
8974 return "vpxor\t%x0, %x0, %x0";
8975 case MODE_V4DF:
8976 return "vxorpd\t%x0, %x0, %x0";
8977 case MODE_V8SF:
8978 return "vxorps\t%x0, %x0, %x0";
8980 default:
8981 break;
8984 case 2:
8985 if (get_attr_mode (insn) == MODE_XI
8986 || get_attr_mode (insn) == MODE_V8DF
8987 || get_attr_mode (insn) == MODE_V16SF)
8988 return "vpternlogd\t{$0xFF, %g0, %g0, %g0|%g0, %g0, %g0, 0xFF}";
8989 if (TARGET_AVX)
8990 return "vpcmpeqd\t%0, %0, %0";
8991 else
8992 return "pcmpeqd\t%0, %0";
8994 default:
8995 break;
8997 gcc_unreachable ();
9000 /* Returns true if OP contains a symbol reference */
9002 bool
9003 symbolic_reference_mentioned_p (rtx op)
9005 const char *fmt;
9006 int i;
9008 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
9009 return true;
9011 fmt = GET_RTX_FORMAT (GET_CODE (op));
9012 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
9014 if (fmt[i] == 'E')
9016 int j;
9018 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
9019 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
9020 return true;
9023 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
9024 return true;
9027 return false;
9030 /* Return true if it is appropriate to emit `ret' instructions in the
9031 body of a function. Do this only if the epilogue is simple, needing a
9032 couple of insns. Prior to reloading, we can't tell how many registers
9033 must be saved, so return false then. Return false if there is no frame
9034 marker to de-allocate. */
9036 bool
9037 ix86_can_use_return_insn_p (void)
9039 struct ix86_frame frame;
9041 if (! reload_completed || frame_pointer_needed)
9042 return 0;
9044 /* Don't allow more than 32k pop, since that's all we can do
9045 with one instruction. */
9046 if (crtl->args.pops_args && crtl->args.size >= 32768)
9047 return 0;
9049 ix86_compute_frame_layout (&frame);
9050 return (frame.stack_pointer_offset == UNITS_PER_WORD
9051 && (frame.nregs + frame.nsseregs) == 0);
9054 /* Value should be nonzero if functions must have frame pointers.
9055 Zero means the frame pointer need not be set up (and parms may
9056 be accessed via the stack pointer) in functions that seem suitable. */
9058 static bool
9059 ix86_frame_pointer_required (void)
9061 /* If we accessed previous frames, then the generated code expects
9062 to be able to access the saved ebp value in our frame. */
9063 if (cfun->machine->accesses_prev_frame)
9064 return true;
9066 /* Several x86 os'es need a frame pointer for other reasons,
9067 usually pertaining to setjmp. */
9068 if (SUBTARGET_FRAME_POINTER_REQUIRED)
9069 return true;
9071 /* For older 32-bit runtimes setjmp requires valid frame-pointer. */
9072 if (TARGET_32BIT_MS_ABI && cfun->calls_setjmp)
9073 return true;
9075 /* Win64 SEH, very large frames need a frame-pointer as maximum stack
9076 allocation is 4GB. */
9077 if (TARGET_64BIT_MS_ABI && get_frame_size () > SEH_MAX_FRAME_SIZE)
9078 return true;
9080 /* In ix86_option_override_internal, TARGET_OMIT_LEAF_FRAME_POINTER
9081 turns off the frame pointer by default. Turn it back on now if
9082 we've not got a leaf function. */
9083 if (TARGET_OMIT_LEAF_FRAME_POINTER
9084 && (!crtl->is_leaf
9085 || ix86_current_function_calls_tls_descriptor))
9086 return true;
9088 if (crtl->profile && !flag_fentry)
9089 return true;
9091 return false;
9094 /* Record that the current function accesses previous call frames. */
9096 void
9097 ix86_setup_frame_addresses (void)
9099 cfun->machine->accesses_prev_frame = 1;
9102 #ifndef USE_HIDDEN_LINKONCE
9103 # if defined(HAVE_GAS_HIDDEN) && (SUPPORTS_ONE_ONLY - 0)
9104 # define USE_HIDDEN_LINKONCE 1
9105 # else
9106 # define USE_HIDDEN_LINKONCE 0
9107 # endif
9108 #endif
9110 static int pic_labels_used;
9112 /* Fills in the label name that should be used for a pc thunk for
9113 the given register. */
9115 static void
9116 get_pc_thunk_name (char name[32], unsigned int regno)
9118 gcc_assert (!TARGET_64BIT);
9120 if (USE_HIDDEN_LINKONCE)
9121 sprintf (name, "__x86.get_pc_thunk.%s", reg_names[regno]);
9122 else
9123 ASM_GENERATE_INTERNAL_LABEL (name, "LPR", regno);
9127 /* This function generates code for -fpic that loads %ebx with
9128 the return address of the caller and then returns. */
9130 static void
9131 ix86_code_end (void)
9133 rtx xops[2];
9134 int regno;
9136 for (regno = AX_REG; regno <= SP_REG; regno++)
9138 char name[32];
9139 tree decl;
9141 if (!(pic_labels_used & (1 << regno)))
9142 continue;
9144 get_pc_thunk_name (name, regno);
9146 decl = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
9147 get_identifier (name),
9148 build_function_type_list (void_type_node, NULL_TREE));
9149 DECL_RESULT (decl) = build_decl (BUILTINS_LOCATION, RESULT_DECL,
9150 NULL_TREE, void_type_node);
9151 TREE_PUBLIC (decl) = 1;
9152 TREE_STATIC (decl) = 1;
9153 DECL_IGNORED_P (decl) = 1;
9155 #if TARGET_MACHO
9156 if (TARGET_MACHO)
9158 switch_to_section (darwin_sections[text_coal_section]);
9159 fputs ("\t.weak_definition\t", asm_out_file);
9160 assemble_name (asm_out_file, name);
9161 fputs ("\n\t.private_extern\t", asm_out_file);
9162 assemble_name (asm_out_file, name);
9163 putc ('\n', asm_out_file);
9164 ASM_OUTPUT_LABEL (asm_out_file, name);
9165 DECL_WEAK (decl) = 1;
9167 else
9168 #endif
9169 if (USE_HIDDEN_LINKONCE)
9171 DECL_COMDAT_GROUP (decl) = DECL_ASSEMBLER_NAME (decl);
9173 targetm.asm_out.unique_section (decl, 0);
9174 switch_to_section (get_named_section (decl, NULL, 0));
9176 targetm.asm_out.globalize_label (asm_out_file, name);
9177 fputs ("\t.hidden\t", asm_out_file);
9178 assemble_name (asm_out_file, name);
9179 putc ('\n', asm_out_file);
9180 ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
9182 else
9184 switch_to_section (text_section);
9185 ASM_OUTPUT_LABEL (asm_out_file, name);
9188 DECL_INITIAL (decl) = make_node (BLOCK);
9189 current_function_decl = decl;
9190 init_function_start (decl);
9191 first_function_block_is_cold = false;
9192 /* Make sure unwind info is emitted for the thunk if needed. */
9193 final_start_function (emit_barrier (), asm_out_file, 1);
9195 /* Pad stack IP move with 4 instructions (two NOPs count
9196 as one instruction). */
9197 if (TARGET_PAD_SHORT_FUNCTION)
9199 int i = 8;
9201 while (i--)
9202 fputs ("\tnop\n", asm_out_file);
9205 xops[0] = gen_rtx_REG (Pmode, regno);
9206 xops[1] = gen_rtx_MEM (Pmode, stack_pointer_rtx);
9207 output_asm_insn ("mov%z0\t{%1, %0|%0, %1}", xops);
9208 fputs ("\tret\n", asm_out_file);
9209 final_end_function ();
9210 init_insn_lengths ();
9211 free_after_compilation (cfun);
9212 set_cfun (NULL);
9213 current_function_decl = NULL;
9216 if (flag_split_stack)
9217 file_end_indicate_split_stack ();
9220 /* Emit code for the SET_GOT patterns. */
9222 const char *
9223 output_set_got (rtx dest, rtx label)
9225 rtx xops[3];
9227 xops[0] = dest;
9229 if (TARGET_VXWORKS_RTP && flag_pic)
9231 /* Load (*VXWORKS_GOTT_BASE) into the PIC register. */
9232 xops[2] = gen_rtx_MEM (Pmode,
9233 gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE));
9234 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
9236 /* Load (*VXWORKS_GOTT_BASE)[VXWORKS_GOTT_INDEX] into the PIC register.
9237 Use %P and a local symbol in order to print VXWORKS_GOTT_INDEX as
9238 an unadorned address. */
9239 xops[2] = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX);
9240 SYMBOL_REF_FLAGS (xops[2]) |= SYMBOL_FLAG_LOCAL;
9241 output_asm_insn ("mov{l}\t{%P2(%0), %0|%0, DWORD PTR %P2[%0]}", xops);
9242 return "";
9245 xops[1] = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
9247 if (!flag_pic)
9249 if (TARGET_MACHO)
9250 /* We don't need a pic base, we're not producing pic. */
9251 gcc_unreachable ();
9253 xops[2] = gen_rtx_LABEL_REF (Pmode, label ? label : gen_label_rtx ());
9254 output_asm_insn ("mov%z0\t{%2, %0|%0, %2}", xops);
9255 targetm.asm_out.internal_label (asm_out_file, "L",
9256 CODE_LABEL_NUMBER (XEXP (xops[2], 0)));
9258 else
9260 char name[32];
9261 get_pc_thunk_name (name, REGNO (dest));
9262 pic_labels_used |= 1 << REGNO (dest);
9264 xops[2] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
9265 xops[2] = gen_rtx_MEM (QImode, xops[2]);
9266 output_asm_insn ("call\t%X2", xops);
9268 #if TARGET_MACHO
9269 /* Output the Mach-O "canonical" pic base label name ("Lxx$pb") here.
9270 This is what will be referenced by the Mach-O PIC subsystem. */
9271 if (machopic_should_output_picbase_label () || !label)
9272 ASM_OUTPUT_LABEL (asm_out_file, MACHOPIC_FUNCTION_BASE_NAME);
9274 /* When we are restoring the pic base at the site of a nonlocal label,
9275 and we decided to emit the pic base above, we will still output a
9276 local label used for calculating the correction offset (even though
9277 the offset will be 0 in that case). */
9278 if (label)
9279 targetm.asm_out.internal_label (asm_out_file, "L",
9280 CODE_LABEL_NUMBER (label));
9281 #endif
9284 if (!TARGET_MACHO)
9285 output_asm_insn ("add%z0\t{%1, %0|%0, %1}", xops);
9287 return "";
9290 /* Generate an "push" pattern for input ARG. */
9292 static rtx
9293 gen_push (rtx arg)
9295 struct machine_function *m = cfun->machine;
9297 if (m->fs.cfa_reg == stack_pointer_rtx)
9298 m->fs.cfa_offset += UNITS_PER_WORD;
9299 m->fs.sp_offset += UNITS_PER_WORD;
9301 if (REG_P (arg) && GET_MODE (arg) != word_mode)
9302 arg = gen_rtx_REG (word_mode, REGNO (arg));
9304 return gen_rtx_SET (VOIDmode,
9305 gen_rtx_MEM (word_mode,
9306 gen_rtx_PRE_DEC (Pmode,
9307 stack_pointer_rtx)),
9308 arg);
9311 /* Generate an "pop" pattern for input ARG. */
9313 static rtx
9314 gen_pop (rtx arg)
9316 if (REG_P (arg) && GET_MODE (arg) != word_mode)
9317 arg = gen_rtx_REG (word_mode, REGNO (arg));
9319 return gen_rtx_SET (VOIDmode,
9320 arg,
9321 gen_rtx_MEM (word_mode,
9322 gen_rtx_POST_INC (Pmode,
9323 stack_pointer_rtx)));
9326 /* Return >= 0 if there is an unused call-clobbered register available
9327 for the entire function. */
9329 static unsigned int
9330 ix86_select_alt_pic_regnum (void)
9332 if (crtl->is_leaf
9333 && !crtl->profile
9334 && !ix86_current_function_calls_tls_descriptor)
9336 int i, drap;
9337 /* Can't use the same register for both PIC and DRAP. */
9338 if (crtl->drap_reg)
9339 drap = REGNO (crtl->drap_reg);
9340 else
9341 drap = -1;
9342 for (i = 2; i >= 0; --i)
9343 if (i != drap && !df_regs_ever_live_p (i))
9344 return i;
9347 return INVALID_REGNUM;
9350 /* Return TRUE if we need to save REGNO. */
9352 static bool
9353 ix86_save_reg (unsigned int regno, bool maybe_eh_return)
9355 if (pic_offset_table_rtx
9356 && regno == REAL_PIC_OFFSET_TABLE_REGNUM
9357 && (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM)
9358 || crtl->profile
9359 || crtl->calls_eh_return
9360 || crtl->uses_const_pool
9361 || cfun->has_nonlocal_label))
9362 return ix86_select_alt_pic_regnum () == INVALID_REGNUM;
9364 if (crtl->calls_eh_return && maybe_eh_return)
9366 unsigned i;
9367 for (i = 0; ; i++)
9369 unsigned test = EH_RETURN_DATA_REGNO (i);
9370 if (test == INVALID_REGNUM)
9371 break;
9372 if (test == regno)
9373 return true;
9377 if (crtl->drap_reg
9378 && regno == REGNO (crtl->drap_reg)
9379 && !cfun->machine->no_drap_save_restore)
9380 return true;
9382 return (df_regs_ever_live_p (regno)
9383 && !call_used_regs[regno]
9384 && !fixed_regs[regno]
9385 && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed));
9388 /* Return number of saved general prupose registers. */
9390 static int
9391 ix86_nsaved_regs (void)
9393 int nregs = 0;
9394 int regno;
9396 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
9397 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
9398 nregs ++;
9399 return nregs;
9402 /* Return number of saved SSE registrers. */
9404 static int
9405 ix86_nsaved_sseregs (void)
9407 int nregs = 0;
9408 int regno;
9410 if (!TARGET_64BIT_MS_ABI)
9411 return 0;
9412 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
9413 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
9414 nregs ++;
9415 return nregs;
9418 /* Given FROM and TO register numbers, say whether this elimination is
9419 allowed. If stack alignment is needed, we can only replace argument
9420 pointer with hard frame pointer, or replace frame pointer with stack
9421 pointer. Otherwise, frame pointer elimination is automatically
9422 handled and all other eliminations are valid. */
9424 static bool
9425 ix86_can_eliminate (const int from, const int to)
9427 if (stack_realign_fp)
9428 return ((from == ARG_POINTER_REGNUM
9429 && to == HARD_FRAME_POINTER_REGNUM)
9430 || (from == FRAME_POINTER_REGNUM
9431 && to == STACK_POINTER_REGNUM));
9432 else
9433 return to == STACK_POINTER_REGNUM ? !frame_pointer_needed : true;
9436 /* Return the offset between two registers, one to be eliminated, and the other
9437 its replacement, at the start of a routine. */
9439 HOST_WIDE_INT
9440 ix86_initial_elimination_offset (int from, int to)
9442 struct ix86_frame frame;
9443 ix86_compute_frame_layout (&frame);
9445 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
9446 return frame.hard_frame_pointer_offset;
9447 else if (from == FRAME_POINTER_REGNUM
9448 && to == HARD_FRAME_POINTER_REGNUM)
9449 return frame.hard_frame_pointer_offset - frame.frame_pointer_offset;
9450 else
9452 gcc_assert (to == STACK_POINTER_REGNUM);
9454 if (from == ARG_POINTER_REGNUM)
9455 return frame.stack_pointer_offset;
9457 gcc_assert (from == FRAME_POINTER_REGNUM);
9458 return frame.stack_pointer_offset - frame.frame_pointer_offset;
9462 /* In a dynamically-aligned function, we can't know the offset from
9463 stack pointer to frame pointer, so we must ensure that setjmp
9464 eliminates fp against the hard fp (%ebp) rather than trying to
9465 index from %esp up to the top of the frame across a gap that is
9466 of unknown (at compile-time) size. */
9467 static rtx
9468 ix86_builtin_setjmp_frame_value (void)
9470 return stack_realign_fp ? hard_frame_pointer_rtx : virtual_stack_vars_rtx;
9473 /* When using -fsplit-stack, the allocation routines set a field in
9474 the TCB to the bottom of the stack plus this much space, measured
9475 in bytes. */
9477 #define SPLIT_STACK_AVAILABLE 256
9479 /* Fill structure ix86_frame about frame of currently computed function. */
9481 static void
9482 ix86_compute_frame_layout (struct ix86_frame *frame)
9484 unsigned HOST_WIDE_INT stack_alignment_needed;
9485 HOST_WIDE_INT offset;
9486 unsigned HOST_WIDE_INT preferred_alignment;
9487 HOST_WIDE_INT size = get_frame_size ();
9488 HOST_WIDE_INT to_allocate;
9490 frame->nregs = ix86_nsaved_regs ();
9491 frame->nsseregs = ix86_nsaved_sseregs ();
9493 stack_alignment_needed = crtl->stack_alignment_needed / BITS_PER_UNIT;
9494 preferred_alignment = crtl->preferred_stack_boundary / BITS_PER_UNIT;
9496 /* 64-bit MS ABI seem to require stack alignment to be always 16 except for
9497 function prologues and leaf. */
9498 if ((TARGET_64BIT_MS_ABI && preferred_alignment < 16)
9499 && (!crtl->is_leaf || cfun->calls_alloca != 0
9500 || ix86_current_function_calls_tls_descriptor))
9502 preferred_alignment = 16;
9503 stack_alignment_needed = 16;
9504 crtl->preferred_stack_boundary = 128;
9505 crtl->stack_alignment_needed = 128;
9508 gcc_assert (!size || stack_alignment_needed);
9509 gcc_assert (preferred_alignment >= STACK_BOUNDARY / BITS_PER_UNIT);
9510 gcc_assert (preferred_alignment <= stack_alignment_needed);
9512 /* For SEH we have to limit the amount of code movement into the prologue.
9513 At present we do this via a BLOCKAGE, at which point there's very little
9514 scheduling that can be done, which means that there's very little point
9515 in doing anything except PUSHs. */
9516 if (TARGET_SEH)
9517 cfun->machine->use_fast_prologue_epilogue = false;
9519 /* During reload iteration the amount of registers saved can change.
9520 Recompute the value as needed. Do not recompute when amount of registers
9521 didn't change as reload does multiple calls to the function and does not
9522 expect the decision to change within single iteration. */
9523 else if (!optimize_bb_for_size_p (ENTRY_BLOCK_PTR_FOR_FN (cfun))
9524 && cfun->machine->use_fast_prologue_epilogue_nregs != frame->nregs)
9526 int count = frame->nregs;
9527 struct cgraph_node *node = cgraph_get_node (current_function_decl);
9529 cfun->machine->use_fast_prologue_epilogue_nregs = count;
9531 /* The fast prologue uses move instead of push to save registers. This
9532 is significantly longer, but also executes faster as modern hardware
9533 can execute the moves in parallel, but can't do that for push/pop.
9535 Be careful about choosing what prologue to emit: When function takes
9536 many instructions to execute we may use slow version as well as in
9537 case function is known to be outside hot spot (this is known with
9538 feedback only). Weight the size of function by number of registers
9539 to save as it is cheap to use one or two push instructions but very
9540 slow to use many of them. */
9541 if (count)
9542 count = (count - 1) * FAST_PROLOGUE_INSN_COUNT;
9543 if (node->frequency < NODE_FREQUENCY_NORMAL
9544 || (flag_branch_probabilities
9545 && node->frequency < NODE_FREQUENCY_HOT))
9546 cfun->machine->use_fast_prologue_epilogue = false;
9547 else
9548 cfun->machine->use_fast_prologue_epilogue
9549 = !expensive_function_p (count);
9552 frame->save_regs_using_mov
9553 = (TARGET_PROLOGUE_USING_MOVE && cfun->machine->use_fast_prologue_epilogue
9554 /* If static stack checking is enabled and done with probes,
9555 the registers need to be saved before allocating the frame. */
9556 && flag_stack_check != STATIC_BUILTIN_STACK_CHECK);
9558 /* Skip return address. */
9559 offset = UNITS_PER_WORD;
9561 /* Skip pushed static chain. */
9562 if (ix86_static_chain_on_stack)
9563 offset += UNITS_PER_WORD;
9565 /* Skip saved base pointer. */
9566 if (frame_pointer_needed)
9567 offset += UNITS_PER_WORD;
9568 frame->hfp_save_offset = offset;
9570 /* The traditional frame pointer location is at the top of the frame. */
9571 frame->hard_frame_pointer_offset = offset;
9573 /* Register save area */
9574 offset += frame->nregs * UNITS_PER_WORD;
9575 frame->reg_save_offset = offset;
9577 /* On SEH target, registers are pushed just before the frame pointer
9578 location. */
9579 if (TARGET_SEH)
9580 frame->hard_frame_pointer_offset = offset;
9582 /* Align and set SSE register save area. */
9583 if (frame->nsseregs)
9585 /* The only ABI that has saved SSE registers (Win64) also has a
9586 16-byte aligned default stack, and thus we don't need to be
9587 within the re-aligned local stack frame to save them. */
9588 gcc_assert (INCOMING_STACK_BOUNDARY >= 128);
9589 offset = (offset + 16 - 1) & -16;
9590 offset += frame->nsseregs * 16;
9592 frame->sse_reg_save_offset = offset;
9594 /* The re-aligned stack starts here. Values before this point are not
9595 directly comparable with values below this point. In order to make
9596 sure that no value happens to be the same before and after, force
9597 the alignment computation below to add a non-zero value. */
9598 if (stack_realign_fp)
9599 offset = (offset + stack_alignment_needed) & -stack_alignment_needed;
9601 /* Va-arg area */
9602 frame->va_arg_size = ix86_varargs_gpr_size + ix86_varargs_fpr_size;
9603 offset += frame->va_arg_size;
9605 /* Align start of frame for local function. */
9606 if (stack_realign_fp
9607 || offset != frame->sse_reg_save_offset
9608 || size != 0
9609 || !crtl->is_leaf
9610 || cfun->calls_alloca
9611 || ix86_current_function_calls_tls_descriptor)
9612 offset = (offset + stack_alignment_needed - 1) & -stack_alignment_needed;
9614 /* Frame pointer points here. */
9615 frame->frame_pointer_offset = offset;
9617 offset += size;
9619 /* Add outgoing arguments area. Can be skipped if we eliminated
9620 all the function calls as dead code.
9621 Skipping is however impossible when function calls alloca. Alloca
9622 expander assumes that last crtl->outgoing_args_size
9623 of stack frame are unused. */
9624 if (ACCUMULATE_OUTGOING_ARGS
9625 && (!crtl->is_leaf || cfun->calls_alloca
9626 || ix86_current_function_calls_tls_descriptor))
9628 offset += crtl->outgoing_args_size;
9629 frame->outgoing_arguments_size = crtl->outgoing_args_size;
9631 else
9632 frame->outgoing_arguments_size = 0;
9634 /* Align stack boundary. Only needed if we're calling another function
9635 or using alloca. */
9636 if (!crtl->is_leaf || cfun->calls_alloca
9637 || ix86_current_function_calls_tls_descriptor)
9638 offset = (offset + preferred_alignment - 1) & -preferred_alignment;
9640 /* We've reached end of stack frame. */
9641 frame->stack_pointer_offset = offset;
9643 /* Size prologue needs to allocate. */
9644 to_allocate = offset - frame->sse_reg_save_offset;
9646 if ((!to_allocate && frame->nregs <= 1)
9647 || (TARGET_64BIT && to_allocate >= (HOST_WIDE_INT) 0x80000000))
9648 frame->save_regs_using_mov = false;
9650 if (ix86_using_red_zone ()
9651 && crtl->sp_is_unchanging
9652 && crtl->is_leaf
9653 && !ix86_current_function_calls_tls_descriptor)
9655 frame->red_zone_size = to_allocate;
9656 if (frame->save_regs_using_mov)
9657 frame->red_zone_size += frame->nregs * UNITS_PER_WORD;
9658 if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE)
9659 frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE;
9661 else
9662 frame->red_zone_size = 0;
9663 frame->stack_pointer_offset -= frame->red_zone_size;
9665 /* The SEH frame pointer location is near the bottom of the frame.
9666 This is enforced by the fact that the difference between the
9667 stack pointer and the frame pointer is limited to 240 bytes in
9668 the unwind data structure. */
9669 if (TARGET_SEH)
9671 HOST_WIDE_INT diff;
9673 /* If we can leave the frame pointer where it is, do so. Also, returns
9674 the establisher frame for __builtin_frame_address (0). */
9675 diff = frame->stack_pointer_offset - frame->hard_frame_pointer_offset;
9676 if (diff <= SEH_MAX_FRAME_SIZE
9677 && (diff > 240 || (diff & 15) != 0)
9678 && !crtl->accesses_prior_frames)
9680 /* Ideally we'd determine what portion of the local stack frame
9681 (within the constraint of the lowest 240) is most heavily used.
9682 But without that complication, simply bias the frame pointer
9683 by 128 bytes so as to maximize the amount of the local stack
9684 frame that is addressable with 8-bit offsets. */
9685 frame->hard_frame_pointer_offset = frame->stack_pointer_offset - 128;
9690 /* This is semi-inlined memory_address_length, but simplified
9691 since we know that we're always dealing with reg+offset, and
9692 to avoid having to create and discard all that rtl. */
9694 static inline int
9695 choose_baseaddr_len (unsigned int regno, HOST_WIDE_INT offset)
9697 int len = 4;
9699 if (offset == 0)
9701 /* EBP and R13 cannot be encoded without an offset. */
9702 len = (regno == BP_REG || regno == R13_REG);
9704 else if (IN_RANGE (offset, -128, 127))
9705 len = 1;
9707 /* ESP and R12 must be encoded with a SIB byte. */
9708 if (regno == SP_REG || regno == R12_REG)
9709 len++;
9711 return len;
9714 /* Return an RTX that points to CFA_OFFSET within the stack frame.
9715 The valid base registers are taken from CFUN->MACHINE->FS. */
9717 static rtx
9718 choose_baseaddr (HOST_WIDE_INT cfa_offset)
9720 const struct machine_function *m = cfun->machine;
9721 rtx base_reg = NULL;
9722 HOST_WIDE_INT base_offset = 0;
9724 if (m->use_fast_prologue_epilogue)
9726 /* Choose the base register most likely to allow the most scheduling
9727 opportunities. Generally FP is valid throughout the function,
9728 while DRAP must be reloaded within the epilogue. But choose either
9729 over the SP due to increased encoding size. */
9731 if (m->fs.fp_valid)
9733 base_reg = hard_frame_pointer_rtx;
9734 base_offset = m->fs.fp_offset - cfa_offset;
9736 else if (m->fs.drap_valid)
9738 base_reg = crtl->drap_reg;
9739 base_offset = 0 - cfa_offset;
9741 else if (m->fs.sp_valid)
9743 base_reg = stack_pointer_rtx;
9744 base_offset = m->fs.sp_offset - cfa_offset;
9747 else
9749 HOST_WIDE_INT toffset;
9750 int len = 16, tlen;
9752 /* Choose the base register with the smallest address encoding.
9753 With a tie, choose FP > DRAP > SP. */
9754 if (m->fs.sp_valid)
9756 base_reg = stack_pointer_rtx;
9757 base_offset = m->fs.sp_offset - cfa_offset;
9758 len = choose_baseaddr_len (STACK_POINTER_REGNUM, base_offset);
9760 if (m->fs.drap_valid)
9762 toffset = 0 - cfa_offset;
9763 tlen = choose_baseaddr_len (REGNO (crtl->drap_reg), toffset);
9764 if (tlen <= len)
9766 base_reg = crtl->drap_reg;
9767 base_offset = toffset;
9768 len = tlen;
9771 if (m->fs.fp_valid)
9773 toffset = m->fs.fp_offset - cfa_offset;
9774 tlen = choose_baseaddr_len (HARD_FRAME_POINTER_REGNUM, toffset);
9775 if (tlen <= len)
9777 base_reg = hard_frame_pointer_rtx;
9778 base_offset = toffset;
9779 len = tlen;
9783 gcc_assert (base_reg != NULL);
9785 return plus_constant (Pmode, base_reg, base_offset);
9788 /* Emit code to save registers in the prologue. */
9790 static void
9791 ix86_emit_save_regs (void)
9793 unsigned int regno;
9794 rtx insn;
9796 for (regno = FIRST_PSEUDO_REGISTER - 1; regno-- > 0; )
9797 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
9799 insn = emit_insn (gen_push (gen_rtx_REG (word_mode, regno)));
9800 RTX_FRAME_RELATED_P (insn) = 1;
9804 /* Emit a single register save at CFA - CFA_OFFSET. */
9806 static void
9807 ix86_emit_save_reg_using_mov (enum machine_mode mode, unsigned int regno,
9808 HOST_WIDE_INT cfa_offset)
9810 struct machine_function *m = cfun->machine;
9811 rtx reg = gen_rtx_REG (mode, regno);
9812 rtx mem, addr, base, insn;
9814 addr = choose_baseaddr (cfa_offset);
9815 mem = gen_frame_mem (mode, addr);
9817 /* For SSE saves, we need to indicate the 128-bit alignment. */
9818 set_mem_align (mem, GET_MODE_ALIGNMENT (mode));
9820 insn = emit_move_insn (mem, reg);
9821 RTX_FRAME_RELATED_P (insn) = 1;
9823 base = addr;
9824 if (GET_CODE (base) == PLUS)
9825 base = XEXP (base, 0);
9826 gcc_checking_assert (REG_P (base));
9828 /* When saving registers into a re-aligned local stack frame, avoid
9829 any tricky guessing by dwarf2out. */
9830 if (m->fs.realigned)
9832 gcc_checking_assert (stack_realign_drap);
9834 if (regno == REGNO (crtl->drap_reg))
9836 /* A bit of a hack. We force the DRAP register to be saved in
9837 the re-aligned stack frame, which provides us with a copy
9838 of the CFA that will last past the prologue. Install it. */
9839 gcc_checking_assert (cfun->machine->fs.fp_valid);
9840 addr = plus_constant (Pmode, hard_frame_pointer_rtx,
9841 cfun->machine->fs.fp_offset - cfa_offset);
9842 mem = gen_rtx_MEM (mode, addr);
9843 add_reg_note (insn, REG_CFA_DEF_CFA, mem);
9845 else
9847 /* The frame pointer is a stable reference within the
9848 aligned frame. Use it. */
9849 gcc_checking_assert (cfun->machine->fs.fp_valid);
9850 addr = plus_constant (Pmode, hard_frame_pointer_rtx,
9851 cfun->machine->fs.fp_offset - cfa_offset);
9852 mem = gen_rtx_MEM (mode, addr);
9853 add_reg_note (insn, REG_CFA_EXPRESSION,
9854 gen_rtx_SET (VOIDmode, mem, reg));
9858 /* The memory may not be relative to the current CFA register,
9859 which means that we may need to generate a new pattern for
9860 use by the unwind info. */
9861 else if (base != m->fs.cfa_reg)
9863 addr = plus_constant (Pmode, m->fs.cfa_reg,
9864 m->fs.cfa_offset - cfa_offset);
9865 mem = gen_rtx_MEM (mode, addr);
9866 add_reg_note (insn, REG_CFA_OFFSET, gen_rtx_SET (VOIDmode, mem, reg));
9870 /* Emit code to save registers using MOV insns.
9871 First register is stored at CFA - CFA_OFFSET. */
9872 static void
9873 ix86_emit_save_regs_using_mov (HOST_WIDE_INT cfa_offset)
9875 unsigned int regno;
9877 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
9878 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
9880 ix86_emit_save_reg_using_mov (word_mode, regno, cfa_offset);
9881 cfa_offset -= UNITS_PER_WORD;
9885 /* Emit code to save SSE registers using MOV insns.
9886 First register is stored at CFA - CFA_OFFSET. */
9887 static void
9888 ix86_emit_save_sse_regs_using_mov (HOST_WIDE_INT cfa_offset)
9890 unsigned int regno;
9892 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
9893 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
9895 ix86_emit_save_reg_using_mov (V4SFmode, regno, cfa_offset);
9896 cfa_offset -= 16;
9900 static GTY(()) rtx queued_cfa_restores;
9902 /* Add a REG_CFA_RESTORE REG note to INSN or queue them until next stack
9903 manipulation insn. The value is on the stack at CFA - CFA_OFFSET.
9904 Don't add the note if the previously saved value will be left untouched
9905 within stack red-zone till return, as unwinders can find the same value
9906 in the register and on the stack. */
9908 static void
9909 ix86_add_cfa_restore_note (rtx insn, rtx reg, HOST_WIDE_INT cfa_offset)
9911 if (!crtl->shrink_wrapped
9912 && cfa_offset <= cfun->machine->fs.red_zone_offset)
9913 return;
9915 if (insn)
9917 add_reg_note (insn, REG_CFA_RESTORE, reg);
9918 RTX_FRAME_RELATED_P (insn) = 1;
9920 else
9921 queued_cfa_restores
9922 = alloc_reg_note (REG_CFA_RESTORE, reg, queued_cfa_restores);
9925 /* Add queued REG_CFA_RESTORE notes if any to INSN. */
9927 static void
9928 ix86_add_queued_cfa_restore_notes (rtx insn)
9930 rtx last;
9931 if (!queued_cfa_restores)
9932 return;
9933 for (last = queued_cfa_restores; XEXP (last, 1); last = XEXP (last, 1))
9935 XEXP (last, 1) = REG_NOTES (insn);
9936 REG_NOTES (insn) = queued_cfa_restores;
9937 queued_cfa_restores = NULL_RTX;
9938 RTX_FRAME_RELATED_P (insn) = 1;
9941 /* Expand prologue or epilogue stack adjustment.
9942 The pattern exist to put a dependency on all ebp-based memory accesses.
9943 STYLE should be negative if instructions should be marked as frame related,
9944 zero if %r11 register is live and cannot be freely used and positive
9945 otherwise. */
9947 static void
9948 pro_epilogue_adjust_stack (rtx dest, rtx src, rtx offset,
9949 int style, bool set_cfa)
9951 struct machine_function *m = cfun->machine;
9952 rtx insn;
9953 bool add_frame_related_expr = false;
9955 if (Pmode == SImode)
9956 insn = gen_pro_epilogue_adjust_stack_si_add (dest, src, offset);
9957 else if (x86_64_immediate_operand (offset, DImode))
9958 insn = gen_pro_epilogue_adjust_stack_di_add (dest, src, offset);
9959 else
9961 rtx tmp;
9962 /* r11 is used by indirect sibcall return as well, set before the
9963 epilogue and used after the epilogue. */
9964 if (style)
9965 tmp = gen_rtx_REG (DImode, R11_REG);
9966 else
9968 gcc_assert (src != hard_frame_pointer_rtx
9969 && dest != hard_frame_pointer_rtx);
9970 tmp = hard_frame_pointer_rtx;
9972 insn = emit_insn (gen_rtx_SET (DImode, tmp, offset));
9973 if (style < 0)
9974 add_frame_related_expr = true;
9976 insn = gen_pro_epilogue_adjust_stack_di_add (dest, src, tmp);
9979 insn = emit_insn (insn);
9980 if (style >= 0)
9981 ix86_add_queued_cfa_restore_notes (insn);
9983 if (set_cfa)
9985 rtx r;
9987 gcc_assert (m->fs.cfa_reg == src);
9988 m->fs.cfa_offset += INTVAL (offset);
9989 m->fs.cfa_reg = dest;
9991 r = gen_rtx_PLUS (Pmode, src, offset);
9992 r = gen_rtx_SET (VOIDmode, dest, r);
9993 add_reg_note (insn, REG_CFA_ADJUST_CFA, r);
9994 RTX_FRAME_RELATED_P (insn) = 1;
9996 else if (style < 0)
9998 RTX_FRAME_RELATED_P (insn) = 1;
9999 if (add_frame_related_expr)
10001 rtx r = gen_rtx_PLUS (Pmode, src, offset);
10002 r = gen_rtx_SET (VOIDmode, dest, r);
10003 add_reg_note (insn, REG_FRAME_RELATED_EXPR, r);
10007 if (dest == stack_pointer_rtx)
10009 HOST_WIDE_INT ooffset = m->fs.sp_offset;
10010 bool valid = m->fs.sp_valid;
10012 if (src == hard_frame_pointer_rtx)
10014 valid = m->fs.fp_valid;
10015 ooffset = m->fs.fp_offset;
10017 else if (src == crtl->drap_reg)
10019 valid = m->fs.drap_valid;
10020 ooffset = 0;
10022 else
10024 /* Else there are two possibilities: SP itself, which we set
10025 up as the default above. Or EH_RETURN_STACKADJ_RTX, which is
10026 taken care of this by hand along the eh_return path. */
10027 gcc_checking_assert (src == stack_pointer_rtx
10028 || offset == const0_rtx);
10031 m->fs.sp_offset = ooffset - INTVAL (offset);
10032 m->fs.sp_valid = valid;
10036 /* Find an available register to be used as dynamic realign argument
10037 pointer regsiter. Such a register will be written in prologue and
10038 used in begin of body, so it must not be
10039 1. parameter passing register.
10040 2. GOT pointer.
10041 We reuse static-chain register if it is available. Otherwise, we
10042 use DI for i386 and R13 for x86-64. We chose R13 since it has
10043 shorter encoding.
10045 Return: the regno of chosen register. */
10047 static unsigned int
10048 find_drap_reg (void)
10050 tree decl = cfun->decl;
10052 if (TARGET_64BIT)
10054 /* Use R13 for nested function or function need static chain.
10055 Since function with tail call may use any caller-saved
10056 registers in epilogue, DRAP must not use caller-saved
10057 register in such case. */
10058 if (DECL_STATIC_CHAIN (decl) || crtl->tail_call_emit)
10059 return R13_REG;
10061 return R10_REG;
10063 else
10065 /* Use DI for nested function or function need static chain.
10066 Since function with tail call may use any caller-saved
10067 registers in epilogue, DRAP must not use caller-saved
10068 register in such case. */
10069 if (DECL_STATIC_CHAIN (decl) || crtl->tail_call_emit)
10070 return DI_REG;
10072 /* Reuse static chain register if it isn't used for parameter
10073 passing. */
10074 if (ix86_function_regparm (TREE_TYPE (decl), decl) <= 2)
10076 unsigned int ccvt = ix86_get_callcvt (TREE_TYPE (decl));
10077 if ((ccvt & (IX86_CALLCVT_FASTCALL | IX86_CALLCVT_THISCALL)) == 0)
10078 return CX_REG;
10080 return DI_REG;
10084 /* Return minimum incoming stack alignment. */
10086 static unsigned int
10087 ix86_minimum_incoming_stack_boundary (bool sibcall)
10089 unsigned int incoming_stack_boundary;
10091 /* Prefer the one specified at command line. */
10092 if (ix86_user_incoming_stack_boundary)
10093 incoming_stack_boundary = ix86_user_incoming_stack_boundary;
10094 /* In 32bit, use MIN_STACK_BOUNDARY for incoming stack boundary
10095 if -mstackrealign is used, it isn't used for sibcall check and
10096 estimated stack alignment is 128bit. */
10097 else if (!sibcall
10098 && !TARGET_64BIT
10099 && ix86_force_align_arg_pointer
10100 && crtl->stack_alignment_estimated == 128)
10101 incoming_stack_boundary = MIN_STACK_BOUNDARY;
10102 else
10103 incoming_stack_boundary = ix86_default_incoming_stack_boundary;
10105 /* Incoming stack alignment can be changed on individual functions
10106 via force_align_arg_pointer attribute. We use the smallest
10107 incoming stack boundary. */
10108 if (incoming_stack_boundary > MIN_STACK_BOUNDARY
10109 && lookup_attribute (ix86_force_align_arg_pointer_string,
10110 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))))
10111 incoming_stack_boundary = MIN_STACK_BOUNDARY;
10113 /* The incoming stack frame has to be aligned at least at
10114 parm_stack_boundary. */
10115 if (incoming_stack_boundary < crtl->parm_stack_boundary)
10116 incoming_stack_boundary = crtl->parm_stack_boundary;
10118 /* Stack at entrance of main is aligned by runtime. We use the
10119 smallest incoming stack boundary. */
10120 if (incoming_stack_boundary > MAIN_STACK_BOUNDARY
10121 && DECL_NAME (current_function_decl)
10122 && MAIN_NAME_P (DECL_NAME (current_function_decl))
10123 && DECL_FILE_SCOPE_P (current_function_decl))
10124 incoming_stack_boundary = MAIN_STACK_BOUNDARY;
10126 return incoming_stack_boundary;
10129 /* Update incoming stack boundary and estimated stack alignment. */
10131 static void
10132 ix86_update_stack_boundary (void)
10134 ix86_incoming_stack_boundary
10135 = ix86_minimum_incoming_stack_boundary (false);
10137 /* x86_64 vararg needs 16byte stack alignment for register save
10138 area. */
10139 if (TARGET_64BIT
10140 && cfun->stdarg
10141 && crtl->stack_alignment_estimated < 128)
10142 crtl->stack_alignment_estimated = 128;
10145 /* Handle the TARGET_GET_DRAP_RTX hook. Return NULL if no DRAP is
10146 needed or an rtx for DRAP otherwise. */
10148 static rtx
10149 ix86_get_drap_rtx (void)
10151 if (ix86_force_drap || !ACCUMULATE_OUTGOING_ARGS)
10152 crtl->need_drap = true;
10154 if (stack_realign_drap)
10156 /* Assign DRAP to vDRAP and returns vDRAP */
10157 unsigned int regno = find_drap_reg ();
10158 rtx drap_vreg;
10159 rtx arg_ptr;
10160 rtx seq, insn;
10162 arg_ptr = gen_rtx_REG (Pmode, regno);
10163 crtl->drap_reg = arg_ptr;
10165 start_sequence ();
10166 drap_vreg = copy_to_reg (arg_ptr);
10167 seq = get_insns ();
10168 end_sequence ();
10170 insn = emit_insn_before (seq, NEXT_INSN (entry_of_function ()));
10171 if (!optimize)
10173 add_reg_note (insn, REG_CFA_SET_VDRAP, drap_vreg);
10174 RTX_FRAME_RELATED_P (insn) = 1;
10176 return drap_vreg;
10178 else
10179 return NULL;
10182 /* Handle the TARGET_INTERNAL_ARG_POINTER hook. */
10184 static rtx
10185 ix86_internal_arg_pointer (void)
10187 return virtual_incoming_args_rtx;
10190 struct scratch_reg {
10191 rtx reg;
10192 bool saved;
10195 /* Return a short-lived scratch register for use on function entry.
10196 In 32-bit mode, it is valid only after the registers are saved
10197 in the prologue. This register must be released by means of
10198 release_scratch_register_on_entry once it is dead. */
10200 static void
10201 get_scratch_register_on_entry (struct scratch_reg *sr)
10203 int regno;
10205 sr->saved = false;
10207 if (TARGET_64BIT)
10209 /* We always use R11 in 64-bit mode. */
10210 regno = R11_REG;
10212 else
10214 tree decl = current_function_decl, fntype = TREE_TYPE (decl);
10215 bool fastcall_p
10216 = lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)) != NULL_TREE;
10217 bool thiscall_p
10218 = lookup_attribute ("thiscall", TYPE_ATTRIBUTES (fntype)) != NULL_TREE;
10219 bool static_chain_p = DECL_STATIC_CHAIN (decl);
10220 int regparm = ix86_function_regparm (fntype, decl);
10221 int drap_regno
10222 = crtl->drap_reg ? REGNO (crtl->drap_reg) : INVALID_REGNUM;
10224 /* 'fastcall' sets regparm to 2, uses ecx/edx for arguments and eax
10225 for the static chain register. */
10226 if ((regparm < 1 || (fastcall_p && !static_chain_p))
10227 && drap_regno != AX_REG)
10228 regno = AX_REG;
10229 /* 'thiscall' sets regparm to 1, uses ecx for arguments and edx
10230 for the static chain register. */
10231 else if (thiscall_p && !static_chain_p && drap_regno != AX_REG)
10232 regno = AX_REG;
10233 else if (regparm < 2 && !thiscall_p && drap_regno != DX_REG)
10234 regno = DX_REG;
10235 /* ecx is the static chain register. */
10236 else if (regparm < 3 && !fastcall_p && !thiscall_p
10237 && !static_chain_p
10238 && drap_regno != CX_REG)
10239 regno = CX_REG;
10240 else if (ix86_save_reg (BX_REG, true))
10241 regno = BX_REG;
10242 /* esi is the static chain register. */
10243 else if (!(regparm == 3 && static_chain_p)
10244 && ix86_save_reg (SI_REG, true))
10245 regno = SI_REG;
10246 else if (ix86_save_reg (DI_REG, true))
10247 regno = DI_REG;
10248 else
10250 regno = (drap_regno == AX_REG ? DX_REG : AX_REG);
10251 sr->saved = true;
10255 sr->reg = gen_rtx_REG (Pmode, regno);
10256 if (sr->saved)
10258 rtx insn = emit_insn (gen_push (sr->reg));
10259 RTX_FRAME_RELATED_P (insn) = 1;
10263 /* Release a scratch register obtained from the preceding function. */
10265 static void
10266 release_scratch_register_on_entry (struct scratch_reg *sr)
10268 if (sr->saved)
10270 struct machine_function *m = cfun->machine;
10271 rtx x, insn = emit_insn (gen_pop (sr->reg));
10273 /* The RTX_FRAME_RELATED_P mechanism doesn't know about pop. */
10274 RTX_FRAME_RELATED_P (insn) = 1;
10275 x = gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (UNITS_PER_WORD));
10276 x = gen_rtx_SET (VOIDmode, stack_pointer_rtx, x);
10277 add_reg_note (insn, REG_FRAME_RELATED_EXPR, x);
10278 m->fs.sp_offset -= UNITS_PER_WORD;
10282 #define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP)
10284 /* Emit code to adjust the stack pointer by SIZE bytes while probing it. */
10286 static void
10287 ix86_adjust_stack_and_probe (const HOST_WIDE_INT size)
10289 /* We skip the probe for the first interval + a small dope of 4 words and
10290 probe that many bytes past the specified size to maintain a protection
10291 area at the botton of the stack. */
10292 const int dope = 4 * UNITS_PER_WORD;
10293 rtx size_rtx = GEN_INT (size), last;
10295 /* See if we have a constant small number of probes to generate. If so,
10296 that's the easy case. The run-time loop is made up of 11 insns in the
10297 generic case while the compile-time loop is made up of 3+2*(n-1) insns
10298 for n # of intervals. */
10299 if (size <= 5 * PROBE_INTERVAL)
10301 HOST_WIDE_INT i, adjust;
10302 bool first_probe = true;
10304 /* Adjust SP and probe at PROBE_INTERVAL + N * PROBE_INTERVAL for
10305 values of N from 1 until it exceeds SIZE. If only one probe is
10306 needed, this will not generate any code. Then adjust and probe
10307 to PROBE_INTERVAL + SIZE. */
10308 for (i = PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
10310 if (first_probe)
10312 adjust = 2 * PROBE_INTERVAL + dope;
10313 first_probe = false;
10315 else
10316 adjust = PROBE_INTERVAL;
10318 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10319 plus_constant (Pmode, stack_pointer_rtx,
10320 -adjust)));
10321 emit_stack_probe (stack_pointer_rtx);
10324 if (first_probe)
10325 adjust = size + PROBE_INTERVAL + dope;
10326 else
10327 adjust = size + PROBE_INTERVAL - i;
10329 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10330 plus_constant (Pmode, stack_pointer_rtx,
10331 -adjust)));
10332 emit_stack_probe (stack_pointer_rtx);
10334 /* Adjust back to account for the additional first interval. */
10335 last = emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10336 plus_constant (Pmode, stack_pointer_rtx,
10337 PROBE_INTERVAL + dope)));
10340 /* Otherwise, do the same as above, but in a loop. Note that we must be
10341 extra careful with variables wrapping around because we might be at
10342 the very top (or the very bottom) of the address space and we have
10343 to be able to handle this case properly; in particular, we use an
10344 equality test for the loop condition. */
10345 else
10347 HOST_WIDE_INT rounded_size;
10348 struct scratch_reg sr;
10350 get_scratch_register_on_entry (&sr);
10353 /* Step 1: round SIZE to the previous multiple of the interval. */
10355 rounded_size = size & -PROBE_INTERVAL;
10358 /* Step 2: compute initial and final value of the loop counter. */
10360 /* SP = SP_0 + PROBE_INTERVAL. */
10361 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10362 plus_constant (Pmode, stack_pointer_rtx,
10363 - (PROBE_INTERVAL + dope))));
10365 /* LAST_ADDR = SP_0 + PROBE_INTERVAL + ROUNDED_SIZE. */
10366 emit_move_insn (sr.reg, GEN_INT (-rounded_size));
10367 emit_insn (gen_rtx_SET (VOIDmode, sr.reg,
10368 gen_rtx_PLUS (Pmode, sr.reg,
10369 stack_pointer_rtx)));
10372 /* Step 3: the loop
10374 while (SP != LAST_ADDR)
10376 SP = SP + PROBE_INTERVAL
10377 probe at SP
10380 adjusts SP and probes to PROBE_INTERVAL + N * PROBE_INTERVAL for
10381 values of N from 1 until it is equal to ROUNDED_SIZE. */
10383 emit_insn (ix86_gen_adjust_stack_and_probe (sr.reg, sr.reg, size_rtx));
10386 /* Step 4: adjust SP and probe at PROBE_INTERVAL + SIZE if we cannot
10387 assert at compile-time that SIZE is equal to ROUNDED_SIZE. */
10389 if (size != rounded_size)
10391 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10392 plus_constant (Pmode, stack_pointer_rtx,
10393 rounded_size - size)));
10394 emit_stack_probe (stack_pointer_rtx);
10397 /* Adjust back to account for the additional first interval. */
10398 last = emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10399 plus_constant (Pmode, stack_pointer_rtx,
10400 PROBE_INTERVAL + dope)));
10402 release_scratch_register_on_entry (&sr);
10405 gcc_assert (cfun->machine->fs.cfa_reg != stack_pointer_rtx);
10407 /* Even if the stack pointer isn't the CFA register, we need to correctly
10408 describe the adjustments made to it, in particular differentiate the
10409 frame-related ones from the frame-unrelated ones. */
10410 if (size > 0)
10412 rtx expr = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (2));
10413 XVECEXP (expr, 0, 0)
10414 = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10415 plus_constant (Pmode, stack_pointer_rtx, -size));
10416 XVECEXP (expr, 0, 1)
10417 = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10418 plus_constant (Pmode, stack_pointer_rtx,
10419 PROBE_INTERVAL + dope + size));
10420 add_reg_note (last, REG_FRAME_RELATED_EXPR, expr);
10421 RTX_FRAME_RELATED_P (last) = 1;
10423 cfun->machine->fs.sp_offset += size;
10426 /* Make sure nothing is scheduled before we are done. */
10427 emit_insn (gen_blockage ());
10430 /* Adjust the stack pointer up to REG while probing it. */
10432 const char *
10433 output_adjust_stack_and_probe (rtx reg)
10435 static int labelno = 0;
10436 char loop_lab[32], end_lab[32];
10437 rtx xops[2];
10439 ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno);
10440 ASM_GENERATE_INTERNAL_LABEL (end_lab, "LPSRE", labelno++);
10442 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
10444 /* Jump to END_LAB if SP == LAST_ADDR. */
10445 xops[0] = stack_pointer_rtx;
10446 xops[1] = reg;
10447 output_asm_insn ("cmp%z0\t{%1, %0|%0, %1}", xops);
10448 fputs ("\tje\t", asm_out_file);
10449 assemble_name_raw (asm_out_file, end_lab);
10450 fputc ('\n', asm_out_file);
10452 /* SP = SP + PROBE_INTERVAL. */
10453 xops[1] = GEN_INT (PROBE_INTERVAL);
10454 output_asm_insn ("sub%z0\t{%1, %0|%0, %1}", xops);
10456 /* Probe at SP. */
10457 xops[1] = const0_rtx;
10458 output_asm_insn ("or%z0\t{%1, (%0)|DWORD PTR [%0], %1}", xops);
10460 fprintf (asm_out_file, "\tjmp\t");
10461 assemble_name_raw (asm_out_file, loop_lab);
10462 fputc ('\n', asm_out_file);
10464 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, end_lab);
10466 return "";
10469 /* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE,
10470 inclusive. These are offsets from the current stack pointer. */
10472 static void
10473 ix86_emit_probe_stack_range (HOST_WIDE_INT first, HOST_WIDE_INT size)
10475 /* See if we have a constant small number of probes to generate. If so,
10476 that's the easy case. The run-time loop is made up of 7 insns in the
10477 generic case while the compile-time loop is made up of n insns for n #
10478 of intervals. */
10479 if (size <= 7 * PROBE_INTERVAL)
10481 HOST_WIDE_INT i;
10483 /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 1 until
10484 it exceeds SIZE. If only one probe is needed, this will not
10485 generate any code. Then probe at FIRST + SIZE. */
10486 for (i = PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
10487 emit_stack_probe (plus_constant (Pmode, stack_pointer_rtx,
10488 -(first + i)));
10490 emit_stack_probe (plus_constant (Pmode, stack_pointer_rtx,
10491 -(first + size)));
10494 /* Otherwise, do the same as above, but in a loop. Note that we must be
10495 extra careful with variables wrapping around because we might be at
10496 the very top (or the very bottom) of the address space and we have
10497 to be able to handle this case properly; in particular, we use an
10498 equality test for the loop condition. */
10499 else
10501 HOST_WIDE_INT rounded_size, last;
10502 struct scratch_reg sr;
10504 get_scratch_register_on_entry (&sr);
10507 /* Step 1: round SIZE to the previous multiple of the interval. */
10509 rounded_size = size & -PROBE_INTERVAL;
10512 /* Step 2: compute initial and final value of the loop counter. */
10514 /* TEST_OFFSET = FIRST. */
10515 emit_move_insn (sr.reg, GEN_INT (-first));
10517 /* LAST_OFFSET = FIRST + ROUNDED_SIZE. */
10518 last = first + rounded_size;
10521 /* Step 3: the loop
10523 while (TEST_ADDR != LAST_ADDR)
10525 TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
10526 probe at TEST_ADDR
10529 probes at FIRST + N * PROBE_INTERVAL for values of N from 1
10530 until it is equal to ROUNDED_SIZE. */
10532 emit_insn (ix86_gen_probe_stack_range (sr.reg, sr.reg, GEN_INT (-last)));
10535 /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
10536 that SIZE is equal to ROUNDED_SIZE. */
10538 if (size != rounded_size)
10539 emit_stack_probe (plus_constant (Pmode,
10540 gen_rtx_PLUS (Pmode,
10541 stack_pointer_rtx,
10542 sr.reg),
10543 rounded_size - size));
10545 release_scratch_register_on_entry (&sr);
10548 /* Make sure nothing is scheduled before we are done. */
10549 emit_insn (gen_blockage ());
10552 /* Probe a range of stack addresses from REG to END, inclusive. These are
10553 offsets from the current stack pointer. */
10555 const char *
10556 output_probe_stack_range (rtx reg, rtx end)
10558 static int labelno = 0;
10559 char loop_lab[32], end_lab[32];
10560 rtx xops[3];
10562 ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno);
10563 ASM_GENERATE_INTERNAL_LABEL (end_lab, "LPSRE", labelno++);
10565 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
10567 /* Jump to END_LAB if TEST_ADDR == LAST_ADDR. */
10568 xops[0] = reg;
10569 xops[1] = end;
10570 output_asm_insn ("cmp%z0\t{%1, %0|%0, %1}", xops);
10571 fputs ("\tje\t", asm_out_file);
10572 assemble_name_raw (asm_out_file, end_lab);
10573 fputc ('\n', asm_out_file);
10575 /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL. */
10576 xops[1] = GEN_INT (PROBE_INTERVAL);
10577 output_asm_insn ("sub%z0\t{%1, %0|%0, %1}", xops);
10579 /* Probe at TEST_ADDR. */
10580 xops[0] = stack_pointer_rtx;
10581 xops[1] = reg;
10582 xops[2] = const0_rtx;
10583 output_asm_insn ("or%z0\t{%2, (%0,%1)|DWORD PTR [%0+%1], %2}", xops);
10585 fprintf (asm_out_file, "\tjmp\t");
10586 assemble_name_raw (asm_out_file, loop_lab);
10587 fputc ('\n', asm_out_file);
10589 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, end_lab);
10591 return "";
10594 /* Finalize stack_realign_needed flag, which will guide prologue/epilogue
10595 to be generated in correct form. */
10596 static void
10597 ix86_finalize_stack_realign_flags (void)
10599 /* Check if stack realign is really needed after reload, and
10600 stores result in cfun */
10601 unsigned int incoming_stack_boundary
10602 = (crtl->parm_stack_boundary > ix86_incoming_stack_boundary
10603 ? crtl->parm_stack_boundary : ix86_incoming_stack_boundary);
10604 unsigned int stack_realign = (incoming_stack_boundary
10605 < (crtl->is_leaf
10606 ? crtl->max_used_stack_slot_alignment
10607 : crtl->stack_alignment_needed));
10609 if (crtl->stack_realign_finalized)
10611 /* After stack_realign_needed is finalized, we can't no longer
10612 change it. */
10613 gcc_assert (crtl->stack_realign_needed == stack_realign);
10614 return;
10617 /* If the only reason for frame_pointer_needed is that we conservatively
10618 assumed stack realignment might be needed, but in the end nothing that
10619 needed the stack alignment had been spilled, clear frame_pointer_needed
10620 and say we don't need stack realignment. */
10621 if (stack_realign
10622 && frame_pointer_needed
10623 && crtl->is_leaf
10624 && flag_omit_frame_pointer
10625 && crtl->sp_is_unchanging
10626 && !ix86_current_function_calls_tls_descriptor
10627 && !crtl->accesses_prior_frames
10628 && !cfun->calls_alloca
10629 && !crtl->calls_eh_return
10630 && !(flag_stack_check && STACK_CHECK_MOVING_SP)
10631 && !ix86_frame_pointer_required ()
10632 && get_frame_size () == 0
10633 && ix86_nsaved_sseregs () == 0
10634 && ix86_varargs_gpr_size + ix86_varargs_fpr_size == 0)
10636 HARD_REG_SET set_up_by_prologue, prologue_used;
10637 basic_block bb;
10639 CLEAR_HARD_REG_SET (prologue_used);
10640 CLEAR_HARD_REG_SET (set_up_by_prologue);
10641 add_to_hard_reg_set (&set_up_by_prologue, Pmode, STACK_POINTER_REGNUM);
10642 add_to_hard_reg_set (&set_up_by_prologue, Pmode, ARG_POINTER_REGNUM);
10643 add_to_hard_reg_set (&set_up_by_prologue, Pmode,
10644 HARD_FRAME_POINTER_REGNUM);
10645 FOR_EACH_BB_FN (bb, cfun)
10647 rtx insn;
10648 FOR_BB_INSNS (bb, insn)
10649 if (NONDEBUG_INSN_P (insn)
10650 && requires_stack_frame_p (insn, prologue_used,
10651 set_up_by_prologue))
10653 crtl->stack_realign_needed = stack_realign;
10654 crtl->stack_realign_finalized = true;
10655 return;
10659 /* If drap has been set, but it actually isn't live at the start
10660 of the function, there is no reason to set it up. */
10661 if (crtl->drap_reg)
10663 basic_block bb = ENTRY_BLOCK_PTR_FOR_FN (cfun)->next_bb;
10664 if (! REGNO_REG_SET_P (DF_LR_IN (bb), REGNO (crtl->drap_reg)))
10666 crtl->drap_reg = NULL_RTX;
10667 crtl->need_drap = false;
10670 else
10671 cfun->machine->no_drap_save_restore = true;
10673 frame_pointer_needed = false;
10674 stack_realign = false;
10675 crtl->max_used_stack_slot_alignment = incoming_stack_boundary;
10676 crtl->stack_alignment_needed = incoming_stack_boundary;
10677 crtl->stack_alignment_estimated = incoming_stack_boundary;
10678 if (crtl->preferred_stack_boundary > incoming_stack_boundary)
10679 crtl->preferred_stack_boundary = incoming_stack_boundary;
10680 df_finish_pass (true);
10681 df_scan_alloc (NULL);
10682 df_scan_blocks ();
10683 df_compute_regs_ever_live (true);
10684 df_analyze ();
10687 crtl->stack_realign_needed = stack_realign;
10688 crtl->stack_realign_finalized = true;
10691 /* Expand the prologue into a bunch of separate insns. */
10693 void
10694 ix86_expand_prologue (void)
10696 struct machine_function *m = cfun->machine;
10697 rtx insn, t;
10698 bool pic_reg_used;
10699 struct ix86_frame frame;
10700 HOST_WIDE_INT allocate;
10701 bool int_registers_saved;
10702 bool sse_registers_saved;
10704 ix86_finalize_stack_realign_flags ();
10706 /* DRAP should not coexist with stack_realign_fp */
10707 gcc_assert (!(crtl->drap_reg && stack_realign_fp));
10709 memset (&m->fs, 0, sizeof (m->fs));
10711 /* Initialize CFA state for before the prologue. */
10712 m->fs.cfa_reg = stack_pointer_rtx;
10713 m->fs.cfa_offset = INCOMING_FRAME_SP_OFFSET;
10715 /* Track SP offset to the CFA. We continue tracking this after we've
10716 swapped the CFA register away from SP. In the case of re-alignment
10717 this is fudged; we're interested to offsets within the local frame. */
10718 m->fs.sp_offset = INCOMING_FRAME_SP_OFFSET;
10719 m->fs.sp_valid = true;
10721 ix86_compute_frame_layout (&frame);
10723 if (!TARGET_64BIT && ix86_function_ms_hook_prologue (current_function_decl))
10725 /* We should have already generated an error for any use of
10726 ms_hook on a nested function. */
10727 gcc_checking_assert (!ix86_static_chain_on_stack);
10729 /* Check if profiling is active and we shall use profiling before
10730 prologue variant. If so sorry. */
10731 if (crtl->profile && flag_fentry != 0)
10732 sorry ("ms_hook_prologue attribute isn%'t compatible "
10733 "with -mfentry for 32-bit");
10735 /* In ix86_asm_output_function_label we emitted:
10736 8b ff movl.s %edi,%edi
10737 55 push %ebp
10738 8b ec movl.s %esp,%ebp
10740 This matches the hookable function prologue in Win32 API
10741 functions in Microsoft Windows XP Service Pack 2 and newer.
10742 Wine uses this to enable Windows apps to hook the Win32 API
10743 functions provided by Wine.
10745 What that means is that we've already set up the frame pointer. */
10747 if (frame_pointer_needed
10748 && !(crtl->drap_reg && crtl->stack_realign_needed))
10750 rtx push, mov;
10752 /* We've decided to use the frame pointer already set up.
10753 Describe this to the unwinder by pretending that both
10754 push and mov insns happen right here.
10756 Putting the unwind info here at the end of the ms_hook
10757 is done so that we can make absolutely certain we get
10758 the required byte sequence at the start of the function,
10759 rather than relying on an assembler that can produce
10760 the exact encoding required.
10762 However it does mean (in the unpatched case) that we have
10763 a 1 insn window where the asynchronous unwind info is
10764 incorrect. However, if we placed the unwind info at
10765 its correct location we would have incorrect unwind info
10766 in the patched case. Which is probably all moot since
10767 I don't expect Wine generates dwarf2 unwind info for the
10768 system libraries that use this feature. */
10770 insn = emit_insn (gen_blockage ());
10772 push = gen_push (hard_frame_pointer_rtx);
10773 mov = gen_rtx_SET (VOIDmode, hard_frame_pointer_rtx,
10774 stack_pointer_rtx);
10775 RTX_FRAME_RELATED_P (push) = 1;
10776 RTX_FRAME_RELATED_P (mov) = 1;
10778 RTX_FRAME_RELATED_P (insn) = 1;
10779 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
10780 gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, push, mov)));
10782 /* Note that gen_push incremented m->fs.cfa_offset, even
10783 though we didn't emit the push insn here. */
10784 m->fs.cfa_reg = hard_frame_pointer_rtx;
10785 m->fs.fp_offset = m->fs.cfa_offset;
10786 m->fs.fp_valid = true;
10788 else
10790 /* The frame pointer is not needed so pop %ebp again.
10791 This leaves us with a pristine state. */
10792 emit_insn (gen_pop (hard_frame_pointer_rtx));
10796 /* The first insn of a function that accepts its static chain on the
10797 stack is to push the register that would be filled in by a direct
10798 call. This insn will be skipped by the trampoline. */
10799 else if (ix86_static_chain_on_stack)
10801 insn = emit_insn (gen_push (ix86_static_chain (cfun->decl, false)));
10802 emit_insn (gen_blockage ());
10804 /* We don't want to interpret this push insn as a register save,
10805 only as a stack adjustment. The real copy of the register as
10806 a save will be done later, if needed. */
10807 t = plus_constant (Pmode, stack_pointer_rtx, -UNITS_PER_WORD);
10808 t = gen_rtx_SET (VOIDmode, stack_pointer_rtx, t);
10809 add_reg_note (insn, REG_CFA_ADJUST_CFA, t);
10810 RTX_FRAME_RELATED_P (insn) = 1;
10813 /* Emit prologue code to adjust stack alignment and setup DRAP, in case
10814 of DRAP is needed and stack realignment is really needed after reload */
10815 if (stack_realign_drap)
10817 int align_bytes = crtl->stack_alignment_needed / BITS_PER_UNIT;
10819 /* Only need to push parameter pointer reg if it is caller saved. */
10820 if (!call_used_regs[REGNO (crtl->drap_reg)])
10822 /* Push arg pointer reg */
10823 insn = emit_insn (gen_push (crtl->drap_reg));
10824 RTX_FRAME_RELATED_P (insn) = 1;
10827 /* Grab the argument pointer. */
10828 t = plus_constant (Pmode, stack_pointer_rtx, m->fs.sp_offset);
10829 insn = emit_insn (gen_rtx_SET (VOIDmode, crtl->drap_reg, t));
10830 RTX_FRAME_RELATED_P (insn) = 1;
10831 m->fs.cfa_reg = crtl->drap_reg;
10832 m->fs.cfa_offset = 0;
10834 /* Align the stack. */
10835 insn = emit_insn (ix86_gen_andsp (stack_pointer_rtx,
10836 stack_pointer_rtx,
10837 GEN_INT (-align_bytes)));
10838 RTX_FRAME_RELATED_P (insn) = 1;
10840 /* Replicate the return address on the stack so that return
10841 address can be reached via (argp - 1) slot. This is needed
10842 to implement macro RETURN_ADDR_RTX and intrinsic function
10843 expand_builtin_return_addr etc. */
10844 t = plus_constant (Pmode, crtl->drap_reg, -UNITS_PER_WORD);
10845 t = gen_frame_mem (word_mode, t);
10846 insn = emit_insn (gen_push (t));
10847 RTX_FRAME_RELATED_P (insn) = 1;
10849 /* For the purposes of frame and register save area addressing,
10850 we've started over with a new frame. */
10851 m->fs.sp_offset = INCOMING_FRAME_SP_OFFSET;
10852 m->fs.realigned = true;
10855 int_registers_saved = (frame.nregs == 0);
10856 sse_registers_saved = (frame.nsseregs == 0);
10858 if (frame_pointer_needed && !m->fs.fp_valid)
10860 /* Note: AT&T enter does NOT have reversed args. Enter is probably
10861 slower on all targets. Also sdb doesn't like it. */
10862 insn = emit_insn (gen_push (hard_frame_pointer_rtx));
10863 RTX_FRAME_RELATED_P (insn) = 1;
10865 /* Push registers now, before setting the frame pointer
10866 on SEH target. */
10867 if (!int_registers_saved
10868 && TARGET_SEH
10869 && !frame.save_regs_using_mov)
10871 ix86_emit_save_regs ();
10872 int_registers_saved = true;
10873 gcc_assert (m->fs.sp_offset == frame.reg_save_offset);
10876 if (m->fs.sp_offset == frame.hard_frame_pointer_offset)
10878 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
10879 RTX_FRAME_RELATED_P (insn) = 1;
10881 if (m->fs.cfa_reg == stack_pointer_rtx)
10882 m->fs.cfa_reg = hard_frame_pointer_rtx;
10883 m->fs.fp_offset = m->fs.sp_offset;
10884 m->fs.fp_valid = true;
10888 if (!int_registers_saved)
10890 /* If saving registers via PUSH, do so now. */
10891 if (!frame.save_regs_using_mov)
10893 ix86_emit_save_regs ();
10894 int_registers_saved = true;
10895 gcc_assert (m->fs.sp_offset == frame.reg_save_offset);
10898 /* When using red zone we may start register saving before allocating
10899 the stack frame saving one cycle of the prologue. However, avoid
10900 doing this if we have to probe the stack; at least on x86_64 the
10901 stack probe can turn into a call that clobbers a red zone location. */
10902 else if (ix86_using_red_zone ()
10903 && (! TARGET_STACK_PROBE
10904 || frame.stack_pointer_offset < CHECK_STACK_LIMIT))
10906 ix86_emit_save_regs_using_mov (frame.reg_save_offset);
10907 int_registers_saved = true;
10911 if (stack_realign_fp)
10913 int align_bytes = crtl->stack_alignment_needed / BITS_PER_UNIT;
10914 gcc_assert (align_bytes > MIN_STACK_BOUNDARY / BITS_PER_UNIT);
10916 /* The computation of the size of the re-aligned stack frame means
10917 that we must allocate the size of the register save area before
10918 performing the actual alignment. Otherwise we cannot guarantee
10919 that there's enough storage above the realignment point. */
10920 if (m->fs.sp_offset != frame.sse_reg_save_offset)
10921 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
10922 GEN_INT (m->fs.sp_offset
10923 - frame.sse_reg_save_offset),
10924 -1, false);
10926 /* Align the stack. */
10927 insn = emit_insn (ix86_gen_andsp (stack_pointer_rtx,
10928 stack_pointer_rtx,
10929 GEN_INT (-align_bytes)));
10931 /* For the purposes of register save area addressing, the stack
10932 pointer is no longer valid. As for the value of sp_offset,
10933 see ix86_compute_frame_layout, which we need to match in order
10934 to pass verification of stack_pointer_offset at the end. */
10935 m->fs.sp_offset = (m->fs.sp_offset + align_bytes) & -align_bytes;
10936 m->fs.sp_valid = false;
10939 allocate = frame.stack_pointer_offset - m->fs.sp_offset;
10941 if (flag_stack_usage_info)
10943 /* We start to count from ARG_POINTER. */
10944 HOST_WIDE_INT stack_size = frame.stack_pointer_offset;
10946 /* If it was realigned, take into account the fake frame. */
10947 if (stack_realign_drap)
10949 if (ix86_static_chain_on_stack)
10950 stack_size += UNITS_PER_WORD;
10952 if (!call_used_regs[REGNO (crtl->drap_reg)])
10953 stack_size += UNITS_PER_WORD;
10955 /* This over-estimates by 1 minimal-stack-alignment-unit but
10956 mitigates that by counting in the new return address slot. */
10957 current_function_dynamic_stack_size
10958 += crtl->stack_alignment_needed / BITS_PER_UNIT;
10961 current_function_static_stack_size = stack_size;
10964 /* On SEH target with very large frame size, allocate an area to save
10965 SSE registers (as the very large allocation won't be described). */
10966 if (TARGET_SEH
10967 && frame.stack_pointer_offset > SEH_MAX_FRAME_SIZE
10968 && !sse_registers_saved)
10970 HOST_WIDE_INT sse_size =
10971 frame.sse_reg_save_offset - frame.reg_save_offset;
10973 gcc_assert (int_registers_saved);
10975 /* No need to do stack checking as the area will be immediately
10976 written. */
10977 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
10978 GEN_INT (-sse_size), -1,
10979 m->fs.cfa_reg == stack_pointer_rtx);
10980 allocate -= sse_size;
10981 ix86_emit_save_sse_regs_using_mov (frame.sse_reg_save_offset);
10982 sse_registers_saved = true;
10985 /* The stack has already been decremented by the instruction calling us
10986 so probe if the size is non-negative to preserve the protection area. */
10987 if (allocate >= 0 && flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
10989 /* We expect the registers to be saved when probes are used. */
10990 gcc_assert (int_registers_saved);
10992 if (STACK_CHECK_MOVING_SP)
10994 if (!(crtl->is_leaf && !cfun->calls_alloca
10995 && allocate <= PROBE_INTERVAL))
10997 ix86_adjust_stack_and_probe (allocate);
10998 allocate = 0;
11001 else
11003 HOST_WIDE_INT size = allocate;
11005 if (TARGET_64BIT && size >= (HOST_WIDE_INT) 0x80000000)
11006 size = 0x80000000 - STACK_CHECK_PROTECT - 1;
11008 if (TARGET_STACK_PROBE)
11010 if (crtl->is_leaf && !cfun->calls_alloca)
11012 if (size > PROBE_INTERVAL)
11013 ix86_emit_probe_stack_range (0, size);
11015 else
11016 ix86_emit_probe_stack_range (0, size + STACK_CHECK_PROTECT);
11018 else
11020 if (crtl->is_leaf && !cfun->calls_alloca)
11022 if (size > PROBE_INTERVAL && size > STACK_CHECK_PROTECT)
11023 ix86_emit_probe_stack_range (STACK_CHECK_PROTECT,
11024 size - STACK_CHECK_PROTECT);
11026 else
11027 ix86_emit_probe_stack_range (STACK_CHECK_PROTECT, size);
11032 if (allocate == 0)
11034 else if (!ix86_target_stack_probe ()
11035 || frame.stack_pointer_offset < CHECK_STACK_LIMIT)
11037 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
11038 GEN_INT (-allocate), -1,
11039 m->fs.cfa_reg == stack_pointer_rtx);
11041 else
11043 rtx eax = gen_rtx_REG (Pmode, AX_REG);
11044 rtx r10 = NULL;
11045 rtx (*adjust_stack_insn)(rtx, rtx, rtx);
11046 const bool sp_is_cfa_reg = (m->fs.cfa_reg == stack_pointer_rtx);
11047 bool eax_live = ix86_eax_live_at_start_p ();
11048 bool r10_live = false;
11050 if (TARGET_64BIT)
11051 r10_live = (DECL_STATIC_CHAIN (current_function_decl) != 0);
11053 if (eax_live)
11055 insn = emit_insn (gen_push (eax));
11056 allocate -= UNITS_PER_WORD;
11057 /* Note that SEH directives need to continue tracking the stack
11058 pointer even after the frame pointer has been set up. */
11059 if (sp_is_cfa_reg || TARGET_SEH)
11061 if (sp_is_cfa_reg)
11062 m->fs.cfa_offset += UNITS_PER_WORD;
11063 RTX_FRAME_RELATED_P (insn) = 1;
11067 if (r10_live)
11069 r10 = gen_rtx_REG (Pmode, R10_REG);
11070 insn = emit_insn (gen_push (r10));
11071 allocate -= UNITS_PER_WORD;
11072 if (sp_is_cfa_reg || TARGET_SEH)
11074 if (sp_is_cfa_reg)
11075 m->fs.cfa_offset += UNITS_PER_WORD;
11076 RTX_FRAME_RELATED_P (insn) = 1;
11080 emit_move_insn (eax, GEN_INT (allocate));
11081 emit_insn (ix86_gen_allocate_stack_worker (eax, eax));
11083 /* Use the fact that AX still contains ALLOCATE. */
11084 adjust_stack_insn = (Pmode == DImode
11085 ? gen_pro_epilogue_adjust_stack_di_sub
11086 : gen_pro_epilogue_adjust_stack_si_sub);
11088 insn = emit_insn (adjust_stack_insn (stack_pointer_rtx,
11089 stack_pointer_rtx, eax));
11091 if (sp_is_cfa_reg || TARGET_SEH)
11093 if (sp_is_cfa_reg)
11094 m->fs.cfa_offset += allocate;
11095 RTX_FRAME_RELATED_P (insn) = 1;
11096 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
11097 gen_rtx_SET (VOIDmode, stack_pointer_rtx,
11098 plus_constant (Pmode, stack_pointer_rtx,
11099 -allocate)));
11101 m->fs.sp_offset += allocate;
11103 /* Use stack_pointer_rtx for relative addressing so that code
11104 works for realigned stack, too. */
11105 if (r10_live && eax_live)
11107 t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, eax);
11108 emit_move_insn (gen_rtx_REG (word_mode, R10_REG),
11109 gen_frame_mem (word_mode, t));
11110 t = plus_constant (Pmode, t, UNITS_PER_WORD);
11111 emit_move_insn (gen_rtx_REG (word_mode, AX_REG),
11112 gen_frame_mem (word_mode, t));
11114 else if (eax_live || r10_live)
11116 t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, eax);
11117 emit_move_insn (gen_rtx_REG (word_mode,
11118 (eax_live ? AX_REG : R10_REG)),
11119 gen_frame_mem (word_mode, t));
11122 gcc_assert (m->fs.sp_offset == frame.stack_pointer_offset);
11124 /* If we havn't already set up the frame pointer, do so now. */
11125 if (frame_pointer_needed && !m->fs.fp_valid)
11127 insn = ix86_gen_add3 (hard_frame_pointer_rtx, stack_pointer_rtx,
11128 GEN_INT (frame.stack_pointer_offset
11129 - frame.hard_frame_pointer_offset));
11130 insn = emit_insn (insn);
11131 RTX_FRAME_RELATED_P (insn) = 1;
11132 add_reg_note (insn, REG_CFA_ADJUST_CFA, NULL);
11134 if (m->fs.cfa_reg == stack_pointer_rtx)
11135 m->fs.cfa_reg = hard_frame_pointer_rtx;
11136 m->fs.fp_offset = frame.hard_frame_pointer_offset;
11137 m->fs.fp_valid = true;
11140 if (!int_registers_saved)
11141 ix86_emit_save_regs_using_mov (frame.reg_save_offset);
11142 if (!sse_registers_saved)
11143 ix86_emit_save_sse_regs_using_mov (frame.sse_reg_save_offset);
11145 pic_reg_used = false;
11146 /* We don't use pic-register for pe-coff target. */
11147 if (pic_offset_table_rtx
11148 && !TARGET_PECOFF
11149 && (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM)
11150 || crtl->profile))
11152 unsigned int alt_pic_reg_used = ix86_select_alt_pic_regnum ();
11154 if (alt_pic_reg_used != INVALID_REGNUM)
11155 SET_REGNO (pic_offset_table_rtx, alt_pic_reg_used);
11157 pic_reg_used = true;
11160 if (pic_reg_used)
11162 if (TARGET_64BIT)
11164 if (ix86_cmodel == CM_LARGE_PIC)
11166 rtx label, tmp_reg;
11168 gcc_assert (Pmode == DImode);
11169 label = gen_label_rtx ();
11170 emit_label (label);
11171 LABEL_PRESERVE_P (label) = 1;
11172 tmp_reg = gen_rtx_REG (Pmode, R11_REG);
11173 gcc_assert (REGNO (pic_offset_table_rtx) != REGNO (tmp_reg));
11174 insn = emit_insn (gen_set_rip_rex64 (pic_offset_table_rtx,
11175 label));
11176 insn = emit_insn (gen_set_got_offset_rex64 (tmp_reg, label));
11177 insn = emit_insn (ix86_gen_add3 (pic_offset_table_rtx,
11178 pic_offset_table_rtx, tmp_reg));
11180 else
11181 insn = emit_insn (gen_set_got_rex64 (pic_offset_table_rtx));
11183 else
11185 insn = emit_insn (gen_set_got (pic_offset_table_rtx));
11186 RTX_FRAME_RELATED_P (insn) = 1;
11187 add_reg_note (insn, REG_CFA_FLUSH_QUEUE, NULL_RTX);
11191 /* In the pic_reg_used case, make sure that the got load isn't deleted
11192 when mcount needs it. Blockage to avoid call movement across mcount
11193 call is emitted in generic code after the NOTE_INSN_PROLOGUE_END
11194 note. */
11195 if (crtl->profile && !flag_fentry && pic_reg_used)
11196 emit_insn (gen_prologue_use (pic_offset_table_rtx));
11198 if (crtl->drap_reg && !crtl->stack_realign_needed)
11200 /* vDRAP is setup but after reload it turns out stack realign
11201 isn't necessary, here we will emit prologue to setup DRAP
11202 without stack realign adjustment */
11203 t = choose_baseaddr (0);
11204 emit_insn (gen_rtx_SET (VOIDmode, crtl->drap_reg, t));
11207 /* Prevent instructions from being scheduled into register save push
11208 sequence when access to the redzone area is done through frame pointer.
11209 The offset between the frame pointer and the stack pointer is calculated
11210 relative to the value of the stack pointer at the end of the function
11211 prologue, and moving instructions that access redzone area via frame
11212 pointer inside push sequence violates this assumption. */
11213 if (frame_pointer_needed && frame.red_zone_size)
11214 emit_insn (gen_memory_blockage ());
11216 /* Emit cld instruction if stringops are used in the function. */
11217 if (TARGET_CLD && ix86_current_function_needs_cld)
11218 emit_insn (gen_cld ());
11220 /* SEH requires that the prologue end within 256 bytes of the start of
11221 the function. Prevent instruction schedules that would extend that.
11222 Further, prevent alloca modifications to the stack pointer from being
11223 combined with prologue modifications. */
11224 if (TARGET_SEH)
11225 emit_insn (gen_prologue_use (stack_pointer_rtx));
11228 /* Emit code to restore REG using a POP insn. */
11230 static void
11231 ix86_emit_restore_reg_using_pop (rtx reg)
11233 struct machine_function *m = cfun->machine;
11234 rtx insn = emit_insn (gen_pop (reg));
11236 ix86_add_cfa_restore_note (insn, reg, m->fs.sp_offset);
11237 m->fs.sp_offset -= UNITS_PER_WORD;
11239 if (m->fs.cfa_reg == crtl->drap_reg
11240 && REGNO (reg) == REGNO (crtl->drap_reg))
11242 /* Previously we'd represented the CFA as an expression
11243 like *(%ebp - 8). We've just popped that value from
11244 the stack, which means we need to reset the CFA to
11245 the drap register. This will remain until we restore
11246 the stack pointer. */
11247 add_reg_note (insn, REG_CFA_DEF_CFA, reg);
11248 RTX_FRAME_RELATED_P (insn) = 1;
11250 /* This means that the DRAP register is valid for addressing too. */
11251 m->fs.drap_valid = true;
11252 return;
11255 if (m->fs.cfa_reg == stack_pointer_rtx)
11257 rtx x = plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD);
11258 x = gen_rtx_SET (VOIDmode, stack_pointer_rtx, x);
11259 add_reg_note (insn, REG_CFA_ADJUST_CFA, x);
11260 RTX_FRAME_RELATED_P (insn) = 1;
11262 m->fs.cfa_offset -= UNITS_PER_WORD;
11265 /* When the frame pointer is the CFA, and we pop it, we are
11266 swapping back to the stack pointer as the CFA. This happens
11267 for stack frames that don't allocate other data, so we assume
11268 the stack pointer is now pointing at the return address, i.e.
11269 the function entry state, which makes the offset be 1 word. */
11270 if (reg == hard_frame_pointer_rtx)
11272 m->fs.fp_valid = false;
11273 if (m->fs.cfa_reg == hard_frame_pointer_rtx)
11275 m->fs.cfa_reg = stack_pointer_rtx;
11276 m->fs.cfa_offset -= UNITS_PER_WORD;
11278 add_reg_note (insn, REG_CFA_DEF_CFA,
11279 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
11280 GEN_INT (m->fs.cfa_offset)));
11281 RTX_FRAME_RELATED_P (insn) = 1;
11286 /* Emit code to restore saved registers using POP insns. */
11288 static void
11289 ix86_emit_restore_regs_using_pop (void)
11291 unsigned int regno;
11293 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
11294 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, false))
11295 ix86_emit_restore_reg_using_pop (gen_rtx_REG (word_mode, regno));
11298 /* Emit code and notes for the LEAVE instruction. */
11300 static void
11301 ix86_emit_leave (void)
11303 struct machine_function *m = cfun->machine;
11304 rtx insn = emit_insn (ix86_gen_leave ());
11306 ix86_add_queued_cfa_restore_notes (insn);
11308 gcc_assert (m->fs.fp_valid);
11309 m->fs.sp_valid = true;
11310 m->fs.sp_offset = m->fs.fp_offset - UNITS_PER_WORD;
11311 m->fs.fp_valid = false;
11313 if (m->fs.cfa_reg == hard_frame_pointer_rtx)
11315 m->fs.cfa_reg = stack_pointer_rtx;
11316 m->fs.cfa_offset = m->fs.sp_offset;
11318 add_reg_note (insn, REG_CFA_DEF_CFA,
11319 plus_constant (Pmode, stack_pointer_rtx,
11320 m->fs.sp_offset));
11321 RTX_FRAME_RELATED_P (insn) = 1;
11323 ix86_add_cfa_restore_note (insn, hard_frame_pointer_rtx,
11324 m->fs.fp_offset);
11327 /* Emit code to restore saved registers using MOV insns.
11328 First register is restored from CFA - CFA_OFFSET. */
11329 static void
11330 ix86_emit_restore_regs_using_mov (HOST_WIDE_INT cfa_offset,
11331 bool maybe_eh_return)
11333 struct machine_function *m = cfun->machine;
11334 unsigned int regno;
11336 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
11337 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return))
11339 rtx reg = gen_rtx_REG (word_mode, regno);
11340 rtx insn, mem;
11342 mem = choose_baseaddr (cfa_offset);
11343 mem = gen_frame_mem (word_mode, mem);
11344 insn = emit_move_insn (reg, mem);
11346 if (m->fs.cfa_reg == crtl->drap_reg && regno == REGNO (crtl->drap_reg))
11348 /* Previously we'd represented the CFA as an expression
11349 like *(%ebp - 8). We've just popped that value from
11350 the stack, which means we need to reset the CFA to
11351 the drap register. This will remain until we restore
11352 the stack pointer. */
11353 add_reg_note (insn, REG_CFA_DEF_CFA, reg);
11354 RTX_FRAME_RELATED_P (insn) = 1;
11356 /* This means that the DRAP register is valid for addressing. */
11357 m->fs.drap_valid = true;
11359 else
11360 ix86_add_cfa_restore_note (NULL_RTX, reg, cfa_offset);
11362 cfa_offset -= UNITS_PER_WORD;
11366 /* Emit code to restore saved registers using MOV insns.
11367 First register is restored from CFA - CFA_OFFSET. */
11368 static void
11369 ix86_emit_restore_sse_regs_using_mov (HOST_WIDE_INT cfa_offset,
11370 bool maybe_eh_return)
11372 unsigned int regno;
11374 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
11375 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return))
11377 rtx reg = gen_rtx_REG (V4SFmode, regno);
11378 rtx mem;
11380 mem = choose_baseaddr (cfa_offset);
11381 mem = gen_rtx_MEM (V4SFmode, mem);
11382 set_mem_align (mem, 128);
11383 emit_move_insn (reg, mem);
11385 ix86_add_cfa_restore_note (NULL_RTX, reg, cfa_offset);
11387 cfa_offset -= 16;
11391 /* Restore function stack, frame, and registers. */
11393 void
11394 ix86_expand_epilogue (int style)
11396 struct machine_function *m = cfun->machine;
11397 struct machine_frame_state frame_state_save = m->fs;
11398 struct ix86_frame frame;
11399 bool restore_regs_via_mov;
11400 bool using_drap;
11402 ix86_finalize_stack_realign_flags ();
11403 ix86_compute_frame_layout (&frame);
11405 m->fs.sp_valid = (!frame_pointer_needed
11406 || (crtl->sp_is_unchanging
11407 && !stack_realign_fp));
11408 gcc_assert (!m->fs.sp_valid
11409 || m->fs.sp_offset == frame.stack_pointer_offset);
11411 /* The FP must be valid if the frame pointer is present. */
11412 gcc_assert (frame_pointer_needed == m->fs.fp_valid);
11413 gcc_assert (!m->fs.fp_valid
11414 || m->fs.fp_offset == frame.hard_frame_pointer_offset);
11416 /* We must have *some* valid pointer to the stack frame. */
11417 gcc_assert (m->fs.sp_valid || m->fs.fp_valid);
11419 /* The DRAP is never valid at this point. */
11420 gcc_assert (!m->fs.drap_valid);
11422 /* See the comment about red zone and frame
11423 pointer usage in ix86_expand_prologue. */
11424 if (frame_pointer_needed && frame.red_zone_size)
11425 emit_insn (gen_memory_blockage ());
11427 using_drap = crtl->drap_reg && crtl->stack_realign_needed;
11428 gcc_assert (!using_drap || m->fs.cfa_reg == crtl->drap_reg);
11430 /* Determine the CFA offset of the end of the red-zone. */
11431 m->fs.red_zone_offset = 0;
11432 if (ix86_using_red_zone () && crtl->args.pops_args < 65536)
11434 /* The red-zone begins below the return address. */
11435 m->fs.red_zone_offset = RED_ZONE_SIZE + UNITS_PER_WORD;
11437 /* When the register save area is in the aligned portion of
11438 the stack, determine the maximum runtime displacement that
11439 matches up with the aligned frame. */
11440 if (stack_realign_drap)
11441 m->fs.red_zone_offset -= (crtl->stack_alignment_needed / BITS_PER_UNIT
11442 + UNITS_PER_WORD);
11445 /* Special care must be taken for the normal return case of a function
11446 using eh_return: the eax and edx registers are marked as saved, but
11447 not restored along this path. Adjust the save location to match. */
11448 if (crtl->calls_eh_return && style != 2)
11449 frame.reg_save_offset -= 2 * UNITS_PER_WORD;
11451 /* EH_RETURN requires the use of moves to function properly. */
11452 if (crtl->calls_eh_return)
11453 restore_regs_via_mov = true;
11454 /* SEH requires the use of pops to identify the epilogue. */
11455 else if (TARGET_SEH)
11456 restore_regs_via_mov = false;
11457 /* If we're only restoring one register and sp is not valid then
11458 using a move instruction to restore the register since it's
11459 less work than reloading sp and popping the register. */
11460 else if (!m->fs.sp_valid && frame.nregs <= 1)
11461 restore_regs_via_mov = true;
11462 else if (TARGET_EPILOGUE_USING_MOVE
11463 && cfun->machine->use_fast_prologue_epilogue
11464 && (frame.nregs > 1
11465 || m->fs.sp_offset != frame.reg_save_offset))
11466 restore_regs_via_mov = true;
11467 else if (frame_pointer_needed
11468 && !frame.nregs
11469 && m->fs.sp_offset != frame.reg_save_offset)
11470 restore_regs_via_mov = true;
11471 else if (frame_pointer_needed
11472 && TARGET_USE_LEAVE
11473 && cfun->machine->use_fast_prologue_epilogue
11474 && frame.nregs == 1)
11475 restore_regs_via_mov = true;
11476 else
11477 restore_regs_via_mov = false;
11479 if (restore_regs_via_mov || frame.nsseregs)
11481 /* Ensure that the entire register save area is addressable via
11482 the stack pointer, if we will restore via sp. */
11483 if (TARGET_64BIT
11484 && m->fs.sp_offset > 0x7fffffff
11485 && !(m->fs.fp_valid || m->fs.drap_valid)
11486 && (frame.nsseregs + frame.nregs) != 0)
11488 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
11489 GEN_INT (m->fs.sp_offset
11490 - frame.sse_reg_save_offset),
11491 style,
11492 m->fs.cfa_reg == stack_pointer_rtx);
11496 /* If there are any SSE registers to restore, then we have to do it
11497 via moves, since there's obviously no pop for SSE regs. */
11498 if (frame.nsseregs)
11499 ix86_emit_restore_sse_regs_using_mov (frame.sse_reg_save_offset,
11500 style == 2);
11502 if (restore_regs_via_mov)
11504 rtx t;
11506 if (frame.nregs)
11507 ix86_emit_restore_regs_using_mov (frame.reg_save_offset, style == 2);
11509 /* eh_return epilogues need %ecx added to the stack pointer. */
11510 if (style == 2)
11512 rtx insn, sa = EH_RETURN_STACKADJ_RTX;
11514 /* Stack align doesn't work with eh_return. */
11515 gcc_assert (!stack_realign_drap);
11516 /* Neither does regparm nested functions. */
11517 gcc_assert (!ix86_static_chain_on_stack);
11519 if (frame_pointer_needed)
11521 t = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa);
11522 t = plus_constant (Pmode, t, m->fs.fp_offset - UNITS_PER_WORD);
11523 emit_insn (gen_rtx_SET (VOIDmode, sa, t));
11525 t = gen_frame_mem (Pmode, hard_frame_pointer_rtx);
11526 insn = emit_move_insn (hard_frame_pointer_rtx, t);
11528 /* Note that we use SA as a temporary CFA, as the return
11529 address is at the proper place relative to it. We
11530 pretend this happens at the FP restore insn because
11531 prior to this insn the FP would be stored at the wrong
11532 offset relative to SA, and after this insn we have no
11533 other reasonable register to use for the CFA. We don't
11534 bother resetting the CFA to the SP for the duration of
11535 the return insn. */
11536 add_reg_note (insn, REG_CFA_DEF_CFA,
11537 plus_constant (Pmode, sa, UNITS_PER_WORD));
11538 ix86_add_queued_cfa_restore_notes (insn);
11539 add_reg_note (insn, REG_CFA_RESTORE, hard_frame_pointer_rtx);
11540 RTX_FRAME_RELATED_P (insn) = 1;
11542 m->fs.cfa_reg = sa;
11543 m->fs.cfa_offset = UNITS_PER_WORD;
11544 m->fs.fp_valid = false;
11546 pro_epilogue_adjust_stack (stack_pointer_rtx, sa,
11547 const0_rtx, style, false);
11549 else
11551 t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa);
11552 t = plus_constant (Pmode, t, m->fs.sp_offset - UNITS_PER_WORD);
11553 insn = emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, t));
11554 ix86_add_queued_cfa_restore_notes (insn);
11556 gcc_assert (m->fs.cfa_reg == stack_pointer_rtx);
11557 if (m->fs.cfa_offset != UNITS_PER_WORD)
11559 m->fs.cfa_offset = UNITS_PER_WORD;
11560 add_reg_note (insn, REG_CFA_DEF_CFA,
11561 plus_constant (Pmode, stack_pointer_rtx,
11562 UNITS_PER_WORD));
11563 RTX_FRAME_RELATED_P (insn) = 1;
11566 m->fs.sp_offset = UNITS_PER_WORD;
11567 m->fs.sp_valid = true;
11570 else
11572 /* SEH requires that the function end with (1) a stack adjustment
11573 if necessary, (2) a sequence of pops, and (3) a return or
11574 jump instruction. Prevent insns from the function body from
11575 being scheduled into this sequence. */
11576 if (TARGET_SEH)
11578 /* Prevent a catch region from being adjacent to the standard
11579 epilogue sequence. Unfortuantely crtl->uses_eh_lsda nor
11580 several other flags that would be interesting to test are
11581 not yet set up. */
11582 if (flag_non_call_exceptions)
11583 emit_insn (gen_nops (const1_rtx));
11584 else
11585 emit_insn (gen_blockage ());
11588 /* First step is to deallocate the stack frame so that we can
11589 pop the registers. Also do it on SEH target for very large
11590 frame as the emitted instructions aren't allowed by the ABI in
11591 epilogues. */
11592 if (!m->fs.sp_valid
11593 || (TARGET_SEH
11594 && (m->fs.sp_offset - frame.reg_save_offset
11595 >= SEH_MAX_FRAME_SIZE)))
11597 pro_epilogue_adjust_stack (stack_pointer_rtx, hard_frame_pointer_rtx,
11598 GEN_INT (m->fs.fp_offset
11599 - frame.reg_save_offset),
11600 style, false);
11602 else if (m->fs.sp_offset != frame.reg_save_offset)
11604 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
11605 GEN_INT (m->fs.sp_offset
11606 - frame.reg_save_offset),
11607 style,
11608 m->fs.cfa_reg == stack_pointer_rtx);
11611 ix86_emit_restore_regs_using_pop ();
11614 /* If we used a stack pointer and haven't already got rid of it,
11615 then do so now. */
11616 if (m->fs.fp_valid)
11618 /* If the stack pointer is valid and pointing at the frame
11619 pointer store address, then we only need a pop. */
11620 if (m->fs.sp_valid && m->fs.sp_offset == frame.hfp_save_offset)
11621 ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx);
11622 /* Leave results in shorter dependency chains on CPUs that are
11623 able to grok it fast. */
11624 else if (TARGET_USE_LEAVE
11625 || optimize_bb_for_size_p (EXIT_BLOCK_PTR_FOR_FN (cfun))
11626 || !cfun->machine->use_fast_prologue_epilogue)
11627 ix86_emit_leave ();
11628 else
11630 pro_epilogue_adjust_stack (stack_pointer_rtx,
11631 hard_frame_pointer_rtx,
11632 const0_rtx, style, !using_drap);
11633 ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx);
11637 if (using_drap)
11639 int param_ptr_offset = UNITS_PER_WORD;
11640 rtx insn;
11642 gcc_assert (stack_realign_drap);
11644 if (ix86_static_chain_on_stack)
11645 param_ptr_offset += UNITS_PER_WORD;
11646 if (!call_used_regs[REGNO (crtl->drap_reg)])
11647 param_ptr_offset += UNITS_PER_WORD;
11649 insn = emit_insn (gen_rtx_SET
11650 (VOIDmode, stack_pointer_rtx,
11651 gen_rtx_PLUS (Pmode,
11652 crtl->drap_reg,
11653 GEN_INT (-param_ptr_offset))));
11654 m->fs.cfa_reg = stack_pointer_rtx;
11655 m->fs.cfa_offset = param_ptr_offset;
11656 m->fs.sp_offset = param_ptr_offset;
11657 m->fs.realigned = false;
11659 add_reg_note (insn, REG_CFA_DEF_CFA,
11660 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
11661 GEN_INT (param_ptr_offset)));
11662 RTX_FRAME_RELATED_P (insn) = 1;
11664 if (!call_used_regs[REGNO (crtl->drap_reg)])
11665 ix86_emit_restore_reg_using_pop (crtl->drap_reg);
11668 /* At this point the stack pointer must be valid, and we must have
11669 restored all of the registers. We may not have deallocated the
11670 entire stack frame. We've delayed this until now because it may
11671 be possible to merge the local stack deallocation with the
11672 deallocation forced by ix86_static_chain_on_stack. */
11673 gcc_assert (m->fs.sp_valid);
11674 gcc_assert (!m->fs.fp_valid);
11675 gcc_assert (!m->fs.realigned);
11676 if (m->fs.sp_offset != UNITS_PER_WORD)
11678 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
11679 GEN_INT (m->fs.sp_offset - UNITS_PER_WORD),
11680 style, true);
11682 else
11683 ix86_add_queued_cfa_restore_notes (get_last_insn ());
11685 /* Sibcall epilogues don't want a return instruction. */
11686 if (style == 0)
11688 m->fs = frame_state_save;
11689 return;
11692 if (crtl->args.pops_args && crtl->args.size)
11694 rtx popc = GEN_INT (crtl->args.pops_args);
11696 /* i386 can only pop 64K bytes. If asked to pop more, pop return
11697 address, do explicit add, and jump indirectly to the caller. */
11699 if (crtl->args.pops_args >= 65536)
11701 rtx ecx = gen_rtx_REG (SImode, CX_REG);
11702 rtx insn;
11704 /* There is no "pascal" calling convention in any 64bit ABI. */
11705 gcc_assert (!TARGET_64BIT);
11707 insn = emit_insn (gen_pop (ecx));
11708 m->fs.cfa_offset -= UNITS_PER_WORD;
11709 m->fs.sp_offset -= UNITS_PER_WORD;
11711 add_reg_note (insn, REG_CFA_ADJUST_CFA,
11712 copy_rtx (XVECEXP (PATTERN (insn), 0, 1)));
11713 add_reg_note (insn, REG_CFA_REGISTER,
11714 gen_rtx_SET (VOIDmode, ecx, pc_rtx));
11715 RTX_FRAME_RELATED_P (insn) = 1;
11717 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
11718 popc, -1, true);
11719 emit_jump_insn (gen_simple_return_indirect_internal (ecx));
11721 else
11722 emit_jump_insn (gen_simple_return_pop_internal (popc));
11724 else
11725 emit_jump_insn (gen_simple_return_internal ());
11727 /* Restore the state back to the state from the prologue,
11728 so that it's correct for the next epilogue. */
11729 m->fs = frame_state_save;
11732 /* Reset from the function's potential modifications. */
11734 static void
11735 ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
11736 HOST_WIDE_INT size ATTRIBUTE_UNUSED)
11738 if (pic_offset_table_rtx)
11739 SET_REGNO (pic_offset_table_rtx, REAL_PIC_OFFSET_TABLE_REGNUM);
11740 #if TARGET_MACHO
11741 /* Mach-O doesn't support labels at the end of objects, so if
11742 it looks like we might want one, insert a NOP. */
11744 rtx insn = get_last_insn ();
11745 rtx deleted_debug_label = NULL_RTX;
11746 while (insn
11747 && NOTE_P (insn)
11748 && NOTE_KIND (insn) != NOTE_INSN_DELETED_LABEL)
11750 /* Don't insert a nop for NOTE_INSN_DELETED_DEBUG_LABEL
11751 notes only, instead set their CODE_LABEL_NUMBER to -1,
11752 otherwise there would be code generation differences
11753 in between -g and -g0. */
11754 if (NOTE_P (insn) && NOTE_KIND (insn) == NOTE_INSN_DELETED_DEBUG_LABEL)
11755 deleted_debug_label = insn;
11756 insn = PREV_INSN (insn);
11758 if (insn
11759 && (LABEL_P (insn)
11760 || (NOTE_P (insn)
11761 && NOTE_KIND (insn) == NOTE_INSN_DELETED_LABEL)))
11762 fputs ("\tnop\n", file);
11763 else if (deleted_debug_label)
11764 for (insn = deleted_debug_label; insn; insn = NEXT_INSN (insn))
11765 if (NOTE_KIND (insn) == NOTE_INSN_DELETED_DEBUG_LABEL)
11766 CODE_LABEL_NUMBER (insn) = -1;
11768 #endif
11772 /* Return a scratch register to use in the split stack prologue. The
11773 split stack prologue is used for -fsplit-stack. It is the first
11774 instructions in the function, even before the regular prologue.
11775 The scratch register can be any caller-saved register which is not
11776 used for parameters or for the static chain. */
11778 static unsigned int
11779 split_stack_prologue_scratch_regno (void)
11781 if (TARGET_64BIT)
11782 return R11_REG;
11783 else
11785 bool is_fastcall, is_thiscall;
11786 int regparm;
11788 is_fastcall = (lookup_attribute ("fastcall",
11789 TYPE_ATTRIBUTES (TREE_TYPE (cfun->decl)))
11790 != NULL);
11791 is_thiscall = (lookup_attribute ("thiscall",
11792 TYPE_ATTRIBUTES (TREE_TYPE (cfun->decl)))
11793 != NULL);
11794 regparm = ix86_function_regparm (TREE_TYPE (cfun->decl), cfun->decl);
11796 if (is_fastcall)
11798 if (DECL_STATIC_CHAIN (cfun->decl))
11800 sorry ("-fsplit-stack does not support fastcall with "
11801 "nested function");
11802 return INVALID_REGNUM;
11804 return AX_REG;
11806 else if (is_thiscall)
11808 if (!DECL_STATIC_CHAIN (cfun->decl))
11809 return DX_REG;
11810 return AX_REG;
11812 else if (regparm < 3)
11814 if (!DECL_STATIC_CHAIN (cfun->decl))
11815 return CX_REG;
11816 else
11818 if (regparm >= 2)
11820 sorry ("-fsplit-stack does not support 2 register "
11821 " parameters for a nested function");
11822 return INVALID_REGNUM;
11824 return DX_REG;
11827 else
11829 /* FIXME: We could make this work by pushing a register
11830 around the addition and comparison. */
11831 sorry ("-fsplit-stack does not support 3 register parameters");
11832 return INVALID_REGNUM;
11837 /* A SYMBOL_REF for the function which allocates new stackspace for
11838 -fsplit-stack. */
11840 static GTY(()) rtx split_stack_fn;
11842 /* A SYMBOL_REF for the more stack function when using the large
11843 model. */
11845 static GTY(()) rtx split_stack_fn_large;
11847 /* Handle -fsplit-stack. These are the first instructions in the
11848 function, even before the regular prologue. */
11850 void
11851 ix86_expand_split_stack_prologue (void)
11853 struct ix86_frame frame;
11854 HOST_WIDE_INT allocate;
11855 unsigned HOST_WIDE_INT args_size;
11856 rtx label, limit, current, jump_insn, allocate_rtx, call_insn, call_fusage;
11857 rtx scratch_reg = NULL_RTX;
11858 rtx varargs_label = NULL_RTX;
11859 rtx fn;
11861 gcc_assert (flag_split_stack && reload_completed);
11863 ix86_finalize_stack_realign_flags ();
11864 ix86_compute_frame_layout (&frame);
11865 allocate = frame.stack_pointer_offset - INCOMING_FRAME_SP_OFFSET;
11867 /* This is the label we will branch to if we have enough stack
11868 space. We expect the basic block reordering pass to reverse this
11869 branch if optimizing, so that we branch in the unlikely case. */
11870 label = gen_label_rtx ();
11872 /* We need to compare the stack pointer minus the frame size with
11873 the stack boundary in the TCB. The stack boundary always gives
11874 us SPLIT_STACK_AVAILABLE bytes, so if we need less than that we
11875 can compare directly. Otherwise we need to do an addition. */
11877 limit = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
11878 UNSPEC_STACK_CHECK);
11879 limit = gen_rtx_CONST (Pmode, limit);
11880 limit = gen_rtx_MEM (Pmode, limit);
11881 if (allocate < SPLIT_STACK_AVAILABLE)
11882 current = stack_pointer_rtx;
11883 else
11885 unsigned int scratch_regno;
11886 rtx offset;
11888 /* We need a scratch register to hold the stack pointer minus
11889 the required frame size. Since this is the very start of the
11890 function, the scratch register can be any caller-saved
11891 register which is not used for parameters. */
11892 offset = GEN_INT (- allocate);
11893 scratch_regno = split_stack_prologue_scratch_regno ();
11894 if (scratch_regno == INVALID_REGNUM)
11895 return;
11896 scratch_reg = gen_rtx_REG (Pmode, scratch_regno);
11897 if (!TARGET_64BIT || x86_64_immediate_operand (offset, Pmode))
11899 /* We don't use ix86_gen_add3 in this case because it will
11900 want to split to lea, but when not optimizing the insn
11901 will not be split after this point. */
11902 emit_insn (gen_rtx_SET (VOIDmode, scratch_reg,
11903 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
11904 offset)));
11906 else
11908 emit_move_insn (scratch_reg, offset);
11909 emit_insn (ix86_gen_add3 (scratch_reg, scratch_reg,
11910 stack_pointer_rtx));
11912 current = scratch_reg;
11915 ix86_expand_branch (GEU, current, limit, label);
11916 jump_insn = get_last_insn ();
11917 JUMP_LABEL (jump_insn) = label;
11919 /* Mark the jump as very likely to be taken. */
11920 add_int_reg_note (jump_insn, REG_BR_PROB,
11921 REG_BR_PROB_BASE - REG_BR_PROB_BASE / 100);
11923 if (split_stack_fn == NULL_RTX)
11924 split_stack_fn = gen_rtx_SYMBOL_REF (Pmode, "__morestack");
11925 fn = split_stack_fn;
11927 /* Get more stack space. We pass in the desired stack space and the
11928 size of the arguments to copy to the new stack. In 32-bit mode
11929 we push the parameters; __morestack will return on a new stack
11930 anyhow. In 64-bit mode we pass the parameters in r10 and
11931 r11. */
11932 allocate_rtx = GEN_INT (allocate);
11933 args_size = crtl->args.size >= 0 ? crtl->args.size : 0;
11934 call_fusage = NULL_RTX;
11935 if (TARGET_64BIT)
11937 rtx reg10, reg11;
11939 reg10 = gen_rtx_REG (Pmode, R10_REG);
11940 reg11 = gen_rtx_REG (Pmode, R11_REG);
11942 /* If this function uses a static chain, it will be in %r10.
11943 Preserve it across the call to __morestack. */
11944 if (DECL_STATIC_CHAIN (cfun->decl))
11946 rtx rax;
11948 rax = gen_rtx_REG (word_mode, AX_REG);
11949 emit_move_insn (rax, gen_rtx_REG (word_mode, R10_REG));
11950 use_reg (&call_fusage, rax);
11953 if ((ix86_cmodel == CM_LARGE || ix86_cmodel == CM_LARGE_PIC)
11954 && !TARGET_PECOFF)
11956 HOST_WIDE_INT argval;
11958 gcc_assert (Pmode == DImode);
11959 /* When using the large model we need to load the address
11960 into a register, and we've run out of registers. So we
11961 switch to a different calling convention, and we call a
11962 different function: __morestack_large. We pass the
11963 argument size in the upper 32 bits of r10 and pass the
11964 frame size in the lower 32 bits. */
11965 gcc_assert ((allocate & (HOST_WIDE_INT) 0xffffffff) == allocate);
11966 gcc_assert ((args_size & 0xffffffff) == args_size);
11968 if (split_stack_fn_large == NULL_RTX)
11969 split_stack_fn_large =
11970 gen_rtx_SYMBOL_REF (Pmode, "__morestack_large_model");
11972 if (ix86_cmodel == CM_LARGE_PIC)
11974 rtx label, x;
11976 label = gen_label_rtx ();
11977 emit_label (label);
11978 LABEL_PRESERVE_P (label) = 1;
11979 emit_insn (gen_set_rip_rex64 (reg10, label));
11980 emit_insn (gen_set_got_offset_rex64 (reg11, label));
11981 emit_insn (ix86_gen_add3 (reg10, reg10, reg11));
11982 x = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, split_stack_fn_large),
11983 UNSPEC_GOT);
11984 x = gen_rtx_CONST (Pmode, x);
11985 emit_move_insn (reg11, x);
11986 x = gen_rtx_PLUS (Pmode, reg10, reg11);
11987 x = gen_const_mem (Pmode, x);
11988 emit_move_insn (reg11, x);
11990 else
11991 emit_move_insn (reg11, split_stack_fn_large);
11993 fn = reg11;
11995 argval = ((args_size << 16) << 16) + allocate;
11996 emit_move_insn (reg10, GEN_INT (argval));
11998 else
12000 emit_move_insn (reg10, allocate_rtx);
12001 emit_move_insn (reg11, GEN_INT (args_size));
12002 use_reg (&call_fusage, reg11);
12005 use_reg (&call_fusage, reg10);
12007 else
12009 emit_insn (gen_push (GEN_INT (args_size)));
12010 emit_insn (gen_push (allocate_rtx));
12012 call_insn = ix86_expand_call (NULL_RTX, gen_rtx_MEM (QImode, fn),
12013 GEN_INT (UNITS_PER_WORD), constm1_rtx,
12014 NULL_RTX, false);
12015 add_function_usage_to (call_insn, call_fusage);
12017 /* In order to make call/return prediction work right, we now need
12018 to execute a return instruction. See
12019 libgcc/config/i386/morestack.S for the details on how this works.
12021 For flow purposes gcc must not see this as a return
12022 instruction--we need control flow to continue at the subsequent
12023 label. Therefore, we use an unspec. */
12024 gcc_assert (crtl->args.pops_args < 65536);
12025 emit_insn (gen_split_stack_return (GEN_INT (crtl->args.pops_args)));
12027 /* If we are in 64-bit mode and this function uses a static chain,
12028 we saved %r10 in %rax before calling _morestack. */
12029 if (TARGET_64BIT && DECL_STATIC_CHAIN (cfun->decl))
12030 emit_move_insn (gen_rtx_REG (word_mode, R10_REG),
12031 gen_rtx_REG (word_mode, AX_REG));
12033 /* If this function calls va_start, we need to store a pointer to
12034 the arguments on the old stack, because they may not have been
12035 all copied to the new stack. At this point the old stack can be
12036 found at the frame pointer value used by __morestack, because
12037 __morestack has set that up before calling back to us. Here we
12038 store that pointer in a scratch register, and in
12039 ix86_expand_prologue we store the scratch register in a stack
12040 slot. */
12041 if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
12043 unsigned int scratch_regno;
12044 rtx frame_reg;
12045 int words;
12047 scratch_regno = split_stack_prologue_scratch_regno ();
12048 scratch_reg = gen_rtx_REG (Pmode, scratch_regno);
12049 frame_reg = gen_rtx_REG (Pmode, BP_REG);
12051 /* 64-bit:
12052 fp -> old fp value
12053 return address within this function
12054 return address of caller of this function
12055 stack arguments
12056 So we add three words to get to the stack arguments.
12058 32-bit:
12059 fp -> old fp value
12060 return address within this function
12061 first argument to __morestack
12062 second argument to __morestack
12063 return address of caller of this function
12064 stack arguments
12065 So we add five words to get to the stack arguments.
12067 words = TARGET_64BIT ? 3 : 5;
12068 emit_insn (gen_rtx_SET (VOIDmode, scratch_reg,
12069 gen_rtx_PLUS (Pmode, frame_reg,
12070 GEN_INT (words * UNITS_PER_WORD))));
12072 varargs_label = gen_label_rtx ();
12073 emit_jump_insn (gen_jump (varargs_label));
12074 JUMP_LABEL (get_last_insn ()) = varargs_label;
12076 emit_barrier ();
12079 emit_label (label);
12080 LABEL_NUSES (label) = 1;
12082 /* If this function calls va_start, we now have to set the scratch
12083 register for the case where we do not call __morestack. In this
12084 case we need to set it based on the stack pointer. */
12085 if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
12087 emit_insn (gen_rtx_SET (VOIDmode, scratch_reg,
12088 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
12089 GEN_INT (UNITS_PER_WORD))));
12091 emit_label (varargs_label);
12092 LABEL_NUSES (varargs_label) = 1;
12096 /* We may have to tell the dataflow pass that the split stack prologue
12097 is initializing a scratch register. */
12099 static void
12100 ix86_live_on_entry (bitmap regs)
12102 if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
12104 gcc_assert (flag_split_stack);
12105 bitmap_set_bit (regs, split_stack_prologue_scratch_regno ());
12109 /* Extract the parts of an RTL expression that is a valid memory address
12110 for an instruction. Return 0 if the structure of the address is
12111 grossly off. Return -1 if the address contains ASHIFT, so it is not
12112 strictly valid, but still used for computing length of lea instruction. */
12115 ix86_decompose_address (rtx addr, struct ix86_address *out)
12117 rtx base = NULL_RTX, index = NULL_RTX, disp = NULL_RTX;
12118 rtx base_reg, index_reg;
12119 HOST_WIDE_INT scale = 1;
12120 rtx scale_rtx = NULL_RTX;
12121 rtx tmp;
12122 int retval = 1;
12123 enum ix86_address_seg seg = SEG_DEFAULT;
12125 /* Allow zero-extended SImode addresses,
12126 they will be emitted with addr32 prefix. */
12127 if (TARGET_64BIT && GET_MODE (addr) == DImode)
12129 if (GET_CODE (addr) == ZERO_EXTEND
12130 && GET_MODE (XEXP (addr, 0)) == SImode)
12132 addr = XEXP (addr, 0);
12133 if (CONST_INT_P (addr))
12134 return 0;
12136 else if (GET_CODE (addr) == AND
12137 && const_32bit_mask (XEXP (addr, 1), DImode))
12139 addr = simplify_gen_subreg (SImode, XEXP (addr, 0), DImode, 0);
12140 if (addr == NULL_RTX)
12141 return 0;
12143 if (CONST_INT_P (addr))
12144 return 0;
12148 /* Allow SImode subregs of DImode addresses,
12149 they will be emitted with addr32 prefix. */
12150 if (TARGET_64BIT && GET_MODE (addr) == SImode)
12152 if (GET_CODE (addr) == SUBREG
12153 && GET_MODE (SUBREG_REG (addr)) == DImode)
12155 addr = SUBREG_REG (addr);
12156 if (CONST_INT_P (addr))
12157 return 0;
12161 if (REG_P (addr))
12162 base = addr;
12163 else if (GET_CODE (addr) == SUBREG)
12165 if (REG_P (SUBREG_REG (addr)))
12166 base = addr;
12167 else
12168 return 0;
12170 else if (GET_CODE (addr) == PLUS)
12172 rtx addends[4], op;
12173 int n = 0, i;
12175 op = addr;
12178 if (n >= 4)
12179 return 0;
12180 addends[n++] = XEXP (op, 1);
12181 op = XEXP (op, 0);
12183 while (GET_CODE (op) == PLUS);
12184 if (n >= 4)
12185 return 0;
12186 addends[n] = op;
12188 for (i = n; i >= 0; --i)
12190 op = addends[i];
12191 switch (GET_CODE (op))
12193 case MULT:
12194 if (index)
12195 return 0;
12196 index = XEXP (op, 0);
12197 scale_rtx = XEXP (op, 1);
12198 break;
12200 case ASHIFT:
12201 if (index)
12202 return 0;
12203 index = XEXP (op, 0);
12204 tmp = XEXP (op, 1);
12205 if (!CONST_INT_P (tmp))
12206 return 0;
12207 scale = INTVAL (tmp);
12208 if ((unsigned HOST_WIDE_INT) scale > 3)
12209 return 0;
12210 scale = 1 << scale;
12211 break;
12213 case ZERO_EXTEND:
12214 op = XEXP (op, 0);
12215 if (GET_CODE (op) != UNSPEC)
12216 return 0;
12217 /* FALLTHRU */
12219 case UNSPEC:
12220 if (XINT (op, 1) == UNSPEC_TP
12221 && TARGET_TLS_DIRECT_SEG_REFS
12222 && seg == SEG_DEFAULT)
12223 seg = DEFAULT_TLS_SEG_REG;
12224 else
12225 return 0;
12226 break;
12228 case SUBREG:
12229 if (!REG_P (SUBREG_REG (op)))
12230 return 0;
12231 /* FALLTHRU */
12233 case REG:
12234 if (!base)
12235 base = op;
12236 else if (!index)
12237 index = op;
12238 else
12239 return 0;
12240 break;
12242 case CONST:
12243 case CONST_INT:
12244 case SYMBOL_REF:
12245 case LABEL_REF:
12246 if (disp)
12247 return 0;
12248 disp = op;
12249 break;
12251 default:
12252 return 0;
12256 else if (GET_CODE (addr) == MULT)
12258 index = XEXP (addr, 0); /* index*scale */
12259 scale_rtx = XEXP (addr, 1);
12261 else if (GET_CODE (addr) == ASHIFT)
12263 /* We're called for lea too, which implements ashift on occasion. */
12264 index = XEXP (addr, 0);
12265 tmp = XEXP (addr, 1);
12266 if (!CONST_INT_P (tmp))
12267 return 0;
12268 scale = INTVAL (tmp);
12269 if ((unsigned HOST_WIDE_INT) scale > 3)
12270 return 0;
12271 scale = 1 << scale;
12272 retval = -1;
12274 else
12275 disp = addr; /* displacement */
12277 if (index)
12279 if (REG_P (index))
12281 else if (GET_CODE (index) == SUBREG
12282 && REG_P (SUBREG_REG (index)))
12284 else
12285 return 0;
12288 /* Extract the integral value of scale. */
12289 if (scale_rtx)
12291 if (!CONST_INT_P (scale_rtx))
12292 return 0;
12293 scale = INTVAL (scale_rtx);
12296 base_reg = base && GET_CODE (base) == SUBREG ? SUBREG_REG (base) : base;
12297 index_reg = index && GET_CODE (index) == SUBREG ? SUBREG_REG (index) : index;
12299 /* Avoid useless 0 displacement. */
12300 if (disp == const0_rtx && (base || index))
12301 disp = NULL_RTX;
12303 /* Allow arg pointer and stack pointer as index if there is not scaling. */
12304 if (base_reg && index_reg && scale == 1
12305 && (index_reg == arg_pointer_rtx
12306 || index_reg == frame_pointer_rtx
12307 || (REG_P (index_reg) && REGNO (index_reg) == STACK_POINTER_REGNUM)))
12309 rtx tmp;
12310 tmp = base, base = index, index = tmp;
12311 tmp = base_reg, base_reg = index_reg, index_reg = tmp;
12314 /* Special case: %ebp cannot be encoded as a base without a displacement.
12315 Similarly %r13. */
12316 if (!disp
12317 && base_reg
12318 && (base_reg == hard_frame_pointer_rtx
12319 || base_reg == frame_pointer_rtx
12320 || base_reg == arg_pointer_rtx
12321 || (REG_P (base_reg)
12322 && (REGNO (base_reg) == HARD_FRAME_POINTER_REGNUM
12323 || REGNO (base_reg) == R13_REG))))
12324 disp = const0_rtx;
12326 /* Special case: on K6, [%esi] makes the instruction vector decoded.
12327 Avoid this by transforming to [%esi+0].
12328 Reload calls address legitimization without cfun defined, so we need
12329 to test cfun for being non-NULL. */
12330 if (TARGET_K6 && cfun && optimize_function_for_speed_p (cfun)
12331 && base_reg && !index_reg && !disp
12332 && REG_P (base_reg) && REGNO (base_reg) == SI_REG)
12333 disp = const0_rtx;
12335 /* Special case: encode reg+reg instead of reg*2. */
12336 if (!base && index && scale == 2)
12337 base = index, base_reg = index_reg, scale = 1;
12339 /* Special case: scaling cannot be encoded without base or displacement. */
12340 if (!base && !disp && index && scale != 1)
12341 disp = const0_rtx;
12343 out->base = base;
12344 out->index = index;
12345 out->disp = disp;
12346 out->scale = scale;
12347 out->seg = seg;
12349 return retval;
12352 /* Return cost of the memory address x.
12353 For i386, it is better to use a complex address than let gcc copy
12354 the address into a reg and make a new pseudo. But not if the address
12355 requires to two regs - that would mean more pseudos with longer
12356 lifetimes. */
12357 static int
12358 ix86_address_cost (rtx x, enum machine_mode mode ATTRIBUTE_UNUSED,
12359 addr_space_t as ATTRIBUTE_UNUSED,
12360 bool speed ATTRIBUTE_UNUSED)
12362 struct ix86_address parts;
12363 int cost = 1;
12364 int ok = ix86_decompose_address (x, &parts);
12366 gcc_assert (ok);
12368 if (parts.base && GET_CODE (parts.base) == SUBREG)
12369 parts.base = SUBREG_REG (parts.base);
12370 if (parts.index && GET_CODE (parts.index) == SUBREG)
12371 parts.index = SUBREG_REG (parts.index);
12373 /* Attempt to minimize number of registers in the address. */
12374 if ((parts.base
12375 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER))
12376 || (parts.index
12377 && (!REG_P (parts.index)
12378 || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)))
12379 cost++;
12381 if (parts.base
12382 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
12383 && parts.index
12384 && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
12385 && parts.base != parts.index)
12386 cost++;
12388 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
12389 since it's predecode logic can't detect the length of instructions
12390 and it degenerates to vector decoded. Increase cost of such
12391 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
12392 to split such addresses or even refuse such addresses at all.
12394 Following addressing modes are affected:
12395 [base+scale*index]
12396 [scale*index+disp]
12397 [base+index]
12399 The first and last case may be avoidable by explicitly coding the zero in
12400 memory address, but I don't have AMD-K6 machine handy to check this
12401 theory. */
12403 if (TARGET_K6
12404 && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
12405 || (parts.disp && !parts.base && parts.index && parts.scale != 1)
12406 || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
12407 cost += 10;
12409 return cost;
12412 /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O as
12413 this is used for to form addresses to local data when -fPIC is in
12414 use. */
12416 static bool
12417 darwin_local_data_pic (rtx disp)
12419 return (GET_CODE (disp) == UNSPEC
12420 && XINT (disp, 1) == UNSPEC_MACHOPIC_OFFSET);
12423 /* Determine if a given RTX is a valid constant. We already know this
12424 satisfies CONSTANT_P. */
12426 static bool
12427 ix86_legitimate_constant_p (enum machine_mode mode ATTRIBUTE_UNUSED, rtx x)
12429 switch (GET_CODE (x))
12431 case CONST:
12432 x = XEXP (x, 0);
12434 if (GET_CODE (x) == PLUS)
12436 if (!CONST_INT_P (XEXP (x, 1)))
12437 return false;
12438 x = XEXP (x, 0);
12441 if (TARGET_MACHO && darwin_local_data_pic (x))
12442 return true;
12444 /* Only some unspecs are valid as "constants". */
12445 if (GET_CODE (x) == UNSPEC)
12446 switch (XINT (x, 1))
12448 case UNSPEC_GOT:
12449 case UNSPEC_GOTOFF:
12450 case UNSPEC_PLTOFF:
12451 return TARGET_64BIT;
12452 case UNSPEC_TPOFF:
12453 case UNSPEC_NTPOFF:
12454 x = XVECEXP (x, 0, 0);
12455 return (GET_CODE (x) == SYMBOL_REF
12456 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
12457 case UNSPEC_DTPOFF:
12458 x = XVECEXP (x, 0, 0);
12459 return (GET_CODE (x) == SYMBOL_REF
12460 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC);
12461 default:
12462 return false;
12465 /* We must have drilled down to a symbol. */
12466 if (GET_CODE (x) == LABEL_REF)
12467 return true;
12468 if (GET_CODE (x) != SYMBOL_REF)
12469 return false;
12470 /* FALLTHRU */
12472 case SYMBOL_REF:
12473 /* TLS symbols are never valid. */
12474 if (SYMBOL_REF_TLS_MODEL (x))
12475 return false;
12477 /* DLLIMPORT symbols are never valid. */
12478 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
12479 && SYMBOL_REF_DLLIMPORT_P (x))
12480 return false;
12482 #if TARGET_MACHO
12483 /* mdynamic-no-pic */
12484 if (MACHO_DYNAMIC_NO_PIC_P)
12485 return machopic_symbol_defined_p (x);
12486 #endif
12487 break;
12489 case CONST_DOUBLE:
12490 if (GET_MODE (x) == TImode
12491 && x != CONST0_RTX (TImode)
12492 && !TARGET_64BIT)
12493 return false;
12494 break;
12496 case CONST_VECTOR:
12497 if (!standard_sse_constant_p (x))
12498 return false;
12500 default:
12501 break;
12504 /* Otherwise we handle everything else in the move patterns. */
12505 return true;
12508 /* Determine if it's legal to put X into the constant pool. This
12509 is not possible for the address of thread-local symbols, which
12510 is checked above. */
12512 static bool
12513 ix86_cannot_force_const_mem (enum machine_mode mode, rtx x)
12515 /* We can always put integral constants and vectors in memory. */
12516 switch (GET_CODE (x))
12518 case CONST_INT:
12519 case CONST_DOUBLE:
12520 case CONST_VECTOR:
12521 return false;
12523 default:
12524 break;
12526 return !ix86_legitimate_constant_p (mode, x);
12529 /* Nonzero if the symbol is marked as dllimport, or as stub-variable,
12530 otherwise zero. */
12532 static bool
12533 is_imported_p (rtx x)
12535 if (!TARGET_DLLIMPORT_DECL_ATTRIBUTES
12536 || GET_CODE (x) != SYMBOL_REF)
12537 return false;
12539 return SYMBOL_REF_DLLIMPORT_P (x) || SYMBOL_REF_STUBVAR_P (x);
12543 /* Nonzero if the constant value X is a legitimate general operand
12544 when generating PIC code. It is given that flag_pic is on and
12545 that X satisfies CONSTANT_P or is a CONST_DOUBLE. */
12547 bool
12548 legitimate_pic_operand_p (rtx x)
12550 rtx inner;
12552 switch (GET_CODE (x))
12554 case CONST:
12555 inner = XEXP (x, 0);
12556 if (GET_CODE (inner) == PLUS
12557 && CONST_INT_P (XEXP (inner, 1)))
12558 inner = XEXP (inner, 0);
12560 /* Only some unspecs are valid as "constants". */
12561 if (GET_CODE (inner) == UNSPEC)
12562 switch (XINT (inner, 1))
12564 case UNSPEC_GOT:
12565 case UNSPEC_GOTOFF:
12566 case UNSPEC_PLTOFF:
12567 return TARGET_64BIT;
12568 case UNSPEC_TPOFF:
12569 x = XVECEXP (inner, 0, 0);
12570 return (GET_CODE (x) == SYMBOL_REF
12571 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
12572 case UNSPEC_MACHOPIC_OFFSET:
12573 return legitimate_pic_address_disp_p (x);
12574 default:
12575 return false;
12577 /* FALLTHRU */
12579 case SYMBOL_REF:
12580 case LABEL_REF:
12581 return legitimate_pic_address_disp_p (x);
12583 default:
12584 return true;
12588 /* Determine if a given CONST RTX is a valid memory displacement
12589 in PIC mode. */
12591 bool
12592 legitimate_pic_address_disp_p (rtx disp)
12594 bool saw_plus;
12596 /* In 64bit mode we can allow direct addresses of symbols and labels
12597 when they are not dynamic symbols. */
12598 if (TARGET_64BIT)
12600 rtx op0 = disp, op1;
12602 switch (GET_CODE (disp))
12604 case LABEL_REF:
12605 return true;
12607 case CONST:
12608 if (GET_CODE (XEXP (disp, 0)) != PLUS)
12609 break;
12610 op0 = XEXP (XEXP (disp, 0), 0);
12611 op1 = XEXP (XEXP (disp, 0), 1);
12612 if (!CONST_INT_P (op1)
12613 || INTVAL (op1) >= 16*1024*1024
12614 || INTVAL (op1) < -16*1024*1024)
12615 break;
12616 if (GET_CODE (op0) == LABEL_REF)
12617 return true;
12618 if (GET_CODE (op0) == CONST
12619 && GET_CODE (XEXP (op0, 0)) == UNSPEC
12620 && XINT (XEXP (op0, 0), 1) == UNSPEC_PCREL)
12621 return true;
12622 if (GET_CODE (op0) == UNSPEC
12623 && XINT (op0, 1) == UNSPEC_PCREL)
12624 return true;
12625 if (GET_CODE (op0) != SYMBOL_REF)
12626 break;
12627 /* FALLTHRU */
12629 case SYMBOL_REF:
12630 /* TLS references should always be enclosed in UNSPEC.
12631 The dllimported symbol needs always to be resolved. */
12632 if (SYMBOL_REF_TLS_MODEL (op0)
12633 || (TARGET_DLLIMPORT_DECL_ATTRIBUTES && SYMBOL_REF_DLLIMPORT_P (op0)))
12634 return false;
12636 if (TARGET_PECOFF)
12638 if (is_imported_p (op0))
12639 return true;
12641 if (SYMBOL_REF_FAR_ADDR_P (op0)
12642 || !SYMBOL_REF_LOCAL_P (op0))
12643 break;
12645 /* Function-symbols need to be resolved only for
12646 large-model.
12647 For the small-model we don't need to resolve anything
12648 here. */
12649 if ((ix86_cmodel != CM_LARGE_PIC
12650 && SYMBOL_REF_FUNCTION_P (op0))
12651 || ix86_cmodel == CM_SMALL_PIC)
12652 return true;
12653 /* Non-external symbols don't need to be resolved for
12654 large, and medium-model. */
12655 if ((ix86_cmodel == CM_LARGE_PIC
12656 || ix86_cmodel == CM_MEDIUM_PIC)
12657 && !SYMBOL_REF_EXTERNAL_P (op0))
12658 return true;
12660 else if (!SYMBOL_REF_FAR_ADDR_P (op0)
12661 && SYMBOL_REF_LOCAL_P (op0)
12662 && ix86_cmodel != CM_LARGE_PIC)
12663 return true;
12664 break;
12666 default:
12667 break;
12670 if (GET_CODE (disp) != CONST)
12671 return false;
12672 disp = XEXP (disp, 0);
12674 if (TARGET_64BIT)
12676 /* We are unsafe to allow PLUS expressions. This limit allowed distance
12677 of GOT tables. We should not need these anyway. */
12678 if (GET_CODE (disp) != UNSPEC
12679 || (XINT (disp, 1) != UNSPEC_GOTPCREL
12680 && XINT (disp, 1) != UNSPEC_GOTOFF
12681 && XINT (disp, 1) != UNSPEC_PCREL
12682 && XINT (disp, 1) != UNSPEC_PLTOFF))
12683 return false;
12685 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
12686 && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
12687 return false;
12688 return true;
12691 saw_plus = false;
12692 if (GET_CODE (disp) == PLUS)
12694 if (!CONST_INT_P (XEXP (disp, 1)))
12695 return false;
12696 disp = XEXP (disp, 0);
12697 saw_plus = true;
12700 if (TARGET_MACHO && darwin_local_data_pic (disp))
12701 return true;
12703 if (GET_CODE (disp) != UNSPEC)
12704 return false;
12706 switch (XINT (disp, 1))
12708 case UNSPEC_GOT:
12709 if (saw_plus)
12710 return false;
12711 /* We need to check for both symbols and labels because VxWorks loads
12712 text labels with @GOT rather than @GOTOFF. See gotoff_operand for
12713 details. */
12714 return (GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
12715 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF);
12716 case UNSPEC_GOTOFF:
12717 /* Refuse GOTOFF in 64bit mode since it is always 64bit when used.
12718 While ABI specify also 32bit relocation but we don't produce it in
12719 small PIC model at all. */
12720 if ((GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
12721 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF)
12722 && !TARGET_64BIT)
12723 return !TARGET_PECOFF && gotoff_operand (XVECEXP (disp, 0, 0), Pmode);
12724 return false;
12725 case UNSPEC_GOTTPOFF:
12726 case UNSPEC_GOTNTPOFF:
12727 case UNSPEC_INDNTPOFF:
12728 if (saw_plus)
12729 return false;
12730 disp = XVECEXP (disp, 0, 0);
12731 return (GET_CODE (disp) == SYMBOL_REF
12732 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_INITIAL_EXEC);
12733 case UNSPEC_NTPOFF:
12734 disp = XVECEXP (disp, 0, 0);
12735 return (GET_CODE (disp) == SYMBOL_REF
12736 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_EXEC);
12737 case UNSPEC_DTPOFF:
12738 disp = XVECEXP (disp, 0, 0);
12739 return (GET_CODE (disp) == SYMBOL_REF
12740 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_DYNAMIC);
12743 return false;
12746 /* Our implementation of LEGITIMIZE_RELOAD_ADDRESS. Returns a value to
12747 replace the input X, or the original X if no replacement is called for.
12748 The output parameter *WIN is 1 if the calling macro should goto WIN,
12749 0 if it should not. */
12751 bool
12752 ix86_legitimize_reload_address (rtx x,
12753 enum machine_mode mode ATTRIBUTE_UNUSED,
12754 int opnum, int type,
12755 int ind_levels ATTRIBUTE_UNUSED)
12757 /* Reload can generate:
12759 (plus:DI (plus:DI (unspec:DI [(const_int 0 [0])] UNSPEC_TP)
12760 (reg:DI 97))
12761 (reg:DI 2 cx))
12763 This RTX is rejected from ix86_legitimate_address_p due to
12764 non-strictness of base register 97. Following this rejection,
12765 reload pushes all three components into separate registers,
12766 creating invalid memory address RTX.
12768 Following code reloads only the invalid part of the
12769 memory address RTX. */
12771 if (GET_CODE (x) == PLUS
12772 && REG_P (XEXP (x, 1))
12773 && GET_CODE (XEXP (x, 0)) == PLUS
12774 && REG_P (XEXP (XEXP (x, 0), 1)))
12776 rtx base, index;
12777 bool something_reloaded = false;
12779 base = XEXP (XEXP (x, 0), 1);
12780 if (!REG_OK_FOR_BASE_STRICT_P (base))
12782 push_reload (base, NULL_RTX, &XEXP (XEXP (x, 0), 1), NULL,
12783 BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
12784 opnum, (enum reload_type) type);
12785 something_reloaded = true;
12788 index = XEXP (x, 1);
12789 if (!REG_OK_FOR_INDEX_STRICT_P (index))
12791 push_reload (index, NULL_RTX, &XEXP (x, 1), NULL,
12792 INDEX_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
12793 opnum, (enum reload_type) type);
12794 something_reloaded = true;
12797 gcc_assert (something_reloaded);
12798 return true;
12801 return false;
12804 /* Determine if op is suitable RTX for an address register.
12805 Return naked register if a register or a register subreg is
12806 found, otherwise return NULL_RTX. */
12808 static rtx
12809 ix86_validate_address_register (rtx op)
12811 enum machine_mode mode = GET_MODE (op);
12813 /* Only SImode or DImode registers can form the address. */
12814 if (mode != SImode && mode != DImode)
12815 return NULL_RTX;
12817 if (REG_P (op))
12818 return op;
12819 else if (GET_CODE (op) == SUBREG)
12821 rtx reg = SUBREG_REG (op);
12823 if (!REG_P (reg))
12824 return NULL_RTX;
12826 mode = GET_MODE (reg);
12828 /* Don't allow SUBREGs that span more than a word. It can
12829 lead to spill failures when the register is one word out
12830 of a two word structure. */
12831 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
12832 return NULL_RTX;
12834 /* Allow only SUBREGs of non-eliminable hard registers. */
12835 if (register_no_elim_operand (reg, mode))
12836 return reg;
12839 /* Op is not a register. */
12840 return NULL_RTX;
12843 /* Recognizes RTL expressions that are valid memory addresses for an
12844 instruction. The MODE argument is the machine mode for the MEM
12845 expression that wants to use this address.
12847 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
12848 convert common non-canonical forms to canonical form so that they will
12849 be recognized. */
12851 static bool
12852 ix86_legitimate_address_p (enum machine_mode mode ATTRIBUTE_UNUSED,
12853 rtx addr, bool strict)
12855 struct ix86_address parts;
12856 rtx base, index, disp;
12857 HOST_WIDE_INT scale;
12858 enum ix86_address_seg seg;
12860 if (ix86_decompose_address (addr, &parts) <= 0)
12861 /* Decomposition failed. */
12862 return false;
12864 base = parts.base;
12865 index = parts.index;
12866 disp = parts.disp;
12867 scale = parts.scale;
12868 seg = parts.seg;
12870 /* Validate base register. */
12871 if (base)
12873 rtx reg = ix86_validate_address_register (base);
12875 if (reg == NULL_RTX)
12876 return false;
12878 if ((strict && ! REG_OK_FOR_BASE_STRICT_P (reg))
12879 || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (reg)))
12880 /* Base is not valid. */
12881 return false;
12884 /* Validate index register. */
12885 if (index)
12887 rtx reg = ix86_validate_address_register (index);
12889 if (reg == NULL_RTX)
12890 return false;
12892 if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (reg))
12893 || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (reg)))
12894 /* Index is not valid. */
12895 return false;
12898 /* Index and base should have the same mode. */
12899 if (base && index
12900 && GET_MODE (base) != GET_MODE (index))
12901 return false;
12903 /* Address override works only on the (%reg) part of %fs:(%reg). */
12904 if (seg != SEG_DEFAULT
12905 && ((base && GET_MODE (base) != word_mode)
12906 || (index && GET_MODE (index) != word_mode)))
12907 return false;
12909 /* Validate scale factor. */
12910 if (scale != 1)
12912 if (!index)
12913 /* Scale without index. */
12914 return false;
12916 if (scale != 2 && scale != 4 && scale != 8)
12917 /* Scale is not a valid multiplier. */
12918 return false;
12921 /* Validate displacement. */
12922 if (disp)
12924 if (GET_CODE (disp) == CONST
12925 && GET_CODE (XEXP (disp, 0)) == UNSPEC
12926 && XINT (XEXP (disp, 0), 1) != UNSPEC_MACHOPIC_OFFSET)
12927 switch (XINT (XEXP (disp, 0), 1))
12929 /* Refuse GOTOFF and GOT in 64bit mode since it is always 64bit when
12930 used. While ABI specify also 32bit relocations, we don't produce
12931 them at all and use IP relative instead. */
12932 case UNSPEC_GOT:
12933 case UNSPEC_GOTOFF:
12934 gcc_assert (flag_pic);
12935 if (!TARGET_64BIT)
12936 goto is_legitimate_pic;
12938 /* 64bit address unspec. */
12939 return false;
12941 case UNSPEC_GOTPCREL:
12942 case UNSPEC_PCREL:
12943 gcc_assert (flag_pic);
12944 goto is_legitimate_pic;
12946 case UNSPEC_GOTTPOFF:
12947 case UNSPEC_GOTNTPOFF:
12948 case UNSPEC_INDNTPOFF:
12949 case UNSPEC_NTPOFF:
12950 case UNSPEC_DTPOFF:
12951 break;
12953 case UNSPEC_STACK_CHECK:
12954 gcc_assert (flag_split_stack);
12955 break;
12957 default:
12958 /* Invalid address unspec. */
12959 return false;
12962 else if (SYMBOLIC_CONST (disp)
12963 && (flag_pic
12964 || (TARGET_MACHO
12965 #if TARGET_MACHO
12966 && MACHOPIC_INDIRECT
12967 && !machopic_operand_p (disp)
12968 #endif
12972 is_legitimate_pic:
12973 if (TARGET_64BIT && (index || base))
12975 /* foo@dtpoff(%rX) is ok. */
12976 if (GET_CODE (disp) != CONST
12977 || GET_CODE (XEXP (disp, 0)) != PLUS
12978 || GET_CODE (XEXP (XEXP (disp, 0), 0)) != UNSPEC
12979 || !CONST_INT_P (XEXP (XEXP (disp, 0), 1))
12980 || (XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_DTPOFF
12981 && XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_NTPOFF))
12982 /* Non-constant pic memory reference. */
12983 return false;
12985 else if ((!TARGET_MACHO || flag_pic)
12986 && ! legitimate_pic_address_disp_p (disp))
12987 /* Displacement is an invalid pic construct. */
12988 return false;
12989 #if TARGET_MACHO
12990 else if (MACHO_DYNAMIC_NO_PIC_P
12991 && !ix86_legitimate_constant_p (Pmode, disp))
12992 /* displacment must be referenced via non_lazy_pointer */
12993 return false;
12994 #endif
12996 /* This code used to verify that a symbolic pic displacement
12997 includes the pic_offset_table_rtx register.
12999 While this is good idea, unfortunately these constructs may
13000 be created by "adds using lea" optimization for incorrect
13001 code like:
13003 int a;
13004 int foo(int i)
13006 return *(&a+i);
13009 This code is nonsensical, but results in addressing
13010 GOT table with pic_offset_table_rtx base. We can't
13011 just refuse it easily, since it gets matched by
13012 "addsi3" pattern, that later gets split to lea in the
13013 case output register differs from input. While this
13014 can be handled by separate addsi pattern for this case
13015 that never results in lea, this seems to be easier and
13016 correct fix for crash to disable this test. */
13018 else if (GET_CODE (disp) != LABEL_REF
13019 && !CONST_INT_P (disp)
13020 && (GET_CODE (disp) != CONST
13021 || !ix86_legitimate_constant_p (Pmode, disp))
13022 && (GET_CODE (disp) != SYMBOL_REF
13023 || !ix86_legitimate_constant_p (Pmode, disp)))
13024 /* Displacement is not constant. */
13025 return false;
13026 else if (TARGET_64BIT
13027 && !x86_64_immediate_operand (disp, VOIDmode))
13028 /* Displacement is out of range. */
13029 return false;
13030 /* In x32 mode, constant addresses are sign extended to 64bit, so
13031 we have to prevent addresses from 0x80000000 to 0xffffffff. */
13032 else if (TARGET_X32 && !(index || base)
13033 && CONST_INT_P (disp)
13034 && val_signbit_known_set_p (SImode, INTVAL (disp)))
13035 return false;
13038 /* Everything looks valid. */
13039 return true;
13042 /* Determine if a given RTX is a valid constant address. */
13044 bool
13045 constant_address_p (rtx x)
13047 return CONSTANT_P (x) && ix86_legitimate_address_p (Pmode, x, 1);
13050 /* Return a unique alias set for the GOT. */
13052 static alias_set_type
13053 ix86_GOT_alias_set (void)
13055 static alias_set_type set = -1;
13056 if (set == -1)
13057 set = new_alias_set ();
13058 return set;
13061 /* Return a legitimate reference for ORIG (an address) using the
13062 register REG. If REG is 0, a new pseudo is generated.
13064 There are two types of references that must be handled:
13066 1. Global data references must load the address from the GOT, via
13067 the PIC reg. An insn is emitted to do this load, and the reg is
13068 returned.
13070 2. Static data references, constant pool addresses, and code labels
13071 compute the address as an offset from the GOT, whose base is in
13072 the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to
13073 differentiate them from global data objects. The returned
13074 address is the PIC reg + an unspec constant.
13076 TARGET_LEGITIMATE_ADDRESS_P rejects symbolic references unless the PIC
13077 reg also appears in the address. */
13079 static rtx
13080 legitimize_pic_address (rtx orig, rtx reg)
13082 rtx addr = orig;
13083 rtx new_rtx = orig;
13085 #if TARGET_MACHO
13086 if (TARGET_MACHO && !TARGET_64BIT)
13088 if (reg == 0)
13089 reg = gen_reg_rtx (Pmode);
13090 /* Use the generic Mach-O PIC machinery. */
13091 return machopic_legitimize_pic_address (orig, GET_MODE (orig), reg);
13093 #endif
13095 if (TARGET_64BIT && TARGET_DLLIMPORT_DECL_ATTRIBUTES)
13097 rtx tmp = legitimize_pe_coff_symbol (addr, true);
13098 if (tmp)
13099 return tmp;
13102 if (TARGET_64BIT && legitimate_pic_address_disp_p (addr))
13103 new_rtx = addr;
13104 else if (TARGET_64BIT && !TARGET_PECOFF
13105 && ix86_cmodel != CM_SMALL_PIC && gotoff_operand (addr, Pmode))
13107 rtx tmpreg;
13108 /* This symbol may be referenced via a displacement from the PIC
13109 base address (@GOTOFF). */
13111 if (reload_in_progress)
13112 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
13113 if (GET_CODE (addr) == CONST)
13114 addr = XEXP (addr, 0);
13115 if (GET_CODE (addr) == PLUS)
13117 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)),
13118 UNSPEC_GOTOFF);
13119 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, XEXP (addr, 1));
13121 else
13122 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
13123 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
13124 if (!reg)
13125 tmpreg = gen_reg_rtx (Pmode);
13126 else
13127 tmpreg = reg;
13128 emit_move_insn (tmpreg, new_rtx);
13130 if (reg != 0)
13132 new_rtx = expand_simple_binop (Pmode, PLUS, reg, pic_offset_table_rtx,
13133 tmpreg, 1, OPTAB_DIRECT);
13134 new_rtx = reg;
13136 else
13137 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, tmpreg);
13139 else if (!TARGET_64BIT && !TARGET_PECOFF && gotoff_operand (addr, Pmode))
13141 /* This symbol may be referenced via a displacement from the PIC
13142 base address (@GOTOFF). */
13144 if (reload_in_progress)
13145 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
13146 if (GET_CODE (addr) == CONST)
13147 addr = XEXP (addr, 0);
13148 if (GET_CODE (addr) == PLUS)
13150 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)),
13151 UNSPEC_GOTOFF);
13152 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, XEXP (addr, 1));
13154 else
13155 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
13156 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
13157 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
13159 if (reg != 0)
13161 emit_move_insn (reg, new_rtx);
13162 new_rtx = reg;
13165 else if ((GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (addr) == 0)
13166 /* We can't use @GOTOFF for text labels on VxWorks;
13167 see gotoff_operand. */
13168 || (TARGET_VXWORKS_RTP && GET_CODE (addr) == LABEL_REF))
13170 rtx tmp = legitimize_pe_coff_symbol (addr, true);
13171 if (tmp)
13172 return tmp;
13174 /* For x64 PE-COFF there is no GOT table. So we use address
13175 directly. */
13176 if (TARGET_64BIT && TARGET_PECOFF)
13178 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_PCREL);
13179 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
13181 if (reg == 0)
13182 reg = gen_reg_rtx (Pmode);
13183 emit_move_insn (reg, new_rtx);
13184 new_rtx = reg;
13186 else if (TARGET_64BIT && ix86_cmodel != CM_LARGE_PIC)
13188 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTPCREL);
13189 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
13190 new_rtx = gen_const_mem (Pmode, new_rtx);
13191 set_mem_alias_set (new_rtx, ix86_GOT_alias_set ());
13193 if (reg == 0)
13194 reg = gen_reg_rtx (Pmode);
13195 /* Use directly gen_movsi, otherwise the address is loaded
13196 into register for CSE. We don't want to CSE this addresses,
13197 instead we CSE addresses from the GOT table, so skip this. */
13198 emit_insn (gen_movsi (reg, new_rtx));
13199 new_rtx = reg;
13201 else
13203 /* This symbol must be referenced via a load from the
13204 Global Offset Table (@GOT). */
13206 if (reload_in_progress)
13207 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
13208 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
13209 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
13210 if (TARGET_64BIT)
13211 new_rtx = force_reg (Pmode, new_rtx);
13212 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
13213 new_rtx = gen_const_mem (Pmode, new_rtx);
13214 set_mem_alias_set (new_rtx, ix86_GOT_alias_set ());
13216 if (reg == 0)
13217 reg = gen_reg_rtx (Pmode);
13218 emit_move_insn (reg, new_rtx);
13219 new_rtx = reg;
13222 else
13224 if (CONST_INT_P (addr)
13225 && !x86_64_immediate_operand (addr, VOIDmode))
13227 if (reg)
13229 emit_move_insn (reg, addr);
13230 new_rtx = reg;
13232 else
13233 new_rtx = force_reg (Pmode, addr);
13235 else if (GET_CODE (addr) == CONST)
13237 addr = XEXP (addr, 0);
13239 /* We must match stuff we generate before. Assume the only
13240 unspecs that can get here are ours. Not that we could do
13241 anything with them anyway.... */
13242 if (GET_CODE (addr) == UNSPEC
13243 || (GET_CODE (addr) == PLUS
13244 && GET_CODE (XEXP (addr, 0)) == UNSPEC))
13245 return orig;
13246 gcc_assert (GET_CODE (addr) == PLUS);
13248 if (GET_CODE (addr) == PLUS)
13250 rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
13252 /* Check first to see if this is a constant offset from a @GOTOFF
13253 symbol reference. */
13254 if (!TARGET_PECOFF && gotoff_operand (op0, Pmode)
13255 && CONST_INT_P (op1))
13257 if (!TARGET_64BIT)
13259 if (reload_in_progress)
13260 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
13261 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0),
13262 UNSPEC_GOTOFF);
13263 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, op1);
13264 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
13265 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
13267 if (reg != 0)
13269 emit_move_insn (reg, new_rtx);
13270 new_rtx = reg;
13273 else
13275 if (INTVAL (op1) < -16*1024*1024
13276 || INTVAL (op1) >= 16*1024*1024)
13278 if (!x86_64_immediate_operand (op1, Pmode))
13279 op1 = force_reg (Pmode, op1);
13280 new_rtx = gen_rtx_PLUS (Pmode, force_reg (Pmode, op0), op1);
13284 else
13286 rtx base = legitimize_pic_address (op0, reg);
13287 enum machine_mode mode = GET_MODE (base);
13288 new_rtx
13289 = legitimize_pic_address (op1, base == reg ? NULL_RTX : reg);
13291 if (CONST_INT_P (new_rtx))
13293 if (INTVAL (new_rtx) < -16*1024*1024
13294 || INTVAL (new_rtx) >= 16*1024*1024)
13296 if (!x86_64_immediate_operand (new_rtx, mode))
13297 new_rtx = force_reg (mode, new_rtx);
13298 new_rtx
13299 = gen_rtx_PLUS (mode, force_reg (mode, base), new_rtx);
13301 else
13302 new_rtx = plus_constant (mode, base, INTVAL (new_rtx));
13304 else
13306 if (GET_CODE (new_rtx) == PLUS
13307 && CONSTANT_P (XEXP (new_rtx, 1)))
13309 base = gen_rtx_PLUS (mode, base, XEXP (new_rtx, 0));
13310 new_rtx = XEXP (new_rtx, 1);
13312 new_rtx = gen_rtx_PLUS (mode, base, new_rtx);
13317 return new_rtx;
13320 /* Load the thread pointer. If TO_REG is true, force it into a register. */
13322 static rtx
13323 get_thread_pointer (enum machine_mode tp_mode, bool to_reg)
13325 rtx tp = gen_rtx_UNSPEC (ptr_mode, gen_rtvec (1, const0_rtx), UNSPEC_TP);
13327 if (GET_MODE (tp) != tp_mode)
13329 gcc_assert (GET_MODE (tp) == SImode);
13330 gcc_assert (tp_mode == DImode);
13332 tp = gen_rtx_ZERO_EXTEND (tp_mode, tp);
13335 if (to_reg)
13336 tp = copy_to_mode_reg (tp_mode, tp);
13338 return tp;
13341 /* Construct the SYMBOL_REF for the tls_get_addr function. */
13343 static GTY(()) rtx ix86_tls_symbol;
13345 static rtx
13346 ix86_tls_get_addr (void)
13348 if (!ix86_tls_symbol)
13350 const char *sym
13351 = ((TARGET_ANY_GNU_TLS && !TARGET_64BIT)
13352 ? "___tls_get_addr" : "__tls_get_addr");
13354 ix86_tls_symbol = gen_rtx_SYMBOL_REF (Pmode, sym);
13357 if (ix86_cmodel == CM_LARGE_PIC && !TARGET_PECOFF)
13359 rtx unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, ix86_tls_symbol),
13360 UNSPEC_PLTOFF);
13361 return gen_rtx_PLUS (Pmode, pic_offset_table_rtx,
13362 gen_rtx_CONST (Pmode, unspec));
13365 return ix86_tls_symbol;
13368 /* Construct the SYMBOL_REF for the _TLS_MODULE_BASE_ symbol. */
13370 static GTY(()) rtx ix86_tls_module_base_symbol;
13373 ix86_tls_module_base (void)
13375 if (!ix86_tls_module_base_symbol)
13377 ix86_tls_module_base_symbol
13378 = gen_rtx_SYMBOL_REF (Pmode, "_TLS_MODULE_BASE_");
13380 SYMBOL_REF_FLAGS (ix86_tls_module_base_symbol)
13381 |= TLS_MODEL_GLOBAL_DYNAMIC << SYMBOL_FLAG_TLS_SHIFT;
13384 return ix86_tls_module_base_symbol;
13387 /* A subroutine of ix86_legitimize_address and ix86_expand_move. FOR_MOV is
13388 false if we expect this to be used for a memory address and true if
13389 we expect to load the address into a register. */
13391 static rtx
13392 legitimize_tls_address (rtx x, enum tls_model model, bool for_mov)
13394 rtx dest, base, off;
13395 rtx pic = NULL_RTX, tp = NULL_RTX;
13396 enum machine_mode tp_mode = Pmode;
13397 int type;
13399 switch (model)
13401 case TLS_MODEL_GLOBAL_DYNAMIC:
13402 dest = gen_reg_rtx (Pmode);
13404 if (!TARGET_64BIT)
13406 if (flag_pic && !TARGET_PECOFF)
13407 pic = pic_offset_table_rtx;
13408 else
13410 pic = gen_reg_rtx (Pmode);
13411 emit_insn (gen_set_got (pic));
13415 if (TARGET_GNU2_TLS)
13417 if (TARGET_64BIT)
13418 emit_insn (gen_tls_dynamic_gnu2_64 (dest, x));
13419 else
13420 emit_insn (gen_tls_dynamic_gnu2_32 (dest, x, pic));
13422 tp = get_thread_pointer (Pmode, true);
13423 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, tp, dest));
13425 if (GET_MODE (x) != Pmode)
13426 x = gen_rtx_ZERO_EXTEND (Pmode, x);
13428 set_unique_reg_note (get_last_insn (), REG_EQUAL, x);
13430 else
13432 rtx caddr = ix86_tls_get_addr ();
13434 if (TARGET_64BIT)
13436 rtx rax = gen_rtx_REG (Pmode, AX_REG);
13437 rtx insns;
13439 start_sequence ();
13440 emit_call_insn
13441 (ix86_gen_tls_global_dynamic_64 (rax, x, caddr));
13442 insns = get_insns ();
13443 end_sequence ();
13445 if (GET_MODE (x) != Pmode)
13446 x = gen_rtx_ZERO_EXTEND (Pmode, x);
13448 RTL_CONST_CALL_P (insns) = 1;
13449 emit_libcall_block (insns, dest, rax, x);
13451 else
13452 emit_insn (gen_tls_global_dynamic_32 (dest, x, pic, caddr));
13454 break;
13456 case TLS_MODEL_LOCAL_DYNAMIC:
13457 base = gen_reg_rtx (Pmode);
13459 if (!TARGET_64BIT)
13461 if (flag_pic)
13462 pic = pic_offset_table_rtx;
13463 else
13465 pic = gen_reg_rtx (Pmode);
13466 emit_insn (gen_set_got (pic));
13470 if (TARGET_GNU2_TLS)
13472 rtx tmp = ix86_tls_module_base ();
13474 if (TARGET_64BIT)
13475 emit_insn (gen_tls_dynamic_gnu2_64 (base, tmp));
13476 else
13477 emit_insn (gen_tls_dynamic_gnu2_32 (base, tmp, pic));
13479 tp = get_thread_pointer (Pmode, true);
13480 set_unique_reg_note (get_last_insn (), REG_EQUAL,
13481 gen_rtx_MINUS (Pmode, tmp, tp));
13483 else
13485 rtx caddr = ix86_tls_get_addr ();
13487 if (TARGET_64BIT)
13489 rtx rax = gen_rtx_REG (Pmode, AX_REG);
13490 rtx insns, eqv;
13492 start_sequence ();
13493 emit_call_insn
13494 (ix86_gen_tls_local_dynamic_base_64 (rax, caddr));
13495 insns = get_insns ();
13496 end_sequence ();
13498 /* Attach a unique REG_EQUAL, to allow the RTL optimizers to
13499 share the LD_BASE result with other LD model accesses. */
13500 eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
13501 UNSPEC_TLS_LD_BASE);
13503 RTL_CONST_CALL_P (insns) = 1;
13504 emit_libcall_block (insns, base, rax, eqv);
13506 else
13507 emit_insn (gen_tls_local_dynamic_base_32 (base, pic, caddr));
13510 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPOFF);
13511 off = gen_rtx_CONST (Pmode, off);
13513 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, base, off));
13515 if (TARGET_GNU2_TLS)
13517 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, dest, tp));
13519 if (GET_MODE (x) != Pmode)
13520 x = gen_rtx_ZERO_EXTEND (Pmode, x);
13522 set_unique_reg_note (get_last_insn (), REG_EQUAL, x);
13524 break;
13526 case TLS_MODEL_INITIAL_EXEC:
13527 if (TARGET_64BIT)
13529 if (TARGET_SUN_TLS && !TARGET_X32)
13531 /* The Sun linker took the AMD64 TLS spec literally
13532 and can only handle %rax as destination of the
13533 initial executable code sequence. */
13535 dest = gen_reg_rtx (DImode);
13536 emit_insn (gen_tls_initial_exec_64_sun (dest, x));
13537 return dest;
13540 /* Generate DImode references to avoid %fs:(%reg32)
13541 problems and linker IE->LE relaxation bug. */
13542 tp_mode = DImode;
13543 pic = NULL;
13544 type = UNSPEC_GOTNTPOFF;
13546 else if (flag_pic)
13548 if (reload_in_progress)
13549 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
13550 pic = pic_offset_table_rtx;
13551 type = TARGET_ANY_GNU_TLS ? UNSPEC_GOTNTPOFF : UNSPEC_GOTTPOFF;
13553 else if (!TARGET_ANY_GNU_TLS)
13555 pic = gen_reg_rtx (Pmode);
13556 emit_insn (gen_set_got (pic));
13557 type = UNSPEC_GOTTPOFF;
13559 else
13561 pic = NULL;
13562 type = UNSPEC_INDNTPOFF;
13565 off = gen_rtx_UNSPEC (tp_mode, gen_rtvec (1, x), type);
13566 off = gen_rtx_CONST (tp_mode, off);
13567 if (pic)
13568 off = gen_rtx_PLUS (tp_mode, pic, off);
13569 off = gen_const_mem (tp_mode, off);
13570 set_mem_alias_set (off, ix86_GOT_alias_set ());
13572 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
13574 base = get_thread_pointer (tp_mode,
13575 for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
13576 off = force_reg (tp_mode, off);
13577 return gen_rtx_PLUS (tp_mode, base, off);
13579 else
13581 base = get_thread_pointer (Pmode, true);
13582 dest = gen_reg_rtx (Pmode);
13583 emit_insn (ix86_gen_sub3 (dest, base, off));
13585 break;
13587 case TLS_MODEL_LOCAL_EXEC:
13588 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x),
13589 (TARGET_64BIT || TARGET_ANY_GNU_TLS)
13590 ? UNSPEC_NTPOFF : UNSPEC_TPOFF);
13591 off = gen_rtx_CONST (Pmode, off);
13593 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
13595 base = get_thread_pointer (Pmode,
13596 for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
13597 return gen_rtx_PLUS (Pmode, base, off);
13599 else
13601 base = get_thread_pointer (Pmode, true);
13602 dest = gen_reg_rtx (Pmode);
13603 emit_insn (ix86_gen_sub3 (dest, base, off));
13605 break;
13607 default:
13608 gcc_unreachable ();
13611 return dest;
13614 /* Create or return the unique __imp_DECL dllimport symbol corresponding
13615 to symbol DECL if BEIMPORT is true. Otherwise create or return the
13616 unique refptr-DECL symbol corresponding to symbol DECL. */
13618 static GTY((if_marked ("tree_map_marked_p"), param_is (struct tree_map)))
13619 htab_t dllimport_map;
13621 static tree
13622 get_dllimport_decl (tree decl, bool beimport)
13624 struct tree_map *h, in;
13625 void **loc;
13626 const char *name;
13627 const char *prefix;
13628 size_t namelen, prefixlen;
13629 char *imp_name;
13630 tree to;
13631 rtx rtl;
13633 if (!dllimport_map)
13634 dllimport_map = htab_create_ggc (512, tree_map_hash, tree_map_eq, 0);
13636 in.hash = htab_hash_pointer (decl);
13637 in.base.from = decl;
13638 loc = htab_find_slot_with_hash (dllimport_map, &in, in.hash, INSERT);
13639 h = (struct tree_map *) *loc;
13640 if (h)
13641 return h->to;
13643 *loc = h = ggc_alloc_tree_map ();
13644 h->hash = in.hash;
13645 h->base.from = decl;
13646 h->to = to = build_decl (DECL_SOURCE_LOCATION (decl),
13647 VAR_DECL, NULL, ptr_type_node);
13648 DECL_ARTIFICIAL (to) = 1;
13649 DECL_IGNORED_P (to) = 1;
13650 DECL_EXTERNAL (to) = 1;
13651 TREE_READONLY (to) = 1;
13653 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
13654 name = targetm.strip_name_encoding (name);
13655 if (beimport)
13656 prefix = name[0] == FASTCALL_PREFIX || user_label_prefix[0] == 0
13657 ? "*__imp_" : "*__imp__";
13658 else
13659 prefix = user_label_prefix[0] == 0 ? "*.refptr." : "*refptr.";
13660 namelen = strlen (name);
13661 prefixlen = strlen (prefix);
13662 imp_name = (char *) alloca (namelen + prefixlen + 1);
13663 memcpy (imp_name, prefix, prefixlen);
13664 memcpy (imp_name + prefixlen, name, namelen + 1);
13666 name = ggc_alloc_string (imp_name, namelen + prefixlen);
13667 rtl = gen_rtx_SYMBOL_REF (Pmode, name);
13668 SET_SYMBOL_REF_DECL (rtl, to);
13669 SYMBOL_REF_FLAGS (rtl) = SYMBOL_FLAG_LOCAL | SYMBOL_FLAG_STUBVAR;
13670 if (!beimport)
13672 SYMBOL_REF_FLAGS (rtl) |= SYMBOL_FLAG_EXTERNAL;
13673 #ifdef SUB_TARGET_RECORD_STUB
13674 SUB_TARGET_RECORD_STUB (name);
13675 #endif
13678 rtl = gen_const_mem (Pmode, rtl);
13679 set_mem_alias_set (rtl, ix86_GOT_alias_set ());
13681 SET_DECL_RTL (to, rtl);
13682 SET_DECL_ASSEMBLER_NAME (to, get_identifier (name));
13684 return to;
13687 /* Expand SYMBOL into its corresponding far-addresse symbol.
13688 WANT_REG is true if we require the result be a register. */
13690 static rtx
13691 legitimize_pe_coff_extern_decl (rtx symbol, bool want_reg)
13693 tree imp_decl;
13694 rtx x;
13696 gcc_assert (SYMBOL_REF_DECL (symbol));
13697 imp_decl = get_dllimport_decl (SYMBOL_REF_DECL (symbol), false);
13699 x = DECL_RTL (imp_decl);
13700 if (want_reg)
13701 x = force_reg (Pmode, x);
13702 return x;
13705 /* Expand SYMBOL into its corresponding dllimport symbol. WANT_REG is
13706 true if we require the result be a register. */
13708 static rtx
13709 legitimize_dllimport_symbol (rtx symbol, bool want_reg)
13711 tree imp_decl;
13712 rtx x;
13714 gcc_assert (SYMBOL_REF_DECL (symbol));
13715 imp_decl = get_dllimport_decl (SYMBOL_REF_DECL (symbol), true);
13717 x = DECL_RTL (imp_decl);
13718 if (want_reg)
13719 x = force_reg (Pmode, x);
13720 return x;
13723 /* Expand SYMBOL into its corresponding dllimport or refptr symbol. WANT_REG
13724 is true if we require the result be a register. */
13726 static rtx
13727 legitimize_pe_coff_symbol (rtx addr, bool inreg)
13729 if (!TARGET_PECOFF)
13730 return NULL_RTX;
13732 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES)
13734 if (GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_DLLIMPORT_P (addr))
13735 return legitimize_dllimport_symbol (addr, inreg);
13736 if (GET_CODE (addr) == CONST
13737 && GET_CODE (XEXP (addr, 0)) == PLUS
13738 && GET_CODE (XEXP (XEXP (addr, 0), 0)) == SYMBOL_REF
13739 && SYMBOL_REF_DLLIMPORT_P (XEXP (XEXP (addr, 0), 0)))
13741 rtx t = legitimize_dllimport_symbol (XEXP (XEXP (addr, 0), 0), inreg);
13742 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (addr, 0), 1));
13746 if (ix86_cmodel != CM_LARGE_PIC && ix86_cmodel != CM_MEDIUM_PIC)
13747 return NULL_RTX;
13748 if (GET_CODE (addr) == SYMBOL_REF
13749 && !is_imported_p (addr)
13750 && SYMBOL_REF_EXTERNAL_P (addr)
13751 && SYMBOL_REF_DECL (addr))
13752 return legitimize_pe_coff_extern_decl (addr, inreg);
13754 if (GET_CODE (addr) == CONST
13755 && GET_CODE (XEXP (addr, 0)) == PLUS
13756 && GET_CODE (XEXP (XEXP (addr, 0), 0)) == SYMBOL_REF
13757 && !is_imported_p (XEXP (XEXP (addr, 0), 0))
13758 && SYMBOL_REF_EXTERNAL_P (XEXP (XEXP (addr, 0), 0))
13759 && SYMBOL_REF_DECL (XEXP (XEXP (addr, 0), 0)))
13761 rtx t = legitimize_pe_coff_extern_decl (XEXP (XEXP (addr, 0), 0), inreg);
13762 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (addr, 0), 1));
13764 return NULL_RTX;
13767 /* Try machine-dependent ways of modifying an illegitimate address
13768 to be legitimate. If we find one, return the new, valid address.
13769 This macro is used in only one place: `memory_address' in explow.c.
13771 OLDX is the address as it was before break_out_memory_refs was called.
13772 In some cases it is useful to look at this to decide what needs to be done.
13774 It is always safe for this macro to do nothing. It exists to recognize
13775 opportunities to optimize the output.
13777 For the 80386, we handle X+REG by loading X into a register R and
13778 using R+REG. R will go in a general reg and indexing will be used.
13779 However, if REG is a broken-out memory address or multiplication,
13780 nothing needs to be done because REG can certainly go in a general reg.
13782 When -fpic is used, special handling is needed for symbolic references.
13783 See comments by legitimize_pic_address in i386.c for details. */
13785 static rtx
13786 ix86_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
13787 enum machine_mode mode)
13789 int changed = 0;
13790 unsigned log;
13792 log = GET_CODE (x) == SYMBOL_REF ? SYMBOL_REF_TLS_MODEL (x) : 0;
13793 if (log)
13794 return legitimize_tls_address (x, (enum tls_model) log, false);
13795 if (GET_CODE (x) == CONST
13796 && GET_CODE (XEXP (x, 0)) == PLUS
13797 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
13798 && (log = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x, 0), 0))))
13800 rtx t = legitimize_tls_address (XEXP (XEXP (x, 0), 0),
13801 (enum tls_model) log, false);
13802 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1));
13805 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES)
13807 rtx tmp = legitimize_pe_coff_symbol (x, true);
13808 if (tmp)
13809 return tmp;
13812 if (flag_pic && SYMBOLIC_CONST (x))
13813 return legitimize_pic_address (x, 0);
13815 #if TARGET_MACHO
13816 if (MACHO_DYNAMIC_NO_PIC_P && SYMBOLIC_CONST (x))
13817 return machopic_indirect_data_reference (x, 0);
13818 #endif
13820 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
13821 if (GET_CODE (x) == ASHIFT
13822 && CONST_INT_P (XEXP (x, 1))
13823 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (x, 1)) < 4)
13825 changed = 1;
13826 log = INTVAL (XEXP (x, 1));
13827 x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
13828 GEN_INT (1 << log));
13831 if (GET_CODE (x) == PLUS)
13833 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
13835 if (GET_CODE (XEXP (x, 0)) == ASHIFT
13836 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
13837 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 0), 1)) < 4)
13839 changed = 1;
13840 log = INTVAL (XEXP (XEXP (x, 0), 1));
13841 XEXP (x, 0) = gen_rtx_MULT (Pmode,
13842 force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
13843 GEN_INT (1 << log));
13846 if (GET_CODE (XEXP (x, 1)) == ASHIFT
13847 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
13848 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 1), 1)) < 4)
13850 changed = 1;
13851 log = INTVAL (XEXP (XEXP (x, 1), 1));
13852 XEXP (x, 1) = gen_rtx_MULT (Pmode,
13853 force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
13854 GEN_INT (1 << log));
13857 /* Put multiply first if it isn't already. */
13858 if (GET_CODE (XEXP (x, 1)) == MULT)
13860 rtx tmp = XEXP (x, 0);
13861 XEXP (x, 0) = XEXP (x, 1);
13862 XEXP (x, 1) = tmp;
13863 changed = 1;
13866 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
13867 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
13868 created by virtual register instantiation, register elimination, and
13869 similar optimizations. */
13870 if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
13872 changed = 1;
13873 x = gen_rtx_PLUS (Pmode,
13874 gen_rtx_PLUS (Pmode, XEXP (x, 0),
13875 XEXP (XEXP (x, 1), 0)),
13876 XEXP (XEXP (x, 1), 1));
13879 /* Canonicalize
13880 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
13881 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
13882 else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
13883 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
13884 && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
13885 && CONSTANT_P (XEXP (x, 1)))
13887 rtx constant;
13888 rtx other = NULL_RTX;
13890 if (CONST_INT_P (XEXP (x, 1)))
13892 constant = XEXP (x, 1);
13893 other = XEXP (XEXP (XEXP (x, 0), 1), 1);
13895 else if (CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 1), 1)))
13897 constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
13898 other = XEXP (x, 1);
13900 else
13901 constant = 0;
13903 if (constant)
13905 changed = 1;
13906 x = gen_rtx_PLUS (Pmode,
13907 gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
13908 XEXP (XEXP (XEXP (x, 0), 1), 0)),
13909 plus_constant (Pmode, other,
13910 INTVAL (constant)));
13914 if (changed && ix86_legitimate_address_p (mode, x, false))
13915 return x;
13917 if (GET_CODE (XEXP (x, 0)) == MULT)
13919 changed = 1;
13920 XEXP (x, 0) = force_operand (XEXP (x, 0), 0);
13923 if (GET_CODE (XEXP (x, 1)) == MULT)
13925 changed = 1;
13926 XEXP (x, 1) = force_operand (XEXP (x, 1), 0);
13929 if (changed
13930 && REG_P (XEXP (x, 1))
13931 && REG_P (XEXP (x, 0)))
13932 return x;
13934 if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
13936 changed = 1;
13937 x = legitimize_pic_address (x, 0);
13940 if (changed && ix86_legitimate_address_p (mode, x, false))
13941 return x;
13943 if (REG_P (XEXP (x, 0)))
13945 rtx temp = gen_reg_rtx (Pmode);
13946 rtx val = force_operand (XEXP (x, 1), temp);
13947 if (val != temp)
13949 val = convert_to_mode (Pmode, val, 1);
13950 emit_move_insn (temp, val);
13953 XEXP (x, 1) = temp;
13954 return x;
13957 else if (REG_P (XEXP (x, 1)))
13959 rtx temp = gen_reg_rtx (Pmode);
13960 rtx val = force_operand (XEXP (x, 0), temp);
13961 if (val != temp)
13963 val = convert_to_mode (Pmode, val, 1);
13964 emit_move_insn (temp, val);
13967 XEXP (x, 0) = temp;
13968 return x;
13972 return x;
13975 /* Print an integer constant expression in assembler syntax. Addition
13976 and subtraction are the only arithmetic that may appear in these
13977 expressions. FILE is the stdio stream to write to, X is the rtx, and
13978 CODE is the operand print code from the output string. */
13980 static void
13981 output_pic_addr_const (FILE *file, rtx x, int code)
13983 char buf[256];
13985 switch (GET_CODE (x))
13987 case PC:
13988 gcc_assert (flag_pic);
13989 putc ('.', file);
13990 break;
13992 case SYMBOL_REF:
13993 if (TARGET_64BIT || ! TARGET_MACHO_BRANCH_ISLANDS)
13994 output_addr_const (file, x);
13995 else
13997 const char *name = XSTR (x, 0);
13999 /* Mark the decl as referenced so that cgraph will
14000 output the function. */
14001 if (SYMBOL_REF_DECL (x))
14002 mark_decl_referenced (SYMBOL_REF_DECL (x));
14004 #if TARGET_MACHO
14005 if (MACHOPIC_INDIRECT
14006 && machopic_classify_symbol (x) == MACHOPIC_UNDEFINED_FUNCTION)
14007 name = machopic_indirection_name (x, /*stub_p=*/true);
14008 #endif
14009 assemble_name (file, name);
14011 if (!TARGET_MACHO && !(TARGET_64BIT && TARGET_PECOFF)
14012 && code == 'P' && ! SYMBOL_REF_LOCAL_P (x))
14013 fputs ("@PLT", file);
14014 break;
14016 case LABEL_REF:
14017 x = XEXP (x, 0);
14018 /* FALLTHRU */
14019 case CODE_LABEL:
14020 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
14021 assemble_name (asm_out_file, buf);
14022 break;
14024 case CONST_INT:
14025 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
14026 break;
14028 case CONST:
14029 /* This used to output parentheses around the expression,
14030 but that does not work on the 386 (either ATT or BSD assembler). */
14031 output_pic_addr_const (file, XEXP (x, 0), code);
14032 break;
14034 case CONST_DOUBLE:
14035 if (GET_MODE (x) == VOIDmode)
14037 /* We can use %d if the number is <32 bits and positive. */
14038 if (CONST_DOUBLE_HIGH (x) || CONST_DOUBLE_LOW (x) < 0)
14039 fprintf (file, "0x%lx%08lx",
14040 (unsigned long) CONST_DOUBLE_HIGH (x),
14041 (unsigned long) CONST_DOUBLE_LOW (x));
14042 else
14043 fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x));
14045 else
14046 /* We can't handle floating point constants;
14047 TARGET_PRINT_OPERAND must handle them. */
14048 output_operand_lossage ("floating constant misused");
14049 break;
14051 case PLUS:
14052 /* Some assemblers need integer constants to appear first. */
14053 if (CONST_INT_P (XEXP (x, 0)))
14055 output_pic_addr_const (file, XEXP (x, 0), code);
14056 putc ('+', file);
14057 output_pic_addr_const (file, XEXP (x, 1), code);
14059 else
14061 gcc_assert (CONST_INT_P (XEXP (x, 1)));
14062 output_pic_addr_const (file, XEXP (x, 1), code);
14063 putc ('+', file);
14064 output_pic_addr_const (file, XEXP (x, 0), code);
14066 break;
14068 case MINUS:
14069 if (!TARGET_MACHO)
14070 putc (ASSEMBLER_DIALECT == ASM_INTEL ? '(' : '[', file);
14071 output_pic_addr_const (file, XEXP (x, 0), code);
14072 putc ('-', file);
14073 output_pic_addr_const (file, XEXP (x, 1), code);
14074 if (!TARGET_MACHO)
14075 putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', file);
14076 break;
14078 case UNSPEC:
14079 if (XINT (x, 1) == UNSPEC_STACK_CHECK)
14081 bool f = i386_asm_output_addr_const_extra (file, x);
14082 gcc_assert (f);
14083 break;
14086 gcc_assert (XVECLEN (x, 0) == 1);
14087 output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
14088 switch (XINT (x, 1))
14090 case UNSPEC_GOT:
14091 fputs ("@GOT", file);
14092 break;
14093 case UNSPEC_GOTOFF:
14094 fputs ("@GOTOFF", file);
14095 break;
14096 case UNSPEC_PLTOFF:
14097 fputs ("@PLTOFF", file);
14098 break;
14099 case UNSPEC_PCREL:
14100 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
14101 "(%rip)" : "[rip]", file);
14102 break;
14103 case UNSPEC_GOTPCREL:
14104 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
14105 "@GOTPCREL(%rip)" : "@GOTPCREL[rip]", file);
14106 break;
14107 case UNSPEC_GOTTPOFF:
14108 /* FIXME: This might be @TPOFF in Sun ld too. */
14109 fputs ("@gottpoff", file);
14110 break;
14111 case UNSPEC_TPOFF:
14112 fputs ("@tpoff", file);
14113 break;
14114 case UNSPEC_NTPOFF:
14115 if (TARGET_64BIT)
14116 fputs ("@tpoff", file);
14117 else
14118 fputs ("@ntpoff", file);
14119 break;
14120 case UNSPEC_DTPOFF:
14121 fputs ("@dtpoff", file);
14122 break;
14123 case UNSPEC_GOTNTPOFF:
14124 if (TARGET_64BIT)
14125 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
14126 "@gottpoff(%rip)": "@gottpoff[rip]", file);
14127 else
14128 fputs ("@gotntpoff", file);
14129 break;
14130 case UNSPEC_INDNTPOFF:
14131 fputs ("@indntpoff", file);
14132 break;
14133 #if TARGET_MACHO
14134 case UNSPEC_MACHOPIC_OFFSET:
14135 putc ('-', file);
14136 machopic_output_function_base_name (file);
14137 break;
14138 #endif
14139 default:
14140 output_operand_lossage ("invalid UNSPEC as operand");
14141 break;
14143 break;
14145 default:
14146 output_operand_lossage ("invalid expression as operand");
14150 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
14151 We need to emit DTP-relative relocations. */
14153 static void ATTRIBUTE_UNUSED
14154 i386_output_dwarf_dtprel (FILE *file, int size, rtx x)
14156 fputs (ASM_LONG, file);
14157 output_addr_const (file, x);
14158 fputs ("@dtpoff", file);
14159 switch (size)
14161 case 4:
14162 break;
14163 case 8:
14164 fputs (", 0", file);
14165 break;
14166 default:
14167 gcc_unreachable ();
14171 /* Return true if X is a representation of the PIC register. This copes
14172 with calls from ix86_find_base_term, where the register might have
14173 been replaced by a cselib value. */
14175 static bool
14176 ix86_pic_register_p (rtx x)
14178 if (GET_CODE (x) == VALUE && CSELIB_VAL_PTR (x))
14179 return (pic_offset_table_rtx
14180 && rtx_equal_for_cselib_p (x, pic_offset_table_rtx));
14181 else
14182 return REG_P (x) && REGNO (x) == PIC_OFFSET_TABLE_REGNUM;
14185 /* Helper function for ix86_delegitimize_address.
14186 Attempt to delegitimize TLS local-exec accesses. */
14188 static rtx
14189 ix86_delegitimize_tls_address (rtx orig_x)
14191 rtx x = orig_x, unspec;
14192 struct ix86_address addr;
14194 if (!TARGET_TLS_DIRECT_SEG_REFS)
14195 return orig_x;
14196 if (MEM_P (x))
14197 x = XEXP (x, 0);
14198 if (GET_CODE (x) != PLUS || GET_MODE (x) != Pmode)
14199 return orig_x;
14200 if (ix86_decompose_address (x, &addr) == 0
14201 || addr.seg != DEFAULT_TLS_SEG_REG
14202 || addr.disp == NULL_RTX
14203 || GET_CODE (addr.disp) != CONST)
14204 return orig_x;
14205 unspec = XEXP (addr.disp, 0);
14206 if (GET_CODE (unspec) == PLUS && CONST_INT_P (XEXP (unspec, 1)))
14207 unspec = XEXP (unspec, 0);
14208 if (GET_CODE (unspec) != UNSPEC || XINT (unspec, 1) != UNSPEC_NTPOFF)
14209 return orig_x;
14210 x = XVECEXP (unspec, 0, 0);
14211 gcc_assert (GET_CODE (x) == SYMBOL_REF);
14212 if (unspec != XEXP (addr.disp, 0))
14213 x = gen_rtx_PLUS (Pmode, x, XEXP (XEXP (addr.disp, 0), 1));
14214 if (addr.index)
14216 rtx idx = addr.index;
14217 if (addr.scale != 1)
14218 idx = gen_rtx_MULT (Pmode, idx, GEN_INT (addr.scale));
14219 x = gen_rtx_PLUS (Pmode, idx, x);
14221 if (addr.base)
14222 x = gen_rtx_PLUS (Pmode, addr.base, x);
14223 if (MEM_P (orig_x))
14224 x = replace_equiv_address_nv (orig_x, x);
14225 return x;
14228 /* In the name of slightly smaller debug output, and to cater to
14229 general assembler lossage, recognize PIC+GOTOFF and turn it back
14230 into a direct symbol reference.
14232 On Darwin, this is necessary to avoid a crash, because Darwin
14233 has a different PIC label for each routine but the DWARF debugging
14234 information is not associated with any particular routine, so it's
14235 necessary to remove references to the PIC label from RTL stored by
14236 the DWARF output code. */
14238 static rtx
14239 ix86_delegitimize_address (rtx x)
14241 rtx orig_x = delegitimize_mem_from_attrs (x);
14242 /* addend is NULL or some rtx if x is something+GOTOFF where
14243 something doesn't include the PIC register. */
14244 rtx addend = NULL_RTX;
14245 /* reg_addend is NULL or a multiple of some register. */
14246 rtx reg_addend = NULL_RTX;
14247 /* const_addend is NULL or a const_int. */
14248 rtx const_addend = NULL_RTX;
14249 /* This is the result, or NULL. */
14250 rtx result = NULL_RTX;
14252 x = orig_x;
14254 if (MEM_P (x))
14255 x = XEXP (x, 0);
14257 if (TARGET_64BIT)
14259 if (GET_CODE (x) == CONST
14260 && GET_CODE (XEXP (x, 0)) == PLUS
14261 && GET_MODE (XEXP (x, 0)) == Pmode
14262 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
14263 && GET_CODE (XEXP (XEXP (x, 0), 0)) == UNSPEC
14264 && XINT (XEXP (XEXP (x, 0), 0), 1) == UNSPEC_PCREL)
14266 rtx x2 = XVECEXP (XEXP (XEXP (x, 0), 0), 0, 0);
14267 x = gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 1), x2);
14268 if (MEM_P (orig_x))
14269 x = replace_equiv_address_nv (orig_x, x);
14270 return x;
14273 if (GET_CODE (x) == CONST
14274 && GET_CODE (XEXP (x, 0)) == UNSPEC
14275 && (XINT (XEXP (x, 0), 1) == UNSPEC_GOTPCREL
14276 || XINT (XEXP (x, 0), 1) == UNSPEC_PCREL)
14277 && (MEM_P (orig_x) || XINT (XEXP (x, 0), 1) == UNSPEC_PCREL))
14279 x = XVECEXP (XEXP (x, 0), 0, 0);
14280 if (GET_MODE (orig_x) != GET_MODE (x) && MEM_P (orig_x))
14282 x = simplify_gen_subreg (GET_MODE (orig_x), x,
14283 GET_MODE (x), 0);
14284 if (x == NULL_RTX)
14285 return orig_x;
14287 return x;
14290 if (ix86_cmodel != CM_MEDIUM_PIC && ix86_cmodel != CM_LARGE_PIC)
14291 return ix86_delegitimize_tls_address (orig_x);
14293 /* Fall thru into the code shared with -m32 for -mcmodel=large -fpic
14294 and -mcmodel=medium -fpic. */
14297 if (GET_CODE (x) != PLUS
14298 || GET_CODE (XEXP (x, 1)) != CONST)
14299 return ix86_delegitimize_tls_address (orig_x);
14301 if (ix86_pic_register_p (XEXP (x, 0)))
14302 /* %ebx + GOT/GOTOFF */
14304 else if (GET_CODE (XEXP (x, 0)) == PLUS)
14306 /* %ebx + %reg * scale + GOT/GOTOFF */
14307 reg_addend = XEXP (x, 0);
14308 if (ix86_pic_register_p (XEXP (reg_addend, 0)))
14309 reg_addend = XEXP (reg_addend, 1);
14310 else if (ix86_pic_register_p (XEXP (reg_addend, 1)))
14311 reg_addend = XEXP (reg_addend, 0);
14312 else
14314 reg_addend = NULL_RTX;
14315 addend = XEXP (x, 0);
14318 else
14319 addend = XEXP (x, 0);
14321 x = XEXP (XEXP (x, 1), 0);
14322 if (GET_CODE (x) == PLUS
14323 && CONST_INT_P (XEXP (x, 1)))
14325 const_addend = XEXP (x, 1);
14326 x = XEXP (x, 0);
14329 if (GET_CODE (x) == UNSPEC
14330 && ((XINT (x, 1) == UNSPEC_GOT && MEM_P (orig_x) && !addend)
14331 || (XINT (x, 1) == UNSPEC_GOTOFF && !MEM_P (orig_x))
14332 || (XINT (x, 1) == UNSPEC_PLTOFF && ix86_cmodel == CM_LARGE_PIC
14333 && !MEM_P (orig_x) && !addend)))
14334 result = XVECEXP (x, 0, 0);
14336 if (!TARGET_64BIT && TARGET_MACHO && darwin_local_data_pic (x)
14337 && !MEM_P (orig_x))
14338 result = XVECEXP (x, 0, 0);
14340 if (! result)
14341 return ix86_delegitimize_tls_address (orig_x);
14343 if (const_addend)
14344 result = gen_rtx_CONST (Pmode, gen_rtx_PLUS (Pmode, result, const_addend));
14345 if (reg_addend)
14346 result = gen_rtx_PLUS (Pmode, reg_addend, result);
14347 if (addend)
14349 /* If the rest of original X doesn't involve the PIC register, add
14350 addend and subtract pic_offset_table_rtx. This can happen e.g.
14351 for code like:
14352 leal (%ebx, %ecx, 4), %ecx
14354 movl foo@GOTOFF(%ecx), %edx
14355 in which case we return (%ecx - %ebx) + foo. */
14356 if (pic_offset_table_rtx)
14357 result = gen_rtx_PLUS (Pmode, gen_rtx_MINUS (Pmode, copy_rtx (addend),
14358 pic_offset_table_rtx),
14359 result);
14360 else
14361 return orig_x;
14363 if (GET_MODE (orig_x) != Pmode && MEM_P (orig_x))
14365 result = simplify_gen_subreg (GET_MODE (orig_x), result, Pmode, 0);
14366 if (result == NULL_RTX)
14367 return orig_x;
14369 return result;
14372 /* If X is a machine specific address (i.e. a symbol or label being
14373 referenced as a displacement from the GOT implemented using an
14374 UNSPEC), then return the base term. Otherwise return X. */
14377 ix86_find_base_term (rtx x)
14379 rtx term;
14381 if (TARGET_64BIT)
14383 if (GET_CODE (x) != CONST)
14384 return x;
14385 term = XEXP (x, 0);
14386 if (GET_CODE (term) == PLUS
14387 && (CONST_INT_P (XEXP (term, 1))
14388 || GET_CODE (XEXP (term, 1)) == CONST_DOUBLE))
14389 term = XEXP (term, 0);
14390 if (GET_CODE (term) != UNSPEC
14391 || (XINT (term, 1) != UNSPEC_GOTPCREL
14392 && XINT (term, 1) != UNSPEC_PCREL))
14393 return x;
14395 return XVECEXP (term, 0, 0);
14398 return ix86_delegitimize_address (x);
14401 static void
14402 put_condition_code (enum rtx_code code, enum machine_mode mode, bool reverse,
14403 bool fp, FILE *file)
14405 const char *suffix;
14407 if (mode == CCFPmode || mode == CCFPUmode)
14409 code = ix86_fp_compare_code_to_integer (code);
14410 mode = CCmode;
14412 if (reverse)
14413 code = reverse_condition (code);
14415 switch (code)
14417 case EQ:
14418 switch (mode)
14420 case CCAmode:
14421 suffix = "a";
14422 break;
14424 case CCCmode:
14425 suffix = "c";
14426 break;
14428 case CCOmode:
14429 suffix = "o";
14430 break;
14432 case CCSmode:
14433 suffix = "s";
14434 break;
14436 default:
14437 suffix = "e";
14439 break;
14440 case NE:
14441 switch (mode)
14443 case CCAmode:
14444 suffix = "na";
14445 break;
14447 case CCCmode:
14448 suffix = "nc";
14449 break;
14451 case CCOmode:
14452 suffix = "no";
14453 break;
14455 case CCSmode:
14456 suffix = "ns";
14457 break;
14459 default:
14460 suffix = "ne";
14462 break;
14463 case GT:
14464 gcc_assert (mode == CCmode || mode == CCNOmode || mode == CCGCmode);
14465 suffix = "g";
14466 break;
14467 case GTU:
14468 /* ??? Use "nbe" instead of "a" for fcmov lossage on some assemblers.
14469 Those same assemblers have the same but opposite lossage on cmov. */
14470 if (mode == CCmode)
14471 suffix = fp ? "nbe" : "a";
14472 else
14473 gcc_unreachable ();
14474 break;
14475 case LT:
14476 switch (mode)
14478 case CCNOmode:
14479 case CCGOCmode:
14480 suffix = "s";
14481 break;
14483 case CCmode:
14484 case CCGCmode:
14485 suffix = "l";
14486 break;
14488 default:
14489 gcc_unreachable ();
14491 break;
14492 case LTU:
14493 if (mode == CCmode)
14494 suffix = "b";
14495 else if (mode == CCCmode)
14496 suffix = "c";
14497 else
14498 gcc_unreachable ();
14499 break;
14500 case GE:
14501 switch (mode)
14503 case CCNOmode:
14504 case CCGOCmode:
14505 suffix = "ns";
14506 break;
14508 case CCmode:
14509 case CCGCmode:
14510 suffix = "ge";
14511 break;
14513 default:
14514 gcc_unreachable ();
14516 break;
14517 case GEU:
14518 if (mode == CCmode)
14519 suffix = fp ? "nb" : "ae";
14520 else if (mode == CCCmode)
14521 suffix = "nc";
14522 else
14523 gcc_unreachable ();
14524 break;
14525 case LE:
14526 gcc_assert (mode == CCmode || mode == CCGCmode || mode == CCNOmode);
14527 suffix = "le";
14528 break;
14529 case LEU:
14530 if (mode == CCmode)
14531 suffix = "be";
14532 else
14533 gcc_unreachable ();
14534 break;
14535 case UNORDERED:
14536 suffix = fp ? "u" : "p";
14537 break;
14538 case ORDERED:
14539 suffix = fp ? "nu" : "np";
14540 break;
14541 default:
14542 gcc_unreachable ();
14544 fputs (suffix, file);
14547 /* Print the name of register X to FILE based on its machine mode and number.
14548 If CODE is 'w', pretend the mode is HImode.
14549 If CODE is 'b', pretend the mode is QImode.
14550 If CODE is 'k', pretend the mode is SImode.
14551 If CODE is 'q', pretend the mode is DImode.
14552 If CODE is 'x', pretend the mode is V4SFmode.
14553 If CODE is 't', pretend the mode is V8SFmode.
14554 If CODE is 'g', pretend the mode is V16SFmode.
14555 If CODE is 'h', pretend the reg is the 'high' byte register.
14556 If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op.
14557 If CODE is 'd', duplicate the operand for AVX instruction.
14560 void
14561 print_reg (rtx x, int code, FILE *file)
14563 const char *reg;
14564 unsigned int regno;
14565 bool duplicated = code == 'd' && TARGET_AVX;
14567 if (ASSEMBLER_DIALECT == ASM_ATT)
14568 putc ('%', file);
14570 if (x == pc_rtx)
14572 gcc_assert (TARGET_64BIT);
14573 fputs ("rip", file);
14574 return;
14577 regno = true_regnum (x);
14578 gcc_assert (regno != ARG_POINTER_REGNUM
14579 && regno != FRAME_POINTER_REGNUM
14580 && regno != FLAGS_REG
14581 && regno != FPSR_REG
14582 && regno != FPCR_REG);
14584 if (code == 'w' || MMX_REG_P (x))
14585 code = 2;
14586 else if (code == 'b')
14587 code = 1;
14588 else if (code == 'k')
14589 code = 4;
14590 else if (code == 'q')
14591 code = 8;
14592 else if (code == 'y')
14593 code = 3;
14594 else if (code == 'h')
14595 code = 0;
14596 else if (code == 'x')
14597 code = 16;
14598 else if (code == 't')
14599 code = 32;
14600 else if (code == 'g')
14601 code = 64;
14602 else
14603 code = GET_MODE_SIZE (GET_MODE (x));
14605 /* Irritatingly, AMD extended registers use different naming convention
14606 from the normal registers: "r%d[bwd]" */
14607 if (REX_INT_REGNO_P (regno))
14609 gcc_assert (TARGET_64BIT);
14610 putc ('r', file);
14611 fprint_ul (file, regno - FIRST_REX_INT_REG + 8);
14612 switch (code)
14614 case 0:
14615 error ("extended registers have no high halves");
14616 break;
14617 case 1:
14618 putc ('b', file);
14619 break;
14620 case 2:
14621 putc ('w', file);
14622 break;
14623 case 4:
14624 putc ('d', file);
14625 break;
14626 case 8:
14627 /* no suffix */
14628 break;
14629 default:
14630 error ("unsupported operand size for extended register");
14631 break;
14633 return;
14636 reg = NULL;
14637 switch (code)
14639 case 3:
14640 if (STACK_TOP_P (x))
14642 reg = "st(0)";
14643 break;
14645 /* FALLTHRU */
14646 case 8:
14647 case 4:
14648 case 12:
14649 if (! ANY_FP_REG_P (x))
14650 putc (code == 8 && TARGET_64BIT ? 'r' : 'e', file);
14651 /* FALLTHRU */
14652 case 16:
14653 case 2:
14654 normal:
14655 reg = hi_reg_name[regno];
14656 break;
14657 case 1:
14658 if (regno >= ARRAY_SIZE (qi_reg_name))
14659 goto normal;
14660 reg = qi_reg_name[regno];
14661 break;
14662 case 0:
14663 if (regno >= ARRAY_SIZE (qi_high_reg_name))
14664 goto normal;
14665 reg = qi_high_reg_name[regno];
14666 break;
14667 case 32:
14668 if (SSE_REG_P (x))
14670 gcc_assert (!duplicated);
14671 putc ('y', file);
14672 fputs (hi_reg_name[regno] + 1, file);
14673 return;
14675 case 64:
14676 if (SSE_REG_P (x))
14678 gcc_assert (!duplicated);
14679 putc ('z', file);
14680 fputs (hi_reg_name[REGNO (x)] + 1, file);
14681 return;
14683 break;
14684 default:
14685 gcc_unreachable ();
14688 fputs (reg, file);
14689 if (duplicated)
14691 if (ASSEMBLER_DIALECT == ASM_ATT)
14692 fprintf (file, ", %%%s", reg);
14693 else
14694 fprintf (file, ", %s", reg);
14698 /* Locate some local-dynamic symbol still in use by this function
14699 so that we can print its name in some tls_local_dynamic_base
14700 pattern. */
14702 static int
14703 get_some_local_dynamic_name_1 (rtx *px, void *data ATTRIBUTE_UNUSED)
14705 rtx x = *px;
14707 if (GET_CODE (x) == SYMBOL_REF
14708 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC)
14710 cfun->machine->some_ld_name = XSTR (x, 0);
14711 return 1;
14714 return 0;
14717 static const char *
14718 get_some_local_dynamic_name (void)
14720 rtx insn;
14722 if (cfun->machine->some_ld_name)
14723 return cfun->machine->some_ld_name;
14725 for (insn = get_insns (); insn ; insn = NEXT_INSN (insn))
14726 if (NONDEBUG_INSN_P (insn)
14727 && for_each_rtx (&PATTERN (insn), get_some_local_dynamic_name_1, 0))
14728 return cfun->machine->some_ld_name;
14730 return NULL;
14733 /* Meaning of CODE:
14734 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
14735 C -- print opcode suffix for set/cmov insn.
14736 c -- like C, but print reversed condition
14737 F,f -- likewise, but for floating-point.
14738 O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.",
14739 otherwise nothing
14740 R -- print embeded rounding and sae.
14741 r -- print only sae.
14742 z -- print the opcode suffix for the size of the current operand.
14743 Z -- likewise, with special suffixes for x87 instructions.
14744 * -- print a star (in certain assembler syntax)
14745 A -- print an absolute memory reference.
14746 E -- print address with DImode register names if TARGET_64BIT.
14747 w -- print the operand as if it's a "word" (HImode) even if it isn't.
14748 s -- print a shift double count, followed by the assemblers argument
14749 delimiter.
14750 b -- print the QImode name of the register for the indicated operand.
14751 %b0 would print %al if operands[0] is reg 0.
14752 w -- likewise, print the HImode name of the register.
14753 k -- likewise, print the SImode name of the register.
14754 q -- likewise, print the DImode name of the register.
14755 x -- likewise, print the V4SFmode name of the register.
14756 t -- likewise, print the V8SFmode name of the register.
14757 g -- likewise, print the V16SFmode name of the register.
14758 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
14759 y -- print "st(0)" instead of "st" as a register.
14760 d -- print duplicated register operand for AVX instruction.
14761 D -- print condition for SSE cmp instruction.
14762 P -- if PIC, print an @PLT suffix.
14763 p -- print raw symbol name.
14764 X -- don't print any sort of PIC '@' suffix for a symbol.
14765 & -- print some in-use local-dynamic symbol name.
14766 H -- print a memory address offset by 8; used for sse high-parts
14767 Y -- print condition for XOP pcom* instruction.
14768 + -- print a branch hint as 'cs' or 'ds' prefix
14769 ; -- print a semicolon (after prefixes due to bug in older gas).
14770 ~ -- print "i" if TARGET_AVX2, "f" otherwise.
14771 @ -- print a segment register of thread base pointer load
14772 ^ -- print addr32 prefix if TARGET_64BIT and Pmode != word_mode
14775 void
14776 ix86_print_operand (FILE *file, rtx x, int code)
14778 if (code)
14780 switch (code)
14782 case 'A':
14783 switch (ASSEMBLER_DIALECT)
14785 case ASM_ATT:
14786 putc ('*', file);
14787 break;
14789 case ASM_INTEL:
14790 /* Intel syntax. For absolute addresses, registers should not
14791 be surrounded by braces. */
14792 if (!REG_P (x))
14794 putc ('[', file);
14795 ix86_print_operand (file, x, 0);
14796 putc (']', file);
14797 return;
14799 break;
14801 default:
14802 gcc_unreachable ();
14805 ix86_print_operand (file, x, 0);
14806 return;
14808 case 'E':
14809 /* Wrap address in an UNSPEC to declare special handling. */
14810 if (TARGET_64BIT)
14811 x = gen_rtx_UNSPEC (DImode, gen_rtvec (1, x), UNSPEC_LEA_ADDR);
14813 output_address (x);
14814 return;
14816 case 'L':
14817 if (ASSEMBLER_DIALECT == ASM_ATT)
14818 putc ('l', file);
14819 return;
14821 case 'W':
14822 if (ASSEMBLER_DIALECT == ASM_ATT)
14823 putc ('w', file);
14824 return;
14826 case 'B':
14827 if (ASSEMBLER_DIALECT == ASM_ATT)
14828 putc ('b', file);
14829 return;
14831 case 'Q':
14832 if (ASSEMBLER_DIALECT == ASM_ATT)
14833 putc ('l', file);
14834 return;
14836 case 'S':
14837 if (ASSEMBLER_DIALECT == ASM_ATT)
14838 putc ('s', file);
14839 return;
14841 case 'T':
14842 if (ASSEMBLER_DIALECT == ASM_ATT)
14843 putc ('t', file);
14844 return;
14846 case 'O':
14847 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
14848 if (ASSEMBLER_DIALECT != ASM_ATT)
14849 return;
14851 switch (GET_MODE_SIZE (GET_MODE (x)))
14853 case 2:
14854 putc ('w', file);
14855 break;
14857 case 4:
14858 putc ('l', file);
14859 break;
14861 case 8:
14862 putc ('q', file);
14863 break;
14865 default:
14866 output_operand_lossage
14867 ("invalid operand size for operand code 'O'");
14868 return;
14871 putc ('.', file);
14872 #endif
14873 return;
14875 case 'z':
14876 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
14878 /* Opcodes don't get size suffixes if using Intel opcodes. */
14879 if (ASSEMBLER_DIALECT == ASM_INTEL)
14880 return;
14882 switch (GET_MODE_SIZE (GET_MODE (x)))
14884 case 1:
14885 putc ('b', file);
14886 return;
14888 case 2:
14889 putc ('w', file);
14890 return;
14892 case 4:
14893 putc ('l', file);
14894 return;
14896 case 8:
14897 putc ('q', file);
14898 return;
14900 default:
14901 output_operand_lossage
14902 ("invalid operand size for operand code 'z'");
14903 return;
14907 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
14908 warning
14909 (0, "non-integer operand used with operand code 'z'");
14910 /* FALLTHRU */
14912 case 'Z':
14913 /* 387 opcodes don't get size suffixes if using Intel opcodes. */
14914 if (ASSEMBLER_DIALECT == ASM_INTEL)
14915 return;
14917 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
14919 switch (GET_MODE_SIZE (GET_MODE (x)))
14921 case 2:
14922 #ifdef HAVE_AS_IX86_FILDS
14923 putc ('s', file);
14924 #endif
14925 return;
14927 case 4:
14928 putc ('l', file);
14929 return;
14931 case 8:
14932 #ifdef HAVE_AS_IX86_FILDQ
14933 putc ('q', file);
14934 #else
14935 fputs ("ll", file);
14936 #endif
14937 return;
14939 default:
14940 break;
14943 else if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
14945 /* 387 opcodes don't get size suffixes
14946 if the operands are registers. */
14947 if (STACK_REG_P (x))
14948 return;
14950 switch (GET_MODE_SIZE (GET_MODE (x)))
14952 case 4:
14953 putc ('s', file);
14954 return;
14956 case 8:
14957 putc ('l', file);
14958 return;
14960 case 12:
14961 case 16:
14962 putc ('t', file);
14963 return;
14965 default:
14966 break;
14969 else
14971 output_operand_lossage
14972 ("invalid operand type used with operand code 'Z'");
14973 return;
14976 output_operand_lossage
14977 ("invalid operand size for operand code 'Z'");
14978 return;
14980 case 'd':
14981 case 'b':
14982 case 'w':
14983 case 'k':
14984 case 'q':
14985 case 'h':
14986 case 't':
14987 case 'g':
14988 case 'y':
14989 case 'x':
14990 case 'X':
14991 case 'P':
14992 case 'p':
14993 break;
14995 case 's':
14996 if (CONST_INT_P (x) || ! SHIFT_DOUBLE_OMITS_COUNT)
14998 ix86_print_operand (file, x, 0);
14999 fputs (", ", file);
15001 return;
15003 case 'Y':
15004 switch (GET_CODE (x))
15006 case NE:
15007 fputs ("neq", file);
15008 break;
15009 case EQ:
15010 fputs ("eq", file);
15011 break;
15012 case GE:
15013 case GEU:
15014 fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "ge" : "unlt", file);
15015 break;
15016 case GT:
15017 case GTU:
15018 fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "gt" : "unle", file);
15019 break;
15020 case LE:
15021 case LEU:
15022 fputs ("le", file);
15023 break;
15024 case LT:
15025 case LTU:
15026 fputs ("lt", file);
15027 break;
15028 case UNORDERED:
15029 fputs ("unord", file);
15030 break;
15031 case ORDERED:
15032 fputs ("ord", file);
15033 break;
15034 case UNEQ:
15035 fputs ("ueq", file);
15036 break;
15037 case UNGE:
15038 fputs ("nlt", file);
15039 break;
15040 case UNGT:
15041 fputs ("nle", file);
15042 break;
15043 case UNLE:
15044 fputs ("ule", file);
15045 break;
15046 case UNLT:
15047 fputs ("ult", file);
15048 break;
15049 case LTGT:
15050 fputs ("une", file);
15051 break;
15052 default:
15053 output_operand_lossage ("operand is not a condition code, "
15054 "invalid operand code 'Y'");
15055 return;
15057 return;
15059 case 'D':
15060 /* Little bit of braindamage here. The SSE compare instructions
15061 does use completely different names for the comparisons that the
15062 fp conditional moves. */
15063 switch (GET_CODE (x))
15065 case UNEQ:
15066 if (TARGET_AVX)
15068 fputs ("eq_us", file);
15069 break;
15071 case EQ:
15072 fputs ("eq", file);
15073 break;
15074 case UNLT:
15075 if (TARGET_AVX)
15077 fputs ("nge", file);
15078 break;
15080 case LT:
15081 fputs ("lt", file);
15082 break;
15083 case UNLE:
15084 if (TARGET_AVX)
15086 fputs ("ngt", file);
15087 break;
15089 case LE:
15090 fputs ("le", file);
15091 break;
15092 case UNORDERED:
15093 fputs ("unord", file);
15094 break;
15095 case LTGT:
15096 if (TARGET_AVX)
15098 fputs ("neq_oq", file);
15099 break;
15101 case NE:
15102 fputs ("neq", file);
15103 break;
15104 case GE:
15105 if (TARGET_AVX)
15107 fputs ("ge", file);
15108 break;
15110 case UNGE:
15111 fputs ("nlt", file);
15112 break;
15113 case GT:
15114 if (TARGET_AVX)
15116 fputs ("gt", file);
15117 break;
15119 case UNGT:
15120 fputs ("nle", file);
15121 break;
15122 case ORDERED:
15123 fputs ("ord", file);
15124 break;
15125 default:
15126 output_operand_lossage ("operand is not a condition code, "
15127 "invalid operand code 'D'");
15128 return;
15130 return;
15132 case 'F':
15133 case 'f':
15134 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
15135 if (ASSEMBLER_DIALECT == ASM_ATT)
15136 putc ('.', file);
15137 #endif
15139 case 'C':
15140 case 'c':
15141 if (!COMPARISON_P (x))
15143 output_operand_lossage ("operand is not a condition code, "
15144 "invalid operand code '%c'", code);
15145 return;
15147 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)),
15148 code == 'c' || code == 'f',
15149 code == 'F' || code == 'f',
15150 file);
15151 return;
15153 case 'H':
15154 if (!offsettable_memref_p (x))
15156 output_operand_lossage ("operand is not an offsettable memory "
15157 "reference, invalid operand code 'H'");
15158 return;
15160 /* It doesn't actually matter what mode we use here, as we're
15161 only going to use this for printing. */
15162 x = adjust_address_nv (x, DImode, 8);
15163 /* Output 'qword ptr' for intel assembler dialect. */
15164 if (ASSEMBLER_DIALECT == ASM_INTEL)
15165 code = 'q';
15166 break;
15168 case 'K':
15169 gcc_assert (CONST_INT_P (x));
15171 if (INTVAL (x) & IX86_HLE_ACQUIRE)
15172 #ifdef HAVE_AS_IX86_HLE
15173 fputs ("xacquire ", file);
15174 #else
15175 fputs ("\n" ASM_BYTE "0xf2\n\t", file);
15176 #endif
15177 else if (INTVAL (x) & IX86_HLE_RELEASE)
15178 #ifdef HAVE_AS_IX86_HLE
15179 fputs ("xrelease ", file);
15180 #else
15181 fputs ("\n" ASM_BYTE "0xf3\n\t", file);
15182 #endif
15183 /* We do not want to print value of the operand. */
15184 return;
15186 case 'N':
15187 if (x == const0_rtx || x == CONST0_RTX (GET_MODE (x)))
15188 fputs ("{z}", file);
15189 return;
15191 case 'r':
15192 gcc_assert (CONST_INT_P (x));
15193 gcc_assert (INTVAL (x) == ROUND_SAE);
15195 if (ASSEMBLER_DIALECT == ASM_INTEL)
15196 fputs (", ", file);
15198 fputs ("{sae}", file);
15200 if (ASSEMBLER_DIALECT == ASM_ATT)
15201 fputs (", ", file);
15203 return;
15205 case 'R':
15206 gcc_assert (CONST_INT_P (x));
15208 if (ASSEMBLER_DIALECT == ASM_INTEL)
15209 fputs (", ", file);
15211 switch (INTVAL (x))
15213 case ROUND_NEAREST_INT | ROUND_SAE:
15214 fputs ("{rn-sae}", file);
15215 break;
15216 case ROUND_NEG_INF | ROUND_SAE:
15217 fputs ("{rd-sae}", file);
15218 break;
15219 case ROUND_POS_INF | ROUND_SAE:
15220 fputs ("{ru-sae}", file);
15221 break;
15222 case ROUND_ZERO | ROUND_SAE:
15223 fputs ("{rz-sae}", file);
15224 break;
15225 default:
15226 gcc_unreachable ();
15229 if (ASSEMBLER_DIALECT == ASM_ATT)
15230 fputs (", ", file);
15232 return;
15234 case '*':
15235 if (ASSEMBLER_DIALECT == ASM_ATT)
15236 putc ('*', file);
15237 return;
15239 case '&':
15241 const char *name = get_some_local_dynamic_name ();
15242 if (name == NULL)
15243 output_operand_lossage ("'%%&' used without any "
15244 "local dynamic TLS references");
15245 else
15246 assemble_name (file, name);
15247 return;
15250 case '+':
15252 rtx x;
15254 if (!optimize
15255 || optimize_function_for_size_p (cfun)
15256 || !TARGET_BRANCH_PREDICTION_HINTS)
15257 return;
15259 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
15260 if (x)
15262 int pred_val = XINT (x, 0);
15264 if (pred_val < REG_BR_PROB_BASE * 45 / 100
15265 || pred_val > REG_BR_PROB_BASE * 55 / 100)
15267 bool taken = pred_val > REG_BR_PROB_BASE / 2;
15268 bool cputaken
15269 = final_forward_branch_p (current_output_insn) == 0;
15271 /* Emit hints only in the case default branch prediction
15272 heuristics would fail. */
15273 if (taken != cputaken)
15275 /* We use 3e (DS) prefix for taken branches and
15276 2e (CS) prefix for not taken branches. */
15277 if (taken)
15278 fputs ("ds ; ", file);
15279 else
15280 fputs ("cs ; ", file);
15284 return;
15287 case ';':
15288 #ifndef HAVE_AS_IX86_REP_LOCK_PREFIX
15289 putc (';', file);
15290 #endif
15291 return;
15293 case '@':
15294 if (ASSEMBLER_DIALECT == ASM_ATT)
15295 putc ('%', file);
15297 /* The kernel uses a different segment register for performance
15298 reasons; a system call would not have to trash the userspace
15299 segment register, which would be expensive. */
15300 if (TARGET_64BIT && ix86_cmodel != CM_KERNEL)
15301 fputs ("fs", file);
15302 else
15303 fputs ("gs", file);
15304 return;
15306 case '~':
15307 putc (TARGET_AVX2 ? 'i' : 'f', file);
15308 return;
15310 case '^':
15311 if (TARGET_64BIT && Pmode != word_mode)
15312 fputs ("addr32 ", file);
15313 return;
15315 default:
15316 output_operand_lossage ("invalid operand code '%c'", code);
15320 if (REG_P (x))
15321 print_reg (x, code, file);
15323 else if (MEM_P (x))
15325 /* No `byte ptr' prefix for call instructions or BLKmode operands. */
15326 if (ASSEMBLER_DIALECT == ASM_INTEL && code != 'X' && code != 'P'
15327 && GET_MODE (x) != BLKmode)
15329 const char * size;
15330 switch (GET_MODE_SIZE (GET_MODE (x)))
15332 case 1: size = "BYTE"; break;
15333 case 2: size = "WORD"; break;
15334 case 4: size = "DWORD"; break;
15335 case 8: size = "QWORD"; break;
15336 case 12: size = "TBYTE"; break;
15337 case 16:
15338 if (GET_MODE (x) == XFmode)
15339 size = "TBYTE";
15340 else
15341 size = "XMMWORD";
15342 break;
15343 case 32: size = "YMMWORD"; break;
15344 case 64: size = "ZMMWORD"; break;
15345 default:
15346 gcc_unreachable ();
15349 /* Check for explicit size override (codes 'b', 'w', 'k',
15350 'q' and 'x') */
15351 if (code == 'b')
15352 size = "BYTE";
15353 else if (code == 'w')
15354 size = "WORD";
15355 else if (code == 'k')
15356 size = "DWORD";
15357 else if (code == 'q')
15358 size = "QWORD";
15359 else if (code == 'x')
15360 size = "XMMWORD";
15362 fputs (size, file);
15363 fputs (" PTR ", file);
15366 x = XEXP (x, 0);
15367 /* Avoid (%rip) for call operands. */
15368 if (CONSTANT_ADDRESS_P (x) && code == 'P'
15369 && !CONST_INT_P (x))
15370 output_addr_const (file, x);
15371 else if (this_is_asm_operands && ! address_operand (x, VOIDmode))
15372 output_operand_lossage ("invalid constraints for operand");
15373 else
15374 output_address (x);
15377 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == SFmode)
15379 REAL_VALUE_TYPE r;
15380 long l;
15382 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
15383 REAL_VALUE_TO_TARGET_SINGLE (r, l);
15385 if (ASSEMBLER_DIALECT == ASM_ATT)
15386 putc ('$', file);
15387 /* Sign extend 32bit SFmode immediate to 8 bytes. */
15388 if (code == 'q')
15389 fprintf (file, "0x%08" HOST_LONG_LONG_FORMAT "x",
15390 (unsigned long long) (int) l);
15391 else
15392 fprintf (file, "0x%08x", (unsigned int) l);
15395 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == DFmode)
15397 REAL_VALUE_TYPE r;
15398 long l[2];
15400 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
15401 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
15403 if (ASSEMBLER_DIALECT == ASM_ATT)
15404 putc ('$', file);
15405 fprintf (file, "0x%lx%08lx", l[1] & 0xffffffff, l[0] & 0xffffffff);
15408 /* These float cases don't actually occur as immediate operands. */
15409 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == XFmode)
15411 char dstr[30];
15413 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
15414 fputs (dstr, file);
15417 else
15419 /* We have patterns that allow zero sets of memory, for instance.
15420 In 64-bit mode, we should probably support all 8-byte vectors,
15421 since we can in fact encode that into an immediate. */
15422 if (GET_CODE (x) == CONST_VECTOR)
15424 gcc_assert (x == CONST0_RTX (GET_MODE (x)));
15425 x = const0_rtx;
15428 if (code != 'P' && code != 'p')
15430 if (CONST_INT_P (x) || GET_CODE (x) == CONST_DOUBLE)
15432 if (ASSEMBLER_DIALECT == ASM_ATT)
15433 putc ('$', file);
15435 else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
15436 || GET_CODE (x) == LABEL_REF)
15438 if (ASSEMBLER_DIALECT == ASM_ATT)
15439 putc ('$', file);
15440 else
15441 fputs ("OFFSET FLAT:", file);
15444 if (CONST_INT_P (x))
15445 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
15446 else if (flag_pic || MACHOPIC_INDIRECT)
15447 output_pic_addr_const (file, x, code);
15448 else
15449 output_addr_const (file, x);
15453 static bool
15454 ix86_print_operand_punct_valid_p (unsigned char code)
15456 return (code == '@' || code == '*' || code == '+' || code == '&'
15457 || code == ';' || code == '~' || code == '^');
15460 /* Print a memory operand whose address is ADDR. */
15462 static void
15463 ix86_print_operand_address (FILE *file, rtx addr)
15465 struct ix86_address parts;
15466 rtx base, index, disp;
15467 int scale;
15468 int ok;
15469 bool vsib = false;
15470 int code = 0;
15472 if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_VSIBADDR)
15474 ok = ix86_decompose_address (XVECEXP (addr, 0, 0), &parts);
15475 gcc_assert (parts.index == NULL_RTX);
15476 parts.index = XVECEXP (addr, 0, 1);
15477 parts.scale = INTVAL (XVECEXP (addr, 0, 2));
15478 addr = XVECEXP (addr, 0, 0);
15479 vsib = true;
15481 else if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_LEA_ADDR)
15483 gcc_assert (TARGET_64BIT);
15484 ok = ix86_decompose_address (XVECEXP (addr, 0, 0), &parts);
15485 code = 'q';
15487 else
15488 ok = ix86_decompose_address (addr, &parts);
15490 gcc_assert (ok);
15492 base = parts.base;
15493 index = parts.index;
15494 disp = parts.disp;
15495 scale = parts.scale;
15497 switch (parts.seg)
15499 case SEG_DEFAULT:
15500 break;
15501 case SEG_FS:
15502 case SEG_GS:
15503 if (ASSEMBLER_DIALECT == ASM_ATT)
15504 putc ('%', file);
15505 fputs ((parts.seg == SEG_FS ? "fs:" : "gs:"), file);
15506 break;
15507 default:
15508 gcc_unreachable ();
15511 /* Use one byte shorter RIP relative addressing for 64bit mode. */
15512 if (TARGET_64BIT && !base && !index)
15514 rtx symbol = disp;
15516 if (GET_CODE (disp) == CONST
15517 && GET_CODE (XEXP (disp, 0)) == PLUS
15518 && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
15519 symbol = XEXP (XEXP (disp, 0), 0);
15521 if (GET_CODE (symbol) == LABEL_REF
15522 || (GET_CODE (symbol) == SYMBOL_REF
15523 && SYMBOL_REF_TLS_MODEL (symbol) == 0))
15524 base = pc_rtx;
15526 if (!base && !index)
15528 /* Displacement only requires special attention. */
15530 if (CONST_INT_P (disp))
15532 if (ASSEMBLER_DIALECT == ASM_INTEL && parts.seg == SEG_DEFAULT)
15533 fputs ("ds:", file);
15534 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (disp));
15536 else if (flag_pic)
15537 output_pic_addr_const (file, disp, 0);
15538 else
15539 output_addr_const (file, disp);
15541 else
15543 /* Print SImode register names to force addr32 prefix. */
15544 if (SImode_address_operand (addr, VOIDmode))
15546 #ifdef ENABLE_CHECKING
15547 gcc_assert (TARGET_64BIT);
15548 switch (GET_CODE (addr))
15550 case SUBREG:
15551 gcc_assert (GET_MODE (addr) == SImode);
15552 gcc_assert (GET_MODE (SUBREG_REG (addr)) == DImode);
15553 break;
15554 case ZERO_EXTEND:
15555 case AND:
15556 gcc_assert (GET_MODE (addr) == DImode);
15557 break;
15558 default:
15559 gcc_unreachable ();
15561 #endif
15562 gcc_assert (!code);
15563 code = 'k';
15565 else if (code == 0
15566 && TARGET_X32
15567 && disp
15568 && CONST_INT_P (disp)
15569 && INTVAL (disp) < -16*1024*1024)
15571 /* X32 runs in 64-bit mode, where displacement, DISP, in
15572 address DISP(%r64), is encoded as 32-bit immediate sign-
15573 extended from 32-bit to 64-bit. For -0x40000300(%r64),
15574 address is %r64 + 0xffffffffbffffd00. When %r64 <
15575 0x40000300, like 0x37ffe064, address is 0xfffffffff7ffdd64,
15576 which is invalid for x32. The correct address is %r64
15577 - 0x40000300 == 0xf7ffdd64. To properly encode
15578 -0x40000300(%r64) for x32, we zero-extend negative
15579 displacement by forcing addr32 prefix which truncates
15580 0xfffffffff7ffdd64 to 0xf7ffdd64. In theory, we should
15581 zero-extend all negative displacements, including -1(%rsp).
15582 However, for small negative displacements, sign-extension
15583 won't cause overflow. We only zero-extend negative
15584 displacements if they < -16*1024*1024, which is also used
15585 to check legitimate address displacements for PIC. */
15586 code = 'k';
15589 if (ASSEMBLER_DIALECT == ASM_ATT)
15591 if (disp)
15593 if (flag_pic)
15594 output_pic_addr_const (file, disp, 0);
15595 else if (GET_CODE (disp) == LABEL_REF)
15596 output_asm_label (disp);
15597 else
15598 output_addr_const (file, disp);
15601 putc ('(', file);
15602 if (base)
15603 print_reg (base, code, file);
15604 if (index)
15606 putc (',', file);
15607 print_reg (index, vsib ? 0 : code, file);
15608 if (scale != 1 || vsib)
15609 fprintf (file, ",%d", scale);
15611 putc (')', file);
15613 else
15615 rtx offset = NULL_RTX;
15617 if (disp)
15619 /* Pull out the offset of a symbol; print any symbol itself. */
15620 if (GET_CODE (disp) == CONST
15621 && GET_CODE (XEXP (disp, 0)) == PLUS
15622 && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
15624 offset = XEXP (XEXP (disp, 0), 1);
15625 disp = gen_rtx_CONST (VOIDmode,
15626 XEXP (XEXP (disp, 0), 0));
15629 if (flag_pic)
15630 output_pic_addr_const (file, disp, 0);
15631 else if (GET_CODE (disp) == LABEL_REF)
15632 output_asm_label (disp);
15633 else if (CONST_INT_P (disp))
15634 offset = disp;
15635 else
15636 output_addr_const (file, disp);
15639 putc ('[', file);
15640 if (base)
15642 print_reg (base, code, file);
15643 if (offset)
15645 if (INTVAL (offset) >= 0)
15646 putc ('+', file);
15647 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
15650 else if (offset)
15651 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
15652 else
15653 putc ('0', file);
15655 if (index)
15657 putc ('+', file);
15658 print_reg (index, vsib ? 0 : code, file);
15659 if (scale != 1 || vsib)
15660 fprintf (file, "*%d", scale);
15662 putc (']', file);
15667 /* Implementation of TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */
15669 static bool
15670 i386_asm_output_addr_const_extra (FILE *file, rtx x)
15672 rtx op;
15674 if (GET_CODE (x) != UNSPEC)
15675 return false;
15677 op = XVECEXP (x, 0, 0);
15678 switch (XINT (x, 1))
15680 case UNSPEC_GOTTPOFF:
15681 output_addr_const (file, op);
15682 /* FIXME: This might be @TPOFF in Sun ld. */
15683 fputs ("@gottpoff", file);
15684 break;
15685 case UNSPEC_TPOFF:
15686 output_addr_const (file, op);
15687 fputs ("@tpoff", file);
15688 break;
15689 case UNSPEC_NTPOFF:
15690 output_addr_const (file, op);
15691 if (TARGET_64BIT)
15692 fputs ("@tpoff", file);
15693 else
15694 fputs ("@ntpoff", file);
15695 break;
15696 case UNSPEC_DTPOFF:
15697 output_addr_const (file, op);
15698 fputs ("@dtpoff", file);
15699 break;
15700 case UNSPEC_GOTNTPOFF:
15701 output_addr_const (file, op);
15702 if (TARGET_64BIT)
15703 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
15704 "@gottpoff(%rip)" : "@gottpoff[rip]", file);
15705 else
15706 fputs ("@gotntpoff", file);
15707 break;
15708 case UNSPEC_INDNTPOFF:
15709 output_addr_const (file, op);
15710 fputs ("@indntpoff", file);
15711 break;
15712 #if TARGET_MACHO
15713 case UNSPEC_MACHOPIC_OFFSET:
15714 output_addr_const (file, op);
15715 putc ('-', file);
15716 machopic_output_function_base_name (file);
15717 break;
15718 #endif
15720 case UNSPEC_STACK_CHECK:
15722 int offset;
15724 gcc_assert (flag_split_stack);
15726 #ifdef TARGET_THREAD_SPLIT_STACK_OFFSET
15727 offset = TARGET_THREAD_SPLIT_STACK_OFFSET;
15728 #else
15729 gcc_unreachable ();
15730 #endif
15732 fprintf (file, "%s:%d", TARGET_64BIT ? "%fs" : "%gs", offset);
15734 break;
15736 default:
15737 return false;
15740 return true;
15743 /* Split one or more double-mode RTL references into pairs of half-mode
15744 references. The RTL can be REG, offsettable MEM, integer constant, or
15745 CONST_DOUBLE. "operands" is a pointer to an array of double-mode RTLs to
15746 split and "num" is its length. lo_half and hi_half are output arrays
15747 that parallel "operands". */
15749 void
15750 split_double_mode (enum machine_mode mode, rtx operands[],
15751 int num, rtx lo_half[], rtx hi_half[])
15753 enum machine_mode half_mode;
15754 unsigned int byte;
15756 switch (mode)
15758 case TImode:
15759 half_mode = DImode;
15760 break;
15761 case DImode:
15762 half_mode = SImode;
15763 break;
15764 default:
15765 gcc_unreachable ();
15768 byte = GET_MODE_SIZE (half_mode);
15770 while (num--)
15772 rtx op = operands[num];
15774 /* simplify_subreg refuse to split volatile memory addresses,
15775 but we still have to handle it. */
15776 if (MEM_P (op))
15778 lo_half[num] = adjust_address (op, half_mode, 0);
15779 hi_half[num] = adjust_address (op, half_mode, byte);
15781 else
15783 lo_half[num] = simplify_gen_subreg (half_mode, op,
15784 GET_MODE (op) == VOIDmode
15785 ? mode : GET_MODE (op), 0);
15786 hi_half[num] = simplify_gen_subreg (half_mode, op,
15787 GET_MODE (op) == VOIDmode
15788 ? mode : GET_MODE (op), byte);
15793 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
15794 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
15795 is the expression of the binary operation. The output may either be
15796 emitted here, or returned to the caller, like all output_* functions.
15798 There is no guarantee that the operands are the same mode, as they
15799 might be within FLOAT or FLOAT_EXTEND expressions. */
15801 #ifndef SYSV386_COMPAT
15802 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
15803 wants to fix the assemblers because that causes incompatibility
15804 with gcc. No-one wants to fix gcc because that causes
15805 incompatibility with assemblers... You can use the option of
15806 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
15807 #define SYSV386_COMPAT 1
15808 #endif
15810 const char *
15811 output_387_binary_op (rtx insn, rtx *operands)
15813 static char buf[40];
15814 const char *p;
15815 const char *ssep;
15816 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]) || SSE_REG_P (operands[2]);
15818 #ifdef ENABLE_CHECKING
15819 /* Even if we do not want to check the inputs, this documents input
15820 constraints. Which helps in understanding the following code. */
15821 if (STACK_REG_P (operands[0])
15822 && ((REG_P (operands[1])
15823 && REGNO (operands[0]) == REGNO (operands[1])
15824 && (STACK_REG_P (operands[2]) || MEM_P (operands[2])))
15825 || (REG_P (operands[2])
15826 && REGNO (operands[0]) == REGNO (operands[2])
15827 && (STACK_REG_P (operands[1]) || MEM_P (operands[1]))))
15828 && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
15829 ; /* ok */
15830 else
15831 gcc_assert (is_sse);
15832 #endif
15834 switch (GET_CODE (operands[3]))
15836 case PLUS:
15837 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
15838 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
15839 p = "fiadd";
15840 else
15841 p = "fadd";
15842 ssep = "vadd";
15843 break;
15845 case MINUS:
15846 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
15847 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
15848 p = "fisub";
15849 else
15850 p = "fsub";
15851 ssep = "vsub";
15852 break;
15854 case MULT:
15855 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
15856 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
15857 p = "fimul";
15858 else
15859 p = "fmul";
15860 ssep = "vmul";
15861 break;
15863 case DIV:
15864 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
15865 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
15866 p = "fidiv";
15867 else
15868 p = "fdiv";
15869 ssep = "vdiv";
15870 break;
15872 default:
15873 gcc_unreachable ();
15876 if (is_sse)
15878 if (TARGET_AVX)
15880 strcpy (buf, ssep);
15881 if (GET_MODE (operands[0]) == SFmode)
15882 strcat (buf, "ss\t{%2, %1, %0|%0, %1, %2}");
15883 else
15884 strcat (buf, "sd\t{%2, %1, %0|%0, %1, %2}");
15886 else
15888 strcpy (buf, ssep + 1);
15889 if (GET_MODE (operands[0]) == SFmode)
15890 strcat (buf, "ss\t{%2, %0|%0, %2}");
15891 else
15892 strcat (buf, "sd\t{%2, %0|%0, %2}");
15894 return buf;
15896 strcpy (buf, p);
15898 switch (GET_CODE (operands[3]))
15900 case MULT:
15901 case PLUS:
15902 if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
15904 rtx temp = operands[2];
15905 operands[2] = operands[1];
15906 operands[1] = temp;
15909 /* know operands[0] == operands[1]. */
15911 if (MEM_P (operands[2]))
15913 p = "%Z2\t%2";
15914 break;
15917 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
15919 if (STACK_TOP_P (operands[0]))
15920 /* How is it that we are storing to a dead operand[2]?
15921 Well, presumably operands[1] is dead too. We can't
15922 store the result to st(0) as st(0) gets popped on this
15923 instruction. Instead store to operands[2] (which I
15924 think has to be st(1)). st(1) will be popped later.
15925 gcc <= 2.8.1 didn't have this check and generated
15926 assembly code that the Unixware assembler rejected. */
15927 p = "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
15928 else
15929 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
15930 break;
15933 if (STACK_TOP_P (operands[0]))
15934 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
15935 else
15936 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
15937 break;
15939 case MINUS:
15940 case DIV:
15941 if (MEM_P (operands[1]))
15943 p = "r%Z1\t%1";
15944 break;
15947 if (MEM_P (operands[2]))
15949 p = "%Z2\t%2";
15950 break;
15953 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
15955 #if SYSV386_COMPAT
15956 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
15957 derived assemblers, confusingly reverse the direction of
15958 the operation for fsub{r} and fdiv{r} when the
15959 destination register is not st(0). The Intel assembler
15960 doesn't have this brain damage. Read !SYSV386_COMPAT to
15961 figure out what the hardware really does. */
15962 if (STACK_TOP_P (operands[0]))
15963 p = "{p\t%0, %2|rp\t%2, %0}";
15964 else
15965 p = "{rp\t%2, %0|p\t%0, %2}";
15966 #else
15967 if (STACK_TOP_P (operands[0]))
15968 /* As above for fmul/fadd, we can't store to st(0). */
15969 p = "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
15970 else
15971 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
15972 #endif
15973 break;
15976 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
15978 #if SYSV386_COMPAT
15979 if (STACK_TOP_P (operands[0]))
15980 p = "{rp\t%0, %1|p\t%1, %0}";
15981 else
15982 p = "{p\t%1, %0|rp\t%0, %1}";
15983 #else
15984 if (STACK_TOP_P (operands[0]))
15985 p = "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
15986 else
15987 p = "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
15988 #endif
15989 break;
15992 if (STACK_TOP_P (operands[0]))
15994 if (STACK_TOP_P (operands[1]))
15995 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
15996 else
15997 p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
15998 break;
16000 else if (STACK_TOP_P (operands[1]))
16002 #if SYSV386_COMPAT
16003 p = "{\t%1, %0|r\t%0, %1}";
16004 #else
16005 p = "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
16006 #endif
16008 else
16010 #if SYSV386_COMPAT
16011 p = "{r\t%2, %0|\t%0, %2}";
16012 #else
16013 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
16014 #endif
16016 break;
16018 default:
16019 gcc_unreachable ();
16022 strcat (buf, p);
16023 return buf;
16026 /* Check if a 256bit AVX register is referenced inside of EXP. */
16028 static int
16029 ix86_check_avx256_register (rtx *pexp, void *data ATTRIBUTE_UNUSED)
16031 rtx exp = *pexp;
16033 if (GET_CODE (exp) == SUBREG)
16034 exp = SUBREG_REG (exp);
16036 if (REG_P (exp)
16037 && VALID_AVX256_REG_OR_OI_MODE (GET_MODE (exp)))
16038 return 1;
16040 return 0;
16043 /* Return needed mode for entity in optimize_mode_switching pass. */
16045 static int
16046 ix86_avx_u128_mode_needed (rtx insn)
16048 if (CALL_P (insn))
16050 rtx link;
16052 /* Needed mode is set to AVX_U128_CLEAN if there are
16053 no 256bit modes used in function arguments. */
16054 for (link = CALL_INSN_FUNCTION_USAGE (insn);
16055 link;
16056 link = XEXP (link, 1))
16058 if (GET_CODE (XEXP (link, 0)) == USE)
16060 rtx arg = XEXP (XEXP (link, 0), 0);
16062 if (ix86_check_avx256_register (&arg, NULL))
16063 return AVX_U128_DIRTY;
16067 return AVX_U128_CLEAN;
16070 /* Require DIRTY mode if a 256bit AVX register is referenced. Hardware
16071 changes state only when a 256bit register is written to, but we need
16072 to prevent the compiler from moving optimal insertion point above
16073 eventual read from 256bit register. */
16074 if (for_each_rtx (&PATTERN (insn), ix86_check_avx256_register, NULL))
16075 return AVX_U128_DIRTY;
16077 return AVX_U128_ANY;
16080 /* Return mode that i387 must be switched into
16081 prior to the execution of insn. */
16083 static int
16084 ix86_i387_mode_needed (int entity, rtx insn)
16086 enum attr_i387_cw mode;
16088 /* The mode UNINITIALIZED is used to store control word after a
16089 function call or ASM pattern. The mode ANY specify that function
16090 has no requirements on the control word and make no changes in the
16091 bits we are interested in. */
16093 if (CALL_P (insn)
16094 || (NONJUMP_INSN_P (insn)
16095 && (asm_noperands (PATTERN (insn)) >= 0
16096 || GET_CODE (PATTERN (insn)) == ASM_INPUT)))
16097 return I387_CW_UNINITIALIZED;
16099 if (recog_memoized (insn) < 0)
16100 return I387_CW_ANY;
16102 mode = get_attr_i387_cw (insn);
16104 switch (entity)
16106 case I387_TRUNC:
16107 if (mode == I387_CW_TRUNC)
16108 return mode;
16109 break;
16111 case I387_FLOOR:
16112 if (mode == I387_CW_FLOOR)
16113 return mode;
16114 break;
16116 case I387_CEIL:
16117 if (mode == I387_CW_CEIL)
16118 return mode;
16119 break;
16121 case I387_MASK_PM:
16122 if (mode == I387_CW_MASK_PM)
16123 return mode;
16124 break;
16126 default:
16127 gcc_unreachable ();
16130 return I387_CW_ANY;
16133 /* Return mode that entity must be switched into
16134 prior to the execution of insn. */
16137 ix86_mode_needed (int entity, rtx insn)
16139 switch (entity)
16141 case AVX_U128:
16142 return ix86_avx_u128_mode_needed (insn);
16143 case I387_TRUNC:
16144 case I387_FLOOR:
16145 case I387_CEIL:
16146 case I387_MASK_PM:
16147 return ix86_i387_mode_needed (entity, insn);
16148 default:
16149 gcc_unreachable ();
16151 return 0;
16154 /* Check if a 256bit AVX register is referenced in stores. */
16156 static void
16157 ix86_check_avx256_stores (rtx dest, const_rtx set ATTRIBUTE_UNUSED, void *data)
16159 if (ix86_check_avx256_register (&dest, NULL))
16161 bool *used = (bool *) data;
16162 *used = true;
16166 /* Calculate mode of upper 128bit AVX registers after the insn. */
16168 static int
16169 ix86_avx_u128_mode_after (int mode, rtx insn)
16171 rtx pat = PATTERN (insn);
16173 if (vzeroupper_operation (pat, VOIDmode)
16174 || vzeroall_operation (pat, VOIDmode))
16175 return AVX_U128_CLEAN;
16177 /* We know that state is clean after CALL insn if there are no
16178 256bit registers used in the function return register. */
16179 if (CALL_P (insn))
16181 bool avx_reg256_found = false;
16182 note_stores (pat, ix86_check_avx256_stores, &avx_reg256_found);
16184 return avx_reg256_found ? AVX_U128_DIRTY : AVX_U128_CLEAN;
16187 /* Otherwise, return current mode. Remember that if insn
16188 references AVX 256bit registers, the mode was already changed
16189 to DIRTY from MODE_NEEDED. */
16190 return mode;
16193 /* Return the mode that an insn results in. */
16196 ix86_mode_after (int entity, int mode, rtx insn)
16198 switch (entity)
16200 case AVX_U128:
16201 return ix86_avx_u128_mode_after (mode, insn);
16202 case I387_TRUNC:
16203 case I387_FLOOR:
16204 case I387_CEIL:
16205 case I387_MASK_PM:
16206 return mode;
16207 default:
16208 gcc_unreachable ();
16212 static int
16213 ix86_avx_u128_mode_entry (void)
16215 tree arg;
16217 /* Entry mode is set to AVX_U128_DIRTY if there are
16218 256bit modes used in function arguments. */
16219 for (arg = DECL_ARGUMENTS (current_function_decl); arg;
16220 arg = TREE_CHAIN (arg))
16222 rtx incoming = DECL_INCOMING_RTL (arg);
16224 if (incoming && ix86_check_avx256_register (&incoming, NULL))
16225 return AVX_U128_DIRTY;
16228 return AVX_U128_CLEAN;
16231 /* Return a mode that ENTITY is assumed to be
16232 switched to at function entry. */
16235 ix86_mode_entry (int entity)
16237 switch (entity)
16239 case AVX_U128:
16240 return ix86_avx_u128_mode_entry ();
16241 case I387_TRUNC:
16242 case I387_FLOOR:
16243 case I387_CEIL:
16244 case I387_MASK_PM:
16245 return I387_CW_ANY;
16246 default:
16247 gcc_unreachable ();
16251 static int
16252 ix86_avx_u128_mode_exit (void)
16254 rtx reg = crtl->return_rtx;
16256 /* Exit mode is set to AVX_U128_DIRTY if there are
16257 256bit modes used in the function return register. */
16258 if (reg && ix86_check_avx256_register (&reg, NULL))
16259 return AVX_U128_DIRTY;
16261 return AVX_U128_CLEAN;
16264 /* Return a mode that ENTITY is assumed to be
16265 switched to at function exit. */
16268 ix86_mode_exit (int entity)
16270 switch (entity)
16272 case AVX_U128:
16273 return ix86_avx_u128_mode_exit ();
16274 case I387_TRUNC:
16275 case I387_FLOOR:
16276 case I387_CEIL:
16277 case I387_MASK_PM:
16278 return I387_CW_ANY;
16279 default:
16280 gcc_unreachable ();
16284 /* Output code to initialize control word copies used by trunc?f?i and
16285 rounding patterns. CURRENT_MODE is set to current control word,
16286 while NEW_MODE is set to new control word. */
16288 static void
16289 emit_i387_cw_initialization (int mode)
16291 rtx stored_mode = assign_386_stack_local (HImode, SLOT_CW_STORED);
16292 rtx new_mode;
16294 enum ix86_stack_slot slot;
16296 rtx reg = gen_reg_rtx (HImode);
16298 emit_insn (gen_x86_fnstcw_1 (stored_mode));
16299 emit_move_insn (reg, copy_rtx (stored_mode));
16301 if (TARGET_64BIT || TARGET_PARTIAL_REG_STALL
16302 || optimize_insn_for_size_p ())
16304 switch (mode)
16306 case I387_CW_TRUNC:
16307 /* round toward zero (truncate) */
16308 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0c00)));
16309 slot = SLOT_CW_TRUNC;
16310 break;
16312 case I387_CW_FLOOR:
16313 /* round down toward -oo */
16314 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
16315 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0400)));
16316 slot = SLOT_CW_FLOOR;
16317 break;
16319 case I387_CW_CEIL:
16320 /* round up toward +oo */
16321 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
16322 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0800)));
16323 slot = SLOT_CW_CEIL;
16324 break;
16326 case I387_CW_MASK_PM:
16327 /* mask precision exception for nearbyint() */
16328 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
16329 slot = SLOT_CW_MASK_PM;
16330 break;
16332 default:
16333 gcc_unreachable ();
16336 else
16338 switch (mode)
16340 case I387_CW_TRUNC:
16341 /* round toward zero (truncate) */
16342 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0xc)));
16343 slot = SLOT_CW_TRUNC;
16344 break;
16346 case I387_CW_FLOOR:
16347 /* round down toward -oo */
16348 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x4)));
16349 slot = SLOT_CW_FLOOR;
16350 break;
16352 case I387_CW_CEIL:
16353 /* round up toward +oo */
16354 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x8)));
16355 slot = SLOT_CW_CEIL;
16356 break;
16358 case I387_CW_MASK_PM:
16359 /* mask precision exception for nearbyint() */
16360 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
16361 slot = SLOT_CW_MASK_PM;
16362 break;
16364 default:
16365 gcc_unreachable ();
16369 gcc_assert (slot < MAX_386_STACK_LOCALS);
16371 new_mode = assign_386_stack_local (HImode, slot);
16372 emit_move_insn (new_mode, reg);
16375 /* Emit vzeroupper. */
16377 void
16378 ix86_avx_emit_vzeroupper (HARD_REG_SET regs_live)
16380 int i;
16382 /* Cancel automatic vzeroupper insertion if there are
16383 live call-saved SSE registers at the insertion point. */
16385 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
16386 if (TEST_HARD_REG_BIT (regs_live, i) && !call_used_regs[i])
16387 return;
16389 if (TARGET_64BIT)
16390 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
16391 if (TEST_HARD_REG_BIT (regs_live, i) && !call_used_regs[i])
16392 return;
16394 emit_insn (gen_avx_vzeroupper ());
16397 /* Generate one or more insns to set ENTITY to MODE. */
16399 void
16400 ix86_emit_mode_set (int entity, int mode, HARD_REG_SET regs_live)
16402 switch (entity)
16404 case AVX_U128:
16405 if (mode == AVX_U128_CLEAN)
16406 ix86_avx_emit_vzeroupper (regs_live);
16407 break;
16408 case I387_TRUNC:
16409 case I387_FLOOR:
16410 case I387_CEIL:
16411 case I387_MASK_PM:
16412 if (mode != I387_CW_ANY
16413 && mode != I387_CW_UNINITIALIZED)
16414 emit_i387_cw_initialization (mode);
16415 break;
16416 default:
16417 gcc_unreachable ();
16421 /* Output code for INSN to convert a float to a signed int. OPERANDS
16422 are the insn operands. The output may be [HSD]Imode and the input
16423 operand may be [SDX]Fmode. */
16425 const char *
16426 output_fix_trunc (rtx insn, rtx *operands, bool fisttp)
16428 int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
16429 int dimode_p = GET_MODE (operands[0]) == DImode;
16430 int round_mode = get_attr_i387_cw (insn);
16432 /* Jump through a hoop or two for DImode, since the hardware has no
16433 non-popping instruction. We used to do this a different way, but
16434 that was somewhat fragile and broke with post-reload splitters. */
16435 if ((dimode_p || fisttp) && !stack_top_dies)
16436 output_asm_insn ("fld\t%y1", operands);
16438 gcc_assert (STACK_TOP_P (operands[1]));
16439 gcc_assert (MEM_P (operands[0]));
16440 gcc_assert (GET_MODE (operands[1]) != TFmode);
16442 if (fisttp)
16443 output_asm_insn ("fisttp%Z0\t%0", operands);
16444 else
16446 if (round_mode != I387_CW_ANY)
16447 output_asm_insn ("fldcw\t%3", operands);
16448 if (stack_top_dies || dimode_p)
16449 output_asm_insn ("fistp%Z0\t%0", operands);
16450 else
16451 output_asm_insn ("fist%Z0\t%0", operands);
16452 if (round_mode != I387_CW_ANY)
16453 output_asm_insn ("fldcw\t%2", operands);
16456 return "";
16459 /* Output code for x87 ffreep insn. The OPNO argument, which may only
16460 have the values zero or one, indicates the ffreep insn's operand
16461 from the OPERANDS array. */
16463 static const char *
16464 output_387_ffreep (rtx *operands ATTRIBUTE_UNUSED, int opno)
16466 if (TARGET_USE_FFREEP)
16467 #ifdef HAVE_AS_IX86_FFREEP
16468 return opno ? "ffreep\t%y1" : "ffreep\t%y0";
16469 #else
16471 static char retval[32];
16472 int regno = REGNO (operands[opno]);
16474 gcc_assert (STACK_REGNO_P (regno));
16476 regno -= FIRST_STACK_REG;
16478 snprintf (retval, sizeof (retval), ASM_SHORT "0xc%ddf", regno);
16479 return retval;
16481 #endif
16483 return opno ? "fstp\t%y1" : "fstp\t%y0";
16487 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
16488 should be used. UNORDERED_P is true when fucom should be used. */
16490 const char *
16491 output_fp_compare (rtx insn, rtx *operands, bool eflags_p, bool unordered_p)
16493 int stack_top_dies;
16494 rtx cmp_op0, cmp_op1;
16495 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]);
16497 if (eflags_p)
16499 cmp_op0 = operands[0];
16500 cmp_op1 = operands[1];
16502 else
16504 cmp_op0 = operands[1];
16505 cmp_op1 = operands[2];
16508 if (is_sse)
16510 if (GET_MODE (operands[0]) == SFmode)
16511 if (unordered_p)
16512 return "%vucomiss\t{%1, %0|%0, %1}";
16513 else
16514 return "%vcomiss\t{%1, %0|%0, %1}";
16515 else
16516 if (unordered_p)
16517 return "%vucomisd\t{%1, %0|%0, %1}";
16518 else
16519 return "%vcomisd\t{%1, %0|%0, %1}";
16522 gcc_assert (STACK_TOP_P (cmp_op0));
16524 stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
16526 if (cmp_op1 == CONST0_RTX (GET_MODE (cmp_op1)))
16528 if (stack_top_dies)
16530 output_asm_insn ("ftst\n\tfnstsw\t%0", operands);
16531 return output_387_ffreep (operands, 1);
16533 else
16534 return "ftst\n\tfnstsw\t%0";
16537 if (STACK_REG_P (cmp_op1)
16538 && stack_top_dies
16539 && find_regno_note (insn, REG_DEAD, REGNO (cmp_op1))
16540 && REGNO (cmp_op1) != FIRST_STACK_REG)
16542 /* If both the top of the 387 stack dies, and the other operand
16543 is also a stack register that dies, then this must be a
16544 `fcompp' float compare */
16546 if (eflags_p)
16548 /* There is no double popping fcomi variant. Fortunately,
16549 eflags is immune from the fstp's cc clobbering. */
16550 if (unordered_p)
16551 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands);
16552 else
16553 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands);
16554 return output_387_ffreep (operands, 0);
16556 else
16558 if (unordered_p)
16559 return "fucompp\n\tfnstsw\t%0";
16560 else
16561 return "fcompp\n\tfnstsw\t%0";
16564 else
16566 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
16568 static const char * const alt[16] =
16570 "fcom%Z2\t%y2\n\tfnstsw\t%0",
16571 "fcomp%Z2\t%y2\n\tfnstsw\t%0",
16572 "fucom%Z2\t%y2\n\tfnstsw\t%0",
16573 "fucomp%Z2\t%y2\n\tfnstsw\t%0",
16575 "ficom%Z2\t%y2\n\tfnstsw\t%0",
16576 "ficomp%Z2\t%y2\n\tfnstsw\t%0",
16577 NULL,
16578 NULL,
16580 "fcomi\t{%y1, %0|%0, %y1}",
16581 "fcomip\t{%y1, %0|%0, %y1}",
16582 "fucomi\t{%y1, %0|%0, %y1}",
16583 "fucomip\t{%y1, %0|%0, %y1}",
16585 NULL,
16586 NULL,
16587 NULL,
16588 NULL
16591 int mask;
16592 const char *ret;
16594 mask = eflags_p << 3;
16595 mask |= (GET_MODE_CLASS (GET_MODE (cmp_op1)) == MODE_INT) << 2;
16596 mask |= unordered_p << 1;
16597 mask |= stack_top_dies;
16599 gcc_assert (mask < 16);
16600 ret = alt[mask];
16601 gcc_assert (ret);
16603 return ret;
16607 void
16608 ix86_output_addr_vec_elt (FILE *file, int value)
16610 const char *directive = ASM_LONG;
16612 #ifdef ASM_QUAD
16613 if (TARGET_LP64)
16614 directive = ASM_QUAD;
16615 #else
16616 gcc_assert (!TARGET_64BIT);
16617 #endif
16619 fprintf (file, "%s%s%d\n", directive, LPREFIX, value);
16622 void
16623 ix86_output_addr_diff_elt (FILE *file, int value, int rel)
16625 const char *directive = ASM_LONG;
16627 #ifdef ASM_QUAD
16628 if (TARGET_64BIT && CASE_VECTOR_MODE == DImode)
16629 directive = ASM_QUAD;
16630 #else
16631 gcc_assert (!TARGET_64BIT);
16632 #endif
16633 /* We can't use @GOTOFF for text labels on VxWorks; see gotoff_operand. */
16634 if (TARGET_64BIT || TARGET_VXWORKS_RTP)
16635 fprintf (file, "%s%s%d-%s%d\n",
16636 directive, LPREFIX, value, LPREFIX, rel);
16637 else if (HAVE_AS_GOTOFF_IN_DATA)
16638 fprintf (file, ASM_LONG "%s%d@GOTOFF\n", LPREFIX, value);
16639 #if TARGET_MACHO
16640 else if (TARGET_MACHO)
16642 fprintf (file, ASM_LONG "%s%d-", LPREFIX, value);
16643 machopic_output_function_base_name (file);
16644 putc ('\n', file);
16646 #endif
16647 else
16648 asm_fprintf (file, ASM_LONG "%U%s+[.-%s%d]\n",
16649 GOT_SYMBOL_NAME, LPREFIX, value);
16652 /* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
16653 for the target. */
16655 void
16656 ix86_expand_clear (rtx dest)
16658 rtx tmp;
16660 /* We play register width games, which are only valid after reload. */
16661 gcc_assert (reload_completed);
16663 /* Avoid HImode and its attendant prefix byte. */
16664 if (GET_MODE_SIZE (GET_MODE (dest)) < 4)
16665 dest = gen_rtx_REG (SImode, REGNO (dest));
16666 tmp = gen_rtx_SET (VOIDmode, dest, const0_rtx);
16668 /* This predicate should match that for movsi_xor and movdi_xor_rex64. */
16669 if (!TARGET_USE_MOV0 || optimize_insn_for_speed_p ())
16671 rtx clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
16672 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
16675 emit_insn (tmp);
16678 /* X is an unchanging MEM. If it is a constant pool reference, return
16679 the constant pool rtx, else NULL. */
16682 maybe_get_pool_constant (rtx x)
16684 x = ix86_delegitimize_address (XEXP (x, 0));
16686 if (GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
16687 return get_pool_constant (x);
16689 return NULL_RTX;
16692 void
16693 ix86_expand_move (enum machine_mode mode, rtx operands[])
16695 rtx op0, op1;
16696 enum tls_model model;
16698 op0 = operands[0];
16699 op1 = operands[1];
16701 if (GET_CODE (op1) == SYMBOL_REF)
16703 rtx tmp;
16705 model = SYMBOL_REF_TLS_MODEL (op1);
16706 if (model)
16708 op1 = legitimize_tls_address (op1, model, true);
16709 op1 = force_operand (op1, op0);
16710 if (op1 == op0)
16711 return;
16712 op1 = convert_to_mode (mode, op1, 1);
16714 else if ((tmp = legitimize_pe_coff_symbol (op1, false)) != NULL_RTX)
16715 op1 = tmp;
16717 else if (GET_CODE (op1) == CONST
16718 && GET_CODE (XEXP (op1, 0)) == PLUS
16719 && GET_CODE (XEXP (XEXP (op1, 0), 0)) == SYMBOL_REF)
16721 rtx addend = XEXP (XEXP (op1, 0), 1);
16722 rtx symbol = XEXP (XEXP (op1, 0), 0);
16723 rtx tmp;
16725 model = SYMBOL_REF_TLS_MODEL (symbol);
16726 if (model)
16727 tmp = legitimize_tls_address (symbol, model, true);
16728 else
16729 tmp = legitimize_pe_coff_symbol (symbol, true);
16731 if (tmp)
16733 tmp = force_operand (tmp, NULL);
16734 tmp = expand_simple_binop (Pmode, PLUS, tmp, addend,
16735 op0, 1, OPTAB_DIRECT);
16736 if (tmp == op0)
16737 return;
16738 op1 = convert_to_mode (mode, tmp, 1);
16742 if ((flag_pic || MACHOPIC_INDIRECT)
16743 && symbolic_operand (op1, mode))
16745 if (TARGET_MACHO && !TARGET_64BIT)
16747 #if TARGET_MACHO
16748 /* dynamic-no-pic */
16749 if (MACHOPIC_INDIRECT)
16751 rtx temp = ((reload_in_progress
16752 || ((op0 && REG_P (op0))
16753 && mode == Pmode))
16754 ? op0 : gen_reg_rtx (Pmode));
16755 op1 = machopic_indirect_data_reference (op1, temp);
16756 if (MACHOPIC_PURE)
16757 op1 = machopic_legitimize_pic_address (op1, mode,
16758 temp == op1 ? 0 : temp);
16760 if (op0 != op1 && GET_CODE (op0) != MEM)
16762 rtx insn = gen_rtx_SET (VOIDmode, op0, op1);
16763 emit_insn (insn);
16764 return;
16766 if (GET_CODE (op0) == MEM)
16767 op1 = force_reg (Pmode, op1);
16768 else
16770 rtx temp = op0;
16771 if (GET_CODE (temp) != REG)
16772 temp = gen_reg_rtx (Pmode);
16773 temp = legitimize_pic_address (op1, temp);
16774 if (temp == op0)
16775 return;
16776 op1 = temp;
16778 /* dynamic-no-pic */
16779 #endif
16781 else
16783 if (MEM_P (op0))
16784 op1 = force_reg (mode, op1);
16785 else if (!(TARGET_64BIT && x86_64_movabs_operand (op1, DImode)))
16787 rtx reg = can_create_pseudo_p () ? NULL_RTX : op0;
16788 op1 = legitimize_pic_address (op1, reg);
16789 if (op0 == op1)
16790 return;
16791 op1 = convert_to_mode (mode, op1, 1);
16795 else
16797 if (MEM_P (op0)
16798 && (PUSH_ROUNDING (GET_MODE_SIZE (mode)) != GET_MODE_SIZE (mode)
16799 || !push_operand (op0, mode))
16800 && MEM_P (op1))
16801 op1 = force_reg (mode, op1);
16803 if (push_operand (op0, mode)
16804 && ! general_no_elim_operand (op1, mode))
16805 op1 = copy_to_mode_reg (mode, op1);
16807 /* Force large constants in 64bit compilation into register
16808 to get them CSEed. */
16809 if (can_create_pseudo_p ()
16810 && (mode == DImode) && TARGET_64BIT
16811 && immediate_operand (op1, mode)
16812 && !x86_64_zext_immediate_operand (op1, VOIDmode)
16813 && !register_operand (op0, mode)
16814 && optimize)
16815 op1 = copy_to_mode_reg (mode, op1);
16817 if (can_create_pseudo_p ()
16818 && FLOAT_MODE_P (mode)
16819 && GET_CODE (op1) == CONST_DOUBLE)
16821 /* If we are loading a floating point constant to a register,
16822 force the value to memory now, since we'll get better code
16823 out the back end. */
16825 op1 = validize_mem (force_const_mem (mode, op1));
16826 if (!register_operand (op0, mode))
16828 rtx temp = gen_reg_rtx (mode);
16829 emit_insn (gen_rtx_SET (VOIDmode, temp, op1));
16830 emit_move_insn (op0, temp);
16831 return;
16836 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
16839 void
16840 ix86_expand_vector_move (enum machine_mode mode, rtx operands[])
16842 rtx op0 = operands[0], op1 = operands[1];
16843 unsigned int align = GET_MODE_ALIGNMENT (mode);
16845 if (push_operand (op0, VOIDmode))
16846 op0 = emit_move_resolve_push (mode, op0);
16848 /* Force constants other than zero into memory. We do not know how
16849 the instructions used to build constants modify the upper 64 bits
16850 of the register, once we have that information we may be able
16851 to handle some of them more efficiently. */
16852 if (can_create_pseudo_p ()
16853 && register_operand (op0, mode)
16854 && (CONSTANT_P (op1)
16855 || (GET_CODE (op1) == SUBREG
16856 && CONSTANT_P (SUBREG_REG (op1))))
16857 && !standard_sse_constant_p (op1))
16858 op1 = validize_mem (force_const_mem (mode, op1));
16860 /* We need to check memory alignment for SSE mode since attribute
16861 can make operands unaligned. */
16862 if (can_create_pseudo_p ()
16863 && SSE_REG_MODE_P (mode)
16864 && ((MEM_P (op0) && (MEM_ALIGN (op0) < align))
16865 || (MEM_P (op1) && (MEM_ALIGN (op1) < align))))
16867 rtx tmp[2];
16869 /* ix86_expand_vector_move_misalign() does not like constants ... */
16870 if (CONSTANT_P (op1)
16871 || (GET_CODE (op1) == SUBREG
16872 && CONSTANT_P (SUBREG_REG (op1))))
16873 op1 = validize_mem (force_const_mem (mode, op1));
16875 /* ... nor both arguments in memory. */
16876 if (!register_operand (op0, mode)
16877 && !register_operand (op1, mode))
16878 op1 = force_reg (mode, op1);
16880 tmp[0] = op0; tmp[1] = op1;
16881 ix86_expand_vector_move_misalign (mode, tmp);
16882 return;
16885 /* Make operand1 a register if it isn't already. */
16886 if (can_create_pseudo_p ()
16887 && !register_operand (op0, mode)
16888 && !register_operand (op1, mode))
16890 emit_move_insn (op0, force_reg (GET_MODE (op0), op1));
16891 return;
16894 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
16897 /* Split 32-byte AVX unaligned load and store if needed. */
16899 static void
16900 ix86_avx256_split_vector_move_misalign (rtx op0, rtx op1)
16902 rtx m;
16903 rtx (*extract) (rtx, rtx, rtx);
16904 rtx (*load_unaligned) (rtx, rtx);
16905 rtx (*store_unaligned) (rtx, rtx);
16906 enum machine_mode mode;
16908 switch (GET_MODE (op0))
16910 default:
16911 gcc_unreachable ();
16912 case V32QImode:
16913 extract = gen_avx_vextractf128v32qi;
16914 load_unaligned = gen_avx_loaddquv32qi;
16915 store_unaligned = gen_avx_storedquv32qi;
16916 mode = V16QImode;
16917 break;
16918 case V8SFmode:
16919 extract = gen_avx_vextractf128v8sf;
16920 load_unaligned = gen_avx_loadups256;
16921 store_unaligned = gen_avx_storeups256;
16922 mode = V4SFmode;
16923 break;
16924 case V4DFmode:
16925 extract = gen_avx_vextractf128v4df;
16926 load_unaligned = gen_avx_loadupd256;
16927 store_unaligned = gen_avx_storeupd256;
16928 mode = V2DFmode;
16929 break;
16932 if (MEM_P (op1))
16934 if (TARGET_AVX256_SPLIT_UNALIGNED_LOAD)
16936 rtx r = gen_reg_rtx (mode);
16937 m = adjust_address (op1, mode, 0);
16938 emit_move_insn (r, m);
16939 m = adjust_address (op1, mode, 16);
16940 r = gen_rtx_VEC_CONCAT (GET_MODE (op0), r, m);
16941 emit_move_insn (op0, r);
16943 /* Normal *mov<mode>_internal pattern will handle
16944 unaligned loads just fine if misaligned_operand
16945 is true, and without the UNSPEC it can be combined
16946 with arithmetic instructions. */
16947 else if (misaligned_operand (op1, GET_MODE (op1)))
16948 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
16949 else
16950 emit_insn (load_unaligned (op0, op1));
16952 else if (MEM_P (op0))
16954 if (TARGET_AVX256_SPLIT_UNALIGNED_STORE)
16956 m = adjust_address (op0, mode, 0);
16957 emit_insn (extract (m, op1, const0_rtx));
16958 m = adjust_address (op0, mode, 16);
16959 emit_insn (extract (m, op1, const1_rtx));
16961 else
16962 emit_insn (store_unaligned (op0, op1));
16964 else
16965 gcc_unreachable ();
16968 /* Implement the movmisalign patterns for SSE. Non-SSE modes go
16969 straight to ix86_expand_vector_move. */
16970 /* Code generation for scalar reg-reg moves of single and double precision data:
16971 if (x86_sse_partial_reg_dependency == true | x86_sse_split_regs == true)
16972 movaps reg, reg
16973 else
16974 movss reg, reg
16975 if (x86_sse_partial_reg_dependency == true)
16976 movapd reg, reg
16977 else
16978 movsd reg, reg
16980 Code generation for scalar loads of double precision data:
16981 if (x86_sse_split_regs == true)
16982 movlpd mem, reg (gas syntax)
16983 else
16984 movsd mem, reg
16986 Code generation for unaligned packed loads of single precision data
16987 (x86_sse_unaligned_move_optimal overrides x86_sse_partial_reg_dependency):
16988 if (x86_sse_unaligned_move_optimal)
16989 movups mem, reg
16991 if (x86_sse_partial_reg_dependency == true)
16993 xorps reg, reg
16994 movlps mem, reg
16995 movhps mem+8, reg
16997 else
16999 movlps mem, reg
17000 movhps mem+8, reg
17003 Code generation for unaligned packed loads of double precision data
17004 (x86_sse_unaligned_move_optimal overrides x86_sse_split_regs):
17005 if (x86_sse_unaligned_move_optimal)
17006 movupd mem, reg
17008 if (x86_sse_split_regs == true)
17010 movlpd mem, reg
17011 movhpd mem+8, reg
17013 else
17015 movsd mem, reg
17016 movhpd mem+8, reg
17020 void
17021 ix86_expand_vector_move_misalign (enum machine_mode mode, rtx operands[])
17023 rtx op0, op1, orig_op0 = NULL_RTX, m;
17024 rtx (*load_unaligned) (rtx, rtx);
17025 rtx (*store_unaligned) (rtx, rtx);
17027 op0 = operands[0];
17028 op1 = operands[1];
17030 if (GET_MODE_SIZE (mode) == 64)
17032 switch (GET_MODE_CLASS (mode))
17034 case MODE_VECTOR_INT:
17035 case MODE_INT:
17036 if (GET_MODE (op0) != V16SImode)
17038 if (!MEM_P (op0))
17040 orig_op0 = op0;
17041 op0 = gen_reg_rtx (V16SImode);
17043 else
17044 op0 = gen_lowpart (V16SImode, op0);
17046 op1 = gen_lowpart (V16SImode, op1);
17047 /* FALLTHRU */
17049 case MODE_VECTOR_FLOAT:
17050 switch (GET_MODE (op0))
17052 default:
17053 gcc_unreachable ();
17054 case V16SImode:
17055 load_unaligned = gen_avx512f_loaddquv16si;
17056 store_unaligned = gen_avx512f_storedquv16si;
17057 break;
17058 case V16SFmode:
17059 load_unaligned = gen_avx512f_loadups512;
17060 store_unaligned = gen_avx512f_storeups512;
17061 break;
17062 case V8DFmode:
17063 load_unaligned = gen_avx512f_loadupd512;
17064 store_unaligned = gen_avx512f_storeupd512;
17065 break;
17068 if (MEM_P (op1))
17069 emit_insn (load_unaligned (op0, op1));
17070 else if (MEM_P (op0))
17071 emit_insn (store_unaligned (op0, op1));
17072 else
17073 gcc_unreachable ();
17074 if (orig_op0)
17075 emit_move_insn (orig_op0, gen_lowpart (GET_MODE (orig_op0), op0));
17076 break;
17078 default:
17079 gcc_unreachable ();
17082 return;
17085 if (TARGET_AVX
17086 && GET_MODE_SIZE (mode) == 32)
17088 switch (GET_MODE_CLASS (mode))
17090 case MODE_VECTOR_INT:
17091 case MODE_INT:
17092 if (GET_MODE (op0) != V32QImode)
17094 if (!MEM_P (op0))
17096 orig_op0 = op0;
17097 op0 = gen_reg_rtx (V32QImode);
17099 else
17100 op0 = gen_lowpart (V32QImode, op0);
17102 op1 = gen_lowpart (V32QImode, op1);
17103 /* FALLTHRU */
17105 case MODE_VECTOR_FLOAT:
17106 ix86_avx256_split_vector_move_misalign (op0, op1);
17107 if (orig_op0)
17108 emit_move_insn (orig_op0, gen_lowpart (GET_MODE (orig_op0), op0));
17109 break;
17111 default:
17112 gcc_unreachable ();
17115 return;
17118 if (MEM_P (op1))
17120 /* Normal *mov<mode>_internal pattern will handle
17121 unaligned loads just fine if misaligned_operand
17122 is true, and without the UNSPEC it can be combined
17123 with arithmetic instructions. */
17124 if (TARGET_AVX
17125 && (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
17126 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
17127 && misaligned_operand (op1, GET_MODE (op1)))
17128 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
17129 /* ??? If we have typed data, then it would appear that using
17130 movdqu is the only way to get unaligned data loaded with
17131 integer type. */
17132 else if (TARGET_SSE2 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
17134 if (GET_MODE (op0) != V16QImode)
17136 orig_op0 = op0;
17137 op0 = gen_reg_rtx (V16QImode);
17139 op1 = gen_lowpart (V16QImode, op1);
17140 /* We will eventually emit movups based on insn attributes. */
17141 emit_insn (gen_sse2_loaddquv16qi (op0, op1));
17142 if (orig_op0)
17143 emit_move_insn (orig_op0, gen_lowpart (GET_MODE (orig_op0), op0));
17145 else if (TARGET_SSE2 && mode == V2DFmode)
17147 rtx zero;
17149 if (TARGET_AVX
17150 || TARGET_SSE_UNALIGNED_LOAD_OPTIMAL
17151 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
17152 || optimize_insn_for_size_p ())
17154 /* We will eventually emit movups based on insn attributes. */
17155 emit_insn (gen_sse2_loadupd (op0, op1));
17156 return;
17159 /* When SSE registers are split into halves, we can avoid
17160 writing to the top half twice. */
17161 if (TARGET_SSE_SPLIT_REGS)
17163 emit_clobber (op0);
17164 zero = op0;
17166 else
17168 /* ??? Not sure about the best option for the Intel chips.
17169 The following would seem to satisfy; the register is
17170 entirely cleared, breaking the dependency chain. We
17171 then store to the upper half, with a dependency depth
17172 of one. A rumor has it that Intel recommends two movsd
17173 followed by an unpacklpd, but this is unconfirmed. And
17174 given that the dependency depth of the unpacklpd would
17175 still be one, I'm not sure why this would be better. */
17176 zero = CONST0_RTX (V2DFmode);
17179 m = adjust_address (op1, DFmode, 0);
17180 emit_insn (gen_sse2_loadlpd (op0, zero, m));
17181 m = adjust_address (op1, DFmode, 8);
17182 emit_insn (gen_sse2_loadhpd (op0, op0, m));
17184 else
17186 rtx t;
17188 if (TARGET_AVX
17189 || TARGET_SSE_UNALIGNED_LOAD_OPTIMAL
17190 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
17191 || optimize_insn_for_size_p ())
17193 if (GET_MODE (op0) != V4SFmode)
17195 orig_op0 = op0;
17196 op0 = gen_reg_rtx (V4SFmode);
17198 op1 = gen_lowpart (V4SFmode, op1);
17199 emit_insn (gen_sse_loadups (op0, op1));
17200 if (orig_op0)
17201 emit_move_insn (orig_op0,
17202 gen_lowpart (GET_MODE (orig_op0), op0));
17203 return;
17206 if (mode != V4SFmode)
17207 t = gen_reg_rtx (V4SFmode);
17208 else
17209 t = op0;
17211 if (TARGET_SSE_PARTIAL_REG_DEPENDENCY)
17212 emit_move_insn (t, CONST0_RTX (V4SFmode));
17213 else
17214 emit_clobber (t);
17216 m = adjust_address (op1, V2SFmode, 0);
17217 emit_insn (gen_sse_loadlps (t, t, m));
17218 m = adjust_address (op1, V2SFmode, 8);
17219 emit_insn (gen_sse_loadhps (t, t, m));
17220 if (mode != V4SFmode)
17221 emit_move_insn (op0, gen_lowpart (mode, t));
17224 else if (MEM_P (op0))
17226 if (TARGET_SSE2 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
17228 op0 = gen_lowpart (V16QImode, op0);
17229 op1 = gen_lowpart (V16QImode, op1);
17230 /* We will eventually emit movups based on insn attributes. */
17231 emit_insn (gen_sse2_storedquv16qi (op0, op1));
17233 else if (TARGET_SSE2 && mode == V2DFmode)
17235 if (TARGET_AVX
17236 || TARGET_SSE_UNALIGNED_STORE_OPTIMAL
17237 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
17238 || optimize_insn_for_size_p ())
17239 /* We will eventually emit movups based on insn attributes. */
17240 emit_insn (gen_sse2_storeupd (op0, op1));
17241 else
17243 m = adjust_address (op0, DFmode, 0);
17244 emit_insn (gen_sse2_storelpd (m, op1));
17245 m = adjust_address (op0, DFmode, 8);
17246 emit_insn (gen_sse2_storehpd (m, op1));
17249 else
17251 if (mode != V4SFmode)
17252 op1 = gen_lowpart (V4SFmode, op1);
17254 if (TARGET_AVX
17255 || TARGET_SSE_UNALIGNED_STORE_OPTIMAL
17256 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
17257 || optimize_insn_for_size_p ())
17259 op0 = gen_lowpart (V4SFmode, op0);
17260 emit_insn (gen_sse_storeups (op0, op1));
17262 else
17264 m = adjust_address (op0, V2SFmode, 0);
17265 emit_insn (gen_sse_storelps (m, op1));
17266 m = adjust_address (op0, V2SFmode, 8);
17267 emit_insn (gen_sse_storehps (m, op1));
17271 else
17272 gcc_unreachable ();
17275 /* Helper function of ix86_fixup_binary_operands to canonicalize
17276 operand order. Returns true if the operands should be swapped. */
17278 static bool
17279 ix86_swap_binary_operands_p (enum rtx_code code, enum machine_mode mode,
17280 rtx operands[])
17282 rtx dst = operands[0];
17283 rtx src1 = operands[1];
17284 rtx src2 = operands[2];
17286 /* If the operation is not commutative, we can't do anything. */
17287 if (GET_RTX_CLASS (code) != RTX_COMM_ARITH)
17288 return false;
17290 /* Highest priority is that src1 should match dst. */
17291 if (rtx_equal_p (dst, src1))
17292 return false;
17293 if (rtx_equal_p (dst, src2))
17294 return true;
17296 /* Next highest priority is that immediate constants come second. */
17297 if (immediate_operand (src2, mode))
17298 return false;
17299 if (immediate_operand (src1, mode))
17300 return true;
17302 /* Lowest priority is that memory references should come second. */
17303 if (MEM_P (src2))
17304 return false;
17305 if (MEM_P (src1))
17306 return true;
17308 return false;
17312 /* Fix up OPERANDS to satisfy ix86_binary_operator_ok. Return the
17313 destination to use for the operation. If different from the true
17314 destination in operands[0], a copy operation will be required. */
17317 ix86_fixup_binary_operands (enum rtx_code code, enum machine_mode mode,
17318 rtx operands[])
17320 rtx dst = operands[0];
17321 rtx src1 = operands[1];
17322 rtx src2 = operands[2];
17324 /* Canonicalize operand order. */
17325 if (ix86_swap_binary_operands_p (code, mode, operands))
17327 rtx temp;
17329 /* It is invalid to swap operands of different modes. */
17330 gcc_assert (GET_MODE (src1) == GET_MODE (src2));
17332 temp = src1;
17333 src1 = src2;
17334 src2 = temp;
17337 /* Both source operands cannot be in memory. */
17338 if (MEM_P (src1) && MEM_P (src2))
17340 /* Optimization: Only read from memory once. */
17341 if (rtx_equal_p (src1, src2))
17343 src2 = force_reg (mode, src2);
17344 src1 = src2;
17346 else if (rtx_equal_p (dst, src1))
17347 src2 = force_reg (mode, src2);
17348 else
17349 src1 = force_reg (mode, src1);
17352 /* If the destination is memory, and we do not have matching source
17353 operands, do things in registers. */
17354 if (MEM_P (dst) && !rtx_equal_p (dst, src1))
17355 dst = gen_reg_rtx (mode);
17357 /* Source 1 cannot be a constant. */
17358 if (CONSTANT_P (src1))
17359 src1 = force_reg (mode, src1);
17361 /* Source 1 cannot be a non-matching memory. */
17362 if (MEM_P (src1) && !rtx_equal_p (dst, src1))
17363 src1 = force_reg (mode, src1);
17365 /* Improve address combine. */
17366 if (code == PLUS
17367 && GET_MODE_CLASS (mode) == MODE_INT
17368 && MEM_P (src2))
17369 src2 = force_reg (mode, src2);
17371 operands[1] = src1;
17372 operands[2] = src2;
17373 return dst;
17376 /* Similarly, but assume that the destination has already been
17377 set up properly. */
17379 void
17380 ix86_fixup_binary_operands_no_copy (enum rtx_code code,
17381 enum machine_mode mode, rtx operands[])
17383 rtx dst = ix86_fixup_binary_operands (code, mode, operands);
17384 gcc_assert (dst == operands[0]);
17387 /* Attempt to expand a binary operator. Make the expansion closer to the
17388 actual machine, then just general_operand, which will allow 3 separate
17389 memory references (one output, two input) in a single insn. */
17391 void
17392 ix86_expand_binary_operator (enum rtx_code code, enum machine_mode mode,
17393 rtx operands[])
17395 rtx src1, src2, dst, op, clob;
17397 dst = ix86_fixup_binary_operands (code, mode, operands);
17398 src1 = operands[1];
17399 src2 = operands[2];
17401 /* Emit the instruction. */
17403 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, src1, src2));
17404 if (reload_in_progress)
17406 /* Reload doesn't know about the flags register, and doesn't know that
17407 it doesn't want to clobber it. We can only do this with PLUS. */
17408 gcc_assert (code == PLUS);
17409 emit_insn (op);
17411 else if (reload_completed
17412 && code == PLUS
17413 && !rtx_equal_p (dst, src1))
17415 /* This is going to be an LEA; avoid splitting it later. */
17416 emit_insn (op);
17418 else
17420 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
17421 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
17424 /* Fix up the destination if needed. */
17425 if (dst != operands[0])
17426 emit_move_insn (operands[0], dst);
17429 /* Expand vector logical operation CODE (AND, IOR, XOR) in MODE with
17430 the given OPERANDS. */
17432 void
17433 ix86_expand_vector_logical_operator (enum rtx_code code, enum machine_mode mode,
17434 rtx operands[])
17436 rtx op1 = NULL_RTX, op2 = NULL_RTX;
17437 if (GET_CODE (operands[1]) == SUBREG)
17439 op1 = operands[1];
17440 op2 = operands[2];
17442 else if (GET_CODE (operands[2]) == SUBREG)
17444 op1 = operands[2];
17445 op2 = operands[1];
17447 /* Optimize (__m128i) d | (__m128i) e and similar code
17448 when d and e are float vectors into float vector logical
17449 insn. In C/C++ without using intrinsics there is no other way
17450 to express vector logical operation on float vectors than
17451 to cast them temporarily to integer vectors. */
17452 if (op1
17453 && !TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
17454 && ((GET_CODE (op2) == SUBREG || GET_CODE (op2) == CONST_VECTOR))
17455 && GET_MODE_CLASS (GET_MODE (SUBREG_REG (op1))) == MODE_VECTOR_FLOAT
17456 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (op1))) == GET_MODE_SIZE (mode)
17457 && SUBREG_BYTE (op1) == 0
17458 && (GET_CODE (op2) == CONST_VECTOR
17459 || (GET_MODE (SUBREG_REG (op1)) == GET_MODE (SUBREG_REG (op2))
17460 && SUBREG_BYTE (op2) == 0))
17461 && can_create_pseudo_p ())
17463 rtx dst;
17464 switch (GET_MODE (SUBREG_REG (op1)))
17466 case V4SFmode:
17467 case V8SFmode:
17468 case V2DFmode:
17469 case V4DFmode:
17470 dst = gen_reg_rtx (GET_MODE (SUBREG_REG (op1)));
17471 if (GET_CODE (op2) == CONST_VECTOR)
17473 op2 = gen_lowpart (GET_MODE (dst), op2);
17474 op2 = force_reg (GET_MODE (dst), op2);
17476 else
17478 op1 = operands[1];
17479 op2 = SUBREG_REG (operands[2]);
17480 if (!nonimmediate_operand (op2, GET_MODE (dst)))
17481 op2 = force_reg (GET_MODE (dst), op2);
17483 op1 = SUBREG_REG (op1);
17484 if (!nonimmediate_operand (op1, GET_MODE (dst)))
17485 op1 = force_reg (GET_MODE (dst), op1);
17486 emit_insn (gen_rtx_SET (VOIDmode, dst,
17487 gen_rtx_fmt_ee (code, GET_MODE (dst),
17488 op1, op2)));
17489 emit_move_insn (operands[0], gen_lowpart (mode, dst));
17490 return;
17491 default:
17492 break;
17495 if (!nonimmediate_operand (operands[1], mode))
17496 operands[1] = force_reg (mode, operands[1]);
17497 if (!nonimmediate_operand (operands[2], mode))
17498 operands[2] = force_reg (mode, operands[2]);
17499 ix86_fixup_binary_operands_no_copy (code, mode, operands);
17500 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
17501 gen_rtx_fmt_ee (code, mode, operands[1],
17502 operands[2])));
17505 /* Return TRUE or FALSE depending on whether the binary operator meets the
17506 appropriate constraints. */
17508 bool
17509 ix86_binary_operator_ok (enum rtx_code code, enum machine_mode mode,
17510 rtx operands[3])
17512 rtx dst = operands[0];
17513 rtx src1 = operands[1];
17514 rtx src2 = operands[2];
17516 /* Both source operands cannot be in memory. */
17517 if (MEM_P (src1) && MEM_P (src2))
17518 return false;
17520 /* Canonicalize operand order for commutative operators. */
17521 if (ix86_swap_binary_operands_p (code, mode, operands))
17523 rtx temp = src1;
17524 src1 = src2;
17525 src2 = temp;
17528 /* If the destination is memory, we must have a matching source operand. */
17529 if (MEM_P (dst) && !rtx_equal_p (dst, src1))
17530 return false;
17532 /* Source 1 cannot be a constant. */
17533 if (CONSTANT_P (src1))
17534 return false;
17536 /* Source 1 cannot be a non-matching memory. */
17537 if (MEM_P (src1) && !rtx_equal_p (dst, src1))
17538 /* Support "andhi/andsi/anddi" as a zero-extending move. */
17539 return (code == AND
17540 && (mode == HImode
17541 || mode == SImode
17542 || (TARGET_64BIT && mode == DImode))
17543 && satisfies_constraint_L (src2));
17545 return true;
17548 /* Attempt to expand a unary operator. Make the expansion closer to the
17549 actual machine, then just general_operand, which will allow 2 separate
17550 memory references (one output, one input) in a single insn. */
17552 void
17553 ix86_expand_unary_operator (enum rtx_code code, enum machine_mode mode,
17554 rtx operands[])
17556 int matching_memory;
17557 rtx src, dst, op, clob;
17559 dst = operands[0];
17560 src = operands[1];
17562 /* If the destination is memory, and we do not have matching source
17563 operands, do things in registers. */
17564 matching_memory = 0;
17565 if (MEM_P (dst))
17567 if (rtx_equal_p (dst, src))
17568 matching_memory = 1;
17569 else
17570 dst = gen_reg_rtx (mode);
17573 /* When source operand is memory, destination must match. */
17574 if (MEM_P (src) && !matching_memory)
17575 src = force_reg (mode, src);
17577 /* Emit the instruction. */
17579 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_e (code, mode, src));
17580 if (reload_in_progress || code == NOT)
17582 /* Reload doesn't know about the flags register, and doesn't know that
17583 it doesn't want to clobber it. */
17584 gcc_assert (code == NOT);
17585 emit_insn (op);
17587 else
17589 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
17590 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
17593 /* Fix up the destination if needed. */
17594 if (dst != operands[0])
17595 emit_move_insn (operands[0], dst);
17598 /* Split 32bit/64bit divmod with 8bit unsigned divmod if dividend and
17599 divisor are within the range [0-255]. */
17601 void
17602 ix86_split_idivmod (enum machine_mode mode, rtx operands[],
17603 bool signed_p)
17605 rtx end_label, qimode_label;
17606 rtx insn, div, mod;
17607 rtx scratch, tmp0, tmp1, tmp2;
17608 rtx (*gen_divmod4_1) (rtx, rtx, rtx, rtx);
17609 rtx (*gen_zero_extend) (rtx, rtx);
17610 rtx (*gen_test_ccno_1) (rtx, rtx);
17612 switch (mode)
17614 case SImode:
17615 gen_divmod4_1 = signed_p ? gen_divmodsi4_1 : gen_udivmodsi4_1;
17616 gen_test_ccno_1 = gen_testsi_ccno_1;
17617 gen_zero_extend = gen_zero_extendqisi2;
17618 break;
17619 case DImode:
17620 gen_divmod4_1 = signed_p ? gen_divmoddi4_1 : gen_udivmoddi4_1;
17621 gen_test_ccno_1 = gen_testdi_ccno_1;
17622 gen_zero_extend = gen_zero_extendqidi2;
17623 break;
17624 default:
17625 gcc_unreachable ();
17628 end_label = gen_label_rtx ();
17629 qimode_label = gen_label_rtx ();
17631 scratch = gen_reg_rtx (mode);
17633 /* Use 8bit unsigned divimod if dividend and divisor are within
17634 the range [0-255]. */
17635 emit_move_insn (scratch, operands[2]);
17636 scratch = expand_simple_binop (mode, IOR, scratch, operands[3],
17637 scratch, 1, OPTAB_DIRECT);
17638 emit_insn (gen_test_ccno_1 (scratch, GEN_INT (-0x100)));
17639 tmp0 = gen_rtx_REG (CCNOmode, FLAGS_REG);
17640 tmp0 = gen_rtx_EQ (VOIDmode, tmp0, const0_rtx);
17641 tmp0 = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp0,
17642 gen_rtx_LABEL_REF (VOIDmode, qimode_label),
17643 pc_rtx);
17644 insn = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp0));
17645 predict_jump (REG_BR_PROB_BASE * 50 / 100);
17646 JUMP_LABEL (insn) = qimode_label;
17648 /* Generate original signed/unsigned divimod. */
17649 div = gen_divmod4_1 (operands[0], operands[1],
17650 operands[2], operands[3]);
17651 emit_insn (div);
17653 /* Branch to the end. */
17654 emit_jump_insn (gen_jump (end_label));
17655 emit_barrier ();
17657 /* Generate 8bit unsigned divide. */
17658 emit_label (qimode_label);
17659 /* Don't use operands[0] for result of 8bit divide since not all
17660 registers support QImode ZERO_EXTRACT. */
17661 tmp0 = simplify_gen_subreg (HImode, scratch, mode, 0);
17662 tmp1 = simplify_gen_subreg (HImode, operands[2], mode, 0);
17663 tmp2 = simplify_gen_subreg (QImode, operands[3], mode, 0);
17664 emit_insn (gen_udivmodhiqi3 (tmp0, tmp1, tmp2));
17666 if (signed_p)
17668 div = gen_rtx_DIV (SImode, operands[2], operands[3]);
17669 mod = gen_rtx_MOD (SImode, operands[2], operands[3]);
17671 else
17673 div = gen_rtx_UDIV (SImode, operands[2], operands[3]);
17674 mod = gen_rtx_UMOD (SImode, operands[2], operands[3]);
17677 /* Extract remainder from AH. */
17678 tmp1 = gen_rtx_ZERO_EXTRACT (mode, tmp0, GEN_INT (8), GEN_INT (8));
17679 if (REG_P (operands[1]))
17680 insn = emit_move_insn (operands[1], tmp1);
17681 else
17683 /* Need a new scratch register since the old one has result
17684 of 8bit divide. */
17685 scratch = gen_reg_rtx (mode);
17686 emit_move_insn (scratch, tmp1);
17687 insn = emit_move_insn (operands[1], scratch);
17689 set_unique_reg_note (insn, REG_EQUAL, mod);
17691 /* Zero extend quotient from AL. */
17692 tmp1 = gen_lowpart (QImode, tmp0);
17693 insn = emit_insn (gen_zero_extend (operands[0], tmp1));
17694 set_unique_reg_note (insn, REG_EQUAL, div);
17696 emit_label (end_label);
17699 /* Whether it is OK to emit CFI directives when emitting asm code. */
17701 bool
17702 ix86_emit_cfi ()
17704 return dwarf2out_do_cfi_asm ();
17707 #define LEA_MAX_STALL (3)
17708 #define LEA_SEARCH_THRESHOLD (LEA_MAX_STALL << 1)
17710 /* Increase given DISTANCE in half-cycles according to
17711 dependencies between PREV and NEXT instructions.
17712 Add 1 half-cycle if there is no dependency and
17713 go to next cycle if there is some dependecy. */
17715 static unsigned int
17716 increase_distance (rtx prev, rtx next, unsigned int distance)
17718 df_ref *use_rec;
17719 df_ref *def_rec;
17721 if (!prev || !next)
17722 return distance + (distance & 1) + 2;
17724 if (!DF_INSN_USES (next) || !DF_INSN_DEFS (prev))
17725 return distance + 1;
17727 for (use_rec = DF_INSN_USES (next); *use_rec; use_rec++)
17728 for (def_rec = DF_INSN_DEFS (prev); *def_rec; def_rec++)
17729 if (!DF_REF_IS_ARTIFICIAL (*def_rec)
17730 && DF_REF_REGNO (*use_rec) == DF_REF_REGNO (*def_rec))
17731 return distance + (distance & 1) + 2;
17733 return distance + 1;
17736 /* Function checks if instruction INSN defines register number
17737 REGNO1 or REGNO2. */
17739 static bool
17740 insn_defines_reg (unsigned int regno1, unsigned int regno2,
17741 rtx insn)
17743 df_ref *def_rec;
17745 for (def_rec = DF_INSN_DEFS (insn); *def_rec; def_rec++)
17746 if (DF_REF_REG_DEF_P (*def_rec)
17747 && !DF_REF_IS_ARTIFICIAL (*def_rec)
17748 && (regno1 == DF_REF_REGNO (*def_rec)
17749 || regno2 == DF_REF_REGNO (*def_rec)))
17751 return true;
17754 return false;
17757 /* Function checks if instruction INSN uses register number
17758 REGNO as a part of address expression. */
17760 static bool
17761 insn_uses_reg_mem (unsigned int regno, rtx insn)
17763 df_ref *use_rec;
17765 for (use_rec = DF_INSN_USES (insn); *use_rec; use_rec++)
17766 if (DF_REF_REG_MEM_P (*use_rec) && regno == DF_REF_REGNO (*use_rec))
17767 return true;
17769 return false;
17772 /* Search backward for non-agu definition of register number REGNO1
17773 or register number REGNO2 in basic block starting from instruction
17774 START up to head of basic block or instruction INSN.
17776 Function puts true value into *FOUND var if definition was found
17777 and false otherwise.
17779 Distance in half-cycles between START and found instruction or head
17780 of BB is added to DISTANCE and returned. */
17782 static int
17783 distance_non_agu_define_in_bb (unsigned int regno1, unsigned int regno2,
17784 rtx insn, int distance,
17785 rtx start, bool *found)
17787 basic_block bb = start ? BLOCK_FOR_INSN (start) : NULL;
17788 rtx prev = start;
17789 rtx next = NULL;
17791 *found = false;
17793 while (prev
17794 && prev != insn
17795 && distance < LEA_SEARCH_THRESHOLD)
17797 if (NONDEBUG_INSN_P (prev) && NONJUMP_INSN_P (prev))
17799 distance = increase_distance (prev, next, distance);
17800 if (insn_defines_reg (regno1, regno2, prev))
17802 if (recog_memoized (prev) < 0
17803 || get_attr_type (prev) != TYPE_LEA)
17805 *found = true;
17806 return distance;
17810 next = prev;
17812 if (prev == BB_HEAD (bb))
17813 break;
17815 prev = PREV_INSN (prev);
17818 return distance;
17821 /* Search backward for non-agu definition of register number REGNO1
17822 or register number REGNO2 in INSN's basic block until
17823 1. Pass LEA_SEARCH_THRESHOLD instructions, or
17824 2. Reach neighbour BBs boundary, or
17825 3. Reach agu definition.
17826 Returns the distance between the non-agu definition point and INSN.
17827 If no definition point, returns -1. */
17829 static int
17830 distance_non_agu_define (unsigned int regno1, unsigned int regno2,
17831 rtx insn)
17833 basic_block bb = BLOCK_FOR_INSN (insn);
17834 int distance = 0;
17835 bool found = false;
17837 if (insn != BB_HEAD (bb))
17838 distance = distance_non_agu_define_in_bb (regno1, regno2, insn,
17839 distance, PREV_INSN (insn),
17840 &found);
17842 if (!found && distance < LEA_SEARCH_THRESHOLD)
17844 edge e;
17845 edge_iterator ei;
17846 bool simple_loop = false;
17848 FOR_EACH_EDGE (e, ei, bb->preds)
17849 if (e->src == bb)
17851 simple_loop = true;
17852 break;
17855 if (simple_loop)
17856 distance = distance_non_agu_define_in_bb (regno1, regno2,
17857 insn, distance,
17858 BB_END (bb), &found);
17859 else
17861 int shortest_dist = -1;
17862 bool found_in_bb = false;
17864 FOR_EACH_EDGE (e, ei, bb->preds)
17866 int bb_dist
17867 = distance_non_agu_define_in_bb (regno1, regno2,
17868 insn, distance,
17869 BB_END (e->src),
17870 &found_in_bb);
17871 if (found_in_bb)
17873 if (shortest_dist < 0)
17874 shortest_dist = bb_dist;
17875 else if (bb_dist > 0)
17876 shortest_dist = MIN (bb_dist, shortest_dist);
17878 found = true;
17882 distance = shortest_dist;
17886 /* get_attr_type may modify recog data. We want to make sure
17887 that recog data is valid for instruction INSN, on which
17888 distance_non_agu_define is called. INSN is unchanged here. */
17889 extract_insn_cached (insn);
17891 if (!found)
17892 return -1;
17894 return distance >> 1;
17897 /* Return the distance in half-cycles between INSN and the next
17898 insn that uses register number REGNO in memory address added
17899 to DISTANCE. Return -1 if REGNO0 is set.
17901 Put true value into *FOUND if register usage was found and
17902 false otherwise.
17903 Put true value into *REDEFINED if register redefinition was
17904 found and false otherwise. */
17906 static int
17907 distance_agu_use_in_bb (unsigned int regno,
17908 rtx insn, int distance, rtx start,
17909 bool *found, bool *redefined)
17911 basic_block bb = NULL;
17912 rtx next = start;
17913 rtx prev = NULL;
17915 *found = false;
17916 *redefined = false;
17918 if (start != NULL_RTX)
17920 bb = BLOCK_FOR_INSN (start);
17921 if (start != BB_HEAD (bb))
17922 /* If insn and start belong to the same bb, set prev to insn,
17923 so the call to increase_distance will increase the distance
17924 between insns by 1. */
17925 prev = insn;
17928 while (next
17929 && next != insn
17930 && distance < LEA_SEARCH_THRESHOLD)
17932 if (NONDEBUG_INSN_P (next) && NONJUMP_INSN_P (next))
17934 distance = increase_distance(prev, next, distance);
17935 if (insn_uses_reg_mem (regno, next))
17937 /* Return DISTANCE if OP0 is used in memory
17938 address in NEXT. */
17939 *found = true;
17940 return distance;
17943 if (insn_defines_reg (regno, INVALID_REGNUM, next))
17945 /* Return -1 if OP0 is set in NEXT. */
17946 *redefined = true;
17947 return -1;
17950 prev = next;
17953 if (next == BB_END (bb))
17954 break;
17956 next = NEXT_INSN (next);
17959 return distance;
17962 /* Return the distance between INSN and the next insn that uses
17963 register number REGNO0 in memory address. Return -1 if no such
17964 a use is found within LEA_SEARCH_THRESHOLD or REGNO0 is set. */
17966 static int
17967 distance_agu_use (unsigned int regno0, rtx insn)
17969 basic_block bb = BLOCK_FOR_INSN (insn);
17970 int distance = 0;
17971 bool found = false;
17972 bool redefined = false;
17974 if (insn != BB_END (bb))
17975 distance = distance_agu_use_in_bb (regno0, insn, distance,
17976 NEXT_INSN (insn),
17977 &found, &redefined);
17979 if (!found && !redefined && distance < LEA_SEARCH_THRESHOLD)
17981 edge e;
17982 edge_iterator ei;
17983 bool simple_loop = false;
17985 FOR_EACH_EDGE (e, ei, bb->succs)
17986 if (e->dest == bb)
17988 simple_loop = true;
17989 break;
17992 if (simple_loop)
17993 distance = distance_agu_use_in_bb (regno0, insn,
17994 distance, BB_HEAD (bb),
17995 &found, &redefined);
17996 else
17998 int shortest_dist = -1;
17999 bool found_in_bb = false;
18000 bool redefined_in_bb = false;
18002 FOR_EACH_EDGE (e, ei, bb->succs)
18004 int bb_dist
18005 = distance_agu_use_in_bb (regno0, insn,
18006 distance, BB_HEAD (e->dest),
18007 &found_in_bb, &redefined_in_bb);
18008 if (found_in_bb)
18010 if (shortest_dist < 0)
18011 shortest_dist = bb_dist;
18012 else if (bb_dist > 0)
18013 shortest_dist = MIN (bb_dist, shortest_dist);
18015 found = true;
18019 distance = shortest_dist;
18023 if (!found || redefined)
18024 return -1;
18026 return distance >> 1;
18029 /* Define this macro to tune LEA priority vs ADD, it take effect when
18030 there is a dilemma of choicing LEA or ADD
18031 Negative value: ADD is more preferred than LEA
18032 Zero: Netrual
18033 Positive value: LEA is more preferred than ADD*/
18034 #define IX86_LEA_PRIORITY 0
18036 /* Return true if usage of lea INSN has performance advantage
18037 over a sequence of instructions. Instructions sequence has
18038 SPLIT_COST cycles higher latency than lea latency. */
18040 static bool
18041 ix86_lea_outperforms (rtx insn, unsigned int regno0, unsigned int regno1,
18042 unsigned int regno2, int split_cost, bool has_scale)
18044 int dist_define, dist_use;
18046 /* For Silvermont if using a 2-source or 3-source LEA for
18047 non-destructive destination purposes, or due to wanting
18048 ability to use SCALE, the use of LEA is justified. */
18049 if (TARGET_SILVERMONT || TARGET_INTEL)
18051 if (has_scale)
18052 return true;
18053 if (split_cost < 1)
18054 return false;
18055 if (regno0 == regno1 || regno0 == regno2)
18056 return false;
18057 return true;
18060 dist_define = distance_non_agu_define (regno1, regno2, insn);
18061 dist_use = distance_agu_use (regno0, insn);
18063 if (dist_define < 0 || dist_define >= LEA_MAX_STALL)
18065 /* If there is no non AGU operand definition, no AGU
18066 operand usage and split cost is 0 then both lea
18067 and non lea variants have same priority. Currently
18068 we prefer lea for 64 bit code and non lea on 32 bit
18069 code. */
18070 if (dist_use < 0 && split_cost == 0)
18071 return TARGET_64BIT || IX86_LEA_PRIORITY;
18072 else
18073 return true;
18076 /* With longer definitions distance lea is more preferable.
18077 Here we change it to take into account splitting cost and
18078 lea priority. */
18079 dist_define += split_cost + IX86_LEA_PRIORITY;
18081 /* If there is no use in memory addess then we just check
18082 that split cost exceeds AGU stall. */
18083 if (dist_use < 0)
18084 return dist_define > LEA_MAX_STALL;
18086 /* If this insn has both backward non-agu dependence and forward
18087 agu dependence, the one with short distance takes effect. */
18088 return dist_define >= dist_use;
18091 /* Return true if it is legal to clobber flags by INSN and
18092 false otherwise. */
18094 static bool
18095 ix86_ok_to_clobber_flags (rtx insn)
18097 basic_block bb = BLOCK_FOR_INSN (insn);
18098 df_ref *use;
18099 bitmap live;
18101 while (insn)
18103 if (NONDEBUG_INSN_P (insn))
18105 for (use = DF_INSN_USES (insn); *use; use++)
18106 if (DF_REF_REG_USE_P (*use) && DF_REF_REGNO (*use) == FLAGS_REG)
18107 return false;
18109 if (insn_defines_reg (FLAGS_REG, INVALID_REGNUM, insn))
18110 return true;
18113 if (insn == BB_END (bb))
18114 break;
18116 insn = NEXT_INSN (insn);
18119 live = df_get_live_out(bb);
18120 return !REGNO_REG_SET_P (live, FLAGS_REG);
18123 /* Return true if we need to split op0 = op1 + op2 into a sequence of
18124 move and add to avoid AGU stalls. */
18126 bool
18127 ix86_avoid_lea_for_add (rtx insn, rtx operands[])
18129 unsigned int regno0, regno1, regno2;
18131 /* Check if we need to optimize. */
18132 if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun))
18133 return false;
18135 /* Check it is correct to split here. */
18136 if (!ix86_ok_to_clobber_flags(insn))
18137 return false;
18139 regno0 = true_regnum (operands[0]);
18140 regno1 = true_regnum (operands[1]);
18141 regno2 = true_regnum (operands[2]);
18143 /* We need to split only adds with non destructive
18144 destination operand. */
18145 if (regno0 == regno1 || regno0 == regno2)
18146 return false;
18147 else
18148 return !ix86_lea_outperforms (insn, regno0, regno1, regno2, 1, false);
18151 /* Return true if we should emit lea instruction instead of mov
18152 instruction. */
18154 bool
18155 ix86_use_lea_for_mov (rtx insn, rtx operands[])
18157 unsigned int regno0, regno1;
18159 /* Check if we need to optimize. */
18160 if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun))
18161 return false;
18163 /* Use lea for reg to reg moves only. */
18164 if (!REG_P (operands[0]) || !REG_P (operands[1]))
18165 return false;
18167 regno0 = true_regnum (operands[0]);
18168 regno1 = true_regnum (operands[1]);
18170 return ix86_lea_outperforms (insn, regno0, regno1, INVALID_REGNUM, 0, false);
18173 /* Return true if we need to split lea into a sequence of
18174 instructions to avoid AGU stalls. */
18176 bool
18177 ix86_avoid_lea_for_addr (rtx insn, rtx operands[])
18179 unsigned int regno0, regno1, regno2;
18180 int split_cost;
18181 struct ix86_address parts;
18182 int ok;
18184 /* Check we need to optimize. */
18185 if (!TARGET_AVOID_LEA_FOR_ADDR || optimize_function_for_size_p (cfun))
18186 return false;
18188 /* The "at least two components" test below might not catch simple
18189 move or zero extension insns if parts.base is non-NULL and parts.disp
18190 is const0_rtx as the only components in the address, e.g. if the
18191 register is %rbp or %r13. As this test is much cheaper and moves or
18192 zero extensions are the common case, do this check first. */
18193 if (REG_P (operands[1])
18194 || (SImode_address_operand (operands[1], VOIDmode)
18195 && REG_P (XEXP (operands[1], 0))))
18196 return false;
18198 /* Check if it is OK to split here. */
18199 if (!ix86_ok_to_clobber_flags (insn))
18200 return false;
18202 ok = ix86_decompose_address (operands[1], &parts);
18203 gcc_assert (ok);
18205 /* There should be at least two components in the address. */
18206 if ((parts.base != NULL_RTX) + (parts.index != NULL_RTX)
18207 + (parts.disp != NULL_RTX) + (parts.scale > 1) < 2)
18208 return false;
18210 /* We should not split into add if non legitimate pic
18211 operand is used as displacement. */
18212 if (parts.disp && flag_pic && !LEGITIMATE_PIC_OPERAND_P (parts.disp))
18213 return false;
18215 regno0 = true_regnum (operands[0]) ;
18216 regno1 = INVALID_REGNUM;
18217 regno2 = INVALID_REGNUM;
18219 if (parts.base)
18220 regno1 = true_regnum (parts.base);
18221 if (parts.index)
18222 regno2 = true_regnum (parts.index);
18224 split_cost = 0;
18226 /* Compute how many cycles we will add to execution time
18227 if split lea into a sequence of instructions. */
18228 if (parts.base || parts.index)
18230 /* Have to use mov instruction if non desctructive
18231 destination form is used. */
18232 if (regno1 != regno0 && regno2 != regno0)
18233 split_cost += 1;
18235 /* Have to add index to base if both exist. */
18236 if (parts.base && parts.index)
18237 split_cost += 1;
18239 /* Have to use shift and adds if scale is 2 or greater. */
18240 if (parts.scale > 1)
18242 if (regno0 != regno1)
18243 split_cost += 1;
18244 else if (regno2 == regno0)
18245 split_cost += 4;
18246 else
18247 split_cost += parts.scale;
18250 /* Have to use add instruction with immediate if
18251 disp is non zero. */
18252 if (parts.disp && parts.disp != const0_rtx)
18253 split_cost += 1;
18255 /* Subtract the price of lea. */
18256 split_cost -= 1;
18259 return !ix86_lea_outperforms (insn, regno0, regno1, regno2, split_cost,
18260 parts.scale > 1);
18263 /* Emit x86 binary operand CODE in mode MODE, where the first operand
18264 matches destination. RTX includes clobber of FLAGS_REG. */
18266 static void
18267 ix86_emit_binop (enum rtx_code code, enum machine_mode mode,
18268 rtx dst, rtx src)
18270 rtx op, clob;
18272 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, dst, src));
18273 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
18275 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
18278 /* Return true if regno1 def is nearest to the insn. */
18280 static bool
18281 find_nearest_reg_def (rtx insn, int regno1, int regno2)
18283 rtx prev = insn;
18284 rtx start = BB_HEAD (BLOCK_FOR_INSN (insn));
18286 if (insn == start)
18287 return false;
18288 while (prev && prev != start)
18290 if (!INSN_P (prev) || !NONDEBUG_INSN_P (prev))
18292 prev = PREV_INSN (prev);
18293 continue;
18295 if (insn_defines_reg (regno1, INVALID_REGNUM, prev))
18296 return true;
18297 else if (insn_defines_reg (regno2, INVALID_REGNUM, prev))
18298 return false;
18299 prev = PREV_INSN (prev);
18302 /* None of the regs is defined in the bb. */
18303 return false;
18306 /* Split lea instructions into a sequence of instructions
18307 which are executed on ALU to avoid AGU stalls.
18308 It is assumed that it is allowed to clobber flags register
18309 at lea position. */
18311 void
18312 ix86_split_lea_for_addr (rtx insn, rtx operands[], enum machine_mode mode)
18314 unsigned int regno0, regno1, regno2;
18315 struct ix86_address parts;
18316 rtx target, tmp;
18317 int ok, adds;
18319 ok = ix86_decompose_address (operands[1], &parts);
18320 gcc_assert (ok);
18322 target = gen_lowpart (mode, operands[0]);
18324 regno0 = true_regnum (target);
18325 regno1 = INVALID_REGNUM;
18326 regno2 = INVALID_REGNUM;
18328 if (parts.base)
18330 parts.base = gen_lowpart (mode, parts.base);
18331 regno1 = true_regnum (parts.base);
18334 if (parts.index)
18336 parts.index = gen_lowpart (mode, parts.index);
18337 regno2 = true_regnum (parts.index);
18340 if (parts.disp)
18341 parts.disp = gen_lowpart (mode, parts.disp);
18343 if (parts.scale > 1)
18345 /* Case r1 = r1 + ... */
18346 if (regno1 == regno0)
18348 /* If we have a case r1 = r1 + C * r2 then we
18349 should use multiplication which is very
18350 expensive. Assume cost model is wrong if we
18351 have such case here. */
18352 gcc_assert (regno2 != regno0);
18354 for (adds = parts.scale; adds > 0; adds--)
18355 ix86_emit_binop (PLUS, mode, target, parts.index);
18357 else
18359 /* r1 = r2 + r3 * C case. Need to move r3 into r1. */
18360 if (regno0 != regno2)
18361 emit_insn (gen_rtx_SET (VOIDmode, target, parts.index));
18363 /* Use shift for scaling. */
18364 ix86_emit_binop (ASHIFT, mode, target,
18365 GEN_INT (exact_log2 (parts.scale)));
18367 if (parts.base)
18368 ix86_emit_binop (PLUS, mode, target, parts.base);
18370 if (parts.disp && parts.disp != const0_rtx)
18371 ix86_emit_binop (PLUS, mode, target, parts.disp);
18374 else if (!parts.base && !parts.index)
18376 gcc_assert(parts.disp);
18377 emit_insn (gen_rtx_SET (VOIDmode, target, parts.disp));
18379 else
18381 if (!parts.base)
18383 if (regno0 != regno2)
18384 emit_insn (gen_rtx_SET (VOIDmode, target, parts.index));
18386 else if (!parts.index)
18388 if (regno0 != regno1)
18389 emit_insn (gen_rtx_SET (VOIDmode, target, parts.base));
18391 else
18393 if (regno0 == regno1)
18394 tmp = parts.index;
18395 else if (regno0 == regno2)
18396 tmp = parts.base;
18397 else
18399 rtx tmp1;
18401 /* Find better operand for SET instruction, depending
18402 on which definition is farther from the insn. */
18403 if (find_nearest_reg_def (insn, regno1, regno2))
18404 tmp = parts.index, tmp1 = parts.base;
18405 else
18406 tmp = parts.base, tmp1 = parts.index;
18408 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
18410 if (parts.disp && parts.disp != const0_rtx)
18411 ix86_emit_binop (PLUS, mode, target, parts.disp);
18413 ix86_emit_binop (PLUS, mode, target, tmp1);
18414 return;
18417 ix86_emit_binop (PLUS, mode, target, tmp);
18420 if (parts.disp && parts.disp != const0_rtx)
18421 ix86_emit_binop (PLUS, mode, target, parts.disp);
18425 /* Return true if it is ok to optimize an ADD operation to LEA
18426 operation to avoid flag register consumation. For most processors,
18427 ADD is faster than LEA. For the processors like BONNELL, if the
18428 destination register of LEA holds an actual address which will be
18429 used soon, LEA is better and otherwise ADD is better. */
18431 bool
18432 ix86_lea_for_add_ok (rtx insn, rtx operands[])
18434 unsigned int regno0 = true_regnum (operands[0]);
18435 unsigned int regno1 = true_regnum (operands[1]);
18436 unsigned int regno2 = true_regnum (operands[2]);
18438 /* If a = b + c, (a!=b && a!=c), must use lea form. */
18439 if (regno0 != regno1 && regno0 != regno2)
18440 return true;
18442 if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun))
18443 return false;
18445 return ix86_lea_outperforms (insn, regno0, regno1, regno2, 0, false);
18448 /* Return true if destination reg of SET_BODY is shift count of
18449 USE_BODY. */
18451 static bool
18452 ix86_dep_by_shift_count_body (const_rtx set_body, const_rtx use_body)
18454 rtx set_dest;
18455 rtx shift_rtx;
18456 int i;
18458 /* Retrieve destination of SET_BODY. */
18459 switch (GET_CODE (set_body))
18461 case SET:
18462 set_dest = SET_DEST (set_body);
18463 if (!set_dest || !REG_P (set_dest))
18464 return false;
18465 break;
18466 case PARALLEL:
18467 for (i = XVECLEN (set_body, 0) - 1; i >= 0; i--)
18468 if (ix86_dep_by_shift_count_body (XVECEXP (set_body, 0, i),
18469 use_body))
18470 return true;
18471 default:
18472 return false;
18473 break;
18476 /* Retrieve shift count of USE_BODY. */
18477 switch (GET_CODE (use_body))
18479 case SET:
18480 shift_rtx = XEXP (use_body, 1);
18481 break;
18482 case PARALLEL:
18483 for (i = XVECLEN (use_body, 0) - 1; i >= 0; i--)
18484 if (ix86_dep_by_shift_count_body (set_body,
18485 XVECEXP (use_body, 0, i)))
18486 return true;
18487 default:
18488 return false;
18489 break;
18492 if (shift_rtx
18493 && (GET_CODE (shift_rtx) == ASHIFT
18494 || GET_CODE (shift_rtx) == LSHIFTRT
18495 || GET_CODE (shift_rtx) == ASHIFTRT
18496 || GET_CODE (shift_rtx) == ROTATE
18497 || GET_CODE (shift_rtx) == ROTATERT))
18499 rtx shift_count = XEXP (shift_rtx, 1);
18501 /* Return true if shift count is dest of SET_BODY. */
18502 if (REG_P (shift_count))
18504 /* Add check since it can be invoked before register
18505 allocation in pre-reload schedule. */
18506 if (reload_completed
18507 && true_regnum (set_dest) == true_regnum (shift_count))
18508 return true;
18509 else if (REGNO(set_dest) == REGNO(shift_count))
18510 return true;
18514 return false;
18517 /* Return true if destination reg of SET_INSN is shift count of
18518 USE_INSN. */
18520 bool
18521 ix86_dep_by_shift_count (const_rtx set_insn, const_rtx use_insn)
18523 return ix86_dep_by_shift_count_body (PATTERN (set_insn),
18524 PATTERN (use_insn));
18527 /* Return TRUE or FALSE depending on whether the unary operator meets the
18528 appropriate constraints. */
18530 bool
18531 ix86_unary_operator_ok (enum rtx_code code ATTRIBUTE_UNUSED,
18532 enum machine_mode mode ATTRIBUTE_UNUSED,
18533 rtx operands[2])
18535 /* If one of operands is memory, source and destination must match. */
18536 if ((MEM_P (operands[0])
18537 || MEM_P (operands[1]))
18538 && ! rtx_equal_p (operands[0], operands[1]))
18539 return false;
18540 return true;
18543 /* Return TRUE if the operands to a vec_interleave_{high,low}v2df
18544 are ok, keeping in mind the possible movddup alternative. */
18546 bool
18547 ix86_vec_interleave_v2df_operator_ok (rtx operands[3], bool high)
18549 if (MEM_P (operands[0]))
18550 return rtx_equal_p (operands[0], operands[1 + high]);
18551 if (MEM_P (operands[1]) && MEM_P (operands[2]))
18552 return TARGET_SSE3 && rtx_equal_p (operands[1], operands[2]);
18553 return true;
18556 /* Post-reload splitter for converting an SF or DFmode value in an
18557 SSE register into an unsigned SImode. */
18559 void
18560 ix86_split_convert_uns_si_sse (rtx operands[])
18562 enum machine_mode vecmode;
18563 rtx value, large, zero_or_two31, input, two31, x;
18565 large = operands[1];
18566 zero_or_two31 = operands[2];
18567 input = operands[3];
18568 two31 = operands[4];
18569 vecmode = GET_MODE (large);
18570 value = gen_rtx_REG (vecmode, REGNO (operands[0]));
18572 /* Load up the value into the low element. We must ensure that the other
18573 elements are valid floats -- zero is the easiest such value. */
18574 if (MEM_P (input))
18576 if (vecmode == V4SFmode)
18577 emit_insn (gen_vec_setv4sf_0 (value, CONST0_RTX (V4SFmode), input));
18578 else
18579 emit_insn (gen_sse2_loadlpd (value, CONST0_RTX (V2DFmode), input));
18581 else
18583 input = gen_rtx_REG (vecmode, REGNO (input));
18584 emit_move_insn (value, CONST0_RTX (vecmode));
18585 if (vecmode == V4SFmode)
18586 emit_insn (gen_sse_movss (value, value, input));
18587 else
18588 emit_insn (gen_sse2_movsd (value, value, input));
18591 emit_move_insn (large, two31);
18592 emit_move_insn (zero_or_two31, MEM_P (two31) ? large : two31);
18594 x = gen_rtx_fmt_ee (LE, vecmode, large, value);
18595 emit_insn (gen_rtx_SET (VOIDmode, large, x));
18597 x = gen_rtx_AND (vecmode, zero_or_two31, large);
18598 emit_insn (gen_rtx_SET (VOIDmode, zero_or_two31, x));
18600 x = gen_rtx_MINUS (vecmode, value, zero_or_two31);
18601 emit_insn (gen_rtx_SET (VOIDmode, value, x));
18603 large = gen_rtx_REG (V4SImode, REGNO (large));
18604 emit_insn (gen_ashlv4si3 (large, large, GEN_INT (31)));
18606 x = gen_rtx_REG (V4SImode, REGNO (value));
18607 if (vecmode == V4SFmode)
18608 emit_insn (gen_fix_truncv4sfv4si2 (x, value));
18609 else
18610 emit_insn (gen_sse2_cvttpd2dq (x, value));
18611 value = x;
18613 emit_insn (gen_xorv4si3 (value, value, large));
18616 /* Convert an unsigned DImode value into a DFmode, using only SSE.
18617 Expects the 64-bit DImode to be supplied in a pair of integral
18618 registers. Requires SSE2; will use SSE3 if available. For x86_32,
18619 -mfpmath=sse, !optimize_size only. */
18621 void
18622 ix86_expand_convert_uns_didf_sse (rtx target, rtx input)
18624 REAL_VALUE_TYPE bias_lo_rvt, bias_hi_rvt;
18625 rtx int_xmm, fp_xmm;
18626 rtx biases, exponents;
18627 rtx x;
18629 int_xmm = gen_reg_rtx (V4SImode);
18630 if (TARGET_INTER_UNIT_MOVES_TO_VEC)
18631 emit_insn (gen_movdi_to_sse (int_xmm, input));
18632 else if (TARGET_SSE_SPLIT_REGS)
18634 emit_clobber (int_xmm);
18635 emit_move_insn (gen_lowpart (DImode, int_xmm), input);
18637 else
18639 x = gen_reg_rtx (V2DImode);
18640 ix86_expand_vector_init_one_nonzero (false, V2DImode, x, input, 0);
18641 emit_move_insn (int_xmm, gen_lowpart (V4SImode, x));
18644 x = gen_rtx_CONST_VECTOR (V4SImode,
18645 gen_rtvec (4, GEN_INT (0x43300000UL),
18646 GEN_INT (0x45300000UL),
18647 const0_rtx, const0_rtx));
18648 exponents = validize_mem (force_const_mem (V4SImode, x));
18650 /* int_xmm = {0x45300000UL, fp_xmm/hi, 0x43300000, fp_xmm/lo } */
18651 emit_insn (gen_vec_interleave_lowv4si (int_xmm, int_xmm, exponents));
18653 /* Concatenating (juxtaposing) (0x43300000UL ## fp_value_low_xmm)
18654 yields a valid DF value equal to (0x1.0p52 + double(fp_value_lo_xmm)).
18655 Similarly (0x45300000UL ## fp_value_hi_xmm) yields
18656 (0x1.0p84 + double(fp_value_hi_xmm)).
18657 Note these exponents differ by 32. */
18659 fp_xmm = copy_to_mode_reg (V2DFmode, gen_lowpart (V2DFmode, int_xmm));
18661 /* Subtract off those 0x1.0p52 and 0x1.0p84 biases, to produce values
18662 in [0,2**32-1] and [0]+[2**32,2**64-1] respectively. */
18663 real_ldexp (&bias_lo_rvt, &dconst1, 52);
18664 real_ldexp (&bias_hi_rvt, &dconst1, 84);
18665 biases = const_double_from_real_value (bias_lo_rvt, DFmode);
18666 x = const_double_from_real_value (bias_hi_rvt, DFmode);
18667 biases = gen_rtx_CONST_VECTOR (V2DFmode, gen_rtvec (2, biases, x));
18668 biases = validize_mem (force_const_mem (V2DFmode, biases));
18669 emit_insn (gen_subv2df3 (fp_xmm, fp_xmm, biases));
18671 /* Add the upper and lower DFmode values together. */
18672 if (TARGET_SSE3)
18673 emit_insn (gen_sse3_haddv2df3 (fp_xmm, fp_xmm, fp_xmm));
18674 else
18676 x = copy_to_mode_reg (V2DFmode, fp_xmm);
18677 emit_insn (gen_vec_interleave_highv2df (fp_xmm, fp_xmm, fp_xmm));
18678 emit_insn (gen_addv2df3 (fp_xmm, fp_xmm, x));
18681 ix86_expand_vector_extract (false, target, fp_xmm, 0);
18684 /* Not used, but eases macroization of patterns. */
18685 void
18686 ix86_expand_convert_uns_sixf_sse (rtx target ATTRIBUTE_UNUSED,
18687 rtx input ATTRIBUTE_UNUSED)
18689 gcc_unreachable ();
18692 /* Convert an unsigned SImode value into a DFmode. Only currently used
18693 for SSE, but applicable anywhere. */
18695 void
18696 ix86_expand_convert_uns_sidf_sse (rtx target, rtx input)
18698 REAL_VALUE_TYPE TWO31r;
18699 rtx x, fp;
18701 x = expand_simple_binop (SImode, PLUS, input, GEN_INT (-2147483647 - 1),
18702 NULL, 1, OPTAB_DIRECT);
18704 fp = gen_reg_rtx (DFmode);
18705 emit_insn (gen_floatsidf2 (fp, x));
18707 real_ldexp (&TWO31r, &dconst1, 31);
18708 x = const_double_from_real_value (TWO31r, DFmode);
18710 x = expand_simple_binop (DFmode, PLUS, fp, x, target, 0, OPTAB_DIRECT);
18711 if (x != target)
18712 emit_move_insn (target, x);
18715 /* Convert a signed DImode value into a DFmode. Only used for SSE in
18716 32-bit mode; otherwise we have a direct convert instruction. */
18718 void
18719 ix86_expand_convert_sign_didf_sse (rtx target, rtx input)
18721 REAL_VALUE_TYPE TWO32r;
18722 rtx fp_lo, fp_hi, x;
18724 fp_lo = gen_reg_rtx (DFmode);
18725 fp_hi = gen_reg_rtx (DFmode);
18727 emit_insn (gen_floatsidf2 (fp_hi, gen_highpart (SImode, input)));
18729 real_ldexp (&TWO32r, &dconst1, 32);
18730 x = const_double_from_real_value (TWO32r, DFmode);
18731 fp_hi = expand_simple_binop (DFmode, MULT, fp_hi, x, fp_hi, 0, OPTAB_DIRECT);
18733 ix86_expand_convert_uns_sidf_sse (fp_lo, gen_lowpart (SImode, input));
18735 x = expand_simple_binop (DFmode, PLUS, fp_hi, fp_lo, target,
18736 0, OPTAB_DIRECT);
18737 if (x != target)
18738 emit_move_insn (target, x);
18741 /* Convert an unsigned SImode value into a SFmode, using only SSE.
18742 For x86_32, -mfpmath=sse, !optimize_size only. */
18743 void
18744 ix86_expand_convert_uns_sisf_sse (rtx target, rtx input)
18746 REAL_VALUE_TYPE ONE16r;
18747 rtx fp_hi, fp_lo, int_hi, int_lo, x;
18749 real_ldexp (&ONE16r, &dconst1, 16);
18750 x = const_double_from_real_value (ONE16r, SFmode);
18751 int_lo = expand_simple_binop (SImode, AND, input, GEN_INT(0xffff),
18752 NULL, 0, OPTAB_DIRECT);
18753 int_hi = expand_simple_binop (SImode, LSHIFTRT, input, GEN_INT(16),
18754 NULL, 0, OPTAB_DIRECT);
18755 fp_hi = gen_reg_rtx (SFmode);
18756 fp_lo = gen_reg_rtx (SFmode);
18757 emit_insn (gen_floatsisf2 (fp_hi, int_hi));
18758 emit_insn (gen_floatsisf2 (fp_lo, int_lo));
18759 fp_hi = expand_simple_binop (SFmode, MULT, fp_hi, x, fp_hi,
18760 0, OPTAB_DIRECT);
18761 fp_hi = expand_simple_binop (SFmode, PLUS, fp_hi, fp_lo, target,
18762 0, OPTAB_DIRECT);
18763 if (!rtx_equal_p (target, fp_hi))
18764 emit_move_insn (target, fp_hi);
18767 /* floatunsv{4,8}siv{4,8}sf2 expander. Expand code to convert
18768 a vector of unsigned ints VAL to vector of floats TARGET. */
18770 void
18771 ix86_expand_vector_convert_uns_vsivsf (rtx target, rtx val)
18773 rtx tmp[8];
18774 REAL_VALUE_TYPE TWO16r;
18775 enum machine_mode intmode = GET_MODE (val);
18776 enum machine_mode fltmode = GET_MODE (target);
18777 rtx (*cvt) (rtx, rtx);
18779 if (intmode == V4SImode)
18780 cvt = gen_floatv4siv4sf2;
18781 else
18782 cvt = gen_floatv8siv8sf2;
18783 tmp[0] = ix86_build_const_vector (intmode, 1, GEN_INT (0xffff));
18784 tmp[0] = force_reg (intmode, tmp[0]);
18785 tmp[1] = expand_simple_binop (intmode, AND, val, tmp[0], NULL_RTX, 1,
18786 OPTAB_DIRECT);
18787 tmp[2] = expand_simple_binop (intmode, LSHIFTRT, val, GEN_INT (16),
18788 NULL_RTX, 1, OPTAB_DIRECT);
18789 tmp[3] = gen_reg_rtx (fltmode);
18790 emit_insn (cvt (tmp[3], tmp[1]));
18791 tmp[4] = gen_reg_rtx (fltmode);
18792 emit_insn (cvt (tmp[4], tmp[2]));
18793 real_ldexp (&TWO16r, &dconst1, 16);
18794 tmp[5] = const_double_from_real_value (TWO16r, SFmode);
18795 tmp[5] = force_reg (fltmode, ix86_build_const_vector (fltmode, 1, tmp[5]));
18796 tmp[6] = expand_simple_binop (fltmode, MULT, tmp[4], tmp[5], NULL_RTX, 1,
18797 OPTAB_DIRECT);
18798 tmp[7] = expand_simple_binop (fltmode, PLUS, tmp[3], tmp[6], target, 1,
18799 OPTAB_DIRECT);
18800 if (tmp[7] != target)
18801 emit_move_insn (target, tmp[7]);
18804 /* Adjust a V*SFmode/V*DFmode value VAL so that *sfix_trunc* resp. fix_trunc*
18805 pattern can be used on it instead of *ufix_trunc* resp. fixuns_trunc*.
18806 This is done by doing just signed conversion if < 0x1p31, and otherwise by
18807 subtracting 0x1p31 first and xoring in 0x80000000 from *XORP afterwards. */
18810 ix86_expand_adjust_ufix_to_sfix_si (rtx val, rtx *xorp)
18812 REAL_VALUE_TYPE TWO31r;
18813 rtx two31r, tmp[4];
18814 enum machine_mode mode = GET_MODE (val);
18815 enum machine_mode scalarmode = GET_MODE_INNER (mode);
18816 enum machine_mode intmode = GET_MODE_SIZE (mode) == 32 ? V8SImode : V4SImode;
18817 rtx (*cmp) (rtx, rtx, rtx, rtx);
18818 int i;
18820 for (i = 0; i < 3; i++)
18821 tmp[i] = gen_reg_rtx (mode);
18822 real_ldexp (&TWO31r, &dconst1, 31);
18823 two31r = const_double_from_real_value (TWO31r, scalarmode);
18824 two31r = ix86_build_const_vector (mode, 1, two31r);
18825 two31r = force_reg (mode, two31r);
18826 switch (mode)
18828 case V8SFmode: cmp = gen_avx_maskcmpv8sf3; break;
18829 case V4SFmode: cmp = gen_sse_maskcmpv4sf3; break;
18830 case V4DFmode: cmp = gen_avx_maskcmpv4df3; break;
18831 case V2DFmode: cmp = gen_sse2_maskcmpv2df3; break;
18832 default: gcc_unreachable ();
18834 tmp[3] = gen_rtx_LE (mode, two31r, val);
18835 emit_insn (cmp (tmp[0], two31r, val, tmp[3]));
18836 tmp[1] = expand_simple_binop (mode, AND, tmp[0], two31r, tmp[1],
18837 0, OPTAB_DIRECT);
18838 if (intmode == V4SImode || TARGET_AVX2)
18839 *xorp = expand_simple_binop (intmode, ASHIFT,
18840 gen_lowpart (intmode, tmp[0]),
18841 GEN_INT (31), NULL_RTX, 0,
18842 OPTAB_DIRECT);
18843 else
18845 rtx two31 = GEN_INT ((unsigned HOST_WIDE_INT) 1 << 31);
18846 two31 = ix86_build_const_vector (intmode, 1, two31);
18847 *xorp = expand_simple_binop (intmode, AND,
18848 gen_lowpart (intmode, tmp[0]),
18849 two31, NULL_RTX, 0,
18850 OPTAB_DIRECT);
18852 return expand_simple_binop (mode, MINUS, val, tmp[1], tmp[2],
18853 0, OPTAB_DIRECT);
18856 /* A subroutine of ix86_build_signbit_mask. If VECT is true,
18857 then replicate the value for all elements of the vector
18858 register. */
18861 ix86_build_const_vector (enum machine_mode mode, bool vect, rtx value)
18863 int i, n_elt;
18864 rtvec v;
18865 enum machine_mode scalar_mode;
18867 switch (mode)
18869 case V64QImode:
18870 case V32QImode:
18871 case V16QImode:
18872 case V32HImode:
18873 case V16HImode:
18874 case V8HImode:
18875 case V16SImode:
18876 case V8SImode:
18877 case V4SImode:
18878 case V8DImode:
18879 case V4DImode:
18880 case V2DImode:
18881 gcc_assert (vect);
18882 case V16SFmode:
18883 case V8SFmode:
18884 case V4SFmode:
18885 case V8DFmode:
18886 case V4DFmode:
18887 case V2DFmode:
18888 n_elt = GET_MODE_NUNITS (mode);
18889 v = rtvec_alloc (n_elt);
18890 scalar_mode = GET_MODE_INNER (mode);
18892 RTVEC_ELT (v, 0) = value;
18894 for (i = 1; i < n_elt; ++i)
18895 RTVEC_ELT (v, i) = vect ? value : CONST0_RTX (scalar_mode);
18897 return gen_rtx_CONST_VECTOR (mode, v);
18899 default:
18900 gcc_unreachable ();
18904 /* A subroutine of ix86_expand_fp_absneg_operator, copysign expanders
18905 and ix86_expand_int_vcond. Create a mask for the sign bit in MODE
18906 for an SSE register. If VECT is true, then replicate the mask for
18907 all elements of the vector register. If INVERT is true, then create
18908 a mask excluding the sign bit. */
18911 ix86_build_signbit_mask (enum machine_mode mode, bool vect, bool invert)
18913 enum machine_mode vec_mode, imode;
18914 HOST_WIDE_INT hi, lo;
18915 int shift = 63;
18916 rtx v;
18917 rtx mask;
18919 /* Find the sign bit, sign extended to 2*HWI. */
18920 switch (mode)
18922 case V16SImode:
18923 case V16SFmode:
18924 case V8SImode:
18925 case V4SImode:
18926 case V8SFmode:
18927 case V4SFmode:
18928 vec_mode = mode;
18929 mode = GET_MODE_INNER (mode);
18930 imode = SImode;
18931 lo = 0x80000000, hi = lo < 0;
18932 break;
18934 case V8DImode:
18935 case V4DImode:
18936 case V2DImode:
18937 case V8DFmode:
18938 case V4DFmode:
18939 case V2DFmode:
18940 vec_mode = mode;
18941 mode = GET_MODE_INNER (mode);
18942 imode = DImode;
18943 if (HOST_BITS_PER_WIDE_INT >= 64)
18944 lo = (HOST_WIDE_INT)1 << shift, hi = -1;
18945 else
18946 lo = 0, hi = (HOST_WIDE_INT)1 << (shift - HOST_BITS_PER_WIDE_INT);
18947 break;
18949 case TImode:
18950 case TFmode:
18951 vec_mode = VOIDmode;
18952 if (HOST_BITS_PER_WIDE_INT >= 64)
18954 imode = TImode;
18955 lo = 0, hi = (HOST_WIDE_INT)1 << shift;
18957 else
18959 rtvec vec;
18961 imode = DImode;
18962 lo = 0, hi = (HOST_WIDE_INT)1 << (shift - HOST_BITS_PER_WIDE_INT);
18964 if (invert)
18966 lo = ~lo, hi = ~hi;
18967 v = constm1_rtx;
18969 else
18970 v = const0_rtx;
18972 mask = immed_double_const (lo, hi, imode);
18974 vec = gen_rtvec (2, v, mask);
18975 v = gen_rtx_CONST_VECTOR (V2DImode, vec);
18976 v = copy_to_mode_reg (mode, gen_lowpart (mode, v));
18978 return v;
18980 break;
18982 default:
18983 gcc_unreachable ();
18986 if (invert)
18987 lo = ~lo, hi = ~hi;
18989 /* Force this value into the low part of a fp vector constant. */
18990 mask = immed_double_const (lo, hi, imode);
18991 mask = gen_lowpart (mode, mask);
18993 if (vec_mode == VOIDmode)
18994 return force_reg (mode, mask);
18996 v = ix86_build_const_vector (vec_mode, vect, mask);
18997 return force_reg (vec_mode, v);
19000 /* Generate code for floating point ABS or NEG. */
19002 void
19003 ix86_expand_fp_absneg_operator (enum rtx_code code, enum machine_mode mode,
19004 rtx operands[])
19006 rtx mask, set, dst, src;
19007 bool use_sse = false;
19008 bool vector_mode = VECTOR_MODE_P (mode);
19009 enum machine_mode vmode = mode;
19011 if (vector_mode)
19012 use_sse = true;
19013 else if (mode == TFmode)
19014 use_sse = true;
19015 else if (TARGET_SSE_MATH)
19017 use_sse = SSE_FLOAT_MODE_P (mode);
19018 if (mode == SFmode)
19019 vmode = V4SFmode;
19020 else if (mode == DFmode)
19021 vmode = V2DFmode;
19024 /* NEG and ABS performed with SSE use bitwise mask operations.
19025 Create the appropriate mask now. */
19026 if (use_sse)
19027 mask = ix86_build_signbit_mask (vmode, vector_mode, code == ABS);
19028 else
19029 mask = NULL_RTX;
19031 dst = operands[0];
19032 src = operands[1];
19034 set = gen_rtx_fmt_e (code, mode, src);
19035 set = gen_rtx_SET (VOIDmode, dst, set);
19037 if (mask)
19039 rtx use, clob;
19040 rtvec par;
19042 use = gen_rtx_USE (VOIDmode, mask);
19043 if (vector_mode)
19044 par = gen_rtvec (2, set, use);
19045 else
19047 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
19048 par = gen_rtvec (3, set, use, clob);
19050 emit_insn (gen_rtx_PARALLEL (VOIDmode, par));
19052 else
19053 emit_insn (set);
19056 /* Expand a copysign operation. Special case operand 0 being a constant. */
19058 void
19059 ix86_expand_copysign (rtx operands[])
19061 enum machine_mode mode, vmode;
19062 rtx dest, op0, op1, mask, nmask;
19064 dest = operands[0];
19065 op0 = operands[1];
19066 op1 = operands[2];
19068 mode = GET_MODE (dest);
19070 if (mode == SFmode)
19071 vmode = V4SFmode;
19072 else if (mode == DFmode)
19073 vmode = V2DFmode;
19074 else
19075 vmode = mode;
19077 if (GET_CODE (op0) == CONST_DOUBLE)
19079 rtx (*copysign_insn)(rtx, rtx, rtx, rtx);
19081 if (real_isneg (CONST_DOUBLE_REAL_VALUE (op0)))
19082 op0 = simplify_unary_operation (ABS, mode, op0, mode);
19084 if (mode == SFmode || mode == DFmode)
19086 if (op0 == CONST0_RTX (mode))
19087 op0 = CONST0_RTX (vmode);
19088 else
19090 rtx v = ix86_build_const_vector (vmode, false, op0);
19092 op0 = force_reg (vmode, v);
19095 else if (op0 != CONST0_RTX (mode))
19096 op0 = force_reg (mode, op0);
19098 mask = ix86_build_signbit_mask (vmode, 0, 0);
19100 if (mode == SFmode)
19101 copysign_insn = gen_copysignsf3_const;
19102 else if (mode == DFmode)
19103 copysign_insn = gen_copysigndf3_const;
19104 else
19105 copysign_insn = gen_copysigntf3_const;
19107 emit_insn (copysign_insn (dest, op0, op1, mask));
19109 else
19111 rtx (*copysign_insn)(rtx, rtx, rtx, rtx, rtx, rtx);
19113 nmask = ix86_build_signbit_mask (vmode, 0, 1);
19114 mask = ix86_build_signbit_mask (vmode, 0, 0);
19116 if (mode == SFmode)
19117 copysign_insn = gen_copysignsf3_var;
19118 else if (mode == DFmode)
19119 copysign_insn = gen_copysigndf3_var;
19120 else
19121 copysign_insn = gen_copysigntf3_var;
19123 emit_insn (copysign_insn (dest, NULL_RTX, op0, op1, nmask, mask));
19127 /* Deconstruct a copysign operation into bit masks. Operand 0 is known to
19128 be a constant, and so has already been expanded into a vector constant. */
19130 void
19131 ix86_split_copysign_const (rtx operands[])
19133 enum machine_mode mode, vmode;
19134 rtx dest, op0, mask, x;
19136 dest = operands[0];
19137 op0 = operands[1];
19138 mask = operands[3];
19140 mode = GET_MODE (dest);
19141 vmode = GET_MODE (mask);
19143 dest = simplify_gen_subreg (vmode, dest, mode, 0);
19144 x = gen_rtx_AND (vmode, dest, mask);
19145 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
19147 if (op0 != CONST0_RTX (vmode))
19149 x = gen_rtx_IOR (vmode, dest, op0);
19150 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
19154 /* Deconstruct a copysign operation into bit masks. Operand 0 is variable,
19155 so we have to do two masks. */
19157 void
19158 ix86_split_copysign_var (rtx operands[])
19160 enum machine_mode mode, vmode;
19161 rtx dest, scratch, op0, op1, mask, nmask, x;
19163 dest = operands[0];
19164 scratch = operands[1];
19165 op0 = operands[2];
19166 op1 = operands[3];
19167 nmask = operands[4];
19168 mask = operands[5];
19170 mode = GET_MODE (dest);
19171 vmode = GET_MODE (mask);
19173 if (rtx_equal_p (op0, op1))
19175 /* Shouldn't happen often (it's useless, obviously), but when it does
19176 we'd generate incorrect code if we continue below. */
19177 emit_move_insn (dest, op0);
19178 return;
19181 if (REG_P (mask) && REGNO (dest) == REGNO (mask)) /* alternative 0 */
19183 gcc_assert (REGNO (op1) == REGNO (scratch));
19185 x = gen_rtx_AND (vmode, scratch, mask);
19186 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
19188 dest = mask;
19189 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
19190 x = gen_rtx_NOT (vmode, dest);
19191 x = gen_rtx_AND (vmode, x, op0);
19192 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
19194 else
19196 if (REGNO (op1) == REGNO (scratch)) /* alternative 1,3 */
19198 x = gen_rtx_AND (vmode, scratch, mask);
19200 else /* alternative 2,4 */
19202 gcc_assert (REGNO (mask) == REGNO (scratch));
19203 op1 = simplify_gen_subreg (vmode, op1, mode, 0);
19204 x = gen_rtx_AND (vmode, scratch, op1);
19206 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
19208 if (REGNO (op0) == REGNO (dest)) /* alternative 1,2 */
19210 dest = simplify_gen_subreg (vmode, op0, mode, 0);
19211 x = gen_rtx_AND (vmode, dest, nmask);
19213 else /* alternative 3,4 */
19215 gcc_assert (REGNO (nmask) == REGNO (dest));
19216 dest = nmask;
19217 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
19218 x = gen_rtx_AND (vmode, dest, op0);
19220 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
19223 x = gen_rtx_IOR (vmode, dest, scratch);
19224 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
19227 /* Return TRUE or FALSE depending on whether the first SET in INSN
19228 has source and destination with matching CC modes, and that the
19229 CC mode is at least as constrained as REQ_MODE. */
19231 bool
19232 ix86_match_ccmode (rtx insn, enum machine_mode req_mode)
19234 rtx set;
19235 enum machine_mode set_mode;
19237 set = PATTERN (insn);
19238 if (GET_CODE (set) == PARALLEL)
19239 set = XVECEXP (set, 0, 0);
19240 gcc_assert (GET_CODE (set) == SET);
19241 gcc_assert (GET_CODE (SET_SRC (set)) == COMPARE);
19243 set_mode = GET_MODE (SET_DEST (set));
19244 switch (set_mode)
19246 case CCNOmode:
19247 if (req_mode != CCNOmode
19248 && (req_mode != CCmode
19249 || XEXP (SET_SRC (set), 1) != const0_rtx))
19250 return false;
19251 break;
19252 case CCmode:
19253 if (req_mode == CCGCmode)
19254 return false;
19255 /* FALLTHRU */
19256 case CCGCmode:
19257 if (req_mode == CCGOCmode || req_mode == CCNOmode)
19258 return false;
19259 /* FALLTHRU */
19260 case CCGOCmode:
19261 if (req_mode == CCZmode)
19262 return false;
19263 /* FALLTHRU */
19264 case CCZmode:
19265 break;
19267 case CCAmode:
19268 case CCCmode:
19269 case CCOmode:
19270 case CCSmode:
19271 if (set_mode != req_mode)
19272 return false;
19273 break;
19275 default:
19276 gcc_unreachable ();
19279 return GET_MODE (SET_SRC (set)) == set_mode;
19282 /* Generate insn patterns to do an integer compare of OPERANDS. */
19284 static rtx
19285 ix86_expand_int_compare (enum rtx_code code, rtx op0, rtx op1)
19287 enum machine_mode cmpmode;
19288 rtx tmp, flags;
19290 cmpmode = SELECT_CC_MODE (code, op0, op1);
19291 flags = gen_rtx_REG (cmpmode, FLAGS_REG);
19293 /* This is very simple, but making the interface the same as in the
19294 FP case makes the rest of the code easier. */
19295 tmp = gen_rtx_COMPARE (cmpmode, op0, op1);
19296 emit_insn (gen_rtx_SET (VOIDmode, flags, tmp));
19298 /* Return the test that should be put into the flags user, i.e.
19299 the bcc, scc, or cmov instruction. */
19300 return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
19303 /* Figure out whether to use ordered or unordered fp comparisons.
19304 Return the appropriate mode to use. */
19306 enum machine_mode
19307 ix86_fp_compare_mode (enum rtx_code code ATTRIBUTE_UNUSED)
19309 /* ??? In order to make all comparisons reversible, we do all comparisons
19310 non-trapping when compiling for IEEE. Once gcc is able to distinguish
19311 all forms trapping and nontrapping comparisons, we can make inequality
19312 comparisons trapping again, since it results in better code when using
19313 FCOM based compares. */
19314 return TARGET_IEEE_FP ? CCFPUmode : CCFPmode;
19317 enum machine_mode
19318 ix86_cc_mode (enum rtx_code code, rtx op0, rtx op1)
19320 enum machine_mode mode = GET_MODE (op0);
19322 if (SCALAR_FLOAT_MODE_P (mode))
19324 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode));
19325 return ix86_fp_compare_mode (code);
19328 switch (code)
19330 /* Only zero flag is needed. */
19331 case EQ: /* ZF=0 */
19332 case NE: /* ZF!=0 */
19333 return CCZmode;
19334 /* Codes needing carry flag. */
19335 case GEU: /* CF=0 */
19336 case LTU: /* CF=1 */
19337 /* Detect overflow checks. They need just the carry flag. */
19338 if (GET_CODE (op0) == PLUS
19339 && rtx_equal_p (op1, XEXP (op0, 0)))
19340 return CCCmode;
19341 else
19342 return CCmode;
19343 case GTU: /* CF=0 & ZF=0 */
19344 case LEU: /* CF=1 | ZF=1 */
19345 return CCmode;
19346 /* Codes possibly doable only with sign flag when
19347 comparing against zero. */
19348 case GE: /* SF=OF or SF=0 */
19349 case LT: /* SF<>OF or SF=1 */
19350 if (op1 == const0_rtx)
19351 return CCGOCmode;
19352 else
19353 /* For other cases Carry flag is not required. */
19354 return CCGCmode;
19355 /* Codes doable only with sign flag when comparing
19356 against zero, but we miss jump instruction for it
19357 so we need to use relational tests against overflow
19358 that thus needs to be zero. */
19359 case GT: /* ZF=0 & SF=OF */
19360 case LE: /* ZF=1 | SF<>OF */
19361 if (op1 == const0_rtx)
19362 return CCNOmode;
19363 else
19364 return CCGCmode;
19365 /* strcmp pattern do (use flags) and combine may ask us for proper
19366 mode. */
19367 case USE:
19368 return CCmode;
19369 default:
19370 gcc_unreachable ();
19374 /* Return the fixed registers used for condition codes. */
19376 static bool
19377 ix86_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
19379 *p1 = FLAGS_REG;
19380 *p2 = FPSR_REG;
19381 return true;
19384 /* If two condition code modes are compatible, return a condition code
19385 mode which is compatible with both. Otherwise, return
19386 VOIDmode. */
19388 static enum machine_mode
19389 ix86_cc_modes_compatible (enum machine_mode m1, enum machine_mode m2)
19391 if (m1 == m2)
19392 return m1;
19394 if (GET_MODE_CLASS (m1) != MODE_CC || GET_MODE_CLASS (m2) != MODE_CC)
19395 return VOIDmode;
19397 if ((m1 == CCGCmode && m2 == CCGOCmode)
19398 || (m1 == CCGOCmode && m2 == CCGCmode))
19399 return CCGCmode;
19401 if (m1 == CCZmode && (m2 == CCGCmode || m2 == CCGOCmode))
19402 return m2;
19403 else if (m2 == CCZmode && (m1 == CCGCmode || m1 == CCGOCmode))
19404 return m1;
19406 switch (m1)
19408 default:
19409 gcc_unreachable ();
19411 case CCmode:
19412 case CCGCmode:
19413 case CCGOCmode:
19414 case CCNOmode:
19415 case CCAmode:
19416 case CCCmode:
19417 case CCOmode:
19418 case CCSmode:
19419 case CCZmode:
19420 switch (m2)
19422 default:
19423 return VOIDmode;
19425 case CCmode:
19426 case CCGCmode:
19427 case CCGOCmode:
19428 case CCNOmode:
19429 case CCAmode:
19430 case CCCmode:
19431 case CCOmode:
19432 case CCSmode:
19433 case CCZmode:
19434 return CCmode;
19437 case CCFPmode:
19438 case CCFPUmode:
19439 /* These are only compatible with themselves, which we already
19440 checked above. */
19441 return VOIDmode;
19446 /* Return a comparison we can do and that it is equivalent to
19447 swap_condition (code) apart possibly from orderedness.
19448 But, never change orderedness if TARGET_IEEE_FP, returning
19449 UNKNOWN in that case if necessary. */
19451 static enum rtx_code
19452 ix86_fp_swap_condition (enum rtx_code code)
19454 switch (code)
19456 case GT: /* GTU - CF=0 & ZF=0 */
19457 return TARGET_IEEE_FP ? UNKNOWN : UNLT;
19458 case GE: /* GEU - CF=0 */
19459 return TARGET_IEEE_FP ? UNKNOWN : UNLE;
19460 case UNLT: /* LTU - CF=1 */
19461 return TARGET_IEEE_FP ? UNKNOWN : GT;
19462 case UNLE: /* LEU - CF=1 | ZF=1 */
19463 return TARGET_IEEE_FP ? UNKNOWN : GE;
19464 default:
19465 return swap_condition (code);
19469 /* Return cost of comparison CODE using the best strategy for performance.
19470 All following functions do use number of instructions as a cost metrics.
19471 In future this should be tweaked to compute bytes for optimize_size and
19472 take into account performance of various instructions on various CPUs. */
19474 static int
19475 ix86_fp_comparison_cost (enum rtx_code code)
19477 int arith_cost;
19479 /* The cost of code using bit-twiddling on %ah. */
19480 switch (code)
19482 case UNLE:
19483 case UNLT:
19484 case LTGT:
19485 case GT:
19486 case GE:
19487 case UNORDERED:
19488 case ORDERED:
19489 case UNEQ:
19490 arith_cost = 4;
19491 break;
19492 case LT:
19493 case NE:
19494 case EQ:
19495 case UNGE:
19496 arith_cost = TARGET_IEEE_FP ? 5 : 4;
19497 break;
19498 case LE:
19499 case UNGT:
19500 arith_cost = TARGET_IEEE_FP ? 6 : 4;
19501 break;
19502 default:
19503 gcc_unreachable ();
19506 switch (ix86_fp_comparison_strategy (code))
19508 case IX86_FPCMP_COMI:
19509 return arith_cost > 4 ? 3 : 2;
19510 case IX86_FPCMP_SAHF:
19511 return arith_cost > 4 ? 4 : 3;
19512 default:
19513 return arith_cost;
19517 /* Return strategy to use for floating-point. We assume that fcomi is always
19518 preferrable where available, since that is also true when looking at size
19519 (2 bytes, vs. 3 for fnstsw+sahf and at least 5 for fnstsw+test). */
19521 enum ix86_fpcmp_strategy
19522 ix86_fp_comparison_strategy (enum rtx_code code ATTRIBUTE_UNUSED)
19524 /* Do fcomi/sahf based test when profitable. */
19526 if (TARGET_CMOVE)
19527 return IX86_FPCMP_COMI;
19529 if (TARGET_SAHF && (TARGET_USE_SAHF || optimize_insn_for_size_p ()))
19530 return IX86_FPCMP_SAHF;
19532 return IX86_FPCMP_ARITH;
19535 /* Swap, force into registers, or otherwise massage the two operands
19536 to a fp comparison. The operands are updated in place; the new
19537 comparison code is returned. */
19539 static enum rtx_code
19540 ix86_prepare_fp_compare_args (enum rtx_code code, rtx *pop0, rtx *pop1)
19542 enum machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
19543 rtx op0 = *pop0, op1 = *pop1;
19544 enum machine_mode op_mode = GET_MODE (op0);
19545 int is_sse = TARGET_SSE_MATH && SSE_FLOAT_MODE_P (op_mode);
19547 /* All of the unordered compare instructions only work on registers.
19548 The same is true of the fcomi compare instructions. The XFmode
19549 compare instructions require registers except when comparing
19550 against zero or when converting operand 1 from fixed point to
19551 floating point. */
19553 if (!is_sse
19554 && (fpcmp_mode == CCFPUmode
19555 || (op_mode == XFmode
19556 && ! (standard_80387_constant_p (op0) == 1
19557 || standard_80387_constant_p (op1) == 1)
19558 && GET_CODE (op1) != FLOAT)
19559 || ix86_fp_comparison_strategy (code) == IX86_FPCMP_COMI))
19561 op0 = force_reg (op_mode, op0);
19562 op1 = force_reg (op_mode, op1);
19564 else
19566 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
19567 things around if they appear profitable, otherwise force op0
19568 into a register. */
19570 if (standard_80387_constant_p (op0) == 0
19571 || (MEM_P (op0)
19572 && ! (standard_80387_constant_p (op1) == 0
19573 || MEM_P (op1))))
19575 enum rtx_code new_code = ix86_fp_swap_condition (code);
19576 if (new_code != UNKNOWN)
19578 rtx tmp;
19579 tmp = op0, op0 = op1, op1 = tmp;
19580 code = new_code;
19584 if (!REG_P (op0))
19585 op0 = force_reg (op_mode, op0);
19587 if (CONSTANT_P (op1))
19589 int tmp = standard_80387_constant_p (op1);
19590 if (tmp == 0)
19591 op1 = validize_mem (force_const_mem (op_mode, op1));
19592 else if (tmp == 1)
19594 if (TARGET_CMOVE)
19595 op1 = force_reg (op_mode, op1);
19597 else
19598 op1 = force_reg (op_mode, op1);
19602 /* Try to rearrange the comparison to make it cheaper. */
19603 if (ix86_fp_comparison_cost (code)
19604 > ix86_fp_comparison_cost (swap_condition (code))
19605 && (REG_P (op1) || can_create_pseudo_p ()))
19607 rtx tmp;
19608 tmp = op0, op0 = op1, op1 = tmp;
19609 code = swap_condition (code);
19610 if (!REG_P (op0))
19611 op0 = force_reg (op_mode, op0);
19614 *pop0 = op0;
19615 *pop1 = op1;
19616 return code;
19619 /* Convert comparison codes we use to represent FP comparison to integer
19620 code that will result in proper branch. Return UNKNOWN if no such code
19621 is available. */
19623 enum rtx_code
19624 ix86_fp_compare_code_to_integer (enum rtx_code code)
19626 switch (code)
19628 case GT:
19629 return GTU;
19630 case GE:
19631 return GEU;
19632 case ORDERED:
19633 case UNORDERED:
19634 return code;
19635 break;
19636 case UNEQ:
19637 return EQ;
19638 break;
19639 case UNLT:
19640 return LTU;
19641 break;
19642 case UNLE:
19643 return LEU;
19644 break;
19645 case LTGT:
19646 return NE;
19647 break;
19648 default:
19649 return UNKNOWN;
19653 /* Generate insn patterns to do a floating point compare of OPERANDS. */
19655 static rtx
19656 ix86_expand_fp_compare (enum rtx_code code, rtx op0, rtx op1, rtx scratch)
19658 enum machine_mode fpcmp_mode, intcmp_mode;
19659 rtx tmp, tmp2;
19661 fpcmp_mode = ix86_fp_compare_mode (code);
19662 code = ix86_prepare_fp_compare_args (code, &op0, &op1);
19664 /* Do fcomi/sahf based test when profitable. */
19665 switch (ix86_fp_comparison_strategy (code))
19667 case IX86_FPCMP_COMI:
19668 intcmp_mode = fpcmp_mode;
19669 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
19670 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
19671 tmp);
19672 emit_insn (tmp);
19673 break;
19675 case IX86_FPCMP_SAHF:
19676 intcmp_mode = fpcmp_mode;
19677 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
19678 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
19679 tmp);
19681 if (!scratch)
19682 scratch = gen_reg_rtx (HImode);
19683 tmp2 = gen_rtx_CLOBBER (VOIDmode, scratch);
19684 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, tmp2)));
19685 break;
19687 case IX86_FPCMP_ARITH:
19688 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
19689 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
19690 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
19691 if (!scratch)
19692 scratch = gen_reg_rtx (HImode);
19693 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
19695 /* In the unordered case, we have to check C2 for NaN's, which
19696 doesn't happen to work out to anything nice combination-wise.
19697 So do some bit twiddling on the value we've got in AH to come
19698 up with an appropriate set of condition codes. */
19700 intcmp_mode = CCNOmode;
19701 switch (code)
19703 case GT:
19704 case UNGT:
19705 if (code == GT || !TARGET_IEEE_FP)
19707 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
19708 code = EQ;
19710 else
19712 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
19713 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
19714 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44)));
19715 intcmp_mode = CCmode;
19716 code = GEU;
19718 break;
19719 case LT:
19720 case UNLT:
19721 if (code == LT && TARGET_IEEE_FP)
19723 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
19724 emit_insn (gen_cmpqi_ext_3 (scratch, const1_rtx));
19725 intcmp_mode = CCmode;
19726 code = EQ;
19728 else
19730 emit_insn (gen_testqi_ext_ccno_0 (scratch, const1_rtx));
19731 code = NE;
19733 break;
19734 case GE:
19735 case UNGE:
19736 if (code == GE || !TARGET_IEEE_FP)
19738 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x05)));
19739 code = EQ;
19741 else
19743 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
19744 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch, const1_rtx));
19745 code = NE;
19747 break;
19748 case LE:
19749 case UNLE:
19750 if (code == LE && TARGET_IEEE_FP)
19752 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
19753 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
19754 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
19755 intcmp_mode = CCmode;
19756 code = LTU;
19758 else
19760 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
19761 code = NE;
19763 break;
19764 case EQ:
19765 case UNEQ:
19766 if (code == EQ && TARGET_IEEE_FP)
19768 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
19769 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
19770 intcmp_mode = CCmode;
19771 code = EQ;
19773 else
19775 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
19776 code = NE;
19778 break;
19779 case NE:
19780 case LTGT:
19781 if (code == NE && TARGET_IEEE_FP)
19783 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
19784 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
19785 GEN_INT (0x40)));
19786 code = NE;
19788 else
19790 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
19791 code = EQ;
19793 break;
19795 case UNORDERED:
19796 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
19797 code = NE;
19798 break;
19799 case ORDERED:
19800 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
19801 code = EQ;
19802 break;
19804 default:
19805 gcc_unreachable ();
19807 break;
19809 default:
19810 gcc_unreachable();
19813 /* Return the test that should be put into the flags user, i.e.
19814 the bcc, scc, or cmov instruction. */
19815 return gen_rtx_fmt_ee (code, VOIDmode,
19816 gen_rtx_REG (intcmp_mode, FLAGS_REG),
19817 const0_rtx);
19820 static rtx
19821 ix86_expand_compare (enum rtx_code code, rtx op0, rtx op1)
19823 rtx ret;
19825 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_CC)
19826 ret = gen_rtx_fmt_ee (code, VOIDmode, op0, op1);
19828 else if (SCALAR_FLOAT_MODE_P (GET_MODE (op0)))
19830 gcc_assert (!DECIMAL_FLOAT_MODE_P (GET_MODE (op0)));
19831 ret = ix86_expand_fp_compare (code, op0, op1, NULL_RTX);
19833 else
19834 ret = ix86_expand_int_compare (code, op0, op1);
19836 return ret;
19839 void
19840 ix86_expand_branch (enum rtx_code code, rtx op0, rtx op1, rtx label)
19842 enum machine_mode mode = GET_MODE (op0);
19843 rtx tmp;
19845 switch (mode)
19847 case SFmode:
19848 case DFmode:
19849 case XFmode:
19850 case QImode:
19851 case HImode:
19852 case SImode:
19853 simple:
19854 tmp = ix86_expand_compare (code, op0, op1);
19855 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
19856 gen_rtx_LABEL_REF (VOIDmode, label),
19857 pc_rtx);
19858 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
19859 return;
19861 case DImode:
19862 if (TARGET_64BIT)
19863 goto simple;
19864 case TImode:
19865 /* Expand DImode branch into multiple compare+branch. */
19867 rtx lo[2], hi[2], label2;
19868 enum rtx_code code1, code2, code3;
19869 enum machine_mode submode;
19871 if (CONSTANT_P (op0) && !CONSTANT_P (op1))
19873 tmp = op0, op0 = op1, op1 = tmp;
19874 code = swap_condition (code);
19877 split_double_mode (mode, &op0, 1, lo+0, hi+0);
19878 split_double_mode (mode, &op1, 1, lo+1, hi+1);
19880 submode = mode == DImode ? SImode : DImode;
19882 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
19883 avoid two branches. This costs one extra insn, so disable when
19884 optimizing for size. */
19886 if ((code == EQ || code == NE)
19887 && (!optimize_insn_for_size_p ()
19888 || hi[1] == const0_rtx || lo[1] == const0_rtx))
19890 rtx xor0, xor1;
19892 xor1 = hi[0];
19893 if (hi[1] != const0_rtx)
19894 xor1 = expand_binop (submode, xor_optab, xor1, hi[1],
19895 NULL_RTX, 0, OPTAB_WIDEN);
19897 xor0 = lo[0];
19898 if (lo[1] != const0_rtx)
19899 xor0 = expand_binop (submode, xor_optab, xor0, lo[1],
19900 NULL_RTX, 0, OPTAB_WIDEN);
19902 tmp = expand_binop (submode, ior_optab, xor1, xor0,
19903 NULL_RTX, 0, OPTAB_WIDEN);
19905 ix86_expand_branch (code, tmp, const0_rtx, label);
19906 return;
19909 /* Otherwise, if we are doing less-than or greater-or-equal-than,
19910 op1 is a constant and the low word is zero, then we can just
19911 examine the high word. Similarly for low word -1 and
19912 less-or-equal-than or greater-than. */
19914 if (CONST_INT_P (hi[1]))
19915 switch (code)
19917 case LT: case LTU: case GE: case GEU:
19918 if (lo[1] == const0_rtx)
19920 ix86_expand_branch (code, hi[0], hi[1], label);
19921 return;
19923 break;
19924 case LE: case LEU: case GT: case GTU:
19925 if (lo[1] == constm1_rtx)
19927 ix86_expand_branch (code, hi[0], hi[1], label);
19928 return;
19930 break;
19931 default:
19932 break;
19935 /* Otherwise, we need two or three jumps. */
19937 label2 = gen_label_rtx ();
19939 code1 = code;
19940 code2 = swap_condition (code);
19941 code3 = unsigned_condition (code);
19943 switch (code)
19945 case LT: case GT: case LTU: case GTU:
19946 break;
19948 case LE: code1 = LT; code2 = GT; break;
19949 case GE: code1 = GT; code2 = LT; break;
19950 case LEU: code1 = LTU; code2 = GTU; break;
19951 case GEU: code1 = GTU; code2 = LTU; break;
19953 case EQ: code1 = UNKNOWN; code2 = NE; break;
19954 case NE: code2 = UNKNOWN; break;
19956 default:
19957 gcc_unreachable ();
19961 * a < b =>
19962 * if (hi(a) < hi(b)) goto true;
19963 * if (hi(a) > hi(b)) goto false;
19964 * if (lo(a) < lo(b)) goto true;
19965 * false:
19968 if (code1 != UNKNOWN)
19969 ix86_expand_branch (code1, hi[0], hi[1], label);
19970 if (code2 != UNKNOWN)
19971 ix86_expand_branch (code2, hi[0], hi[1], label2);
19973 ix86_expand_branch (code3, lo[0], lo[1], label);
19975 if (code2 != UNKNOWN)
19976 emit_label (label2);
19977 return;
19980 default:
19981 gcc_assert (GET_MODE_CLASS (GET_MODE (op0)) == MODE_CC);
19982 goto simple;
19986 /* Split branch based on floating point condition. */
19987 void
19988 ix86_split_fp_branch (enum rtx_code code, rtx op1, rtx op2,
19989 rtx target1, rtx target2, rtx tmp, rtx pushed)
19991 rtx condition;
19992 rtx i;
19994 if (target2 != pc_rtx)
19996 rtx tmp = target2;
19997 code = reverse_condition_maybe_unordered (code);
19998 target2 = target1;
19999 target1 = tmp;
20002 condition = ix86_expand_fp_compare (code, op1, op2,
20003 tmp);
20005 /* Remove pushed operand from stack. */
20006 if (pushed)
20007 ix86_free_from_memory (GET_MODE (pushed));
20009 i = emit_jump_insn (gen_rtx_SET
20010 (VOIDmode, pc_rtx,
20011 gen_rtx_IF_THEN_ELSE (VOIDmode,
20012 condition, target1, target2)));
20013 if (split_branch_probability >= 0)
20014 add_int_reg_note (i, REG_BR_PROB, split_branch_probability);
20017 void
20018 ix86_expand_setcc (rtx dest, enum rtx_code code, rtx op0, rtx op1)
20020 rtx ret;
20022 gcc_assert (GET_MODE (dest) == QImode);
20024 ret = ix86_expand_compare (code, op0, op1);
20025 PUT_MODE (ret, QImode);
20026 emit_insn (gen_rtx_SET (VOIDmode, dest, ret));
20029 /* Expand comparison setting or clearing carry flag. Return true when
20030 successful and set pop for the operation. */
20031 static bool
20032 ix86_expand_carry_flag_compare (enum rtx_code code, rtx op0, rtx op1, rtx *pop)
20034 enum machine_mode mode =
20035 GET_MODE (op0) != VOIDmode ? GET_MODE (op0) : GET_MODE (op1);
20037 /* Do not handle double-mode compares that go through special path. */
20038 if (mode == (TARGET_64BIT ? TImode : DImode))
20039 return false;
20041 if (SCALAR_FLOAT_MODE_P (mode))
20043 rtx compare_op, compare_seq;
20045 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode));
20047 /* Shortcut: following common codes never translate
20048 into carry flag compares. */
20049 if (code == EQ || code == NE || code == UNEQ || code == LTGT
20050 || code == ORDERED || code == UNORDERED)
20051 return false;
20053 /* These comparisons require zero flag; swap operands so they won't. */
20054 if ((code == GT || code == UNLE || code == LE || code == UNGT)
20055 && !TARGET_IEEE_FP)
20057 rtx tmp = op0;
20058 op0 = op1;
20059 op1 = tmp;
20060 code = swap_condition (code);
20063 /* Try to expand the comparison and verify that we end up with
20064 carry flag based comparison. This fails to be true only when
20065 we decide to expand comparison using arithmetic that is not
20066 too common scenario. */
20067 start_sequence ();
20068 compare_op = ix86_expand_fp_compare (code, op0, op1, NULL_RTX);
20069 compare_seq = get_insns ();
20070 end_sequence ();
20072 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
20073 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
20074 code = ix86_fp_compare_code_to_integer (GET_CODE (compare_op));
20075 else
20076 code = GET_CODE (compare_op);
20078 if (code != LTU && code != GEU)
20079 return false;
20081 emit_insn (compare_seq);
20082 *pop = compare_op;
20083 return true;
20086 if (!INTEGRAL_MODE_P (mode))
20087 return false;
20089 switch (code)
20091 case LTU:
20092 case GEU:
20093 break;
20095 /* Convert a==0 into (unsigned)a<1. */
20096 case EQ:
20097 case NE:
20098 if (op1 != const0_rtx)
20099 return false;
20100 op1 = const1_rtx;
20101 code = (code == EQ ? LTU : GEU);
20102 break;
20104 /* Convert a>b into b<a or a>=b-1. */
20105 case GTU:
20106 case LEU:
20107 if (CONST_INT_P (op1))
20109 op1 = gen_int_mode (INTVAL (op1) + 1, GET_MODE (op0));
20110 /* Bail out on overflow. We still can swap operands but that
20111 would force loading of the constant into register. */
20112 if (op1 == const0_rtx
20113 || !x86_64_immediate_operand (op1, GET_MODE (op1)))
20114 return false;
20115 code = (code == GTU ? GEU : LTU);
20117 else
20119 rtx tmp = op1;
20120 op1 = op0;
20121 op0 = tmp;
20122 code = (code == GTU ? LTU : GEU);
20124 break;
20126 /* Convert a>=0 into (unsigned)a<0x80000000. */
20127 case LT:
20128 case GE:
20129 if (mode == DImode || op1 != const0_rtx)
20130 return false;
20131 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
20132 code = (code == LT ? GEU : LTU);
20133 break;
20134 case LE:
20135 case GT:
20136 if (mode == DImode || op1 != constm1_rtx)
20137 return false;
20138 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
20139 code = (code == LE ? GEU : LTU);
20140 break;
20142 default:
20143 return false;
20145 /* Swapping operands may cause constant to appear as first operand. */
20146 if (!nonimmediate_operand (op0, VOIDmode))
20148 if (!can_create_pseudo_p ())
20149 return false;
20150 op0 = force_reg (mode, op0);
20152 *pop = ix86_expand_compare (code, op0, op1);
20153 gcc_assert (GET_CODE (*pop) == LTU || GET_CODE (*pop) == GEU);
20154 return true;
20157 bool
20158 ix86_expand_int_movcc (rtx operands[])
20160 enum rtx_code code = GET_CODE (operands[1]), compare_code;
20161 rtx compare_seq, compare_op;
20162 enum machine_mode mode = GET_MODE (operands[0]);
20163 bool sign_bit_compare_p = false;
20164 rtx op0 = XEXP (operands[1], 0);
20165 rtx op1 = XEXP (operands[1], 1);
20167 if (GET_MODE (op0) == TImode
20168 || (GET_MODE (op0) == DImode
20169 && !TARGET_64BIT))
20170 return false;
20172 start_sequence ();
20173 compare_op = ix86_expand_compare (code, op0, op1);
20174 compare_seq = get_insns ();
20175 end_sequence ();
20177 compare_code = GET_CODE (compare_op);
20179 if ((op1 == const0_rtx && (code == GE || code == LT))
20180 || (op1 == constm1_rtx && (code == GT || code == LE)))
20181 sign_bit_compare_p = true;
20183 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
20184 HImode insns, we'd be swallowed in word prefix ops. */
20186 if ((mode != HImode || TARGET_FAST_PREFIX)
20187 && (mode != (TARGET_64BIT ? TImode : DImode))
20188 && CONST_INT_P (operands[2])
20189 && CONST_INT_P (operands[3]))
20191 rtx out = operands[0];
20192 HOST_WIDE_INT ct = INTVAL (operands[2]);
20193 HOST_WIDE_INT cf = INTVAL (operands[3]);
20194 HOST_WIDE_INT diff;
20196 diff = ct - cf;
20197 /* Sign bit compares are better done using shifts than we do by using
20198 sbb. */
20199 if (sign_bit_compare_p
20200 || ix86_expand_carry_flag_compare (code, op0, op1, &compare_op))
20202 /* Detect overlap between destination and compare sources. */
20203 rtx tmp = out;
20205 if (!sign_bit_compare_p)
20207 rtx flags;
20208 bool fpcmp = false;
20210 compare_code = GET_CODE (compare_op);
20212 flags = XEXP (compare_op, 0);
20214 if (GET_MODE (flags) == CCFPmode
20215 || GET_MODE (flags) == CCFPUmode)
20217 fpcmp = true;
20218 compare_code
20219 = ix86_fp_compare_code_to_integer (compare_code);
20222 /* To simplify rest of code, restrict to the GEU case. */
20223 if (compare_code == LTU)
20225 HOST_WIDE_INT tmp = ct;
20226 ct = cf;
20227 cf = tmp;
20228 compare_code = reverse_condition (compare_code);
20229 code = reverse_condition (code);
20231 else
20233 if (fpcmp)
20234 PUT_CODE (compare_op,
20235 reverse_condition_maybe_unordered
20236 (GET_CODE (compare_op)));
20237 else
20238 PUT_CODE (compare_op,
20239 reverse_condition (GET_CODE (compare_op)));
20241 diff = ct - cf;
20243 if (reg_overlap_mentioned_p (out, op0)
20244 || reg_overlap_mentioned_p (out, op1))
20245 tmp = gen_reg_rtx (mode);
20247 if (mode == DImode)
20248 emit_insn (gen_x86_movdicc_0_m1 (tmp, flags, compare_op));
20249 else
20250 emit_insn (gen_x86_movsicc_0_m1 (gen_lowpart (SImode, tmp),
20251 flags, compare_op));
20253 else
20255 if (code == GT || code == GE)
20256 code = reverse_condition (code);
20257 else
20259 HOST_WIDE_INT tmp = ct;
20260 ct = cf;
20261 cf = tmp;
20262 diff = ct - cf;
20264 tmp = emit_store_flag (tmp, code, op0, op1, VOIDmode, 0, -1);
20267 if (diff == 1)
20270 * cmpl op0,op1
20271 * sbbl dest,dest
20272 * [addl dest, ct]
20274 * Size 5 - 8.
20276 if (ct)
20277 tmp = expand_simple_binop (mode, PLUS,
20278 tmp, GEN_INT (ct),
20279 copy_rtx (tmp), 1, OPTAB_DIRECT);
20281 else if (cf == -1)
20284 * cmpl op0,op1
20285 * sbbl dest,dest
20286 * orl $ct, dest
20288 * Size 8.
20290 tmp = expand_simple_binop (mode, IOR,
20291 tmp, GEN_INT (ct),
20292 copy_rtx (tmp), 1, OPTAB_DIRECT);
20294 else if (diff == -1 && ct)
20297 * cmpl op0,op1
20298 * sbbl dest,dest
20299 * notl dest
20300 * [addl dest, cf]
20302 * Size 8 - 11.
20304 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
20305 if (cf)
20306 tmp = expand_simple_binop (mode, PLUS,
20307 copy_rtx (tmp), GEN_INT (cf),
20308 copy_rtx (tmp), 1, OPTAB_DIRECT);
20310 else
20313 * cmpl op0,op1
20314 * sbbl dest,dest
20315 * [notl dest]
20316 * andl cf - ct, dest
20317 * [addl dest, ct]
20319 * Size 8 - 11.
20322 if (cf == 0)
20324 cf = ct;
20325 ct = 0;
20326 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
20329 tmp = expand_simple_binop (mode, AND,
20330 copy_rtx (tmp),
20331 gen_int_mode (cf - ct, mode),
20332 copy_rtx (tmp), 1, OPTAB_DIRECT);
20333 if (ct)
20334 tmp = expand_simple_binop (mode, PLUS,
20335 copy_rtx (tmp), GEN_INT (ct),
20336 copy_rtx (tmp), 1, OPTAB_DIRECT);
20339 if (!rtx_equal_p (tmp, out))
20340 emit_move_insn (copy_rtx (out), copy_rtx (tmp));
20342 return true;
20345 if (diff < 0)
20347 enum machine_mode cmp_mode = GET_MODE (op0);
20349 HOST_WIDE_INT tmp;
20350 tmp = ct, ct = cf, cf = tmp;
20351 diff = -diff;
20353 if (SCALAR_FLOAT_MODE_P (cmp_mode))
20355 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode));
20357 /* We may be reversing unordered compare to normal compare, that
20358 is not valid in general (we may convert non-trapping condition
20359 to trapping one), however on i386 we currently emit all
20360 comparisons unordered. */
20361 compare_code = reverse_condition_maybe_unordered (compare_code);
20362 code = reverse_condition_maybe_unordered (code);
20364 else
20366 compare_code = reverse_condition (compare_code);
20367 code = reverse_condition (code);
20371 compare_code = UNKNOWN;
20372 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT
20373 && CONST_INT_P (op1))
20375 if (op1 == const0_rtx
20376 && (code == LT || code == GE))
20377 compare_code = code;
20378 else if (op1 == constm1_rtx)
20380 if (code == LE)
20381 compare_code = LT;
20382 else if (code == GT)
20383 compare_code = GE;
20387 /* Optimize dest = (op0 < 0) ? -1 : cf. */
20388 if (compare_code != UNKNOWN
20389 && GET_MODE (op0) == GET_MODE (out)
20390 && (cf == -1 || ct == -1))
20392 /* If lea code below could be used, only optimize
20393 if it results in a 2 insn sequence. */
20395 if (! (diff == 1 || diff == 2 || diff == 4 || diff == 8
20396 || diff == 3 || diff == 5 || diff == 9)
20397 || (compare_code == LT && ct == -1)
20398 || (compare_code == GE && cf == -1))
20401 * notl op1 (if necessary)
20402 * sarl $31, op1
20403 * orl cf, op1
20405 if (ct != -1)
20407 cf = ct;
20408 ct = -1;
20409 code = reverse_condition (code);
20412 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, -1);
20414 out = expand_simple_binop (mode, IOR,
20415 out, GEN_INT (cf),
20416 out, 1, OPTAB_DIRECT);
20417 if (out != operands[0])
20418 emit_move_insn (operands[0], out);
20420 return true;
20425 if ((diff == 1 || diff == 2 || diff == 4 || diff == 8
20426 || diff == 3 || diff == 5 || diff == 9)
20427 && ((mode != QImode && mode != HImode) || !TARGET_PARTIAL_REG_STALL)
20428 && (mode != DImode
20429 || x86_64_immediate_operand (GEN_INT (cf), VOIDmode)))
20432 * xorl dest,dest
20433 * cmpl op1,op2
20434 * setcc dest
20435 * lea cf(dest*(ct-cf)),dest
20437 * Size 14.
20439 * This also catches the degenerate setcc-only case.
20442 rtx tmp;
20443 int nops;
20445 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, 1);
20447 nops = 0;
20448 /* On x86_64 the lea instruction operates on Pmode, so we need
20449 to get arithmetics done in proper mode to match. */
20450 if (diff == 1)
20451 tmp = copy_rtx (out);
20452 else
20454 rtx out1;
20455 out1 = copy_rtx (out);
20456 tmp = gen_rtx_MULT (mode, out1, GEN_INT (diff & ~1));
20457 nops++;
20458 if (diff & 1)
20460 tmp = gen_rtx_PLUS (mode, tmp, out1);
20461 nops++;
20464 if (cf != 0)
20466 tmp = gen_rtx_PLUS (mode, tmp, GEN_INT (cf));
20467 nops++;
20469 if (!rtx_equal_p (tmp, out))
20471 if (nops == 1)
20472 out = force_operand (tmp, copy_rtx (out));
20473 else
20474 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (out), copy_rtx (tmp)));
20476 if (!rtx_equal_p (out, operands[0]))
20477 emit_move_insn (operands[0], copy_rtx (out));
20479 return true;
20483 * General case: Jumpful:
20484 * xorl dest,dest cmpl op1, op2
20485 * cmpl op1, op2 movl ct, dest
20486 * setcc dest jcc 1f
20487 * decl dest movl cf, dest
20488 * andl (cf-ct),dest 1:
20489 * addl ct,dest
20491 * Size 20. Size 14.
20493 * This is reasonably steep, but branch mispredict costs are
20494 * high on modern cpus, so consider failing only if optimizing
20495 * for space.
20498 if ((!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
20499 && BRANCH_COST (optimize_insn_for_speed_p (),
20500 false) >= 2)
20502 if (cf == 0)
20504 enum machine_mode cmp_mode = GET_MODE (op0);
20506 cf = ct;
20507 ct = 0;
20509 if (SCALAR_FLOAT_MODE_P (cmp_mode))
20511 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode));
20513 /* We may be reversing unordered compare to normal compare,
20514 that is not valid in general (we may convert non-trapping
20515 condition to trapping one), however on i386 we currently
20516 emit all comparisons unordered. */
20517 code = reverse_condition_maybe_unordered (code);
20519 else
20521 code = reverse_condition (code);
20522 if (compare_code != UNKNOWN)
20523 compare_code = reverse_condition (compare_code);
20527 if (compare_code != UNKNOWN)
20529 /* notl op1 (if needed)
20530 sarl $31, op1
20531 andl (cf-ct), op1
20532 addl ct, op1
20534 For x < 0 (resp. x <= -1) there will be no notl,
20535 so if possible swap the constants to get rid of the
20536 complement.
20537 True/false will be -1/0 while code below (store flag
20538 followed by decrement) is 0/-1, so the constants need
20539 to be exchanged once more. */
20541 if (compare_code == GE || !cf)
20543 code = reverse_condition (code);
20544 compare_code = LT;
20546 else
20548 HOST_WIDE_INT tmp = cf;
20549 cf = ct;
20550 ct = tmp;
20553 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, -1);
20555 else
20557 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, 1);
20559 out = expand_simple_binop (mode, PLUS, copy_rtx (out),
20560 constm1_rtx,
20561 copy_rtx (out), 1, OPTAB_DIRECT);
20564 out = expand_simple_binop (mode, AND, copy_rtx (out),
20565 gen_int_mode (cf - ct, mode),
20566 copy_rtx (out), 1, OPTAB_DIRECT);
20567 if (ct)
20568 out = expand_simple_binop (mode, PLUS, copy_rtx (out), GEN_INT (ct),
20569 copy_rtx (out), 1, OPTAB_DIRECT);
20570 if (!rtx_equal_p (out, operands[0]))
20571 emit_move_insn (operands[0], copy_rtx (out));
20573 return true;
20577 if (!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
20579 /* Try a few things more with specific constants and a variable. */
20581 optab op;
20582 rtx var, orig_out, out, tmp;
20584 if (BRANCH_COST (optimize_insn_for_speed_p (), false) <= 2)
20585 return false;
20587 /* If one of the two operands is an interesting constant, load a
20588 constant with the above and mask it in with a logical operation. */
20590 if (CONST_INT_P (operands[2]))
20592 var = operands[3];
20593 if (INTVAL (operands[2]) == 0 && operands[3] != constm1_rtx)
20594 operands[3] = constm1_rtx, op = and_optab;
20595 else if (INTVAL (operands[2]) == -1 && operands[3] != const0_rtx)
20596 operands[3] = const0_rtx, op = ior_optab;
20597 else
20598 return false;
20600 else if (CONST_INT_P (operands[3]))
20602 var = operands[2];
20603 if (INTVAL (operands[3]) == 0 && operands[2] != constm1_rtx)
20604 operands[2] = constm1_rtx, op = and_optab;
20605 else if (INTVAL (operands[3]) == -1 && operands[3] != const0_rtx)
20606 operands[2] = const0_rtx, op = ior_optab;
20607 else
20608 return false;
20610 else
20611 return false;
20613 orig_out = operands[0];
20614 tmp = gen_reg_rtx (mode);
20615 operands[0] = tmp;
20617 /* Recurse to get the constant loaded. */
20618 if (ix86_expand_int_movcc (operands) == 0)
20619 return false;
20621 /* Mask in the interesting variable. */
20622 out = expand_binop (mode, op, var, tmp, orig_out, 0,
20623 OPTAB_WIDEN);
20624 if (!rtx_equal_p (out, orig_out))
20625 emit_move_insn (copy_rtx (orig_out), copy_rtx (out));
20627 return true;
20631 * For comparison with above,
20633 * movl cf,dest
20634 * movl ct,tmp
20635 * cmpl op1,op2
20636 * cmovcc tmp,dest
20638 * Size 15.
20641 if (! nonimmediate_operand (operands[2], mode))
20642 operands[2] = force_reg (mode, operands[2]);
20643 if (! nonimmediate_operand (operands[3], mode))
20644 operands[3] = force_reg (mode, operands[3]);
20646 if (! register_operand (operands[2], VOIDmode)
20647 && (mode == QImode
20648 || ! register_operand (operands[3], VOIDmode)))
20649 operands[2] = force_reg (mode, operands[2]);
20651 if (mode == QImode
20652 && ! register_operand (operands[3], VOIDmode))
20653 operands[3] = force_reg (mode, operands[3]);
20655 emit_insn (compare_seq);
20656 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
20657 gen_rtx_IF_THEN_ELSE (mode,
20658 compare_op, operands[2],
20659 operands[3])));
20660 return true;
20663 /* Swap, force into registers, or otherwise massage the two operands
20664 to an sse comparison with a mask result. Thus we differ a bit from
20665 ix86_prepare_fp_compare_args which expects to produce a flags result.
20667 The DEST operand exists to help determine whether to commute commutative
20668 operators. The POP0/POP1 operands are updated in place. The new
20669 comparison code is returned, or UNKNOWN if not implementable. */
20671 static enum rtx_code
20672 ix86_prepare_sse_fp_compare_args (rtx dest, enum rtx_code code,
20673 rtx *pop0, rtx *pop1)
20675 rtx tmp;
20677 switch (code)
20679 case LTGT:
20680 case UNEQ:
20681 /* AVX supports all the needed comparisons. */
20682 if (TARGET_AVX)
20683 break;
20684 /* We have no LTGT as an operator. We could implement it with
20685 NE & ORDERED, but this requires an extra temporary. It's
20686 not clear that it's worth it. */
20687 return UNKNOWN;
20689 case LT:
20690 case LE:
20691 case UNGT:
20692 case UNGE:
20693 /* These are supported directly. */
20694 break;
20696 case EQ:
20697 case NE:
20698 case UNORDERED:
20699 case ORDERED:
20700 /* AVX has 3 operand comparisons, no need to swap anything. */
20701 if (TARGET_AVX)
20702 break;
20703 /* For commutative operators, try to canonicalize the destination
20704 operand to be first in the comparison - this helps reload to
20705 avoid extra moves. */
20706 if (!dest || !rtx_equal_p (dest, *pop1))
20707 break;
20708 /* FALLTHRU */
20710 case GE:
20711 case GT:
20712 case UNLE:
20713 case UNLT:
20714 /* These are not supported directly before AVX, and furthermore
20715 ix86_expand_sse_fp_minmax only optimizes LT/UNGE. Swap the
20716 comparison operands to transform into something that is
20717 supported. */
20718 tmp = *pop0;
20719 *pop0 = *pop1;
20720 *pop1 = tmp;
20721 code = swap_condition (code);
20722 break;
20724 default:
20725 gcc_unreachable ();
20728 return code;
20731 /* Detect conditional moves that exactly match min/max operational
20732 semantics. Note that this is IEEE safe, as long as we don't
20733 interchange the operands.
20735 Returns FALSE if this conditional move doesn't match a MIN/MAX,
20736 and TRUE if the operation is successful and instructions are emitted. */
20738 static bool
20739 ix86_expand_sse_fp_minmax (rtx dest, enum rtx_code code, rtx cmp_op0,
20740 rtx cmp_op1, rtx if_true, rtx if_false)
20742 enum machine_mode mode;
20743 bool is_min;
20744 rtx tmp;
20746 if (code == LT)
20748 else if (code == UNGE)
20750 tmp = if_true;
20751 if_true = if_false;
20752 if_false = tmp;
20754 else
20755 return false;
20757 if (rtx_equal_p (cmp_op0, if_true) && rtx_equal_p (cmp_op1, if_false))
20758 is_min = true;
20759 else if (rtx_equal_p (cmp_op1, if_true) && rtx_equal_p (cmp_op0, if_false))
20760 is_min = false;
20761 else
20762 return false;
20764 mode = GET_MODE (dest);
20766 /* We want to check HONOR_NANS and HONOR_SIGNED_ZEROS here,
20767 but MODE may be a vector mode and thus not appropriate. */
20768 if (!flag_finite_math_only || !flag_unsafe_math_optimizations)
20770 int u = is_min ? UNSPEC_IEEE_MIN : UNSPEC_IEEE_MAX;
20771 rtvec v;
20773 if_true = force_reg (mode, if_true);
20774 v = gen_rtvec (2, if_true, if_false);
20775 tmp = gen_rtx_UNSPEC (mode, v, u);
20777 else
20779 code = is_min ? SMIN : SMAX;
20780 tmp = gen_rtx_fmt_ee (code, mode, if_true, if_false);
20783 emit_insn (gen_rtx_SET (VOIDmode, dest, tmp));
20784 return true;
20787 /* Expand an sse vector comparison. Return the register with the result. */
20789 static rtx
20790 ix86_expand_sse_cmp (rtx dest, enum rtx_code code, rtx cmp_op0, rtx cmp_op1,
20791 rtx op_true, rtx op_false)
20793 enum machine_mode mode = GET_MODE (dest);
20794 enum machine_mode cmp_ops_mode = GET_MODE (cmp_op0);
20796 /* In general case result of comparison can differ from operands' type. */
20797 enum machine_mode cmp_mode;
20799 /* In AVX512F the result of comparison is an integer mask. */
20800 bool maskcmp = false;
20801 rtx x;
20803 if (GET_MODE_SIZE (cmp_ops_mode) == 64)
20805 cmp_mode = mode_for_size (GET_MODE_NUNITS (cmp_ops_mode), MODE_INT, 0);
20806 gcc_assert (cmp_mode != BLKmode);
20808 maskcmp = true;
20810 else
20811 cmp_mode = cmp_ops_mode;
20814 cmp_op0 = force_reg (cmp_ops_mode, cmp_op0);
20815 if (!nonimmediate_operand (cmp_op1, cmp_ops_mode))
20816 cmp_op1 = force_reg (cmp_ops_mode, cmp_op1);
20818 if (optimize
20819 || reg_overlap_mentioned_p (dest, op_true)
20820 || reg_overlap_mentioned_p (dest, op_false))
20821 dest = gen_reg_rtx (maskcmp ? cmp_mode : mode);
20823 /* Compare patterns for int modes are unspec in AVX512F only. */
20824 if (maskcmp && (code == GT || code == EQ))
20826 rtx (*gen)(rtx, rtx, rtx);
20828 switch (cmp_ops_mode)
20830 case V16SImode:
20831 gen = code == GT ? gen_avx512f_gtv16si3 : gen_avx512f_eqv16si3_1;
20832 break;
20833 case V8DImode:
20834 gen = code == GT ? gen_avx512f_gtv8di3 : gen_avx512f_eqv8di3_1;
20835 break;
20836 default:
20837 gen = NULL;
20840 if (gen)
20842 emit_insn (gen (dest, cmp_op0, cmp_op1));
20843 return dest;
20846 x = gen_rtx_fmt_ee (code, cmp_mode, cmp_op0, cmp_op1);
20848 if (cmp_mode != mode && !maskcmp)
20850 x = force_reg (cmp_ops_mode, x);
20851 convert_move (dest, x, false);
20853 else
20854 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
20856 return dest;
20859 /* Expand DEST = CMP ? OP_TRUE : OP_FALSE into a sequence of logical
20860 operations. This is used for both scalar and vector conditional moves. */
20862 static void
20863 ix86_expand_sse_movcc (rtx dest, rtx cmp, rtx op_true, rtx op_false)
20865 enum machine_mode mode = GET_MODE (dest);
20866 enum machine_mode cmpmode = GET_MODE (cmp);
20868 /* In AVX512F the result of comparison is an integer mask. */
20869 bool maskcmp = (mode != cmpmode && TARGET_AVX512F);
20871 rtx t2, t3, x;
20873 if (vector_all_ones_operand (op_true, mode)
20874 && rtx_equal_p (op_false, CONST0_RTX (mode))
20875 && !maskcmp)
20877 emit_insn (gen_rtx_SET (VOIDmode, dest, cmp));
20879 else if (op_false == CONST0_RTX (mode)
20880 && !maskcmp)
20882 op_true = force_reg (mode, op_true);
20883 x = gen_rtx_AND (mode, cmp, op_true);
20884 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
20886 else if (op_true == CONST0_RTX (mode)
20887 && !maskcmp)
20889 op_false = force_reg (mode, op_false);
20890 x = gen_rtx_NOT (mode, cmp);
20891 x = gen_rtx_AND (mode, x, op_false);
20892 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
20894 else if (INTEGRAL_MODE_P (mode) && op_true == CONSTM1_RTX (mode)
20895 && !maskcmp)
20897 op_false = force_reg (mode, op_false);
20898 x = gen_rtx_IOR (mode, cmp, op_false);
20899 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
20901 else if (TARGET_XOP
20902 && !maskcmp)
20904 op_true = force_reg (mode, op_true);
20906 if (!nonimmediate_operand (op_false, mode))
20907 op_false = force_reg (mode, op_false);
20909 emit_insn (gen_rtx_SET (mode, dest,
20910 gen_rtx_IF_THEN_ELSE (mode, cmp,
20911 op_true,
20912 op_false)));
20914 else
20916 rtx (*gen) (rtx, rtx, rtx, rtx) = NULL;
20917 rtx d = dest;
20919 if (!nonimmediate_operand (op_true, mode))
20920 op_true = force_reg (mode, op_true);
20922 op_false = force_reg (mode, op_false);
20924 switch (mode)
20926 case V4SFmode:
20927 if (TARGET_SSE4_1)
20928 gen = gen_sse4_1_blendvps;
20929 break;
20930 case V2DFmode:
20931 if (TARGET_SSE4_1)
20932 gen = gen_sse4_1_blendvpd;
20933 break;
20934 case V16QImode:
20935 case V8HImode:
20936 case V4SImode:
20937 case V2DImode:
20938 if (TARGET_SSE4_1)
20940 gen = gen_sse4_1_pblendvb;
20941 if (mode != V16QImode)
20942 d = gen_reg_rtx (V16QImode);
20943 op_false = gen_lowpart (V16QImode, op_false);
20944 op_true = gen_lowpart (V16QImode, op_true);
20945 cmp = gen_lowpart (V16QImode, cmp);
20947 break;
20948 case V8SFmode:
20949 if (TARGET_AVX)
20950 gen = gen_avx_blendvps256;
20951 break;
20952 case V4DFmode:
20953 if (TARGET_AVX)
20954 gen = gen_avx_blendvpd256;
20955 break;
20956 case V32QImode:
20957 case V16HImode:
20958 case V8SImode:
20959 case V4DImode:
20960 if (TARGET_AVX2)
20962 gen = gen_avx2_pblendvb;
20963 if (mode != V32QImode)
20964 d = gen_reg_rtx (V32QImode);
20965 op_false = gen_lowpart (V32QImode, op_false);
20966 op_true = gen_lowpart (V32QImode, op_true);
20967 cmp = gen_lowpart (V32QImode, cmp);
20969 break;
20971 case V16SImode:
20972 gen = gen_avx512f_blendmv16si;
20973 break;
20974 case V8DImode:
20975 gen = gen_avx512f_blendmv8di;
20976 break;
20977 case V8DFmode:
20978 gen = gen_avx512f_blendmv8df;
20979 break;
20980 case V16SFmode:
20981 gen = gen_avx512f_blendmv16sf;
20982 break;
20984 default:
20985 break;
20988 if (gen != NULL)
20990 emit_insn (gen (d, op_false, op_true, cmp));
20991 if (d != dest)
20992 emit_move_insn (dest, gen_lowpart (GET_MODE (dest), d));
20994 else
20996 op_true = force_reg (mode, op_true);
20998 t2 = gen_reg_rtx (mode);
20999 if (optimize)
21000 t3 = gen_reg_rtx (mode);
21001 else
21002 t3 = dest;
21004 x = gen_rtx_AND (mode, op_true, cmp);
21005 emit_insn (gen_rtx_SET (VOIDmode, t2, x));
21007 x = gen_rtx_NOT (mode, cmp);
21008 x = gen_rtx_AND (mode, x, op_false);
21009 emit_insn (gen_rtx_SET (VOIDmode, t3, x));
21011 x = gen_rtx_IOR (mode, t3, t2);
21012 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
21017 /* Expand a floating-point conditional move. Return true if successful. */
21019 bool
21020 ix86_expand_fp_movcc (rtx operands[])
21022 enum machine_mode mode = GET_MODE (operands[0]);
21023 enum rtx_code code = GET_CODE (operands[1]);
21024 rtx tmp, compare_op;
21025 rtx op0 = XEXP (operands[1], 0);
21026 rtx op1 = XEXP (operands[1], 1);
21028 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
21030 enum machine_mode cmode;
21032 /* Since we've no cmove for sse registers, don't force bad register
21033 allocation just to gain access to it. Deny movcc when the
21034 comparison mode doesn't match the move mode. */
21035 cmode = GET_MODE (op0);
21036 if (cmode == VOIDmode)
21037 cmode = GET_MODE (op1);
21038 if (cmode != mode)
21039 return false;
21041 code = ix86_prepare_sse_fp_compare_args (operands[0], code, &op0, &op1);
21042 if (code == UNKNOWN)
21043 return false;
21045 if (ix86_expand_sse_fp_minmax (operands[0], code, op0, op1,
21046 operands[2], operands[3]))
21047 return true;
21049 tmp = ix86_expand_sse_cmp (operands[0], code, op0, op1,
21050 operands[2], operands[3]);
21051 ix86_expand_sse_movcc (operands[0], tmp, operands[2], operands[3]);
21052 return true;
21055 if (GET_MODE (op0) == TImode
21056 || (GET_MODE (op0) == DImode
21057 && !TARGET_64BIT))
21058 return false;
21060 /* The floating point conditional move instructions don't directly
21061 support conditions resulting from a signed integer comparison. */
21063 compare_op = ix86_expand_compare (code, op0, op1);
21064 if (!fcmov_comparison_operator (compare_op, VOIDmode))
21066 tmp = gen_reg_rtx (QImode);
21067 ix86_expand_setcc (tmp, code, op0, op1);
21069 compare_op = ix86_expand_compare (NE, tmp, const0_rtx);
21072 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
21073 gen_rtx_IF_THEN_ELSE (mode, compare_op,
21074 operands[2], operands[3])));
21076 return true;
21079 /* Expand a floating-point vector conditional move; a vcond operation
21080 rather than a movcc operation. */
21082 bool
21083 ix86_expand_fp_vcond (rtx operands[])
21085 enum rtx_code code = GET_CODE (operands[3]);
21086 rtx cmp;
21088 code = ix86_prepare_sse_fp_compare_args (operands[0], code,
21089 &operands[4], &operands[5]);
21090 if (code == UNKNOWN)
21092 rtx temp;
21093 switch (GET_CODE (operands[3]))
21095 case LTGT:
21096 temp = ix86_expand_sse_cmp (operands[0], ORDERED, operands[4],
21097 operands[5], operands[0], operands[0]);
21098 cmp = ix86_expand_sse_cmp (operands[0], NE, operands[4],
21099 operands[5], operands[1], operands[2]);
21100 code = AND;
21101 break;
21102 case UNEQ:
21103 temp = ix86_expand_sse_cmp (operands[0], UNORDERED, operands[4],
21104 operands[5], operands[0], operands[0]);
21105 cmp = ix86_expand_sse_cmp (operands[0], EQ, operands[4],
21106 operands[5], operands[1], operands[2]);
21107 code = IOR;
21108 break;
21109 default:
21110 gcc_unreachable ();
21112 cmp = expand_simple_binop (GET_MODE (cmp), code, temp, cmp, cmp, 1,
21113 OPTAB_DIRECT);
21114 ix86_expand_sse_movcc (operands[0], cmp, operands[1], operands[2]);
21115 return true;
21118 if (ix86_expand_sse_fp_minmax (operands[0], code, operands[4],
21119 operands[5], operands[1], operands[2]))
21120 return true;
21122 cmp = ix86_expand_sse_cmp (operands[0], code, operands[4], operands[5],
21123 operands[1], operands[2]);
21124 ix86_expand_sse_movcc (operands[0], cmp, operands[1], operands[2]);
21125 return true;
21128 /* Expand a signed/unsigned integral vector conditional move. */
21130 bool
21131 ix86_expand_int_vcond (rtx operands[])
21133 enum machine_mode data_mode = GET_MODE (operands[0]);
21134 enum machine_mode mode = GET_MODE (operands[4]);
21135 enum rtx_code code = GET_CODE (operands[3]);
21136 bool negate = false;
21137 rtx x, cop0, cop1;
21139 cop0 = operands[4];
21140 cop1 = operands[5];
21142 /* Try to optimize x < 0 ? -1 : 0 into (signed) x >> 31
21143 and x < 0 ? 1 : 0 into (unsigned) x >> 31. */
21144 if ((code == LT || code == GE)
21145 && data_mode == mode
21146 && cop1 == CONST0_RTX (mode)
21147 && operands[1 + (code == LT)] == CONST0_RTX (data_mode)
21148 && GET_MODE_SIZE (GET_MODE_INNER (data_mode)) > 1
21149 && GET_MODE_SIZE (GET_MODE_INNER (data_mode)) <= 8
21150 && (GET_MODE_SIZE (data_mode) == 16
21151 || (TARGET_AVX2 && GET_MODE_SIZE (data_mode) == 32)))
21153 rtx negop = operands[2 - (code == LT)];
21154 int shift = GET_MODE_BITSIZE (GET_MODE_INNER (data_mode)) - 1;
21155 if (negop == CONST1_RTX (data_mode))
21157 rtx res = expand_simple_binop (mode, LSHIFTRT, cop0, GEN_INT (shift),
21158 operands[0], 1, OPTAB_DIRECT);
21159 if (res != operands[0])
21160 emit_move_insn (operands[0], res);
21161 return true;
21163 else if (GET_MODE_INNER (data_mode) != DImode
21164 && vector_all_ones_operand (negop, data_mode))
21166 rtx res = expand_simple_binop (mode, ASHIFTRT, cop0, GEN_INT (shift),
21167 operands[0], 0, OPTAB_DIRECT);
21168 if (res != operands[0])
21169 emit_move_insn (operands[0], res);
21170 return true;
21174 if (!nonimmediate_operand (cop1, mode))
21175 cop1 = force_reg (mode, cop1);
21176 if (!general_operand (operands[1], data_mode))
21177 operands[1] = force_reg (data_mode, operands[1]);
21178 if (!general_operand (operands[2], data_mode))
21179 operands[2] = force_reg (data_mode, operands[2]);
21181 /* XOP supports all of the comparisons on all 128-bit vector int types. */
21182 if (TARGET_XOP
21183 && (mode == V16QImode || mode == V8HImode
21184 || mode == V4SImode || mode == V2DImode))
21186 else
21188 /* Canonicalize the comparison to EQ, GT, GTU. */
21189 switch (code)
21191 case EQ:
21192 case GT:
21193 case GTU:
21194 break;
21196 case NE:
21197 case LE:
21198 case LEU:
21199 code = reverse_condition (code);
21200 negate = true;
21201 break;
21203 case GE:
21204 case GEU:
21205 code = reverse_condition (code);
21206 negate = true;
21207 /* FALLTHRU */
21209 case LT:
21210 case LTU:
21211 code = swap_condition (code);
21212 x = cop0, cop0 = cop1, cop1 = x;
21213 break;
21215 default:
21216 gcc_unreachable ();
21219 /* Only SSE4.1/SSE4.2 supports V2DImode. */
21220 if (mode == V2DImode)
21222 switch (code)
21224 case EQ:
21225 /* SSE4.1 supports EQ. */
21226 if (!TARGET_SSE4_1)
21227 return false;
21228 break;
21230 case GT:
21231 case GTU:
21232 /* SSE4.2 supports GT/GTU. */
21233 if (!TARGET_SSE4_2)
21234 return false;
21235 break;
21237 default:
21238 gcc_unreachable ();
21242 /* Unsigned parallel compare is not supported by the hardware.
21243 Play some tricks to turn this into a signed comparison
21244 against 0. */
21245 if (code == GTU)
21247 cop0 = force_reg (mode, cop0);
21249 switch (mode)
21251 case V16SImode:
21252 case V8DImode:
21253 case V8SImode:
21254 case V4DImode:
21255 case V4SImode:
21256 case V2DImode:
21258 rtx t1, t2, mask;
21259 rtx (*gen_sub3) (rtx, rtx, rtx);
21261 switch (mode)
21263 case V16SImode: gen_sub3 = gen_subv16si3; break;
21264 case V8DImode: gen_sub3 = gen_subv8di3; break;
21265 case V8SImode: gen_sub3 = gen_subv8si3; break;
21266 case V4DImode: gen_sub3 = gen_subv4di3; break;
21267 case V4SImode: gen_sub3 = gen_subv4si3; break;
21268 case V2DImode: gen_sub3 = gen_subv2di3; break;
21269 default:
21270 gcc_unreachable ();
21272 /* Subtract (-(INT MAX) - 1) from both operands to make
21273 them signed. */
21274 mask = ix86_build_signbit_mask (mode, true, false);
21275 t1 = gen_reg_rtx (mode);
21276 emit_insn (gen_sub3 (t1, cop0, mask));
21278 t2 = gen_reg_rtx (mode);
21279 emit_insn (gen_sub3 (t2, cop1, mask));
21281 cop0 = t1;
21282 cop1 = t2;
21283 code = GT;
21285 break;
21287 case V32QImode:
21288 case V16HImode:
21289 case V16QImode:
21290 case V8HImode:
21291 /* Perform a parallel unsigned saturating subtraction. */
21292 x = gen_reg_rtx (mode);
21293 emit_insn (gen_rtx_SET (VOIDmode, x,
21294 gen_rtx_US_MINUS (mode, cop0, cop1)));
21296 cop0 = x;
21297 cop1 = CONST0_RTX (mode);
21298 code = EQ;
21299 negate = !negate;
21300 break;
21302 default:
21303 gcc_unreachable ();
21308 /* Allow the comparison to be done in one mode, but the movcc to
21309 happen in another mode. */
21310 if (data_mode == mode)
21312 x = ix86_expand_sse_cmp (operands[0], code, cop0, cop1,
21313 operands[1+negate], operands[2-negate]);
21315 else
21317 gcc_assert (GET_MODE_SIZE (data_mode) == GET_MODE_SIZE (mode));
21318 x = ix86_expand_sse_cmp (gen_reg_rtx (mode), code, cop0, cop1,
21319 operands[1+negate], operands[2-negate]);
21320 if (GET_MODE (x) == mode)
21321 x = gen_lowpart (data_mode, x);
21324 ix86_expand_sse_movcc (operands[0], x, operands[1+negate],
21325 operands[2-negate]);
21326 return true;
21329 static bool
21330 ix86_expand_vec_perm_vpermi2 (rtx target, rtx op0, rtx mask, rtx op1)
21332 enum machine_mode mode = GET_MODE (op0);
21333 switch (mode)
21335 case V16SImode:
21336 emit_insn (gen_avx512f_vpermi2varv16si3 (target, op0,
21337 force_reg (V16SImode, mask),
21338 op1));
21339 return true;
21340 case V16SFmode:
21341 emit_insn (gen_avx512f_vpermi2varv16sf3 (target, op0,
21342 force_reg (V16SImode, mask),
21343 op1));
21344 return true;
21345 case V8DImode:
21346 emit_insn (gen_avx512f_vpermi2varv8di3 (target, op0,
21347 force_reg (V8DImode, mask), op1));
21348 return true;
21349 case V8DFmode:
21350 emit_insn (gen_avx512f_vpermi2varv8df3 (target, op0,
21351 force_reg (V8DImode, mask), op1));
21352 return true;
21353 default:
21354 return false;
21358 /* Expand a variable vector permutation. */
21360 void
21361 ix86_expand_vec_perm (rtx operands[])
21363 rtx target = operands[0];
21364 rtx op0 = operands[1];
21365 rtx op1 = operands[2];
21366 rtx mask = operands[3];
21367 rtx t1, t2, t3, t4, t5, t6, t7, t8, vt, vt2, vec[32];
21368 enum machine_mode mode = GET_MODE (op0);
21369 enum machine_mode maskmode = GET_MODE (mask);
21370 int w, e, i;
21371 bool one_operand_shuffle = rtx_equal_p (op0, op1);
21373 /* Number of elements in the vector. */
21374 w = GET_MODE_NUNITS (mode);
21375 e = GET_MODE_UNIT_SIZE (mode);
21376 gcc_assert (w <= 64);
21378 if (ix86_expand_vec_perm_vpermi2 (target, op0, mask, op1))
21379 return;
21381 if (TARGET_AVX2)
21383 if (mode == V4DImode || mode == V4DFmode || mode == V16HImode)
21385 /* Unfortunately, the VPERMQ and VPERMPD instructions only support
21386 an constant shuffle operand. With a tiny bit of effort we can
21387 use VPERMD instead. A re-interpretation stall for V4DFmode is
21388 unfortunate but there's no avoiding it.
21389 Similarly for V16HImode we don't have instructions for variable
21390 shuffling, while for V32QImode we can use after preparing suitable
21391 masks vpshufb; vpshufb; vpermq; vpor. */
21393 if (mode == V16HImode)
21395 maskmode = mode = V32QImode;
21396 w = 32;
21397 e = 1;
21399 else
21401 maskmode = mode = V8SImode;
21402 w = 8;
21403 e = 4;
21405 t1 = gen_reg_rtx (maskmode);
21407 /* Replicate the low bits of the V4DImode mask into V8SImode:
21408 mask = { A B C D }
21409 t1 = { A A B B C C D D }. */
21410 for (i = 0; i < w / 2; ++i)
21411 vec[i*2 + 1] = vec[i*2] = GEN_INT (i * 2);
21412 vt = gen_rtx_CONST_VECTOR (maskmode, gen_rtvec_v (w, vec));
21413 vt = force_reg (maskmode, vt);
21414 mask = gen_lowpart (maskmode, mask);
21415 if (maskmode == V8SImode)
21416 emit_insn (gen_avx2_permvarv8si (t1, mask, vt));
21417 else
21418 emit_insn (gen_avx2_pshufbv32qi3 (t1, mask, vt));
21420 /* Multiply the shuffle indicies by two. */
21421 t1 = expand_simple_binop (maskmode, PLUS, t1, t1, t1, 1,
21422 OPTAB_DIRECT);
21424 /* Add one to the odd shuffle indicies:
21425 t1 = { A*2, A*2+1, B*2, B*2+1, ... }. */
21426 for (i = 0; i < w / 2; ++i)
21428 vec[i * 2] = const0_rtx;
21429 vec[i * 2 + 1] = const1_rtx;
21431 vt = gen_rtx_CONST_VECTOR (maskmode, gen_rtvec_v (w, vec));
21432 vt = validize_mem (force_const_mem (maskmode, vt));
21433 t1 = expand_simple_binop (maskmode, PLUS, t1, vt, t1, 1,
21434 OPTAB_DIRECT);
21436 /* Continue as if V8SImode (resp. V32QImode) was used initially. */
21437 operands[3] = mask = t1;
21438 target = gen_reg_rtx (mode);
21439 op0 = gen_lowpart (mode, op0);
21440 op1 = gen_lowpart (mode, op1);
21443 switch (mode)
21445 case V8SImode:
21446 /* The VPERMD and VPERMPS instructions already properly ignore
21447 the high bits of the shuffle elements. No need for us to
21448 perform an AND ourselves. */
21449 if (one_operand_shuffle)
21451 emit_insn (gen_avx2_permvarv8si (target, op0, mask));
21452 if (target != operands[0])
21453 emit_move_insn (operands[0],
21454 gen_lowpart (GET_MODE (operands[0]), target));
21456 else
21458 t1 = gen_reg_rtx (V8SImode);
21459 t2 = gen_reg_rtx (V8SImode);
21460 emit_insn (gen_avx2_permvarv8si (t1, op0, mask));
21461 emit_insn (gen_avx2_permvarv8si (t2, op1, mask));
21462 goto merge_two;
21464 return;
21466 case V8SFmode:
21467 mask = gen_lowpart (V8SImode, mask);
21468 if (one_operand_shuffle)
21469 emit_insn (gen_avx2_permvarv8sf (target, op0, mask));
21470 else
21472 t1 = gen_reg_rtx (V8SFmode);
21473 t2 = gen_reg_rtx (V8SFmode);
21474 emit_insn (gen_avx2_permvarv8sf (t1, op0, mask));
21475 emit_insn (gen_avx2_permvarv8sf (t2, op1, mask));
21476 goto merge_two;
21478 return;
21480 case V4SImode:
21481 /* By combining the two 128-bit input vectors into one 256-bit
21482 input vector, we can use VPERMD and VPERMPS for the full
21483 two-operand shuffle. */
21484 t1 = gen_reg_rtx (V8SImode);
21485 t2 = gen_reg_rtx (V8SImode);
21486 emit_insn (gen_avx_vec_concatv8si (t1, op0, op1));
21487 emit_insn (gen_avx_vec_concatv8si (t2, mask, mask));
21488 emit_insn (gen_avx2_permvarv8si (t1, t1, t2));
21489 emit_insn (gen_avx_vextractf128v8si (target, t1, const0_rtx));
21490 return;
21492 case V4SFmode:
21493 t1 = gen_reg_rtx (V8SFmode);
21494 t2 = gen_reg_rtx (V8SImode);
21495 mask = gen_lowpart (V4SImode, mask);
21496 emit_insn (gen_avx_vec_concatv8sf (t1, op0, op1));
21497 emit_insn (gen_avx_vec_concatv8si (t2, mask, mask));
21498 emit_insn (gen_avx2_permvarv8sf (t1, t1, t2));
21499 emit_insn (gen_avx_vextractf128v8sf (target, t1, const0_rtx));
21500 return;
21502 case V32QImode:
21503 t1 = gen_reg_rtx (V32QImode);
21504 t2 = gen_reg_rtx (V32QImode);
21505 t3 = gen_reg_rtx (V32QImode);
21506 vt2 = GEN_INT (128);
21507 for (i = 0; i < 32; i++)
21508 vec[i] = vt2;
21509 vt = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, vec));
21510 vt = force_reg (V32QImode, vt);
21511 for (i = 0; i < 32; i++)
21512 vec[i] = i < 16 ? vt2 : const0_rtx;
21513 vt2 = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, vec));
21514 vt2 = force_reg (V32QImode, vt2);
21515 /* From mask create two adjusted masks, which contain the same
21516 bits as mask in the low 7 bits of each vector element.
21517 The first mask will have the most significant bit clear
21518 if it requests element from the same 128-bit lane
21519 and MSB set if it requests element from the other 128-bit lane.
21520 The second mask will have the opposite values of the MSB,
21521 and additionally will have its 128-bit lanes swapped.
21522 E.g. { 07 12 1e 09 ... | 17 19 05 1f ... } mask vector will have
21523 t1 { 07 92 9e 09 ... | 17 19 85 1f ... } and
21524 t3 { 97 99 05 9f ... | 87 12 1e 89 ... } where each ...
21525 stands for other 12 bytes. */
21526 /* The bit whether element is from the same lane or the other
21527 lane is bit 4, so shift it up by 3 to the MSB position. */
21528 t5 = gen_reg_rtx (V4DImode);
21529 emit_insn (gen_ashlv4di3 (t5, gen_lowpart (V4DImode, mask),
21530 GEN_INT (3)));
21531 /* Clear MSB bits from the mask just in case it had them set. */
21532 emit_insn (gen_avx2_andnotv32qi3 (t2, vt, mask));
21533 /* After this t1 will have MSB set for elements from other lane. */
21534 emit_insn (gen_xorv32qi3 (t1, gen_lowpart (V32QImode, t5), vt2));
21535 /* Clear bits other than MSB. */
21536 emit_insn (gen_andv32qi3 (t1, t1, vt));
21537 /* Or in the lower bits from mask into t3. */
21538 emit_insn (gen_iorv32qi3 (t3, t1, t2));
21539 /* And invert MSB bits in t1, so MSB is set for elements from the same
21540 lane. */
21541 emit_insn (gen_xorv32qi3 (t1, t1, vt));
21542 /* Swap 128-bit lanes in t3. */
21543 t6 = gen_reg_rtx (V4DImode);
21544 emit_insn (gen_avx2_permv4di_1 (t6, gen_lowpart (V4DImode, t3),
21545 const2_rtx, GEN_INT (3),
21546 const0_rtx, const1_rtx));
21547 /* And or in the lower bits from mask into t1. */
21548 emit_insn (gen_iorv32qi3 (t1, t1, t2));
21549 if (one_operand_shuffle)
21551 /* Each of these shuffles will put 0s in places where
21552 element from the other 128-bit lane is needed, otherwise
21553 will shuffle in the requested value. */
21554 emit_insn (gen_avx2_pshufbv32qi3 (t3, op0,
21555 gen_lowpart (V32QImode, t6)));
21556 emit_insn (gen_avx2_pshufbv32qi3 (t1, op0, t1));
21557 /* For t3 the 128-bit lanes are swapped again. */
21558 t7 = gen_reg_rtx (V4DImode);
21559 emit_insn (gen_avx2_permv4di_1 (t7, gen_lowpart (V4DImode, t3),
21560 const2_rtx, GEN_INT (3),
21561 const0_rtx, const1_rtx));
21562 /* And oring both together leads to the result. */
21563 emit_insn (gen_iorv32qi3 (target, t1,
21564 gen_lowpart (V32QImode, t7)));
21565 if (target != operands[0])
21566 emit_move_insn (operands[0],
21567 gen_lowpart (GET_MODE (operands[0]), target));
21568 return;
21571 t4 = gen_reg_rtx (V32QImode);
21572 /* Similarly to the above one_operand_shuffle code,
21573 just for repeated twice for each operand. merge_two:
21574 code will merge the two results together. */
21575 emit_insn (gen_avx2_pshufbv32qi3 (t4, op0,
21576 gen_lowpart (V32QImode, t6)));
21577 emit_insn (gen_avx2_pshufbv32qi3 (t3, op1,
21578 gen_lowpart (V32QImode, t6)));
21579 emit_insn (gen_avx2_pshufbv32qi3 (t2, op0, t1));
21580 emit_insn (gen_avx2_pshufbv32qi3 (t1, op1, t1));
21581 t7 = gen_reg_rtx (V4DImode);
21582 emit_insn (gen_avx2_permv4di_1 (t7, gen_lowpart (V4DImode, t4),
21583 const2_rtx, GEN_INT (3),
21584 const0_rtx, const1_rtx));
21585 t8 = gen_reg_rtx (V4DImode);
21586 emit_insn (gen_avx2_permv4di_1 (t8, gen_lowpart (V4DImode, t3),
21587 const2_rtx, GEN_INT (3),
21588 const0_rtx, const1_rtx));
21589 emit_insn (gen_iorv32qi3 (t4, t2, gen_lowpart (V32QImode, t7)));
21590 emit_insn (gen_iorv32qi3 (t3, t1, gen_lowpart (V32QImode, t8)));
21591 t1 = t4;
21592 t2 = t3;
21593 goto merge_two;
21595 default:
21596 gcc_assert (GET_MODE_SIZE (mode) <= 16);
21597 break;
21601 if (TARGET_XOP)
21603 /* The XOP VPPERM insn supports three inputs. By ignoring the
21604 one_operand_shuffle special case, we avoid creating another
21605 set of constant vectors in memory. */
21606 one_operand_shuffle = false;
21608 /* mask = mask & {2*w-1, ...} */
21609 vt = GEN_INT (2*w - 1);
21611 else
21613 /* mask = mask & {w-1, ...} */
21614 vt = GEN_INT (w - 1);
21617 for (i = 0; i < w; i++)
21618 vec[i] = vt;
21619 vt = gen_rtx_CONST_VECTOR (maskmode, gen_rtvec_v (w, vec));
21620 mask = expand_simple_binop (maskmode, AND, mask, vt,
21621 NULL_RTX, 0, OPTAB_DIRECT);
21623 /* For non-QImode operations, convert the word permutation control
21624 into a byte permutation control. */
21625 if (mode != V16QImode)
21627 mask = expand_simple_binop (maskmode, ASHIFT, mask,
21628 GEN_INT (exact_log2 (e)),
21629 NULL_RTX, 0, OPTAB_DIRECT);
21631 /* Convert mask to vector of chars. */
21632 mask = force_reg (V16QImode, gen_lowpart (V16QImode, mask));
21634 /* Replicate each of the input bytes into byte positions:
21635 (v2di) --> {0,0,0,0,0,0,0,0, 8,8,8,8,8,8,8,8}
21636 (v4si) --> {0,0,0,0, 4,4,4,4, 8,8,8,8, 12,12,12,12}
21637 (v8hi) --> {0,0, 2,2, 4,4, 6,6, ...}. */
21638 for (i = 0; i < 16; ++i)
21639 vec[i] = GEN_INT (i/e * e);
21640 vt = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, vec));
21641 vt = validize_mem (force_const_mem (V16QImode, vt));
21642 if (TARGET_XOP)
21643 emit_insn (gen_xop_pperm (mask, mask, mask, vt));
21644 else
21645 emit_insn (gen_ssse3_pshufbv16qi3 (mask, mask, vt));
21647 /* Convert it into the byte positions by doing
21648 mask = mask + {0,1,..,16/w, 0,1,..,16/w, ...} */
21649 for (i = 0; i < 16; ++i)
21650 vec[i] = GEN_INT (i % e);
21651 vt = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, vec));
21652 vt = validize_mem (force_const_mem (V16QImode, vt));
21653 emit_insn (gen_addv16qi3 (mask, mask, vt));
21656 /* The actual shuffle operations all operate on V16QImode. */
21657 op0 = gen_lowpart (V16QImode, op0);
21658 op1 = gen_lowpart (V16QImode, op1);
21660 if (TARGET_XOP)
21662 if (GET_MODE (target) != V16QImode)
21663 target = gen_reg_rtx (V16QImode);
21664 emit_insn (gen_xop_pperm (target, op0, op1, mask));
21665 if (target != operands[0])
21666 emit_move_insn (operands[0],
21667 gen_lowpart (GET_MODE (operands[0]), target));
21669 else if (one_operand_shuffle)
21671 if (GET_MODE (target) != V16QImode)
21672 target = gen_reg_rtx (V16QImode);
21673 emit_insn (gen_ssse3_pshufbv16qi3 (target, op0, mask));
21674 if (target != operands[0])
21675 emit_move_insn (operands[0],
21676 gen_lowpart (GET_MODE (operands[0]), target));
21678 else
21680 rtx xops[6];
21681 bool ok;
21683 /* Shuffle the two input vectors independently. */
21684 t1 = gen_reg_rtx (V16QImode);
21685 t2 = gen_reg_rtx (V16QImode);
21686 emit_insn (gen_ssse3_pshufbv16qi3 (t1, op0, mask));
21687 emit_insn (gen_ssse3_pshufbv16qi3 (t2, op1, mask));
21689 merge_two:
21690 /* Then merge them together. The key is whether any given control
21691 element contained a bit set that indicates the second word. */
21692 mask = operands[3];
21693 vt = GEN_INT (w);
21694 if (maskmode == V2DImode && !TARGET_SSE4_1)
21696 /* Without SSE4.1, we don't have V2DImode EQ. Perform one
21697 more shuffle to convert the V2DI input mask into a V4SI
21698 input mask. At which point the masking that expand_int_vcond
21699 will work as desired. */
21700 rtx t3 = gen_reg_rtx (V4SImode);
21701 emit_insn (gen_sse2_pshufd_1 (t3, gen_lowpart (V4SImode, mask),
21702 const0_rtx, const0_rtx,
21703 const2_rtx, const2_rtx));
21704 mask = t3;
21705 maskmode = V4SImode;
21706 e = w = 4;
21709 for (i = 0; i < w; i++)
21710 vec[i] = vt;
21711 vt = gen_rtx_CONST_VECTOR (maskmode, gen_rtvec_v (w, vec));
21712 vt = force_reg (maskmode, vt);
21713 mask = expand_simple_binop (maskmode, AND, mask, vt,
21714 NULL_RTX, 0, OPTAB_DIRECT);
21716 if (GET_MODE (target) != mode)
21717 target = gen_reg_rtx (mode);
21718 xops[0] = target;
21719 xops[1] = gen_lowpart (mode, t2);
21720 xops[2] = gen_lowpart (mode, t1);
21721 xops[3] = gen_rtx_EQ (maskmode, mask, vt);
21722 xops[4] = mask;
21723 xops[5] = vt;
21724 ok = ix86_expand_int_vcond (xops);
21725 gcc_assert (ok);
21726 if (target != operands[0])
21727 emit_move_insn (operands[0],
21728 gen_lowpart (GET_MODE (operands[0]), target));
21732 /* Unpack OP[1] into the next wider integer vector type. UNSIGNED_P is
21733 true if we should do zero extension, else sign extension. HIGH_P is
21734 true if we want the N/2 high elements, else the low elements. */
21736 void
21737 ix86_expand_sse_unpack (rtx dest, rtx src, bool unsigned_p, bool high_p)
21739 enum machine_mode imode = GET_MODE (src);
21740 rtx tmp;
21742 if (TARGET_SSE4_1)
21744 rtx (*unpack)(rtx, rtx);
21745 rtx (*extract)(rtx, rtx) = NULL;
21746 enum machine_mode halfmode = BLKmode;
21748 switch (imode)
21750 case V32QImode:
21751 if (unsigned_p)
21752 unpack = gen_avx2_zero_extendv16qiv16hi2;
21753 else
21754 unpack = gen_avx2_sign_extendv16qiv16hi2;
21755 halfmode = V16QImode;
21756 extract
21757 = high_p ? gen_vec_extract_hi_v32qi : gen_vec_extract_lo_v32qi;
21758 break;
21759 case V32HImode:
21760 if (unsigned_p)
21761 unpack = gen_avx512f_zero_extendv16hiv16si2;
21762 else
21763 unpack = gen_avx512f_sign_extendv16hiv16si2;
21764 halfmode = V16HImode;
21765 extract
21766 = high_p ? gen_vec_extract_hi_v32hi : gen_vec_extract_lo_v32hi;
21767 break;
21768 case V16HImode:
21769 if (unsigned_p)
21770 unpack = gen_avx2_zero_extendv8hiv8si2;
21771 else
21772 unpack = gen_avx2_sign_extendv8hiv8si2;
21773 halfmode = V8HImode;
21774 extract
21775 = high_p ? gen_vec_extract_hi_v16hi : gen_vec_extract_lo_v16hi;
21776 break;
21777 case V16SImode:
21778 if (unsigned_p)
21779 unpack = gen_avx512f_zero_extendv8siv8di2;
21780 else
21781 unpack = gen_avx512f_sign_extendv8siv8di2;
21782 halfmode = V8SImode;
21783 extract
21784 = high_p ? gen_vec_extract_hi_v16si : gen_vec_extract_lo_v16si;
21785 break;
21786 case V8SImode:
21787 if (unsigned_p)
21788 unpack = gen_avx2_zero_extendv4siv4di2;
21789 else
21790 unpack = gen_avx2_sign_extendv4siv4di2;
21791 halfmode = V4SImode;
21792 extract
21793 = high_p ? gen_vec_extract_hi_v8si : gen_vec_extract_lo_v8si;
21794 break;
21795 case V16QImode:
21796 if (unsigned_p)
21797 unpack = gen_sse4_1_zero_extendv8qiv8hi2;
21798 else
21799 unpack = gen_sse4_1_sign_extendv8qiv8hi2;
21800 break;
21801 case V8HImode:
21802 if (unsigned_p)
21803 unpack = gen_sse4_1_zero_extendv4hiv4si2;
21804 else
21805 unpack = gen_sse4_1_sign_extendv4hiv4si2;
21806 break;
21807 case V4SImode:
21808 if (unsigned_p)
21809 unpack = gen_sse4_1_zero_extendv2siv2di2;
21810 else
21811 unpack = gen_sse4_1_sign_extendv2siv2di2;
21812 break;
21813 default:
21814 gcc_unreachable ();
21817 if (GET_MODE_SIZE (imode) >= 32)
21819 tmp = gen_reg_rtx (halfmode);
21820 emit_insn (extract (tmp, src));
21822 else if (high_p)
21824 /* Shift higher 8 bytes to lower 8 bytes. */
21825 tmp = gen_reg_rtx (V1TImode);
21826 emit_insn (gen_sse2_lshrv1ti3 (tmp, gen_lowpart (V1TImode, src),
21827 GEN_INT (64)));
21828 tmp = gen_lowpart (imode, tmp);
21830 else
21831 tmp = src;
21833 emit_insn (unpack (dest, tmp));
21835 else
21837 rtx (*unpack)(rtx, rtx, rtx);
21839 switch (imode)
21841 case V16QImode:
21842 if (high_p)
21843 unpack = gen_vec_interleave_highv16qi;
21844 else
21845 unpack = gen_vec_interleave_lowv16qi;
21846 break;
21847 case V8HImode:
21848 if (high_p)
21849 unpack = gen_vec_interleave_highv8hi;
21850 else
21851 unpack = gen_vec_interleave_lowv8hi;
21852 break;
21853 case V4SImode:
21854 if (high_p)
21855 unpack = gen_vec_interleave_highv4si;
21856 else
21857 unpack = gen_vec_interleave_lowv4si;
21858 break;
21859 default:
21860 gcc_unreachable ();
21863 if (unsigned_p)
21864 tmp = force_reg (imode, CONST0_RTX (imode));
21865 else
21866 tmp = ix86_expand_sse_cmp (gen_reg_rtx (imode), GT, CONST0_RTX (imode),
21867 src, pc_rtx, pc_rtx);
21869 rtx tmp2 = gen_reg_rtx (imode);
21870 emit_insn (unpack (tmp2, src, tmp));
21871 emit_move_insn (dest, gen_lowpart (GET_MODE (dest), tmp2));
21875 /* Expand conditional increment or decrement using adb/sbb instructions.
21876 The default case using setcc followed by the conditional move can be
21877 done by generic code. */
21878 bool
21879 ix86_expand_int_addcc (rtx operands[])
21881 enum rtx_code code = GET_CODE (operands[1]);
21882 rtx flags;
21883 rtx (*insn)(rtx, rtx, rtx, rtx, rtx);
21884 rtx compare_op;
21885 rtx val = const0_rtx;
21886 bool fpcmp = false;
21887 enum machine_mode mode;
21888 rtx op0 = XEXP (operands[1], 0);
21889 rtx op1 = XEXP (operands[1], 1);
21891 if (operands[3] != const1_rtx
21892 && operands[3] != constm1_rtx)
21893 return false;
21894 if (!ix86_expand_carry_flag_compare (code, op0, op1, &compare_op))
21895 return false;
21896 code = GET_CODE (compare_op);
21898 flags = XEXP (compare_op, 0);
21900 if (GET_MODE (flags) == CCFPmode
21901 || GET_MODE (flags) == CCFPUmode)
21903 fpcmp = true;
21904 code = ix86_fp_compare_code_to_integer (code);
21907 if (code != LTU)
21909 val = constm1_rtx;
21910 if (fpcmp)
21911 PUT_CODE (compare_op,
21912 reverse_condition_maybe_unordered
21913 (GET_CODE (compare_op)));
21914 else
21915 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
21918 mode = GET_MODE (operands[0]);
21920 /* Construct either adc or sbb insn. */
21921 if ((code == LTU) == (operands[3] == constm1_rtx))
21923 switch (mode)
21925 case QImode:
21926 insn = gen_subqi3_carry;
21927 break;
21928 case HImode:
21929 insn = gen_subhi3_carry;
21930 break;
21931 case SImode:
21932 insn = gen_subsi3_carry;
21933 break;
21934 case DImode:
21935 insn = gen_subdi3_carry;
21936 break;
21937 default:
21938 gcc_unreachable ();
21941 else
21943 switch (mode)
21945 case QImode:
21946 insn = gen_addqi3_carry;
21947 break;
21948 case HImode:
21949 insn = gen_addhi3_carry;
21950 break;
21951 case SImode:
21952 insn = gen_addsi3_carry;
21953 break;
21954 case DImode:
21955 insn = gen_adddi3_carry;
21956 break;
21957 default:
21958 gcc_unreachable ();
21961 emit_insn (insn (operands[0], operands[2], val, flags, compare_op));
21963 return true;
21967 /* Split operands 0 and 1 into half-mode parts. Similar to split_double_mode,
21968 but works for floating pointer parameters and nonoffsetable memories.
21969 For pushes, it returns just stack offsets; the values will be saved
21970 in the right order. Maximally three parts are generated. */
21972 static int
21973 ix86_split_to_parts (rtx operand, rtx *parts, enum machine_mode mode)
21975 int size;
21977 if (!TARGET_64BIT)
21978 size = mode==XFmode ? 3 : GET_MODE_SIZE (mode) / 4;
21979 else
21980 size = (GET_MODE_SIZE (mode) + 4) / 8;
21982 gcc_assert (!REG_P (operand) || !MMX_REGNO_P (REGNO (operand)));
21983 gcc_assert (size >= 2 && size <= 4);
21985 /* Optimize constant pool reference to immediates. This is used by fp
21986 moves, that force all constants to memory to allow combining. */
21987 if (MEM_P (operand) && MEM_READONLY_P (operand))
21989 rtx tmp = maybe_get_pool_constant (operand);
21990 if (tmp)
21991 operand = tmp;
21994 if (MEM_P (operand) && !offsettable_memref_p (operand))
21996 /* The only non-offsetable memories we handle are pushes. */
21997 int ok = push_operand (operand, VOIDmode);
21999 gcc_assert (ok);
22001 operand = copy_rtx (operand);
22002 PUT_MODE (operand, word_mode);
22003 parts[0] = parts[1] = parts[2] = parts[3] = operand;
22004 return size;
22007 if (GET_CODE (operand) == CONST_VECTOR)
22009 enum machine_mode imode = int_mode_for_mode (mode);
22010 /* Caution: if we looked through a constant pool memory above,
22011 the operand may actually have a different mode now. That's
22012 ok, since we want to pun this all the way back to an integer. */
22013 operand = simplify_subreg (imode, operand, GET_MODE (operand), 0);
22014 gcc_assert (operand != NULL);
22015 mode = imode;
22018 if (!TARGET_64BIT)
22020 if (mode == DImode)
22021 split_double_mode (mode, &operand, 1, &parts[0], &parts[1]);
22022 else
22024 int i;
22026 if (REG_P (operand))
22028 gcc_assert (reload_completed);
22029 for (i = 0; i < size; i++)
22030 parts[i] = gen_rtx_REG (SImode, REGNO (operand) + i);
22032 else if (offsettable_memref_p (operand))
22034 operand = adjust_address (operand, SImode, 0);
22035 parts[0] = operand;
22036 for (i = 1; i < size; i++)
22037 parts[i] = adjust_address (operand, SImode, 4 * i);
22039 else if (GET_CODE (operand) == CONST_DOUBLE)
22041 REAL_VALUE_TYPE r;
22042 long l[4];
22044 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
22045 switch (mode)
22047 case TFmode:
22048 real_to_target (l, &r, mode);
22049 parts[3] = gen_int_mode (l[3], SImode);
22050 parts[2] = gen_int_mode (l[2], SImode);
22051 break;
22052 case XFmode:
22053 /* We can't use REAL_VALUE_TO_TARGET_LONG_DOUBLE since
22054 long double may not be 80-bit. */
22055 real_to_target (l, &r, mode);
22056 parts[2] = gen_int_mode (l[2], SImode);
22057 break;
22058 case DFmode:
22059 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
22060 break;
22061 default:
22062 gcc_unreachable ();
22064 parts[1] = gen_int_mode (l[1], SImode);
22065 parts[0] = gen_int_mode (l[0], SImode);
22067 else
22068 gcc_unreachable ();
22071 else
22073 if (mode == TImode)
22074 split_double_mode (mode, &operand, 1, &parts[0], &parts[1]);
22075 if (mode == XFmode || mode == TFmode)
22077 enum machine_mode upper_mode = mode==XFmode ? SImode : DImode;
22078 if (REG_P (operand))
22080 gcc_assert (reload_completed);
22081 parts[0] = gen_rtx_REG (DImode, REGNO (operand) + 0);
22082 parts[1] = gen_rtx_REG (upper_mode, REGNO (operand) + 1);
22084 else if (offsettable_memref_p (operand))
22086 operand = adjust_address (operand, DImode, 0);
22087 parts[0] = operand;
22088 parts[1] = adjust_address (operand, upper_mode, 8);
22090 else if (GET_CODE (operand) == CONST_DOUBLE)
22092 REAL_VALUE_TYPE r;
22093 long l[4];
22095 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
22096 real_to_target (l, &r, mode);
22098 /* Do not use shift by 32 to avoid warning on 32bit systems. */
22099 if (HOST_BITS_PER_WIDE_INT >= 64)
22100 parts[0]
22101 = gen_int_mode
22102 ((l[0] & (((HOST_WIDE_INT) 2 << 31) - 1))
22103 + ((((HOST_WIDE_INT) l[1]) << 31) << 1),
22104 DImode);
22105 else
22106 parts[0] = immed_double_const (l[0], l[1], DImode);
22108 if (upper_mode == SImode)
22109 parts[1] = gen_int_mode (l[2], SImode);
22110 else if (HOST_BITS_PER_WIDE_INT >= 64)
22111 parts[1]
22112 = gen_int_mode
22113 ((l[2] & (((HOST_WIDE_INT) 2 << 31) - 1))
22114 + ((((HOST_WIDE_INT) l[3]) << 31) << 1),
22115 DImode);
22116 else
22117 parts[1] = immed_double_const (l[2], l[3], DImode);
22119 else
22120 gcc_unreachable ();
22124 return size;
22127 /* Emit insns to perform a move or push of DI, DF, XF, and TF values.
22128 Return false when normal moves are needed; true when all required
22129 insns have been emitted. Operands 2-4 contain the input values
22130 int the correct order; operands 5-7 contain the output values. */
22132 void
22133 ix86_split_long_move (rtx operands[])
22135 rtx part[2][4];
22136 int nparts, i, j;
22137 int push = 0;
22138 int collisions = 0;
22139 enum machine_mode mode = GET_MODE (operands[0]);
22140 bool collisionparts[4];
22142 /* The DFmode expanders may ask us to move double.
22143 For 64bit target this is single move. By hiding the fact
22144 here we simplify i386.md splitters. */
22145 if (TARGET_64BIT && GET_MODE_SIZE (GET_MODE (operands[0])) == 8)
22147 /* Optimize constant pool reference to immediates. This is used by
22148 fp moves, that force all constants to memory to allow combining. */
22150 if (MEM_P (operands[1])
22151 && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
22152 && CONSTANT_POOL_ADDRESS_P (XEXP (operands[1], 0)))
22153 operands[1] = get_pool_constant (XEXP (operands[1], 0));
22154 if (push_operand (operands[0], VOIDmode))
22156 operands[0] = copy_rtx (operands[0]);
22157 PUT_MODE (operands[0], word_mode);
22159 else
22160 operands[0] = gen_lowpart (DImode, operands[0]);
22161 operands[1] = gen_lowpart (DImode, operands[1]);
22162 emit_move_insn (operands[0], operands[1]);
22163 return;
22166 /* The only non-offsettable memory we handle is push. */
22167 if (push_operand (operands[0], VOIDmode))
22168 push = 1;
22169 else
22170 gcc_assert (!MEM_P (operands[0])
22171 || offsettable_memref_p (operands[0]));
22173 nparts = ix86_split_to_parts (operands[1], part[1], GET_MODE (operands[0]));
22174 ix86_split_to_parts (operands[0], part[0], GET_MODE (operands[0]));
22176 /* When emitting push, take care for source operands on the stack. */
22177 if (push && MEM_P (operands[1])
22178 && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1]))
22180 rtx src_base = XEXP (part[1][nparts - 1], 0);
22182 /* Compensate for the stack decrement by 4. */
22183 if (!TARGET_64BIT && nparts == 3
22184 && mode == XFmode && TARGET_128BIT_LONG_DOUBLE)
22185 src_base = plus_constant (Pmode, src_base, 4);
22187 /* src_base refers to the stack pointer and is
22188 automatically decreased by emitted push. */
22189 for (i = 0; i < nparts; i++)
22190 part[1][i] = change_address (part[1][i],
22191 GET_MODE (part[1][i]), src_base);
22194 /* We need to do copy in the right order in case an address register
22195 of the source overlaps the destination. */
22196 if (REG_P (part[0][0]) && MEM_P (part[1][0]))
22198 rtx tmp;
22200 for (i = 0; i < nparts; i++)
22202 collisionparts[i]
22203 = reg_overlap_mentioned_p (part[0][i], XEXP (part[1][0], 0));
22204 if (collisionparts[i])
22205 collisions++;
22208 /* Collision in the middle part can be handled by reordering. */
22209 if (collisions == 1 && nparts == 3 && collisionparts [1])
22211 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
22212 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
22214 else if (collisions == 1
22215 && nparts == 4
22216 && (collisionparts [1] || collisionparts [2]))
22218 if (collisionparts [1])
22220 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
22221 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
22223 else
22225 tmp = part[0][2]; part[0][2] = part[0][3]; part[0][3] = tmp;
22226 tmp = part[1][2]; part[1][2] = part[1][3]; part[1][3] = tmp;
22230 /* If there are more collisions, we can't handle it by reordering.
22231 Do an lea to the last part and use only one colliding move. */
22232 else if (collisions > 1)
22234 rtx base;
22236 collisions = 1;
22238 base = part[0][nparts - 1];
22240 /* Handle the case when the last part isn't valid for lea.
22241 Happens in 64-bit mode storing the 12-byte XFmode. */
22242 if (GET_MODE (base) != Pmode)
22243 base = gen_rtx_REG (Pmode, REGNO (base));
22245 emit_insn (gen_rtx_SET (VOIDmode, base, XEXP (part[1][0], 0)));
22246 part[1][0] = replace_equiv_address (part[1][0], base);
22247 for (i = 1; i < nparts; i++)
22249 tmp = plus_constant (Pmode, base, UNITS_PER_WORD * i);
22250 part[1][i] = replace_equiv_address (part[1][i], tmp);
22255 if (push)
22257 if (!TARGET_64BIT)
22259 if (nparts == 3)
22261 if (TARGET_128BIT_LONG_DOUBLE && mode == XFmode)
22262 emit_insn (ix86_gen_add3 (stack_pointer_rtx,
22263 stack_pointer_rtx, GEN_INT (-4)));
22264 emit_move_insn (part[0][2], part[1][2]);
22266 else if (nparts == 4)
22268 emit_move_insn (part[0][3], part[1][3]);
22269 emit_move_insn (part[0][2], part[1][2]);
22272 else
22274 /* In 64bit mode we don't have 32bit push available. In case this is
22275 register, it is OK - we will just use larger counterpart. We also
22276 retype memory - these comes from attempt to avoid REX prefix on
22277 moving of second half of TFmode value. */
22278 if (GET_MODE (part[1][1]) == SImode)
22280 switch (GET_CODE (part[1][1]))
22282 case MEM:
22283 part[1][1] = adjust_address (part[1][1], DImode, 0);
22284 break;
22286 case REG:
22287 part[1][1] = gen_rtx_REG (DImode, REGNO (part[1][1]));
22288 break;
22290 default:
22291 gcc_unreachable ();
22294 if (GET_MODE (part[1][0]) == SImode)
22295 part[1][0] = part[1][1];
22298 emit_move_insn (part[0][1], part[1][1]);
22299 emit_move_insn (part[0][0], part[1][0]);
22300 return;
22303 /* Choose correct order to not overwrite the source before it is copied. */
22304 if ((REG_P (part[0][0])
22305 && REG_P (part[1][1])
22306 && (REGNO (part[0][0]) == REGNO (part[1][1])
22307 || (nparts == 3
22308 && REGNO (part[0][0]) == REGNO (part[1][2]))
22309 || (nparts == 4
22310 && REGNO (part[0][0]) == REGNO (part[1][3]))))
22311 || (collisions > 0
22312 && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))))
22314 for (i = 0, j = nparts - 1; i < nparts; i++, j--)
22316 operands[2 + i] = part[0][j];
22317 operands[6 + i] = part[1][j];
22320 else
22322 for (i = 0; i < nparts; i++)
22324 operands[2 + i] = part[0][i];
22325 operands[6 + i] = part[1][i];
22329 /* If optimizing for size, attempt to locally unCSE nonzero constants. */
22330 if (optimize_insn_for_size_p ())
22332 for (j = 0; j < nparts - 1; j++)
22333 if (CONST_INT_P (operands[6 + j])
22334 && operands[6 + j] != const0_rtx
22335 && REG_P (operands[2 + j]))
22336 for (i = j; i < nparts - 1; i++)
22337 if (CONST_INT_P (operands[7 + i])
22338 && INTVAL (operands[7 + i]) == INTVAL (operands[6 + j]))
22339 operands[7 + i] = operands[2 + j];
22342 for (i = 0; i < nparts; i++)
22343 emit_move_insn (operands[2 + i], operands[6 + i]);
22345 return;
22348 /* Helper function of ix86_split_ashl used to generate an SImode/DImode
22349 left shift by a constant, either using a single shift or
22350 a sequence of add instructions. */
22352 static void
22353 ix86_expand_ashl_const (rtx operand, int count, enum machine_mode mode)
22355 rtx (*insn)(rtx, rtx, rtx);
22357 if (count == 1
22358 || (count * ix86_cost->add <= ix86_cost->shift_const
22359 && !optimize_insn_for_size_p ()))
22361 insn = mode == DImode ? gen_addsi3 : gen_adddi3;
22362 while (count-- > 0)
22363 emit_insn (insn (operand, operand, operand));
22365 else
22367 insn = mode == DImode ? gen_ashlsi3 : gen_ashldi3;
22368 emit_insn (insn (operand, operand, GEN_INT (count)));
22372 void
22373 ix86_split_ashl (rtx *operands, rtx scratch, enum machine_mode mode)
22375 rtx (*gen_ashl3)(rtx, rtx, rtx);
22376 rtx (*gen_shld)(rtx, rtx, rtx);
22377 int half_width = GET_MODE_BITSIZE (mode) >> 1;
22379 rtx low[2], high[2];
22380 int count;
22382 if (CONST_INT_P (operands[2]))
22384 split_double_mode (mode, operands, 2, low, high);
22385 count = INTVAL (operands[2]) & (GET_MODE_BITSIZE (mode) - 1);
22387 if (count >= half_width)
22389 emit_move_insn (high[0], low[1]);
22390 emit_move_insn (low[0], const0_rtx);
22392 if (count > half_width)
22393 ix86_expand_ashl_const (high[0], count - half_width, mode);
22395 else
22397 gen_shld = mode == DImode ? gen_x86_shld : gen_x86_64_shld;
22399 if (!rtx_equal_p (operands[0], operands[1]))
22400 emit_move_insn (operands[0], operands[1]);
22402 emit_insn (gen_shld (high[0], low[0], GEN_INT (count)));
22403 ix86_expand_ashl_const (low[0], count, mode);
22405 return;
22408 split_double_mode (mode, operands, 1, low, high);
22410 gen_ashl3 = mode == DImode ? gen_ashlsi3 : gen_ashldi3;
22412 if (operands[1] == const1_rtx)
22414 /* Assuming we've chosen a QImode capable registers, then 1 << N
22415 can be done with two 32/64-bit shifts, no branches, no cmoves. */
22416 if (ANY_QI_REG_P (low[0]) && ANY_QI_REG_P (high[0]))
22418 rtx s, d, flags = gen_rtx_REG (CCZmode, FLAGS_REG);
22420 ix86_expand_clear (low[0]);
22421 ix86_expand_clear (high[0]);
22422 emit_insn (gen_testqi_ccz_1 (operands[2], GEN_INT (half_width)));
22424 d = gen_lowpart (QImode, low[0]);
22425 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
22426 s = gen_rtx_EQ (QImode, flags, const0_rtx);
22427 emit_insn (gen_rtx_SET (VOIDmode, d, s));
22429 d = gen_lowpart (QImode, high[0]);
22430 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
22431 s = gen_rtx_NE (QImode, flags, const0_rtx);
22432 emit_insn (gen_rtx_SET (VOIDmode, d, s));
22435 /* Otherwise, we can get the same results by manually performing
22436 a bit extract operation on bit 5/6, and then performing the two
22437 shifts. The two methods of getting 0/1 into low/high are exactly
22438 the same size. Avoiding the shift in the bit extract case helps
22439 pentium4 a bit; no one else seems to care much either way. */
22440 else
22442 enum machine_mode half_mode;
22443 rtx (*gen_lshr3)(rtx, rtx, rtx);
22444 rtx (*gen_and3)(rtx, rtx, rtx);
22445 rtx (*gen_xor3)(rtx, rtx, rtx);
22446 HOST_WIDE_INT bits;
22447 rtx x;
22449 if (mode == DImode)
22451 half_mode = SImode;
22452 gen_lshr3 = gen_lshrsi3;
22453 gen_and3 = gen_andsi3;
22454 gen_xor3 = gen_xorsi3;
22455 bits = 5;
22457 else
22459 half_mode = DImode;
22460 gen_lshr3 = gen_lshrdi3;
22461 gen_and3 = gen_anddi3;
22462 gen_xor3 = gen_xordi3;
22463 bits = 6;
22466 if (TARGET_PARTIAL_REG_STALL && !optimize_insn_for_size_p ())
22467 x = gen_rtx_ZERO_EXTEND (half_mode, operands[2]);
22468 else
22469 x = gen_lowpart (half_mode, operands[2]);
22470 emit_insn (gen_rtx_SET (VOIDmode, high[0], x));
22472 emit_insn (gen_lshr3 (high[0], high[0], GEN_INT (bits)));
22473 emit_insn (gen_and3 (high[0], high[0], const1_rtx));
22474 emit_move_insn (low[0], high[0]);
22475 emit_insn (gen_xor3 (low[0], low[0], const1_rtx));
22478 emit_insn (gen_ashl3 (low[0], low[0], operands[2]));
22479 emit_insn (gen_ashl3 (high[0], high[0], operands[2]));
22480 return;
22483 if (operands[1] == constm1_rtx)
22485 /* For -1 << N, we can avoid the shld instruction, because we
22486 know that we're shifting 0...31/63 ones into a -1. */
22487 emit_move_insn (low[0], constm1_rtx);
22488 if (optimize_insn_for_size_p ())
22489 emit_move_insn (high[0], low[0]);
22490 else
22491 emit_move_insn (high[0], constm1_rtx);
22493 else
22495 gen_shld = mode == DImode ? gen_x86_shld : gen_x86_64_shld;
22497 if (!rtx_equal_p (operands[0], operands[1]))
22498 emit_move_insn (operands[0], operands[1]);
22500 split_double_mode (mode, operands, 1, low, high);
22501 emit_insn (gen_shld (high[0], low[0], operands[2]));
22504 emit_insn (gen_ashl3 (low[0], low[0], operands[2]));
22506 if (TARGET_CMOVE && scratch)
22508 rtx (*gen_x86_shift_adj_1)(rtx, rtx, rtx, rtx)
22509 = mode == DImode ? gen_x86_shiftsi_adj_1 : gen_x86_shiftdi_adj_1;
22511 ix86_expand_clear (scratch);
22512 emit_insn (gen_x86_shift_adj_1 (high[0], low[0], operands[2], scratch));
22514 else
22516 rtx (*gen_x86_shift_adj_2)(rtx, rtx, rtx)
22517 = mode == DImode ? gen_x86_shiftsi_adj_2 : gen_x86_shiftdi_adj_2;
22519 emit_insn (gen_x86_shift_adj_2 (high[0], low[0], operands[2]));
22523 void
22524 ix86_split_ashr (rtx *operands, rtx scratch, enum machine_mode mode)
22526 rtx (*gen_ashr3)(rtx, rtx, rtx)
22527 = mode == DImode ? gen_ashrsi3 : gen_ashrdi3;
22528 rtx (*gen_shrd)(rtx, rtx, rtx);
22529 int half_width = GET_MODE_BITSIZE (mode) >> 1;
22531 rtx low[2], high[2];
22532 int count;
22534 if (CONST_INT_P (operands[2]))
22536 split_double_mode (mode, operands, 2, low, high);
22537 count = INTVAL (operands[2]) & (GET_MODE_BITSIZE (mode) - 1);
22539 if (count == GET_MODE_BITSIZE (mode) - 1)
22541 emit_move_insn (high[0], high[1]);
22542 emit_insn (gen_ashr3 (high[0], high[0],
22543 GEN_INT (half_width - 1)));
22544 emit_move_insn (low[0], high[0]);
22547 else if (count >= half_width)
22549 emit_move_insn (low[0], high[1]);
22550 emit_move_insn (high[0], low[0]);
22551 emit_insn (gen_ashr3 (high[0], high[0],
22552 GEN_INT (half_width - 1)));
22554 if (count > half_width)
22555 emit_insn (gen_ashr3 (low[0], low[0],
22556 GEN_INT (count - half_width)));
22558 else
22560 gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
22562 if (!rtx_equal_p (operands[0], operands[1]))
22563 emit_move_insn (operands[0], operands[1]);
22565 emit_insn (gen_shrd (low[0], high[0], GEN_INT (count)));
22566 emit_insn (gen_ashr3 (high[0], high[0], GEN_INT (count)));
22569 else
22571 gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
22573 if (!rtx_equal_p (operands[0], operands[1]))
22574 emit_move_insn (operands[0], operands[1]);
22576 split_double_mode (mode, operands, 1, low, high);
22578 emit_insn (gen_shrd (low[0], high[0], operands[2]));
22579 emit_insn (gen_ashr3 (high[0], high[0], operands[2]));
22581 if (TARGET_CMOVE && scratch)
22583 rtx (*gen_x86_shift_adj_1)(rtx, rtx, rtx, rtx)
22584 = mode == DImode ? gen_x86_shiftsi_adj_1 : gen_x86_shiftdi_adj_1;
22586 emit_move_insn (scratch, high[0]);
22587 emit_insn (gen_ashr3 (scratch, scratch,
22588 GEN_INT (half_width - 1)));
22589 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
22590 scratch));
22592 else
22594 rtx (*gen_x86_shift_adj_3)(rtx, rtx, rtx)
22595 = mode == DImode ? gen_x86_shiftsi_adj_3 : gen_x86_shiftdi_adj_3;
22597 emit_insn (gen_x86_shift_adj_3 (low[0], high[0], operands[2]));
22602 void
22603 ix86_split_lshr (rtx *operands, rtx scratch, enum machine_mode mode)
22605 rtx (*gen_lshr3)(rtx, rtx, rtx)
22606 = mode == DImode ? gen_lshrsi3 : gen_lshrdi3;
22607 rtx (*gen_shrd)(rtx, rtx, rtx);
22608 int half_width = GET_MODE_BITSIZE (mode) >> 1;
22610 rtx low[2], high[2];
22611 int count;
22613 if (CONST_INT_P (operands[2]))
22615 split_double_mode (mode, operands, 2, low, high);
22616 count = INTVAL (operands[2]) & (GET_MODE_BITSIZE (mode) - 1);
22618 if (count >= half_width)
22620 emit_move_insn (low[0], high[1]);
22621 ix86_expand_clear (high[0]);
22623 if (count > half_width)
22624 emit_insn (gen_lshr3 (low[0], low[0],
22625 GEN_INT (count - half_width)));
22627 else
22629 gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
22631 if (!rtx_equal_p (operands[0], operands[1]))
22632 emit_move_insn (operands[0], operands[1]);
22634 emit_insn (gen_shrd (low[0], high[0], GEN_INT (count)));
22635 emit_insn (gen_lshr3 (high[0], high[0], GEN_INT (count)));
22638 else
22640 gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
22642 if (!rtx_equal_p (operands[0], operands[1]))
22643 emit_move_insn (operands[0], operands[1]);
22645 split_double_mode (mode, operands, 1, low, high);
22647 emit_insn (gen_shrd (low[0], high[0], operands[2]));
22648 emit_insn (gen_lshr3 (high[0], high[0], operands[2]));
22650 if (TARGET_CMOVE && scratch)
22652 rtx (*gen_x86_shift_adj_1)(rtx, rtx, rtx, rtx)
22653 = mode == DImode ? gen_x86_shiftsi_adj_1 : gen_x86_shiftdi_adj_1;
22655 ix86_expand_clear (scratch);
22656 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
22657 scratch));
22659 else
22661 rtx (*gen_x86_shift_adj_2)(rtx, rtx, rtx)
22662 = mode == DImode ? gen_x86_shiftsi_adj_2 : gen_x86_shiftdi_adj_2;
22664 emit_insn (gen_x86_shift_adj_2 (low[0], high[0], operands[2]));
22669 /* Predict just emitted jump instruction to be taken with probability PROB. */
22670 static void
22671 predict_jump (int prob)
22673 rtx insn = get_last_insn ();
22674 gcc_assert (JUMP_P (insn));
22675 add_int_reg_note (insn, REG_BR_PROB, prob);
22678 /* Helper function for the string operations below. Dest VARIABLE whether
22679 it is aligned to VALUE bytes. If true, jump to the label. */
22680 static rtx
22681 ix86_expand_aligntest (rtx variable, int value, bool epilogue)
22683 rtx label = gen_label_rtx ();
22684 rtx tmpcount = gen_reg_rtx (GET_MODE (variable));
22685 if (GET_MODE (variable) == DImode)
22686 emit_insn (gen_anddi3 (tmpcount, variable, GEN_INT (value)));
22687 else
22688 emit_insn (gen_andsi3 (tmpcount, variable, GEN_INT (value)));
22689 emit_cmp_and_jump_insns (tmpcount, const0_rtx, EQ, 0, GET_MODE (variable),
22690 1, label);
22691 if (epilogue)
22692 predict_jump (REG_BR_PROB_BASE * 50 / 100);
22693 else
22694 predict_jump (REG_BR_PROB_BASE * 90 / 100);
22695 return label;
22698 /* Adjust COUNTER by the VALUE. */
22699 static void
22700 ix86_adjust_counter (rtx countreg, HOST_WIDE_INT value)
22702 rtx (*gen_add)(rtx, rtx, rtx)
22703 = GET_MODE (countreg) == DImode ? gen_adddi3 : gen_addsi3;
22705 emit_insn (gen_add (countreg, countreg, GEN_INT (-value)));
22708 /* Zero extend possibly SImode EXP to Pmode register. */
22710 ix86_zero_extend_to_Pmode (rtx exp)
22712 return force_reg (Pmode, convert_to_mode (Pmode, exp, 1));
22715 /* Divide COUNTREG by SCALE. */
22716 static rtx
22717 scale_counter (rtx countreg, int scale)
22719 rtx sc;
22721 if (scale == 1)
22722 return countreg;
22723 if (CONST_INT_P (countreg))
22724 return GEN_INT (INTVAL (countreg) / scale);
22725 gcc_assert (REG_P (countreg));
22727 sc = expand_simple_binop (GET_MODE (countreg), LSHIFTRT, countreg,
22728 GEN_INT (exact_log2 (scale)),
22729 NULL, 1, OPTAB_DIRECT);
22730 return sc;
22733 /* Return mode for the memcpy/memset loop counter. Prefer SImode over
22734 DImode for constant loop counts. */
22736 static enum machine_mode
22737 counter_mode (rtx count_exp)
22739 if (GET_MODE (count_exp) != VOIDmode)
22740 return GET_MODE (count_exp);
22741 if (!CONST_INT_P (count_exp))
22742 return Pmode;
22743 if (TARGET_64BIT && (INTVAL (count_exp) & ~0xffffffff))
22744 return DImode;
22745 return SImode;
22748 /* Copy the address to a Pmode register. This is used for x32 to
22749 truncate DImode TLS address to a SImode register. */
22751 static rtx
22752 ix86_copy_addr_to_reg (rtx addr)
22754 if (GET_MODE (addr) == Pmode)
22755 return copy_addr_to_reg (addr);
22756 else
22758 gcc_assert (GET_MODE (addr) == DImode && Pmode == SImode);
22759 return gen_rtx_SUBREG (SImode, copy_to_mode_reg (DImode, addr), 0);
22763 /* When ISSETMEM is FALSE, output simple loop to move memory pointer to SRCPTR
22764 to DESTPTR via chunks of MODE unrolled UNROLL times, overall size is COUNT
22765 specified in bytes. When ISSETMEM is TRUE, output the equivalent loop to set
22766 memory by VALUE (supposed to be in MODE).
22768 The size is rounded down to whole number of chunk size moved at once.
22769 SRCMEM and DESTMEM provide MEMrtx to feed proper aliasing info. */
22772 static void
22773 expand_set_or_movmem_via_loop (rtx destmem, rtx srcmem,
22774 rtx destptr, rtx srcptr, rtx value,
22775 rtx count, enum machine_mode mode, int unroll,
22776 int expected_size, bool issetmem)
22778 rtx out_label, top_label, iter, tmp;
22779 enum machine_mode iter_mode = counter_mode (count);
22780 int piece_size_n = GET_MODE_SIZE (mode) * unroll;
22781 rtx piece_size = GEN_INT (piece_size_n);
22782 rtx piece_size_mask = GEN_INT (~((GET_MODE_SIZE (mode) * unroll) - 1));
22783 rtx size;
22784 int i;
22786 top_label = gen_label_rtx ();
22787 out_label = gen_label_rtx ();
22788 iter = gen_reg_rtx (iter_mode);
22790 size = expand_simple_binop (iter_mode, AND, count, piece_size_mask,
22791 NULL, 1, OPTAB_DIRECT);
22792 /* Those two should combine. */
22793 if (piece_size == const1_rtx)
22795 emit_cmp_and_jump_insns (size, const0_rtx, EQ, NULL_RTX, iter_mode,
22796 true, out_label);
22797 predict_jump (REG_BR_PROB_BASE * 10 / 100);
22799 emit_move_insn (iter, const0_rtx);
22801 emit_label (top_label);
22803 tmp = convert_modes (Pmode, iter_mode, iter, true);
22805 /* This assert could be relaxed - in this case we'll need to compute
22806 smallest power of two, containing in PIECE_SIZE_N and pass it to
22807 offset_address. */
22808 gcc_assert ((piece_size_n & (piece_size_n - 1)) == 0);
22809 destmem = offset_address (destmem, tmp, piece_size_n);
22810 destmem = adjust_address (destmem, mode, 0);
22812 if (!issetmem)
22814 srcmem = offset_address (srcmem, copy_rtx (tmp), piece_size_n);
22815 srcmem = adjust_address (srcmem, mode, 0);
22817 /* When unrolling for chips that reorder memory reads and writes,
22818 we can save registers by using single temporary.
22819 Also using 4 temporaries is overkill in 32bit mode. */
22820 if (!TARGET_64BIT && 0)
22822 for (i = 0; i < unroll; i++)
22824 if (i)
22826 destmem =
22827 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
22828 srcmem =
22829 adjust_address (copy_rtx (srcmem), mode, GET_MODE_SIZE (mode));
22831 emit_move_insn (destmem, srcmem);
22834 else
22836 rtx tmpreg[4];
22837 gcc_assert (unroll <= 4);
22838 for (i = 0; i < unroll; i++)
22840 tmpreg[i] = gen_reg_rtx (mode);
22841 if (i)
22843 srcmem =
22844 adjust_address (copy_rtx (srcmem), mode, GET_MODE_SIZE (mode));
22846 emit_move_insn (tmpreg[i], srcmem);
22848 for (i = 0; i < unroll; i++)
22850 if (i)
22852 destmem =
22853 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
22855 emit_move_insn (destmem, tmpreg[i]);
22859 else
22860 for (i = 0; i < unroll; i++)
22862 if (i)
22863 destmem =
22864 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
22865 emit_move_insn (destmem, value);
22868 tmp = expand_simple_binop (iter_mode, PLUS, iter, piece_size, iter,
22869 true, OPTAB_LIB_WIDEN);
22870 if (tmp != iter)
22871 emit_move_insn (iter, tmp);
22873 emit_cmp_and_jump_insns (iter, size, LT, NULL_RTX, iter_mode,
22874 true, top_label);
22875 if (expected_size != -1)
22877 expected_size /= GET_MODE_SIZE (mode) * unroll;
22878 if (expected_size == 0)
22879 predict_jump (0);
22880 else if (expected_size > REG_BR_PROB_BASE)
22881 predict_jump (REG_BR_PROB_BASE - 1);
22882 else
22883 predict_jump (REG_BR_PROB_BASE - (REG_BR_PROB_BASE + expected_size / 2) / expected_size);
22885 else
22886 predict_jump (REG_BR_PROB_BASE * 80 / 100);
22887 iter = ix86_zero_extend_to_Pmode (iter);
22888 tmp = expand_simple_binop (Pmode, PLUS, destptr, iter, destptr,
22889 true, OPTAB_LIB_WIDEN);
22890 if (tmp != destptr)
22891 emit_move_insn (destptr, tmp);
22892 if (!issetmem)
22894 tmp = expand_simple_binop (Pmode, PLUS, srcptr, iter, srcptr,
22895 true, OPTAB_LIB_WIDEN);
22896 if (tmp != srcptr)
22897 emit_move_insn (srcptr, tmp);
22899 emit_label (out_label);
22902 /* Output "rep; mov" or "rep; stos" instruction depending on ISSETMEM argument.
22903 When ISSETMEM is true, arguments SRCMEM and SRCPTR are ignored.
22904 When ISSETMEM is false, arguments VALUE and ORIG_VALUE are ignored.
22905 For setmem case, VALUE is a promoted to a wider size ORIG_VALUE.
22906 ORIG_VALUE is the original value passed to memset to fill the memory with.
22907 Other arguments have same meaning as for previous function. */
22909 static void
22910 expand_set_or_movmem_via_rep (rtx destmem, rtx srcmem,
22911 rtx destptr, rtx srcptr, rtx value, rtx orig_value,
22912 rtx count,
22913 enum machine_mode mode, bool issetmem)
22915 rtx destexp;
22916 rtx srcexp;
22917 rtx countreg;
22918 HOST_WIDE_INT rounded_count;
22920 /* If possible, it is shorter to use rep movs.
22921 TODO: Maybe it is better to move this logic to decide_alg. */
22922 if (mode == QImode && CONST_INT_P (count) && !(INTVAL (count) & 3)
22923 && (!issetmem || orig_value == const0_rtx))
22924 mode = SImode;
22926 if (destptr != XEXP (destmem, 0) || GET_MODE (destmem) != BLKmode)
22927 destmem = adjust_automodify_address_nv (destmem, BLKmode, destptr, 0);
22929 countreg = ix86_zero_extend_to_Pmode (scale_counter (count,
22930 GET_MODE_SIZE (mode)));
22931 if (mode != QImode)
22933 destexp = gen_rtx_ASHIFT (Pmode, countreg,
22934 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
22935 destexp = gen_rtx_PLUS (Pmode, destexp, destptr);
22937 else
22938 destexp = gen_rtx_PLUS (Pmode, destptr, countreg);
22939 if ((!issetmem || orig_value == const0_rtx) && CONST_INT_P (count))
22941 rounded_count = (INTVAL (count)
22942 & ~((HOST_WIDE_INT) GET_MODE_SIZE (mode) - 1));
22943 destmem = shallow_copy_rtx (destmem);
22944 set_mem_size (destmem, rounded_count);
22946 else if (MEM_SIZE_KNOWN_P (destmem))
22947 clear_mem_size (destmem);
22949 if (issetmem)
22951 value = force_reg (mode, gen_lowpart (mode, value));
22952 emit_insn (gen_rep_stos (destptr, countreg, destmem, value, destexp));
22954 else
22956 if (srcptr != XEXP (srcmem, 0) || GET_MODE (srcmem) != BLKmode)
22957 srcmem = adjust_automodify_address_nv (srcmem, BLKmode, srcptr, 0);
22958 if (mode != QImode)
22960 srcexp = gen_rtx_ASHIFT (Pmode, countreg,
22961 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
22962 srcexp = gen_rtx_PLUS (Pmode, srcexp, srcptr);
22964 else
22965 srcexp = gen_rtx_PLUS (Pmode, srcptr, countreg);
22966 if (CONST_INT_P (count))
22968 rounded_count = (INTVAL (count)
22969 & ~((HOST_WIDE_INT) GET_MODE_SIZE (mode) - 1));
22970 srcmem = shallow_copy_rtx (srcmem);
22971 set_mem_size (srcmem, rounded_count);
22973 else
22975 if (MEM_SIZE_KNOWN_P (srcmem))
22976 clear_mem_size (srcmem);
22978 emit_insn (gen_rep_mov (destptr, destmem, srcptr, srcmem, countreg,
22979 destexp, srcexp));
22983 /* This function emits moves to copy SIZE_TO_MOVE bytes from SRCMEM to
22984 DESTMEM.
22985 SRC is passed by pointer to be updated on return.
22986 Return value is updated DST. */
22987 static rtx
22988 emit_memmov (rtx destmem, rtx *srcmem, rtx destptr, rtx srcptr,
22989 HOST_WIDE_INT size_to_move)
22991 rtx dst = destmem, src = *srcmem, adjust, tempreg;
22992 enum insn_code code;
22993 enum machine_mode move_mode;
22994 int piece_size, i;
22996 /* Find the widest mode in which we could perform moves.
22997 Start with the biggest power of 2 less than SIZE_TO_MOVE and half
22998 it until move of such size is supported. */
22999 piece_size = 1 << floor_log2 (size_to_move);
23000 move_mode = mode_for_size (piece_size * BITS_PER_UNIT, MODE_INT, 0);
23001 code = optab_handler (mov_optab, move_mode);
23002 while (code == CODE_FOR_nothing && piece_size > 1)
23004 piece_size >>= 1;
23005 move_mode = mode_for_size (piece_size * BITS_PER_UNIT, MODE_INT, 0);
23006 code = optab_handler (mov_optab, move_mode);
23009 /* Find the corresponding vector mode with the same size as MOVE_MODE.
23010 MOVE_MODE is an integer mode at the moment (SI, DI, TI, etc.). */
23011 if (GET_MODE_SIZE (move_mode) > GET_MODE_SIZE (word_mode))
23013 int nunits = GET_MODE_SIZE (move_mode) / GET_MODE_SIZE (word_mode);
23014 move_mode = mode_for_vector (word_mode, nunits);
23015 code = optab_handler (mov_optab, move_mode);
23016 if (code == CODE_FOR_nothing)
23018 move_mode = word_mode;
23019 piece_size = GET_MODE_SIZE (move_mode);
23020 code = optab_handler (mov_optab, move_mode);
23023 gcc_assert (code != CODE_FOR_nothing);
23025 dst = adjust_automodify_address_nv (dst, move_mode, destptr, 0);
23026 src = adjust_automodify_address_nv (src, move_mode, srcptr, 0);
23028 /* Emit moves. We'll need SIZE_TO_MOVE/PIECE_SIZES moves. */
23029 gcc_assert (size_to_move % piece_size == 0);
23030 adjust = GEN_INT (piece_size);
23031 for (i = 0; i < size_to_move; i += piece_size)
23033 /* We move from memory to memory, so we'll need to do it via
23034 a temporary register. */
23035 tempreg = gen_reg_rtx (move_mode);
23036 emit_insn (GEN_FCN (code) (tempreg, src));
23037 emit_insn (GEN_FCN (code) (dst, tempreg));
23039 emit_move_insn (destptr,
23040 gen_rtx_PLUS (Pmode, copy_rtx (destptr), adjust));
23041 emit_move_insn (srcptr,
23042 gen_rtx_PLUS (Pmode, copy_rtx (srcptr), adjust));
23044 dst = adjust_automodify_address_nv (dst, move_mode, destptr,
23045 piece_size);
23046 src = adjust_automodify_address_nv (src, move_mode, srcptr,
23047 piece_size);
23050 /* Update DST and SRC rtx. */
23051 *srcmem = src;
23052 return dst;
23055 /* Output code to copy at most count & (max_size - 1) bytes from SRC to DEST. */
23056 static void
23057 expand_movmem_epilogue (rtx destmem, rtx srcmem,
23058 rtx destptr, rtx srcptr, rtx count, int max_size)
23060 rtx src, dest;
23061 if (CONST_INT_P (count))
23063 HOST_WIDE_INT countval = INTVAL (count);
23064 HOST_WIDE_INT epilogue_size = countval % max_size;
23065 int i;
23067 /* For now MAX_SIZE should be a power of 2. This assert could be
23068 relaxed, but it'll require a bit more complicated epilogue
23069 expanding. */
23070 gcc_assert ((max_size & (max_size - 1)) == 0);
23071 for (i = max_size; i >= 1; i >>= 1)
23073 if (epilogue_size & i)
23074 destmem = emit_memmov (destmem, &srcmem, destptr, srcptr, i);
23076 return;
23078 if (max_size > 8)
23080 count = expand_simple_binop (GET_MODE (count), AND, count, GEN_INT (max_size - 1),
23081 count, 1, OPTAB_DIRECT);
23082 expand_set_or_movmem_via_loop (destmem, srcmem, destptr, srcptr, NULL,
23083 count, QImode, 1, 4, false);
23084 return;
23087 /* When there are stringops, we can cheaply increase dest and src pointers.
23088 Otherwise we save code size by maintaining offset (zero is readily
23089 available from preceding rep operation) and using x86 addressing modes.
23091 if (TARGET_SINGLE_STRINGOP)
23093 if (max_size > 4)
23095 rtx label = ix86_expand_aligntest (count, 4, true);
23096 src = change_address (srcmem, SImode, srcptr);
23097 dest = change_address (destmem, SImode, destptr);
23098 emit_insn (gen_strmov (destptr, dest, srcptr, src));
23099 emit_label (label);
23100 LABEL_NUSES (label) = 1;
23102 if (max_size > 2)
23104 rtx label = ix86_expand_aligntest (count, 2, true);
23105 src = change_address (srcmem, HImode, srcptr);
23106 dest = change_address (destmem, HImode, destptr);
23107 emit_insn (gen_strmov (destptr, dest, srcptr, src));
23108 emit_label (label);
23109 LABEL_NUSES (label) = 1;
23111 if (max_size > 1)
23113 rtx label = ix86_expand_aligntest (count, 1, true);
23114 src = change_address (srcmem, QImode, srcptr);
23115 dest = change_address (destmem, QImode, destptr);
23116 emit_insn (gen_strmov (destptr, dest, srcptr, src));
23117 emit_label (label);
23118 LABEL_NUSES (label) = 1;
23121 else
23123 rtx offset = force_reg (Pmode, const0_rtx);
23124 rtx tmp;
23126 if (max_size > 4)
23128 rtx label = ix86_expand_aligntest (count, 4, true);
23129 src = change_address (srcmem, SImode, srcptr);
23130 dest = change_address (destmem, SImode, destptr);
23131 emit_move_insn (dest, src);
23132 tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (4), NULL,
23133 true, OPTAB_LIB_WIDEN);
23134 if (tmp != offset)
23135 emit_move_insn (offset, tmp);
23136 emit_label (label);
23137 LABEL_NUSES (label) = 1;
23139 if (max_size > 2)
23141 rtx label = ix86_expand_aligntest (count, 2, true);
23142 tmp = gen_rtx_PLUS (Pmode, srcptr, offset);
23143 src = change_address (srcmem, HImode, tmp);
23144 tmp = gen_rtx_PLUS (Pmode, destptr, offset);
23145 dest = change_address (destmem, HImode, tmp);
23146 emit_move_insn (dest, src);
23147 tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (2), tmp,
23148 true, OPTAB_LIB_WIDEN);
23149 if (tmp != offset)
23150 emit_move_insn (offset, tmp);
23151 emit_label (label);
23152 LABEL_NUSES (label) = 1;
23154 if (max_size > 1)
23156 rtx label = ix86_expand_aligntest (count, 1, true);
23157 tmp = gen_rtx_PLUS (Pmode, srcptr, offset);
23158 src = change_address (srcmem, QImode, tmp);
23159 tmp = gen_rtx_PLUS (Pmode, destptr, offset);
23160 dest = change_address (destmem, QImode, tmp);
23161 emit_move_insn (dest, src);
23162 emit_label (label);
23163 LABEL_NUSES (label) = 1;
23168 /* This function emits moves to fill SIZE_TO_MOVE bytes starting from DESTMEM
23169 with value PROMOTED_VAL.
23170 SRC is passed by pointer to be updated on return.
23171 Return value is updated DST. */
23172 static rtx
23173 emit_memset (rtx destmem, rtx destptr, rtx promoted_val,
23174 HOST_WIDE_INT size_to_move)
23176 rtx dst = destmem, adjust;
23177 enum insn_code code;
23178 enum machine_mode move_mode;
23179 int piece_size, i;
23181 /* Find the widest mode in which we could perform moves.
23182 Start with the biggest power of 2 less than SIZE_TO_MOVE and half
23183 it until move of such size is supported. */
23184 move_mode = GET_MODE (promoted_val);
23185 if (move_mode == VOIDmode)
23186 move_mode = QImode;
23187 if (size_to_move < GET_MODE_SIZE (move_mode))
23189 move_mode = mode_for_size (size_to_move * BITS_PER_UNIT, MODE_INT, 0);
23190 promoted_val = gen_lowpart (move_mode, promoted_val);
23192 piece_size = GET_MODE_SIZE (move_mode);
23193 code = optab_handler (mov_optab, move_mode);
23194 gcc_assert (code != CODE_FOR_nothing && promoted_val != NULL_RTX);
23196 dst = adjust_automodify_address_nv (dst, move_mode, destptr, 0);
23198 /* Emit moves. We'll need SIZE_TO_MOVE/PIECE_SIZES moves. */
23199 gcc_assert (size_to_move % piece_size == 0);
23200 adjust = GEN_INT (piece_size);
23201 for (i = 0; i < size_to_move; i += piece_size)
23203 if (piece_size <= GET_MODE_SIZE (word_mode))
23205 emit_insn (gen_strset (destptr, dst, promoted_val));
23206 dst = adjust_automodify_address_nv (dst, move_mode, destptr,
23207 piece_size);
23208 continue;
23211 emit_insn (GEN_FCN (code) (dst, promoted_val));
23213 emit_move_insn (destptr,
23214 gen_rtx_PLUS (Pmode, copy_rtx (destptr), adjust));
23216 dst = adjust_automodify_address_nv (dst, move_mode, destptr,
23217 piece_size);
23220 /* Update DST rtx. */
23221 return dst;
23223 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
23224 static void
23225 expand_setmem_epilogue_via_loop (rtx destmem, rtx destptr, rtx value,
23226 rtx count, int max_size)
23228 count =
23229 expand_simple_binop (counter_mode (count), AND, count,
23230 GEN_INT (max_size - 1), count, 1, OPTAB_DIRECT);
23231 expand_set_or_movmem_via_loop (destmem, NULL, destptr, NULL,
23232 gen_lowpart (QImode, value), count, QImode,
23233 1, max_size / 2, true);
23236 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
23237 static void
23238 expand_setmem_epilogue (rtx destmem, rtx destptr, rtx value, rtx vec_value,
23239 rtx count, int max_size)
23241 rtx dest;
23243 if (CONST_INT_P (count))
23245 HOST_WIDE_INT countval = INTVAL (count);
23246 HOST_WIDE_INT epilogue_size = countval % max_size;
23247 int i;
23249 /* For now MAX_SIZE should be a power of 2. This assert could be
23250 relaxed, but it'll require a bit more complicated epilogue
23251 expanding. */
23252 gcc_assert ((max_size & (max_size - 1)) == 0);
23253 for (i = max_size; i >= 1; i >>= 1)
23255 if (epilogue_size & i)
23257 if (vec_value && i > GET_MODE_SIZE (GET_MODE (value)))
23258 destmem = emit_memset (destmem, destptr, vec_value, i);
23259 else
23260 destmem = emit_memset (destmem, destptr, value, i);
23263 return;
23265 if (max_size > 32)
23267 expand_setmem_epilogue_via_loop (destmem, destptr, value, count, max_size);
23268 return;
23270 if (max_size > 16)
23272 rtx label = ix86_expand_aligntest (count, 16, true);
23273 if (TARGET_64BIT)
23275 dest = change_address (destmem, DImode, destptr);
23276 emit_insn (gen_strset (destptr, dest, value));
23277 dest = adjust_automodify_address_nv (dest, DImode, destptr, 8);
23278 emit_insn (gen_strset (destptr, dest, value));
23280 else
23282 dest = change_address (destmem, SImode, destptr);
23283 emit_insn (gen_strset (destptr, dest, value));
23284 dest = adjust_automodify_address_nv (dest, SImode, destptr, 4);
23285 emit_insn (gen_strset (destptr, dest, value));
23286 dest = adjust_automodify_address_nv (dest, SImode, destptr, 8);
23287 emit_insn (gen_strset (destptr, dest, value));
23288 dest = adjust_automodify_address_nv (dest, SImode, destptr, 12);
23289 emit_insn (gen_strset (destptr, dest, value));
23291 emit_label (label);
23292 LABEL_NUSES (label) = 1;
23294 if (max_size > 8)
23296 rtx label = ix86_expand_aligntest (count, 8, true);
23297 if (TARGET_64BIT)
23299 dest = change_address (destmem, DImode, destptr);
23300 emit_insn (gen_strset (destptr, dest, value));
23302 else
23304 dest = change_address (destmem, SImode, destptr);
23305 emit_insn (gen_strset (destptr, dest, value));
23306 dest = adjust_automodify_address_nv (dest, SImode, destptr, 4);
23307 emit_insn (gen_strset (destptr, dest, value));
23309 emit_label (label);
23310 LABEL_NUSES (label) = 1;
23312 if (max_size > 4)
23314 rtx label = ix86_expand_aligntest (count, 4, true);
23315 dest = change_address (destmem, SImode, destptr);
23316 emit_insn (gen_strset (destptr, dest, gen_lowpart (SImode, value)));
23317 emit_label (label);
23318 LABEL_NUSES (label) = 1;
23320 if (max_size > 2)
23322 rtx label = ix86_expand_aligntest (count, 2, true);
23323 dest = change_address (destmem, HImode, destptr);
23324 emit_insn (gen_strset (destptr, dest, gen_lowpart (HImode, value)));
23325 emit_label (label);
23326 LABEL_NUSES (label) = 1;
23328 if (max_size > 1)
23330 rtx label = ix86_expand_aligntest (count, 1, true);
23331 dest = change_address (destmem, QImode, destptr);
23332 emit_insn (gen_strset (destptr, dest, gen_lowpart (QImode, value)));
23333 emit_label (label);
23334 LABEL_NUSES (label) = 1;
23338 /* Depending on ISSETMEM, copy enough from SRCMEM to DESTMEM or set enough to
23339 DESTMEM to align it to DESIRED_ALIGNMENT. Original alignment is ALIGN.
23340 Depending on ISSETMEM, either arguments SRCMEM/SRCPTR or VALUE/VEC_VALUE are
23341 ignored.
23342 Return value is updated DESTMEM. */
23343 static rtx
23344 expand_set_or_movmem_prologue (rtx destmem, rtx srcmem,
23345 rtx destptr, rtx srcptr, rtx value,
23346 rtx vec_value, rtx count, int align,
23347 int desired_alignment, bool issetmem)
23349 int i;
23350 for (i = 1; i < desired_alignment; i <<= 1)
23352 if (align <= i)
23354 rtx label = ix86_expand_aligntest (destptr, i, false);
23355 if (issetmem)
23357 if (vec_value && i > GET_MODE_SIZE (GET_MODE (value)))
23358 destmem = emit_memset (destmem, destptr, vec_value, i);
23359 else
23360 destmem = emit_memset (destmem, destptr, value, i);
23362 else
23363 destmem = emit_memmov (destmem, &srcmem, destptr, srcptr, i);
23364 ix86_adjust_counter (count, i);
23365 emit_label (label);
23366 LABEL_NUSES (label) = 1;
23367 set_mem_align (destmem, i * 2 * BITS_PER_UNIT);
23370 return destmem;
23373 /* Test if COUNT&SIZE is nonzero and if so, expand movme
23374 or setmem sequence that is valid for SIZE..2*SIZE-1 bytes
23375 and jump to DONE_LABEL. */
23376 static void
23377 expand_small_movmem_or_setmem (rtx destmem, rtx srcmem,
23378 rtx destptr, rtx srcptr,
23379 rtx value, rtx vec_value,
23380 rtx count, int size,
23381 rtx done_label, bool issetmem)
23383 rtx label = ix86_expand_aligntest (count, size, false);
23384 enum machine_mode mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 1);
23385 rtx modesize;
23386 int n;
23388 /* If we do not have vector value to copy, we must reduce size. */
23389 if (issetmem)
23391 if (!vec_value)
23393 if (GET_MODE (value) == VOIDmode && size > 8)
23394 mode = Pmode;
23395 else if (GET_MODE_SIZE (mode) > GET_MODE_SIZE (GET_MODE (value)))
23396 mode = GET_MODE (value);
23398 else
23399 mode = GET_MODE (vec_value), value = vec_value;
23401 else
23403 /* Choose appropriate vector mode. */
23404 if (size >= 32)
23405 mode = TARGET_AVX ? V32QImode : TARGET_SSE ? V16QImode : DImode;
23406 else if (size >= 16)
23407 mode = TARGET_SSE ? V16QImode : DImode;
23408 srcmem = change_address (srcmem, mode, srcptr);
23410 destmem = change_address (destmem, mode, destptr);
23411 modesize = GEN_INT (GET_MODE_SIZE (mode));
23412 gcc_assert (GET_MODE_SIZE (mode) <= size);
23413 for (n = 0; n * GET_MODE_SIZE (mode) < size; n++)
23415 if (issetmem)
23416 emit_move_insn (destmem, gen_lowpart (mode, value));
23417 else
23419 emit_move_insn (destmem, srcmem);
23420 srcmem = offset_address (srcmem, modesize, GET_MODE_SIZE (mode));
23422 destmem = offset_address (destmem, modesize, GET_MODE_SIZE (mode));
23425 destmem = offset_address (destmem, count, 1);
23426 destmem = offset_address (destmem, GEN_INT (-2 * size),
23427 GET_MODE_SIZE (mode));
23428 if (!issetmem)
23430 srcmem = offset_address (srcmem, count, 1);
23431 srcmem = offset_address (srcmem, GEN_INT (-2 * size),
23432 GET_MODE_SIZE (mode));
23434 for (n = 0; n * GET_MODE_SIZE (mode) < size; n++)
23436 if (issetmem)
23437 emit_move_insn (destmem, gen_lowpart (mode, value));
23438 else
23440 emit_move_insn (destmem, srcmem);
23441 srcmem = offset_address (srcmem, modesize, GET_MODE_SIZE (mode));
23443 destmem = offset_address (destmem, modesize, GET_MODE_SIZE (mode));
23445 emit_jump_insn (gen_jump (done_label));
23446 emit_barrier ();
23448 emit_label (label);
23449 LABEL_NUSES (label) = 1;
23452 /* Handle small memcpy (up to SIZE that is supposed to be small power of 2.
23453 and get ready for the main memcpy loop by copying iniital DESIRED_ALIGN-ALIGN
23454 bytes and last SIZE bytes adjusitng DESTPTR/SRCPTR/COUNT in a way we can
23455 proceed with an loop copying SIZE bytes at once. Do moves in MODE.
23456 DONE_LABEL is a label after the whole copying sequence. The label is created
23457 on demand if *DONE_LABEL is NULL.
23458 MIN_SIZE is minimal size of block copied. This value gets adjusted for new
23459 bounds after the initial copies.
23461 DESTMEM/SRCMEM are memory expressions pointing to the copies block,
23462 DESTPTR/SRCPTR are pointers to the block. DYNAMIC_CHECK indicate whether
23463 we will dispatch to a library call for large blocks.
23465 In pseudocode we do:
23467 if (COUNT < SIZE)
23469 Assume that SIZE is 4. Bigger sizes are handled analogously
23470 if (COUNT & 4)
23472 copy 4 bytes from SRCPTR to DESTPTR
23473 copy 4 bytes from SRCPTR + COUNT - 4 to DESTPTR + COUNT - 4
23474 goto done_label
23476 if (!COUNT)
23477 goto done_label;
23478 copy 1 byte from SRCPTR to DESTPTR
23479 if (COUNT & 2)
23481 copy 2 bytes from SRCPTR to DESTPTR
23482 copy 2 bytes from SRCPTR + COUNT - 2 to DESTPTR + COUNT - 2
23485 else
23487 copy at least DESIRED_ALIGN-ALIGN bytes from SRCPTR to DESTPTR
23488 copy SIZE bytes from SRCPTR + COUNT - SIZE to DESTPTR + COUNT -SIZE
23490 OLD_DESPTR = DESTPTR;
23491 Align DESTPTR up to DESIRED_ALIGN
23492 SRCPTR += DESTPTR - OLD_DESTPTR
23493 COUNT -= DEST_PTR - OLD_DESTPTR
23494 if (DYNAMIC_CHECK)
23495 Round COUNT down to multiple of SIZE
23496 << optional caller supplied zero size guard is here >>
23497 << optional caller suppplied dynamic check is here >>
23498 << caller supplied main copy loop is here >>
23500 done_label:
23502 static void
23503 expand_set_or_movmem_prologue_epilogue_by_misaligned_moves (rtx destmem, rtx srcmem,
23504 rtx *destptr, rtx *srcptr,
23505 enum machine_mode mode,
23506 rtx value, rtx vec_value,
23507 rtx *count,
23508 rtx *done_label,
23509 int size,
23510 int desired_align,
23511 int align,
23512 unsigned HOST_WIDE_INT *min_size,
23513 bool dynamic_check,
23514 bool issetmem)
23516 rtx loop_label = NULL, label;
23517 int n;
23518 rtx modesize;
23519 int prolog_size = 0;
23520 rtx mode_value;
23522 /* Chose proper value to copy. */
23523 if (issetmem && VECTOR_MODE_P (mode))
23524 mode_value = vec_value;
23525 else
23526 mode_value = value;
23527 gcc_assert (GET_MODE_SIZE (mode) <= size);
23529 /* See if block is big or small, handle small blocks. */
23530 if (!CONST_INT_P (*count) && *min_size < (unsigned HOST_WIDE_INT)size)
23532 int size2 = size;
23533 loop_label = gen_label_rtx ();
23535 if (!*done_label)
23536 *done_label = gen_label_rtx ();
23538 emit_cmp_and_jump_insns (*count, GEN_INT (size2), GE, 0, GET_MODE (*count),
23539 1, loop_label);
23540 size2 >>= 1;
23542 /* Handle sizes > 3. */
23543 for (;size2 > 2; size2 >>= 1)
23544 expand_small_movmem_or_setmem (destmem, srcmem,
23545 *destptr, *srcptr,
23546 value, vec_value,
23547 *count,
23548 size2, *done_label, issetmem);
23549 /* Nothing to copy? Jump to DONE_LABEL if so */
23550 emit_cmp_and_jump_insns (*count, const0_rtx, EQ, 0, GET_MODE (*count),
23551 1, *done_label);
23553 /* Do a byte copy. */
23554 destmem = change_address (destmem, QImode, *destptr);
23555 if (issetmem)
23556 emit_move_insn (destmem, gen_lowpart (QImode, value));
23557 else
23559 srcmem = change_address (srcmem, QImode, *srcptr);
23560 emit_move_insn (destmem, srcmem);
23563 /* Handle sizes 2 and 3. */
23564 label = ix86_expand_aligntest (*count, 2, false);
23565 destmem = change_address (destmem, HImode, *destptr);
23566 destmem = offset_address (destmem, *count, 1);
23567 destmem = offset_address (destmem, GEN_INT (-2), 2);
23568 if (issetmem)
23569 emit_move_insn (destmem, gen_lowpart (HImode, value));
23570 else
23572 srcmem = change_address (srcmem, HImode, *srcptr);
23573 srcmem = offset_address (srcmem, *count, 1);
23574 srcmem = offset_address (srcmem, GEN_INT (-2), 2);
23575 emit_move_insn (destmem, srcmem);
23578 emit_label (label);
23579 LABEL_NUSES (label) = 1;
23580 emit_jump_insn (gen_jump (*done_label));
23581 emit_barrier ();
23583 else
23584 gcc_assert (*min_size >= (unsigned HOST_WIDE_INT)size
23585 || UINTVAL (*count) >= (unsigned HOST_WIDE_INT)size);
23587 /* Start memcpy for COUNT >= SIZE. */
23588 if (loop_label)
23590 emit_label (loop_label);
23591 LABEL_NUSES (loop_label) = 1;
23594 /* Copy first desired_align bytes. */
23595 if (!issetmem)
23596 srcmem = change_address (srcmem, mode, *srcptr);
23597 destmem = change_address (destmem, mode, *destptr);
23598 modesize = GEN_INT (GET_MODE_SIZE (mode));
23599 for (n = 0; prolog_size < desired_align - align; n++)
23601 if (issetmem)
23602 emit_move_insn (destmem, mode_value);
23603 else
23605 emit_move_insn (destmem, srcmem);
23606 srcmem = offset_address (srcmem, modesize, GET_MODE_SIZE (mode));
23608 destmem = offset_address (destmem, modesize, GET_MODE_SIZE (mode));
23609 prolog_size += GET_MODE_SIZE (mode);
23613 /* Copy last SIZE bytes. */
23614 destmem = offset_address (destmem, *count, 1);
23615 destmem = offset_address (destmem,
23616 GEN_INT (-size - prolog_size),
23618 if (issetmem)
23619 emit_move_insn (destmem, mode_value);
23620 else
23622 srcmem = offset_address (srcmem, *count, 1);
23623 srcmem = offset_address (srcmem,
23624 GEN_INT (-size - prolog_size),
23626 emit_move_insn (destmem, srcmem);
23628 for (n = 1; n * GET_MODE_SIZE (mode) < size; n++)
23630 destmem = offset_address (destmem, modesize, 1);
23631 if (issetmem)
23632 emit_move_insn (destmem, mode_value);
23633 else
23635 srcmem = offset_address (srcmem, modesize, 1);
23636 emit_move_insn (destmem, srcmem);
23640 /* Align destination. */
23641 if (desired_align > 1 && desired_align > align)
23643 rtx saveddest = *destptr;
23645 gcc_assert (desired_align <= size);
23646 /* Align destptr up, place it to new register. */
23647 *destptr = expand_simple_binop (GET_MODE (*destptr), PLUS, *destptr,
23648 GEN_INT (prolog_size),
23649 NULL_RTX, 1, OPTAB_DIRECT);
23650 *destptr = expand_simple_binop (GET_MODE (*destptr), AND, *destptr,
23651 GEN_INT (-desired_align),
23652 *destptr, 1, OPTAB_DIRECT);
23653 /* See how many bytes we skipped. */
23654 saveddest = expand_simple_binop (GET_MODE (*destptr), MINUS, saveddest,
23655 *destptr,
23656 saveddest, 1, OPTAB_DIRECT);
23657 /* Adjust srcptr and count. */
23658 if (!issetmem)
23659 *srcptr = expand_simple_binop (GET_MODE (*srcptr), MINUS, *srcptr, saveddest,
23660 *srcptr, 1, OPTAB_DIRECT);
23661 *count = expand_simple_binop (GET_MODE (*count), PLUS, *count,
23662 saveddest, *count, 1, OPTAB_DIRECT);
23663 /* We copied at most size + prolog_size. */
23664 if (*min_size > (unsigned HOST_WIDE_INT)(size + prolog_size))
23665 *min_size = (*min_size - size) & ~(unsigned HOST_WIDE_INT)(size - 1);
23666 else
23667 *min_size = 0;
23669 /* Our loops always round down the bock size, but for dispatch to library
23670 we need precise value. */
23671 if (dynamic_check)
23672 *count = expand_simple_binop (GET_MODE (*count), AND, *count,
23673 GEN_INT (-size), *count, 1, OPTAB_DIRECT);
23675 else
23677 gcc_assert (prolog_size == 0);
23678 /* Decrease count, so we won't end up copying last word twice. */
23679 if (!CONST_INT_P (*count))
23680 *count = expand_simple_binop (GET_MODE (*count), PLUS, *count,
23681 constm1_rtx, *count, 1, OPTAB_DIRECT);
23682 else
23683 *count = GEN_INT ((UINTVAL (*count) - 1) & ~(unsigned HOST_WIDE_INT)(size - 1));
23684 if (*min_size)
23685 *min_size = (*min_size - 1) & ~(unsigned HOST_WIDE_INT)(size - 1);
23690 /* This function is like the previous one, except here we know how many bytes
23691 need to be copied. That allows us to update alignment not only of DST, which
23692 is returned, but also of SRC, which is passed as a pointer for that
23693 reason. */
23694 static rtx
23695 expand_set_or_movmem_constant_prologue (rtx dst, rtx *srcp, rtx destreg,
23696 rtx srcreg, rtx value, rtx vec_value,
23697 int desired_align, int align_bytes,
23698 bool issetmem)
23700 rtx src = NULL;
23701 rtx orig_dst = dst;
23702 rtx orig_src = NULL;
23703 int piece_size = 1;
23704 int copied_bytes = 0;
23706 if (!issetmem)
23708 gcc_assert (srcp != NULL);
23709 src = *srcp;
23710 orig_src = src;
23713 for (piece_size = 1;
23714 piece_size <= desired_align && copied_bytes < align_bytes;
23715 piece_size <<= 1)
23717 if (align_bytes & piece_size)
23719 if (issetmem)
23721 if (vec_value && piece_size > GET_MODE_SIZE (GET_MODE (value)))
23722 dst = emit_memset (dst, destreg, vec_value, piece_size);
23723 else
23724 dst = emit_memset (dst, destreg, value, piece_size);
23726 else
23727 dst = emit_memmov (dst, &src, destreg, srcreg, piece_size);
23728 copied_bytes += piece_size;
23731 if (MEM_ALIGN (dst) < (unsigned int) desired_align * BITS_PER_UNIT)
23732 set_mem_align (dst, desired_align * BITS_PER_UNIT);
23733 if (MEM_SIZE_KNOWN_P (orig_dst))
23734 set_mem_size (dst, MEM_SIZE (orig_dst) - align_bytes);
23736 if (!issetmem)
23738 int src_align_bytes = get_mem_align_offset (src, desired_align
23739 * BITS_PER_UNIT);
23740 if (src_align_bytes >= 0)
23741 src_align_bytes = desired_align - src_align_bytes;
23742 if (src_align_bytes >= 0)
23744 unsigned int src_align;
23745 for (src_align = desired_align; src_align >= 2; src_align >>= 1)
23747 if ((src_align_bytes & (src_align - 1))
23748 == (align_bytes & (src_align - 1)))
23749 break;
23751 if (src_align > (unsigned int) desired_align)
23752 src_align = desired_align;
23753 if (MEM_ALIGN (src) < src_align * BITS_PER_UNIT)
23754 set_mem_align (src, src_align * BITS_PER_UNIT);
23756 if (MEM_SIZE_KNOWN_P (orig_src))
23757 set_mem_size (src, MEM_SIZE (orig_src) - align_bytes);
23758 *srcp = src;
23761 return dst;
23764 /* Return true if ALG can be used in current context.
23765 Assume we expand memset if MEMSET is true. */
23766 static bool
23767 alg_usable_p (enum stringop_alg alg, bool memset)
23769 if (alg == no_stringop)
23770 return false;
23771 if (alg == vector_loop)
23772 return TARGET_SSE || TARGET_AVX;
23773 /* Algorithms using the rep prefix want at least edi and ecx;
23774 additionally, memset wants eax and memcpy wants esi. Don't
23775 consider such algorithms if the user has appropriated those
23776 registers for their own purposes. */
23777 if (alg == rep_prefix_1_byte
23778 || alg == rep_prefix_4_byte
23779 || alg == rep_prefix_8_byte)
23780 return !(fixed_regs[CX_REG] || fixed_regs[DI_REG]
23781 || (memset ? fixed_regs[AX_REG] : fixed_regs[SI_REG]));
23782 return true;
23785 /* Given COUNT and EXPECTED_SIZE, decide on codegen of string operation. */
23786 static enum stringop_alg
23787 decide_alg (HOST_WIDE_INT count, HOST_WIDE_INT expected_size,
23788 unsigned HOST_WIDE_INT min_size, unsigned HOST_WIDE_INT max_size,
23789 bool memset, bool zero_memset, int *dynamic_check, bool *noalign)
23791 const struct stringop_algs * algs;
23792 bool optimize_for_speed;
23793 int max = -1;
23794 const struct processor_costs *cost;
23795 int i;
23796 bool any_alg_usable_p = false;
23798 *noalign = false;
23799 *dynamic_check = -1;
23801 /* Even if the string operation call is cold, we still might spend a lot
23802 of time processing large blocks. */
23803 if (optimize_function_for_size_p (cfun)
23804 || (optimize_insn_for_size_p ()
23805 && (max_size < 256
23806 || (expected_size != -1 && expected_size < 256))))
23807 optimize_for_speed = false;
23808 else
23809 optimize_for_speed = true;
23811 cost = optimize_for_speed ? ix86_cost : &ix86_size_cost;
23812 if (memset)
23813 algs = &cost->memset[TARGET_64BIT != 0];
23814 else
23815 algs = &cost->memcpy[TARGET_64BIT != 0];
23817 /* See maximal size for user defined algorithm. */
23818 for (i = 0; i < MAX_STRINGOP_ALGS; i++)
23820 enum stringop_alg candidate = algs->size[i].alg;
23821 bool usable = alg_usable_p (candidate, memset);
23822 any_alg_usable_p |= usable;
23824 if (candidate != libcall && candidate && usable)
23825 max = algs->size[i].max;
23828 /* If expected size is not known but max size is small enough
23829 so inline version is a win, set expected size into
23830 the range. */
23831 if (max > 1 && (unsigned HOST_WIDE_INT) max >= max_size
23832 && expected_size == -1)
23833 expected_size = min_size / 2 + max_size / 2;
23835 /* If user specified the algorithm, honnor it if possible. */
23836 if (ix86_stringop_alg != no_stringop
23837 && alg_usable_p (ix86_stringop_alg, memset))
23838 return ix86_stringop_alg;
23839 /* rep; movq or rep; movl is the smallest variant. */
23840 else if (!optimize_for_speed)
23842 *noalign = true;
23843 if (!count || (count & 3) || (memset && !zero_memset))
23844 return alg_usable_p (rep_prefix_1_byte, memset)
23845 ? rep_prefix_1_byte : loop_1_byte;
23846 else
23847 return alg_usable_p (rep_prefix_4_byte, memset)
23848 ? rep_prefix_4_byte : loop;
23850 /* Very tiny blocks are best handled via the loop, REP is expensive to
23851 setup. */
23852 else if (expected_size != -1 && expected_size < 4)
23853 return loop_1_byte;
23854 else if (expected_size != -1)
23856 enum stringop_alg alg = libcall;
23857 bool alg_noalign = false;
23858 for (i = 0; i < MAX_STRINGOP_ALGS; i++)
23860 /* We get here if the algorithms that were not libcall-based
23861 were rep-prefix based and we are unable to use rep prefixes
23862 based on global register usage. Break out of the loop and
23863 use the heuristic below. */
23864 if (algs->size[i].max == 0)
23865 break;
23866 if (algs->size[i].max >= expected_size || algs->size[i].max == -1)
23868 enum stringop_alg candidate = algs->size[i].alg;
23870 if (candidate != libcall && alg_usable_p (candidate, memset))
23872 alg = candidate;
23873 alg_noalign = algs->size[i].noalign;
23875 /* Honor TARGET_INLINE_ALL_STRINGOPS by picking
23876 last non-libcall inline algorithm. */
23877 if (TARGET_INLINE_ALL_STRINGOPS)
23879 /* When the current size is best to be copied by a libcall,
23880 but we are still forced to inline, run the heuristic below
23881 that will pick code for medium sized blocks. */
23882 if (alg != libcall)
23884 *noalign = alg_noalign;
23885 return alg;
23887 break;
23889 else if (alg_usable_p (candidate, memset))
23891 *noalign = algs->size[i].noalign;
23892 return candidate;
23897 /* When asked to inline the call anyway, try to pick meaningful choice.
23898 We look for maximal size of block that is faster to copy by hand and
23899 take blocks of at most of that size guessing that average size will
23900 be roughly half of the block.
23902 If this turns out to be bad, we might simply specify the preferred
23903 choice in ix86_costs. */
23904 if ((TARGET_INLINE_ALL_STRINGOPS || TARGET_INLINE_STRINGOPS_DYNAMICALLY)
23905 && (algs->unknown_size == libcall
23906 || !alg_usable_p (algs->unknown_size, memset)))
23908 enum stringop_alg alg;
23910 /* If there aren't any usable algorithms, then recursing on
23911 smaller sizes isn't going to find anything. Just return the
23912 simple byte-at-a-time copy loop. */
23913 if (!any_alg_usable_p)
23915 /* Pick something reasonable. */
23916 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY)
23917 *dynamic_check = 128;
23918 return loop_1_byte;
23920 if (max == -1)
23921 max = 4096;
23922 alg = decide_alg (count, max / 2, min_size, max_size, memset,
23923 zero_memset, dynamic_check, noalign);
23924 gcc_assert (*dynamic_check == -1);
23925 gcc_assert (alg != libcall);
23926 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY)
23927 *dynamic_check = max;
23928 return alg;
23930 return (alg_usable_p (algs->unknown_size, memset)
23931 ? algs->unknown_size : libcall);
23934 /* Decide on alignment. We know that the operand is already aligned to ALIGN
23935 (ALIGN can be based on profile feedback and thus it is not 100% guaranteed). */
23936 static int
23937 decide_alignment (int align,
23938 enum stringop_alg alg,
23939 int expected_size,
23940 enum machine_mode move_mode)
23942 int desired_align = 0;
23944 gcc_assert (alg != no_stringop);
23946 if (alg == libcall)
23947 return 0;
23948 if (move_mode == VOIDmode)
23949 return 0;
23951 desired_align = GET_MODE_SIZE (move_mode);
23952 /* PentiumPro has special logic triggering for 8 byte aligned blocks.
23953 copying whole cacheline at once. */
23954 if (TARGET_PENTIUMPRO
23955 && (alg == rep_prefix_4_byte || alg == rep_prefix_1_byte))
23956 desired_align = 8;
23958 if (optimize_size)
23959 desired_align = 1;
23960 if (desired_align < align)
23961 desired_align = align;
23962 if (expected_size != -1 && expected_size < 4)
23963 desired_align = align;
23965 return desired_align;
23969 /* Helper function for memcpy. For QImode value 0xXY produce
23970 0xXYXYXYXY of wide specified by MODE. This is essentially
23971 a * 0x10101010, but we can do slightly better than
23972 synth_mult by unwinding the sequence by hand on CPUs with
23973 slow multiply. */
23974 static rtx
23975 promote_duplicated_reg (enum machine_mode mode, rtx val)
23977 enum machine_mode valmode = GET_MODE (val);
23978 rtx tmp;
23979 int nops = mode == DImode ? 3 : 2;
23981 gcc_assert (mode == SImode || mode == DImode || val == const0_rtx);
23982 if (val == const0_rtx)
23983 return copy_to_mode_reg (mode, CONST0_RTX (mode));
23984 if (CONST_INT_P (val))
23986 HOST_WIDE_INT v = INTVAL (val) & 255;
23988 v |= v << 8;
23989 v |= v << 16;
23990 if (mode == DImode)
23991 v |= (v << 16) << 16;
23992 return copy_to_mode_reg (mode, gen_int_mode (v, mode));
23995 if (valmode == VOIDmode)
23996 valmode = QImode;
23997 if (valmode != QImode)
23998 val = gen_lowpart (QImode, val);
23999 if (mode == QImode)
24000 return val;
24001 if (!TARGET_PARTIAL_REG_STALL)
24002 nops--;
24003 if (ix86_cost->mult_init[mode == DImode ? 3 : 2]
24004 + ix86_cost->mult_bit * (mode == DImode ? 8 : 4)
24005 <= (ix86_cost->shift_const + ix86_cost->add) * nops
24006 + (COSTS_N_INSNS (TARGET_PARTIAL_REG_STALL == 0)))
24008 rtx reg = convert_modes (mode, QImode, val, true);
24009 tmp = promote_duplicated_reg (mode, const1_rtx);
24010 return expand_simple_binop (mode, MULT, reg, tmp, NULL, 1,
24011 OPTAB_DIRECT);
24013 else
24015 rtx reg = convert_modes (mode, QImode, val, true);
24017 if (!TARGET_PARTIAL_REG_STALL)
24018 if (mode == SImode)
24019 emit_insn (gen_movsi_insv_1 (reg, reg));
24020 else
24021 emit_insn (gen_movdi_insv_1 (reg, reg));
24022 else
24024 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (8),
24025 NULL, 1, OPTAB_DIRECT);
24026 reg =
24027 expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
24029 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (16),
24030 NULL, 1, OPTAB_DIRECT);
24031 reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
24032 if (mode == SImode)
24033 return reg;
24034 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (32),
24035 NULL, 1, OPTAB_DIRECT);
24036 reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
24037 return reg;
24041 /* Duplicate value VAL using promote_duplicated_reg into maximal size that will
24042 be needed by main loop copying SIZE_NEEDED chunks and prologue getting
24043 alignment from ALIGN to DESIRED_ALIGN. */
24044 static rtx
24045 promote_duplicated_reg_to_size (rtx val, int size_needed, int desired_align,
24046 int align)
24048 rtx promoted_val;
24050 if (TARGET_64BIT
24051 && (size_needed > 4 || (desired_align > align && desired_align > 4)))
24052 promoted_val = promote_duplicated_reg (DImode, val);
24053 else if (size_needed > 2 || (desired_align > align && desired_align > 2))
24054 promoted_val = promote_duplicated_reg (SImode, val);
24055 else if (size_needed > 1 || (desired_align > align && desired_align > 1))
24056 promoted_val = promote_duplicated_reg (HImode, val);
24057 else
24058 promoted_val = val;
24060 return promoted_val;
24063 /* Expand string move (memcpy) ot store (memset) operation. Use i386 string
24064 operations when profitable. The code depends upon architecture, block size
24065 and alignment, but always has one of the following overall structures:
24067 Aligned move sequence:
24069 1) Prologue guard: Conditional that jumps up to epilogues for small
24070 blocks that can be handled by epilogue alone. This is faster
24071 but also needed for correctness, since prologue assume the block
24072 is larger than the desired alignment.
24074 Optional dynamic check for size and libcall for large
24075 blocks is emitted here too, with -minline-stringops-dynamically.
24077 2) Prologue: copy first few bytes in order to get destination
24078 aligned to DESIRED_ALIGN. It is emitted only when ALIGN is less
24079 than DESIRED_ALIGN and up to DESIRED_ALIGN - ALIGN bytes can be
24080 copied. We emit either a jump tree on power of two sized
24081 blocks, or a byte loop.
24083 3) Main body: the copying loop itself, copying in SIZE_NEEDED chunks
24084 with specified algorithm.
24086 4) Epilogue: code copying tail of the block that is too small to be
24087 handled by main body (or up to size guarded by prologue guard).
24089 Misaligned move sequence
24091 1) missaligned move prologue/epilogue containing:
24092 a) Prologue handling small memory blocks and jumping to done_label
24093 (skipped if blocks are known to be large enough)
24094 b) Signle move copying first DESIRED_ALIGN-ALIGN bytes if alignment is
24095 needed by single possibly misaligned move
24096 (skipped if alignment is not needed)
24097 c) Copy of last SIZE_NEEDED bytes by possibly misaligned moves
24099 2) Zero size guard dispatching to done_label, if needed
24101 3) dispatch to library call, if needed,
24103 3) Main body: the copying loop itself, copying in SIZE_NEEDED chunks
24104 with specified algorithm. */
24105 bool
24106 ix86_expand_set_or_movmem (rtx dst, rtx src, rtx count_exp, rtx val_exp,
24107 rtx align_exp, rtx expected_align_exp,
24108 rtx expected_size_exp, rtx min_size_exp,
24109 rtx max_size_exp, rtx probable_max_size_exp,
24110 bool issetmem)
24112 rtx destreg;
24113 rtx srcreg = NULL;
24114 rtx label = NULL;
24115 rtx tmp;
24116 rtx jump_around_label = NULL;
24117 HOST_WIDE_INT align = 1;
24118 unsigned HOST_WIDE_INT count = 0;
24119 HOST_WIDE_INT expected_size = -1;
24120 int size_needed = 0, epilogue_size_needed;
24121 int desired_align = 0, align_bytes = 0;
24122 enum stringop_alg alg;
24123 rtx promoted_val = NULL;
24124 rtx vec_promoted_val = NULL;
24125 bool force_loopy_epilogue = false;
24126 int dynamic_check;
24127 bool need_zero_guard = false;
24128 bool noalign;
24129 enum machine_mode move_mode = VOIDmode;
24130 int unroll_factor = 1;
24131 /* TODO: Once value ranges are available, fill in proper data. */
24132 unsigned HOST_WIDE_INT min_size = 0;
24133 unsigned HOST_WIDE_INT max_size = -1;
24134 unsigned HOST_WIDE_INT probable_max_size = -1;
24135 bool misaligned_prologue_used = false;
24137 if (CONST_INT_P (align_exp))
24138 align = INTVAL (align_exp);
24139 /* i386 can do misaligned access on reasonably increased cost. */
24140 if (CONST_INT_P (expected_align_exp)
24141 && INTVAL (expected_align_exp) > align)
24142 align = INTVAL (expected_align_exp);
24143 /* ALIGN is the minimum of destination and source alignment, but we care here
24144 just about destination alignment. */
24145 else if (!issetmem
24146 && MEM_ALIGN (dst) > (unsigned HOST_WIDE_INT) align * BITS_PER_UNIT)
24147 align = MEM_ALIGN (dst) / BITS_PER_UNIT;
24149 if (CONST_INT_P (count_exp))
24150 min_size = max_size = probable_max_size = count = expected_size
24151 = INTVAL (count_exp);
24152 else
24154 if (min_size_exp)
24155 min_size = INTVAL (min_size_exp);
24156 if (max_size_exp)
24157 max_size = INTVAL (max_size_exp);
24158 if (probable_max_size_exp)
24159 probable_max_size = INTVAL (probable_max_size_exp);
24160 if (CONST_INT_P (expected_size_exp) && count == 0)
24161 expected_size = INTVAL (expected_size_exp);
24164 /* Make sure we don't need to care about overflow later on. */
24165 if (count > ((unsigned HOST_WIDE_INT) 1 << 30))
24166 return false;
24168 /* Step 0: Decide on preferred algorithm, desired alignment and
24169 size of chunks to be copied by main loop. */
24170 alg = decide_alg (count, expected_size, min_size, probable_max_size,
24171 issetmem,
24172 issetmem && val_exp == const0_rtx,
24173 &dynamic_check, &noalign);
24174 if (alg == libcall)
24175 return false;
24176 gcc_assert (alg != no_stringop);
24178 /* For now vector-version of memset is generated only for memory zeroing, as
24179 creating of promoted vector value is very cheap in this case. */
24180 if (issetmem && alg == vector_loop && val_exp != const0_rtx)
24181 alg = unrolled_loop;
24183 if (!count)
24184 count_exp = copy_to_mode_reg (GET_MODE (count_exp), count_exp);
24185 destreg = ix86_copy_addr_to_reg (XEXP (dst, 0));
24186 if (!issetmem)
24187 srcreg = ix86_copy_addr_to_reg (XEXP (src, 0));
24189 unroll_factor = 1;
24190 move_mode = word_mode;
24191 switch (alg)
24193 case libcall:
24194 case no_stringop:
24195 case last_alg:
24196 gcc_unreachable ();
24197 case loop_1_byte:
24198 need_zero_guard = true;
24199 move_mode = QImode;
24200 break;
24201 case loop:
24202 need_zero_guard = true;
24203 break;
24204 case unrolled_loop:
24205 need_zero_guard = true;
24206 unroll_factor = (TARGET_64BIT ? 4 : 2);
24207 break;
24208 case vector_loop:
24209 need_zero_guard = true;
24210 unroll_factor = 4;
24211 /* Find the widest supported mode. */
24212 move_mode = word_mode;
24213 while (optab_handler (mov_optab, GET_MODE_WIDER_MODE (move_mode))
24214 != CODE_FOR_nothing)
24215 move_mode = GET_MODE_WIDER_MODE (move_mode);
24217 /* Find the corresponding vector mode with the same size as MOVE_MODE.
24218 MOVE_MODE is an integer mode at the moment (SI, DI, TI, etc.). */
24219 if (GET_MODE_SIZE (move_mode) > GET_MODE_SIZE (word_mode))
24221 int nunits = GET_MODE_SIZE (move_mode) / GET_MODE_SIZE (word_mode);
24222 move_mode = mode_for_vector (word_mode, nunits);
24223 if (optab_handler (mov_optab, move_mode) == CODE_FOR_nothing)
24224 move_mode = word_mode;
24226 gcc_assert (optab_handler (mov_optab, move_mode) != CODE_FOR_nothing);
24227 break;
24228 case rep_prefix_8_byte:
24229 move_mode = DImode;
24230 break;
24231 case rep_prefix_4_byte:
24232 move_mode = SImode;
24233 break;
24234 case rep_prefix_1_byte:
24235 move_mode = QImode;
24236 break;
24238 size_needed = GET_MODE_SIZE (move_mode) * unroll_factor;
24239 epilogue_size_needed = size_needed;
24241 desired_align = decide_alignment (align, alg, expected_size, move_mode);
24242 if (!TARGET_ALIGN_STRINGOPS || noalign)
24243 align = desired_align;
24245 /* Step 1: Prologue guard. */
24247 /* Alignment code needs count to be in register. */
24248 if (CONST_INT_P (count_exp) && desired_align > align)
24250 if (INTVAL (count_exp) > desired_align
24251 && INTVAL (count_exp) > size_needed)
24253 align_bytes
24254 = get_mem_align_offset (dst, desired_align * BITS_PER_UNIT);
24255 if (align_bytes <= 0)
24256 align_bytes = 0;
24257 else
24258 align_bytes = desired_align - align_bytes;
24260 if (align_bytes == 0)
24261 count_exp = force_reg (counter_mode (count_exp), count_exp);
24263 gcc_assert (desired_align >= 1 && align >= 1);
24265 /* Misaligned move sequences handle both prologue and epilogue at once.
24266 Default code generation results in a smaller code for large alignments
24267 and also avoids redundant job when sizes are known precisely. */
24268 misaligned_prologue_used
24269 = (TARGET_MISALIGNED_MOVE_STRING_PRO_EPILOGUES
24270 && MAX (desired_align, epilogue_size_needed) <= 32
24271 && desired_align <= epilogue_size_needed
24272 && ((desired_align > align && !align_bytes)
24273 || (!count && epilogue_size_needed > 1)));
24275 /* Do the cheap promotion to allow better CSE across the
24276 main loop and epilogue (ie one load of the big constant in the
24277 front of all code.
24278 For now the misaligned move sequences do not have fast path
24279 without broadcasting. */
24280 if (issetmem && ((CONST_INT_P (val_exp) || misaligned_prologue_used)))
24282 if (alg == vector_loop)
24284 gcc_assert (val_exp == const0_rtx);
24285 vec_promoted_val = promote_duplicated_reg (move_mode, val_exp);
24286 promoted_val = promote_duplicated_reg_to_size (val_exp,
24287 GET_MODE_SIZE (word_mode),
24288 desired_align, align);
24290 else
24292 promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed,
24293 desired_align, align);
24296 /* Misaligned move sequences handles both prologues and epilogues at once.
24297 Default code generation results in smaller code for large alignments and
24298 also avoids redundant job when sizes are known precisely. */
24299 if (misaligned_prologue_used)
24301 /* Misaligned move prologue handled small blocks by itself. */
24302 expand_set_or_movmem_prologue_epilogue_by_misaligned_moves
24303 (dst, src, &destreg, &srcreg,
24304 move_mode, promoted_val, vec_promoted_val,
24305 &count_exp,
24306 &jump_around_label,
24307 desired_align < align
24308 ? MAX (desired_align, epilogue_size_needed) : epilogue_size_needed,
24309 desired_align, align, &min_size, dynamic_check, issetmem);
24310 if (!issetmem)
24311 src = change_address (src, BLKmode, srcreg);
24312 dst = change_address (dst, BLKmode, destreg);
24313 set_mem_align (dst, desired_align * BITS_PER_UNIT);
24314 epilogue_size_needed = 0;
24315 if (need_zero_guard && !min_size)
24317 /* It is possible that we copied enough so the main loop will not
24318 execute. */
24319 gcc_assert (size_needed > 1);
24320 if (jump_around_label == NULL_RTX)
24321 jump_around_label = gen_label_rtx ();
24322 emit_cmp_and_jump_insns (count_exp,
24323 GEN_INT (size_needed),
24324 LTU, 0, counter_mode (count_exp), 1, jump_around_label);
24325 if (expected_size == -1
24326 || expected_size < (desired_align - align) / 2 + size_needed)
24327 predict_jump (REG_BR_PROB_BASE * 20 / 100);
24328 else
24329 predict_jump (REG_BR_PROB_BASE * 60 / 100);
24332 /* Ensure that alignment prologue won't copy past end of block. */
24333 else if (size_needed > 1 || (desired_align > 1 && desired_align > align))
24335 epilogue_size_needed = MAX (size_needed - 1, desired_align - align);
24336 /* Epilogue always copies COUNT_EXP & EPILOGUE_SIZE_NEEDED bytes.
24337 Make sure it is power of 2. */
24338 epilogue_size_needed = 1 << (floor_log2 (epilogue_size_needed) + 1);
24340 /* To improve performance of small blocks, we jump around the VAL
24341 promoting mode. This mean that if the promoted VAL is not constant,
24342 we might not use it in the epilogue and have to use byte
24343 loop variant. */
24344 if (issetmem && epilogue_size_needed > 2 && !promoted_val)
24345 force_loopy_epilogue = true;
24346 if ((count && count < (unsigned HOST_WIDE_INT) epilogue_size_needed)
24347 || max_size < (unsigned HOST_WIDE_INT) epilogue_size_needed)
24349 /* If main algorithm works on QImode, no epilogue is needed.
24350 For small sizes just don't align anything. */
24351 if (size_needed == 1)
24352 desired_align = align;
24353 else
24354 goto epilogue;
24356 else if (!count
24357 && min_size < (unsigned HOST_WIDE_INT) epilogue_size_needed)
24359 label = gen_label_rtx ();
24360 emit_cmp_and_jump_insns (count_exp,
24361 GEN_INT (epilogue_size_needed),
24362 LTU, 0, counter_mode (count_exp), 1, label);
24363 if (expected_size == -1 || expected_size < epilogue_size_needed)
24364 predict_jump (REG_BR_PROB_BASE * 60 / 100);
24365 else
24366 predict_jump (REG_BR_PROB_BASE * 20 / 100);
24370 /* Emit code to decide on runtime whether library call or inline should be
24371 used. */
24372 if (dynamic_check != -1)
24374 if (!issetmem && CONST_INT_P (count_exp))
24376 if (UINTVAL (count_exp) >= (unsigned HOST_WIDE_INT)dynamic_check)
24378 emit_block_move_via_libcall (dst, src, count_exp, false);
24379 count_exp = const0_rtx;
24380 goto epilogue;
24383 else
24385 rtx hot_label = gen_label_rtx ();
24386 if (jump_around_label == NULL_RTX)
24387 jump_around_label = gen_label_rtx ();
24388 emit_cmp_and_jump_insns (count_exp, GEN_INT (dynamic_check - 1),
24389 LEU, 0, GET_MODE (count_exp), 1, hot_label);
24390 predict_jump (REG_BR_PROB_BASE * 90 / 100);
24391 if (issetmem)
24392 set_storage_via_libcall (dst, count_exp, val_exp, false);
24393 else
24394 emit_block_move_via_libcall (dst, src, count_exp, false);
24395 emit_jump (jump_around_label);
24396 emit_label (hot_label);
24400 /* Step 2: Alignment prologue. */
24401 /* Do the expensive promotion once we branched off the small blocks. */
24402 if (issetmem && !promoted_val)
24403 promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed,
24404 desired_align, align);
24406 if (desired_align > align && !misaligned_prologue_used)
24408 if (align_bytes == 0)
24410 /* Except for the first move in prologue, we no longer know
24411 constant offset in aliasing info. It don't seems to worth
24412 the pain to maintain it for the first move, so throw away
24413 the info early. */
24414 dst = change_address (dst, BLKmode, destreg);
24415 if (!issetmem)
24416 src = change_address (src, BLKmode, srcreg);
24417 dst = expand_set_or_movmem_prologue (dst, src, destreg, srcreg,
24418 promoted_val, vec_promoted_val,
24419 count_exp, align, desired_align,
24420 issetmem);
24421 /* At most desired_align - align bytes are copied. */
24422 if (min_size < (unsigned)(desired_align - align))
24423 min_size = 0;
24424 else
24425 min_size -= desired_align - align;
24427 else
24429 /* If we know how many bytes need to be stored before dst is
24430 sufficiently aligned, maintain aliasing info accurately. */
24431 dst = expand_set_or_movmem_constant_prologue (dst, &src, destreg,
24432 srcreg,
24433 promoted_val,
24434 vec_promoted_val,
24435 desired_align,
24436 align_bytes,
24437 issetmem);
24439 count_exp = plus_constant (counter_mode (count_exp),
24440 count_exp, -align_bytes);
24441 count -= align_bytes;
24442 min_size -= align_bytes;
24443 max_size -= align_bytes;
24445 if (need_zero_guard
24446 && !min_size
24447 && (count < (unsigned HOST_WIDE_INT) size_needed
24448 || (align_bytes == 0
24449 && count < ((unsigned HOST_WIDE_INT) size_needed
24450 + desired_align - align))))
24452 /* It is possible that we copied enough so the main loop will not
24453 execute. */
24454 gcc_assert (size_needed > 1);
24455 if (label == NULL_RTX)
24456 label = gen_label_rtx ();
24457 emit_cmp_and_jump_insns (count_exp,
24458 GEN_INT (size_needed),
24459 LTU, 0, counter_mode (count_exp), 1, label);
24460 if (expected_size == -1
24461 || expected_size < (desired_align - align) / 2 + size_needed)
24462 predict_jump (REG_BR_PROB_BASE * 20 / 100);
24463 else
24464 predict_jump (REG_BR_PROB_BASE * 60 / 100);
24467 if (label && size_needed == 1)
24469 emit_label (label);
24470 LABEL_NUSES (label) = 1;
24471 label = NULL;
24472 epilogue_size_needed = 1;
24473 if (issetmem)
24474 promoted_val = val_exp;
24476 else if (label == NULL_RTX && !misaligned_prologue_used)
24477 epilogue_size_needed = size_needed;
24479 /* Step 3: Main loop. */
24481 switch (alg)
24483 case libcall:
24484 case no_stringop:
24485 case last_alg:
24486 gcc_unreachable ();
24487 case loop_1_byte:
24488 case loop:
24489 case unrolled_loop:
24490 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, promoted_val,
24491 count_exp, move_mode, unroll_factor,
24492 expected_size, issetmem);
24493 break;
24494 case vector_loop:
24495 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg,
24496 vec_promoted_val, count_exp, move_mode,
24497 unroll_factor, expected_size, issetmem);
24498 break;
24499 case rep_prefix_8_byte:
24500 case rep_prefix_4_byte:
24501 case rep_prefix_1_byte:
24502 expand_set_or_movmem_via_rep (dst, src, destreg, srcreg, promoted_val,
24503 val_exp, count_exp, move_mode, issetmem);
24504 break;
24506 /* Adjust properly the offset of src and dest memory for aliasing. */
24507 if (CONST_INT_P (count_exp))
24509 if (!issetmem)
24510 src = adjust_automodify_address_nv (src, BLKmode, srcreg,
24511 (count / size_needed) * size_needed);
24512 dst = adjust_automodify_address_nv (dst, BLKmode, destreg,
24513 (count / size_needed) * size_needed);
24515 else
24517 if (!issetmem)
24518 src = change_address (src, BLKmode, srcreg);
24519 dst = change_address (dst, BLKmode, destreg);
24522 /* Step 4: Epilogue to copy the remaining bytes. */
24523 epilogue:
24524 if (label)
24526 /* When the main loop is done, COUNT_EXP might hold original count,
24527 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
24528 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
24529 bytes. Compensate if needed. */
24531 if (size_needed < epilogue_size_needed)
24533 tmp =
24534 expand_simple_binop (counter_mode (count_exp), AND, count_exp,
24535 GEN_INT (size_needed - 1), count_exp, 1,
24536 OPTAB_DIRECT);
24537 if (tmp != count_exp)
24538 emit_move_insn (count_exp, tmp);
24540 emit_label (label);
24541 LABEL_NUSES (label) = 1;
24544 if (count_exp != const0_rtx && epilogue_size_needed > 1)
24546 if (force_loopy_epilogue)
24547 expand_setmem_epilogue_via_loop (dst, destreg, val_exp, count_exp,
24548 epilogue_size_needed);
24549 else
24551 if (issetmem)
24552 expand_setmem_epilogue (dst, destreg, promoted_val,
24553 vec_promoted_val, count_exp,
24554 epilogue_size_needed);
24555 else
24556 expand_movmem_epilogue (dst, src, destreg, srcreg, count_exp,
24557 epilogue_size_needed);
24560 if (jump_around_label)
24561 emit_label (jump_around_label);
24562 return true;
24566 /* Expand the appropriate insns for doing strlen if not just doing
24567 repnz; scasb
24569 out = result, initialized with the start address
24570 align_rtx = alignment of the address.
24571 scratch = scratch register, initialized with the startaddress when
24572 not aligned, otherwise undefined
24574 This is just the body. It needs the initializations mentioned above and
24575 some address computing at the end. These things are done in i386.md. */
24577 static void
24578 ix86_expand_strlensi_unroll_1 (rtx out, rtx src, rtx align_rtx)
24580 int align;
24581 rtx tmp;
24582 rtx align_2_label = NULL_RTX;
24583 rtx align_3_label = NULL_RTX;
24584 rtx align_4_label = gen_label_rtx ();
24585 rtx end_0_label = gen_label_rtx ();
24586 rtx mem;
24587 rtx tmpreg = gen_reg_rtx (SImode);
24588 rtx scratch = gen_reg_rtx (SImode);
24589 rtx cmp;
24591 align = 0;
24592 if (CONST_INT_P (align_rtx))
24593 align = INTVAL (align_rtx);
24595 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
24597 /* Is there a known alignment and is it less than 4? */
24598 if (align < 4)
24600 rtx scratch1 = gen_reg_rtx (Pmode);
24601 emit_move_insn (scratch1, out);
24602 /* Is there a known alignment and is it not 2? */
24603 if (align != 2)
24605 align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */
24606 align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */
24608 /* Leave just the 3 lower bits. */
24609 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (3),
24610 NULL_RTX, 0, OPTAB_WIDEN);
24612 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
24613 Pmode, 1, align_4_label);
24614 emit_cmp_and_jump_insns (align_rtx, const2_rtx, EQ, NULL,
24615 Pmode, 1, align_2_label);
24616 emit_cmp_and_jump_insns (align_rtx, const2_rtx, GTU, NULL,
24617 Pmode, 1, align_3_label);
24619 else
24621 /* Since the alignment is 2, we have to check 2 or 0 bytes;
24622 check if is aligned to 4 - byte. */
24624 align_rtx = expand_binop (Pmode, and_optab, scratch1, const2_rtx,
24625 NULL_RTX, 0, OPTAB_WIDEN);
24627 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
24628 Pmode, 1, align_4_label);
24631 mem = change_address (src, QImode, out);
24633 /* Now compare the bytes. */
24635 /* Compare the first n unaligned byte on a byte per byte basis. */
24636 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
24637 QImode, 1, end_0_label);
24639 /* Increment the address. */
24640 emit_insn (ix86_gen_add3 (out, out, const1_rtx));
24642 /* Not needed with an alignment of 2 */
24643 if (align != 2)
24645 emit_label (align_2_label);
24647 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
24648 end_0_label);
24650 emit_insn (ix86_gen_add3 (out, out, const1_rtx));
24652 emit_label (align_3_label);
24655 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
24656 end_0_label);
24658 emit_insn (ix86_gen_add3 (out, out, const1_rtx));
24661 /* Generate loop to check 4 bytes at a time. It is not a good idea to
24662 align this loop. It gives only huge programs, but does not help to
24663 speed up. */
24664 emit_label (align_4_label);
24666 mem = change_address (src, SImode, out);
24667 emit_move_insn (scratch, mem);
24668 emit_insn (ix86_gen_add3 (out, out, GEN_INT (4)));
24670 /* This formula yields a nonzero result iff one of the bytes is zero.
24671 This saves three branches inside loop and many cycles. */
24673 emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101)));
24674 emit_insn (gen_one_cmplsi2 (scratch, scratch));
24675 emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch));
24676 emit_insn (gen_andsi3 (tmpreg, tmpreg,
24677 gen_int_mode (0x80808080, SImode)));
24678 emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0, SImode, 1,
24679 align_4_label);
24681 if (TARGET_CMOVE)
24683 rtx reg = gen_reg_rtx (SImode);
24684 rtx reg2 = gen_reg_rtx (Pmode);
24685 emit_move_insn (reg, tmpreg);
24686 emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16)));
24688 /* If zero is not in the first two bytes, move two bytes forward. */
24689 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
24690 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
24691 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
24692 emit_insn (gen_rtx_SET (VOIDmode, tmpreg,
24693 gen_rtx_IF_THEN_ELSE (SImode, tmp,
24694 reg,
24695 tmpreg)));
24696 /* Emit lea manually to avoid clobbering of flags. */
24697 emit_insn (gen_rtx_SET (SImode, reg2,
24698 gen_rtx_PLUS (Pmode, out, const2_rtx)));
24700 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
24701 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
24702 emit_insn (gen_rtx_SET (VOIDmode, out,
24703 gen_rtx_IF_THEN_ELSE (Pmode, tmp,
24704 reg2,
24705 out)));
24707 else
24709 rtx end_2_label = gen_label_rtx ();
24710 /* Is zero in the first two bytes? */
24712 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
24713 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
24714 tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx);
24715 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
24716 gen_rtx_LABEL_REF (VOIDmode, end_2_label),
24717 pc_rtx);
24718 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
24719 JUMP_LABEL (tmp) = end_2_label;
24721 /* Not in the first two. Move two bytes forward. */
24722 emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16)));
24723 emit_insn (ix86_gen_add3 (out, out, const2_rtx));
24725 emit_label (end_2_label);
24729 /* Avoid branch in fixing the byte. */
24730 tmpreg = gen_lowpart (QImode, tmpreg);
24731 emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg));
24732 tmp = gen_rtx_REG (CCmode, FLAGS_REG);
24733 cmp = gen_rtx_LTU (VOIDmode, tmp, const0_rtx);
24734 emit_insn (ix86_gen_sub3_carry (out, out, GEN_INT (3), tmp, cmp));
24736 emit_label (end_0_label);
24739 /* Expand strlen. */
24741 bool
24742 ix86_expand_strlen (rtx out, rtx src, rtx eoschar, rtx align)
24744 rtx addr, scratch1, scratch2, scratch3, scratch4;
24746 /* The generic case of strlen expander is long. Avoid it's
24747 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
24749 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
24750 && !TARGET_INLINE_ALL_STRINGOPS
24751 && !optimize_insn_for_size_p ()
24752 && (!CONST_INT_P (align) || INTVAL (align) < 4))
24753 return false;
24755 addr = force_reg (Pmode, XEXP (src, 0));
24756 scratch1 = gen_reg_rtx (Pmode);
24758 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
24759 && !optimize_insn_for_size_p ())
24761 /* Well it seems that some optimizer does not combine a call like
24762 foo(strlen(bar), strlen(bar));
24763 when the move and the subtraction is done here. It does calculate
24764 the length just once when these instructions are done inside of
24765 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
24766 often used and I use one fewer register for the lifetime of
24767 output_strlen_unroll() this is better. */
24769 emit_move_insn (out, addr);
24771 ix86_expand_strlensi_unroll_1 (out, src, align);
24773 /* strlensi_unroll_1 returns the address of the zero at the end of
24774 the string, like memchr(), so compute the length by subtracting
24775 the start address. */
24776 emit_insn (ix86_gen_sub3 (out, out, addr));
24778 else
24780 rtx unspec;
24782 /* Can't use this if the user has appropriated eax, ecx, or edi. */
24783 if (fixed_regs[AX_REG] || fixed_regs[CX_REG] || fixed_regs[DI_REG])
24784 return false;
24786 scratch2 = gen_reg_rtx (Pmode);
24787 scratch3 = gen_reg_rtx (Pmode);
24788 scratch4 = force_reg (Pmode, constm1_rtx);
24790 emit_move_insn (scratch3, addr);
24791 eoschar = force_reg (QImode, eoschar);
24793 src = replace_equiv_address_nv (src, scratch3);
24795 /* If .md starts supporting :P, this can be done in .md. */
24796 unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (4, src, eoschar, align,
24797 scratch4), UNSPEC_SCAS);
24798 emit_insn (gen_strlenqi_1 (scratch1, scratch3, unspec));
24799 emit_insn (ix86_gen_one_cmpl2 (scratch2, scratch1));
24800 emit_insn (ix86_gen_add3 (out, scratch2, constm1_rtx));
24802 return true;
24805 /* For given symbol (function) construct code to compute address of it's PLT
24806 entry in large x86-64 PIC model. */
24807 static rtx
24808 construct_plt_address (rtx symbol)
24810 rtx tmp, unspec;
24812 gcc_assert (GET_CODE (symbol) == SYMBOL_REF);
24813 gcc_assert (ix86_cmodel == CM_LARGE_PIC && !TARGET_PECOFF);
24814 gcc_assert (Pmode == DImode);
24816 tmp = gen_reg_rtx (Pmode);
24817 unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, symbol), UNSPEC_PLTOFF);
24819 emit_move_insn (tmp, gen_rtx_CONST (Pmode, unspec));
24820 emit_insn (ix86_gen_add3 (tmp, tmp, pic_offset_table_rtx));
24821 return tmp;
24825 ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1,
24826 rtx callarg2,
24827 rtx pop, bool sibcall)
24829 unsigned int const cregs_size
24830 = ARRAY_SIZE (x86_64_ms_sysv_extra_clobbered_registers);
24831 rtx vec[3 + cregs_size];
24832 rtx use = NULL, call;
24833 unsigned int vec_len = 0;
24835 if (pop == const0_rtx)
24836 pop = NULL;
24837 gcc_assert (!TARGET_64BIT || !pop);
24839 if (TARGET_MACHO && !TARGET_64BIT)
24841 #if TARGET_MACHO
24842 if (flag_pic && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF)
24843 fnaddr = machopic_indirect_call_target (fnaddr);
24844 #endif
24846 else
24848 /* Static functions and indirect calls don't need the pic register. */
24849 if (flag_pic
24850 && (!TARGET_64BIT
24851 || (ix86_cmodel == CM_LARGE_PIC
24852 && DEFAULT_ABI != MS_ABI))
24853 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
24854 && ! SYMBOL_REF_LOCAL_P (XEXP (fnaddr, 0)))
24855 use_reg (&use, pic_offset_table_rtx);
24858 if (TARGET_64BIT && INTVAL (callarg2) >= 0)
24860 rtx al = gen_rtx_REG (QImode, AX_REG);
24861 emit_move_insn (al, callarg2);
24862 use_reg (&use, al);
24865 if (ix86_cmodel == CM_LARGE_PIC
24866 && !TARGET_PECOFF
24867 && MEM_P (fnaddr)
24868 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
24869 && !local_symbolic_operand (XEXP (fnaddr, 0), VOIDmode))
24870 fnaddr = gen_rtx_MEM (QImode, construct_plt_address (XEXP (fnaddr, 0)));
24871 else if (sibcall
24872 ? !sibcall_insn_operand (XEXP (fnaddr, 0), word_mode)
24873 : !call_insn_operand (XEXP (fnaddr, 0), word_mode))
24875 fnaddr = convert_to_mode (word_mode, XEXP (fnaddr, 0), 1);
24876 fnaddr = gen_rtx_MEM (QImode, copy_to_mode_reg (word_mode, fnaddr));
24879 call = gen_rtx_CALL (VOIDmode, fnaddr, callarg1);
24880 if (retval)
24881 call = gen_rtx_SET (VOIDmode, retval, call);
24882 vec[vec_len++] = call;
24884 if (pop)
24886 pop = gen_rtx_PLUS (Pmode, stack_pointer_rtx, pop);
24887 pop = gen_rtx_SET (VOIDmode, stack_pointer_rtx, pop);
24888 vec[vec_len++] = pop;
24891 if (TARGET_64BIT_MS_ABI
24892 && (!callarg2 || INTVAL (callarg2) != -2))
24894 unsigned i;
24896 vec[vec_len++] = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, const0_rtx),
24897 UNSPEC_MS_TO_SYSV_CALL);
24899 for (i = 0; i < cregs_size; i++)
24901 int regno = x86_64_ms_sysv_extra_clobbered_registers[i];
24902 enum machine_mode mode = SSE_REGNO_P (regno) ? TImode : DImode;
24904 vec[vec_len++]
24905 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (mode, regno));
24909 if (vec_len > 1)
24910 call = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (vec_len, vec));
24911 call = emit_call_insn (call);
24912 if (use)
24913 CALL_INSN_FUNCTION_USAGE (call) = use;
24915 return call;
24918 /* Output the assembly for a call instruction. */
24920 const char *
24921 ix86_output_call_insn (rtx insn, rtx call_op)
24923 bool direct_p = constant_call_address_operand (call_op, VOIDmode);
24924 bool seh_nop_p = false;
24925 const char *xasm;
24927 if (SIBLING_CALL_P (insn))
24929 if (direct_p)
24930 xasm = "jmp\t%P0";
24931 /* SEH epilogue detection requires the indirect branch case
24932 to include REX.W. */
24933 else if (TARGET_SEH)
24934 xasm = "rex.W jmp %A0";
24935 else
24936 xasm = "jmp\t%A0";
24938 output_asm_insn (xasm, &call_op);
24939 return "";
24942 /* SEH unwinding can require an extra nop to be emitted in several
24943 circumstances. Determine if we have one of those. */
24944 if (TARGET_SEH)
24946 rtx i;
24948 for (i = NEXT_INSN (insn); i ; i = NEXT_INSN (i))
24950 /* If we get to another real insn, we don't need the nop. */
24951 if (INSN_P (i))
24952 break;
24954 /* If we get to the epilogue note, prevent a catch region from
24955 being adjacent to the standard epilogue sequence. If non-
24956 call-exceptions, we'll have done this during epilogue emission. */
24957 if (NOTE_P (i) && NOTE_KIND (i) == NOTE_INSN_EPILOGUE_BEG
24958 && !flag_non_call_exceptions
24959 && !can_throw_internal (insn))
24961 seh_nop_p = true;
24962 break;
24966 /* If we didn't find a real insn following the call, prevent the
24967 unwinder from looking into the next function. */
24968 if (i == NULL)
24969 seh_nop_p = true;
24972 if (direct_p)
24973 xasm = "call\t%P0";
24974 else
24975 xasm = "call\t%A0";
24977 output_asm_insn (xasm, &call_op);
24979 if (seh_nop_p)
24980 return "nop";
24982 return "";
24985 /* Clear stack slot assignments remembered from previous functions.
24986 This is called from INIT_EXPANDERS once before RTL is emitted for each
24987 function. */
24989 static struct machine_function *
24990 ix86_init_machine_status (void)
24992 struct machine_function *f;
24994 f = ggc_alloc_cleared_machine_function ();
24995 f->use_fast_prologue_epilogue_nregs = -1;
24996 f->call_abi = ix86_abi;
24998 return f;
25001 /* Return a MEM corresponding to a stack slot with mode MODE.
25002 Allocate a new slot if necessary.
25004 The RTL for a function can have several slots available: N is
25005 which slot to use. */
25008 assign_386_stack_local (enum machine_mode mode, enum ix86_stack_slot n)
25010 struct stack_local_entry *s;
25012 gcc_assert (n < MAX_386_STACK_LOCALS);
25014 for (s = ix86_stack_locals; s; s = s->next)
25015 if (s->mode == mode && s->n == n)
25016 return validize_mem (copy_rtx (s->rtl));
25018 s = ggc_alloc_stack_local_entry ();
25019 s->n = n;
25020 s->mode = mode;
25021 s->rtl = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
25023 s->next = ix86_stack_locals;
25024 ix86_stack_locals = s;
25025 return validize_mem (s->rtl);
25028 static void
25029 ix86_instantiate_decls (void)
25031 struct stack_local_entry *s;
25033 for (s = ix86_stack_locals; s; s = s->next)
25034 if (s->rtl != NULL_RTX)
25035 instantiate_decl_rtl (s->rtl);
25038 /* Check whether x86 address PARTS is a pc-relative address. */
25040 static bool
25041 rip_relative_addr_p (struct ix86_address *parts)
25043 rtx base, index, disp;
25045 base = parts->base;
25046 index = parts->index;
25047 disp = parts->disp;
25049 if (disp && !base && !index)
25051 if (TARGET_64BIT)
25053 rtx symbol = disp;
25055 if (GET_CODE (disp) == CONST)
25056 symbol = XEXP (disp, 0);
25057 if (GET_CODE (symbol) == PLUS
25058 && CONST_INT_P (XEXP (symbol, 1)))
25059 symbol = XEXP (symbol, 0);
25061 if (GET_CODE (symbol) == LABEL_REF
25062 || (GET_CODE (symbol) == SYMBOL_REF
25063 && SYMBOL_REF_TLS_MODEL (symbol) == 0)
25064 || (GET_CODE (symbol) == UNSPEC
25065 && (XINT (symbol, 1) == UNSPEC_GOTPCREL
25066 || XINT (symbol, 1) == UNSPEC_PCREL
25067 || XINT (symbol, 1) == UNSPEC_GOTNTPOFF)))
25068 return true;
25071 return false;
25074 /* Calculate the length of the memory address in the instruction encoding.
25075 Includes addr32 prefix, does not include the one-byte modrm, opcode,
25076 or other prefixes. We never generate addr32 prefix for LEA insn. */
25079 memory_address_length (rtx addr, bool lea)
25081 struct ix86_address parts;
25082 rtx base, index, disp;
25083 int len;
25084 int ok;
25086 if (GET_CODE (addr) == PRE_DEC
25087 || GET_CODE (addr) == POST_INC
25088 || GET_CODE (addr) == PRE_MODIFY
25089 || GET_CODE (addr) == POST_MODIFY)
25090 return 0;
25092 ok = ix86_decompose_address (addr, &parts);
25093 gcc_assert (ok);
25095 len = (parts.seg == SEG_DEFAULT) ? 0 : 1;
25097 /* If this is not LEA instruction, add the length of addr32 prefix. */
25098 if (TARGET_64BIT && !lea
25099 && (SImode_address_operand (addr, VOIDmode)
25100 || (parts.base && GET_MODE (parts.base) == SImode)
25101 || (parts.index && GET_MODE (parts.index) == SImode)))
25102 len++;
25104 base = parts.base;
25105 index = parts.index;
25106 disp = parts.disp;
25108 if (base && GET_CODE (base) == SUBREG)
25109 base = SUBREG_REG (base);
25110 if (index && GET_CODE (index) == SUBREG)
25111 index = SUBREG_REG (index);
25113 gcc_assert (base == NULL_RTX || REG_P (base));
25114 gcc_assert (index == NULL_RTX || REG_P (index));
25116 /* Rule of thumb:
25117 - esp as the base always wants an index,
25118 - ebp as the base always wants a displacement,
25119 - r12 as the base always wants an index,
25120 - r13 as the base always wants a displacement. */
25122 /* Register Indirect. */
25123 if (base && !index && !disp)
25125 /* esp (for its index) and ebp (for its displacement) need
25126 the two-byte modrm form. Similarly for r12 and r13 in 64-bit
25127 code. */
25128 if (base == arg_pointer_rtx
25129 || base == frame_pointer_rtx
25130 || REGNO (base) == SP_REG
25131 || REGNO (base) == BP_REG
25132 || REGNO (base) == R12_REG
25133 || REGNO (base) == R13_REG)
25134 len++;
25137 /* Direct Addressing. In 64-bit mode mod 00 r/m 5
25138 is not disp32, but disp32(%rip), so for disp32
25139 SIB byte is needed, unless print_operand_address
25140 optimizes it into disp32(%rip) or (%rip) is implied
25141 by UNSPEC. */
25142 else if (disp && !base && !index)
25144 len += 4;
25145 if (rip_relative_addr_p (&parts))
25146 len++;
25148 else
25150 /* Find the length of the displacement constant. */
25151 if (disp)
25153 if (base && satisfies_constraint_K (disp))
25154 len += 1;
25155 else
25156 len += 4;
25158 /* ebp always wants a displacement. Similarly r13. */
25159 else if (base && (REGNO (base) == BP_REG || REGNO (base) == R13_REG))
25160 len++;
25162 /* An index requires the two-byte modrm form.... */
25163 if (index
25164 /* ...like esp (or r12), which always wants an index. */
25165 || base == arg_pointer_rtx
25166 || base == frame_pointer_rtx
25167 || (base && (REGNO (base) == SP_REG || REGNO (base) == R12_REG)))
25168 len++;
25171 return len;
25174 /* Compute default value for "length_immediate" attribute. When SHORTFORM
25175 is set, expect that insn have 8bit immediate alternative. */
25177 ix86_attr_length_immediate_default (rtx insn, bool shortform)
25179 int len = 0;
25180 int i;
25181 extract_insn_cached (insn);
25182 for (i = recog_data.n_operands - 1; i >= 0; --i)
25183 if (CONSTANT_P (recog_data.operand[i]))
25185 enum attr_mode mode = get_attr_mode (insn);
25187 gcc_assert (!len);
25188 if (shortform && CONST_INT_P (recog_data.operand[i]))
25190 HOST_WIDE_INT ival = INTVAL (recog_data.operand[i]);
25191 switch (mode)
25193 case MODE_QI:
25194 len = 1;
25195 continue;
25196 case MODE_HI:
25197 ival = trunc_int_for_mode (ival, HImode);
25198 break;
25199 case MODE_SI:
25200 ival = trunc_int_for_mode (ival, SImode);
25201 break;
25202 default:
25203 break;
25205 if (IN_RANGE (ival, -128, 127))
25207 len = 1;
25208 continue;
25211 switch (mode)
25213 case MODE_QI:
25214 len = 1;
25215 break;
25216 case MODE_HI:
25217 len = 2;
25218 break;
25219 case MODE_SI:
25220 len = 4;
25221 break;
25222 /* Immediates for DImode instructions are encoded
25223 as 32bit sign extended values. */
25224 case MODE_DI:
25225 len = 4;
25226 break;
25227 default:
25228 fatal_insn ("unknown insn mode", insn);
25231 return len;
25234 /* Compute default value for "length_address" attribute. */
25236 ix86_attr_length_address_default (rtx insn)
25238 int i;
25240 if (get_attr_type (insn) == TYPE_LEA)
25242 rtx set = PATTERN (insn), addr;
25244 if (GET_CODE (set) == PARALLEL)
25245 set = XVECEXP (set, 0, 0);
25247 gcc_assert (GET_CODE (set) == SET);
25249 addr = SET_SRC (set);
25251 return memory_address_length (addr, true);
25254 extract_insn_cached (insn);
25255 for (i = recog_data.n_operands - 1; i >= 0; --i)
25256 if (MEM_P (recog_data.operand[i]))
25258 constrain_operands_cached (reload_completed);
25259 if (which_alternative != -1)
25261 const char *constraints = recog_data.constraints[i];
25262 int alt = which_alternative;
25264 while (*constraints == '=' || *constraints == '+')
25265 constraints++;
25266 while (alt-- > 0)
25267 while (*constraints++ != ',')
25269 /* Skip ignored operands. */
25270 if (*constraints == 'X')
25271 continue;
25273 return memory_address_length (XEXP (recog_data.operand[i], 0), false);
25275 return 0;
25278 /* Compute default value for "length_vex" attribute. It includes
25279 2 or 3 byte VEX prefix and 1 opcode byte. */
25282 ix86_attr_length_vex_default (rtx insn, bool has_0f_opcode, bool has_vex_w)
25284 int i;
25286 /* Only 0f opcode can use 2 byte VEX prefix and VEX W bit uses 3
25287 byte VEX prefix. */
25288 if (!has_0f_opcode || has_vex_w)
25289 return 3 + 1;
25291 /* We can always use 2 byte VEX prefix in 32bit. */
25292 if (!TARGET_64BIT)
25293 return 2 + 1;
25295 extract_insn_cached (insn);
25297 for (i = recog_data.n_operands - 1; i >= 0; --i)
25298 if (REG_P (recog_data.operand[i]))
25300 /* REX.W bit uses 3 byte VEX prefix. */
25301 if (GET_MODE (recog_data.operand[i]) == DImode
25302 && GENERAL_REG_P (recog_data.operand[i]))
25303 return 3 + 1;
25305 else
25307 /* REX.X or REX.B bits use 3 byte VEX prefix. */
25308 if (MEM_P (recog_data.operand[i])
25309 && x86_extended_reg_mentioned_p (recog_data.operand[i]))
25310 return 3 + 1;
25313 return 2 + 1;
25316 /* Return the maximum number of instructions a cpu can issue. */
25318 static int
25319 ix86_issue_rate (void)
25321 switch (ix86_tune)
25323 case PROCESSOR_PENTIUM:
25324 case PROCESSOR_BONNELL:
25325 case PROCESSOR_SILVERMONT:
25326 case PROCESSOR_INTEL:
25327 case PROCESSOR_K6:
25328 case PROCESSOR_BTVER2:
25329 case PROCESSOR_PENTIUM4:
25330 case PROCESSOR_NOCONA:
25331 return 2;
25333 case PROCESSOR_PENTIUMPRO:
25334 case PROCESSOR_ATHLON:
25335 case PROCESSOR_K8:
25336 case PROCESSOR_AMDFAM10:
25337 case PROCESSOR_GENERIC:
25338 case PROCESSOR_BTVER1:
25339 return 3;
25341 case PROCESSOR_BDVER1:
25342 case PROCESSOR_BDVER2:
25343 case PROCESSOR_BDVER3:
25344 case PROCESSOR_BDVER4:
25345 case PROCESSOR_CORE2:
25346 case PROCESSOR_NEHALEM:
25347 case PROCESSOR_SANDYBRIDGE:
25348 case PROCESSOR_HASWELL:
25349 return 4;
25351 default:
25352 return 1;
25356 /* A subroutine of ix86_adjust_cost -- return TRUE iff INSN reads flags set
25357 by DEP_INSN and nothing set by DEP_INSN. */
25359 static bool
25360 ix86_flags_dependent (rtx insn, rtx dep_insn, enum attr_type insn_type)
25362 rtx set, set2;
25364 /* Simplify the test for uninteresting insns. */
25365 if (insn_type != TYPE_SETCC
25366 && insn_type != TYPE_ICMOV
25367 && insn_type != TYPE_FCMOV
25368 && insn_type != TYPE_IBR)
25369 return false;
25371 if ((set = single_set (dep_insn)) != 0)
25373 set = SET_DEST (set);
25374 set2 = NULL_RTX;
25376 else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL
25377 && XVECLEN (PATTERN (dep_insn), 0) == 2
25378 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET
25379 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET)
25381 set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
25382 set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
25384 else
25385 return false;
25387 if (!REG_P (set) || REGNO (set) != FLAGS_REG)
25388 return false;
25390 /* This test is true if the dependent insn reads the flags but
25391 not any other potentially set register. */
25392 if (!reg_overlap_mentioned_p (set, PATTERN (insn)))
25393 return false;
25395 if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn)))
25396 return false;
25398 return true;
25401 /* Return true iff USE_INSN has a memory address with operands set by
25402 SET_INSN. */
25404 bool
25405 ix86_agi_dependent (rtx set_insn, rtx use_insn)
25407 int i;
25408 extract_insn_cached (use_insn);
25409 for (i = recog_data.n_operands - 1; i >= 0; --i)
25410 if (MEM_P (recog_data.operand[i]))
25412 rtx addr = XEXP (recog_data.operand[i], 0);
25413 return modified_in_p (addr, set_insn) != 0;
25415 return false;
25418 /* Helper function for exact_store_load_dependency.
25419 Return true if addr is found in insn. */
25420 static bool
25421 exact_dependency_1 (rtx addr, rtx insn)
25423 enum rtx_code code;
25424 const char *format_ptr;
25425 int i, j;
25427 code = GET_CODE (insn);
25428 switch (code)
25430 case MEM:
25431 if (rtx_equal_p (addr, insn))
25432 return true;
25433 break;
25434 case REG:
25435 CASE_CONST_ANY:
25436 case SYMBOL_REF:
25437 case CODE_LABEL:
25438 case PC:
25439 case CC0:
25440 case EXPR_LIST:
25441 return false;
25442 default:
25443 break;
25446 format_ptr = GET_RTX_FORMAT (code);
25447 for (i = 0; i < GET_RTX_LENGTH (code); i++)
25449 switch (*format_ptr++)
25451 case 'e':
25452 if (exact_dependency_1 (addr, XEXP (insn, i)))
25453 return true;
25454 break;
25455 case 'E':
25456 for (j = 0; j < XVECLEN (insn, i); j++)
25457 if (exact_dependency_1 (addr, XVECEXP (insn, i, j)))
25458 return true;
25459 break;
25462 return false;
25465 /* Return true if there exists exact dependency for store & load, i.e.
25466 the same memory address is used in them. */
25467 static bool
25468 exact_store_load_dependency (rtx store, rtx load)
25470 rtx set1, set2;
25472 set1 = single_set (store);
25473 if (!set1)
25474 return false;
25475 if (!MEM_P (SET_DEST (set1)))
25476 return false;
25477 set2 = single_set (load);
25478 if (!set2)
25479 return false;
25480 if (exact_dependency_1 (SET_DEST (set1), SET_SRC (set2)))
25481 return true;
25482 return false;
25485 static int
25486 ix86_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
25488 enum attr_type insn_type, dep_insn_type;
25489 enum attr_memory memory;
25490 rtx set, set2;
25491 int dep_insn_code_number;
25493 /* Anti and output dependencies have zero cost on all CPUs. */
25494 if (REG_NOTE_KIND (link) != 0)
25495 return 0;
25497 dep_insn_code_number = recog_memoized (dep_insn);
25499 /* If we can't recognize the insns, we can't really do anything. */
25500 if (dep_insn_code_number < 0 || recog_memoized (insn) < 0)
25501 return cost;
25503 insn_type = get_attr_type (insn);
25504 dep_insn_type = get_attr_type (dep_insn);
25506 switch (ix86_tune)
25508 case PROCESSOR_PENTIUM:
25509 /* Address Generation Interlock adds a cycle of latency. */
25510 if (insn_type == TYPE_LEA)
25512 rtx addr = PATTERN (insn);
25514 if (GET_CODE (addr) == PARALLEL)
25515 addr = XVECEXP (addr, 0, 0);
25517 gcc_assert (GET_CODE (addr) == SET);
25519 addr = SET_SRC (addr);
25520 if (modified_in_p (addr, dep_insn))
25521 cost += 1;
25523 else if (ix86_agi_dependent (dep_insn, insn))
25524 cost += 1;
25526 /* ??? Compares pair with jump/setcc. */
25527 if (ix86_flags_dependent (insn, dep_insn, insn_type))
25528 cost = 0;
25530 /* Floating point stores require value to be ready one cycle earlier. */
25531 if (insn_type == TYPE_FMOV
25532 && get_attr_memory (insn) == MEMORY_STORE
25533 && !ix86_agi_dependent (dep_insn, insn))
25534 cost += 1;
25535 break;
25537 case PROCESSOR_PENTIUMPRO:
25538 /* INT->FP conversion is expensive. */
25539 if (get_attr_fp_int_src (dep_insn))
25540 cost += 5;
25542 /* There is one cycle extra latency between an FP op and a store. */
25543 if (insn_type == TYPE_FMOV
25544 && (set = single_set (dep_insn)) != NULL_RTX
25545 && (set2 = single_set (insn)) != NULL_RTX
25546 && rtx_equal_p (SET_DEST (set), SET_SRC (set2))
25547 && MEM_P (SET_DEST (set2)))
25548 cost += 1;
25550 memory = get_attr_memory (insn);
25552 /* Show ability of reorder buffer to hide latency of load by executing
25553 in parallel with previous instruction in case
25554 previous instruction is not needed to compute the address. */
25555 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
25556 && !ix86_agi_dependent (dep_insn, insn))
25558 /* Claim moves to take one cycle, as core can issue one load
25559 at time and the next load can start cycle later. */
25560 if (dep_insn_type == TYPE_IMOV
25561 || dep_insn_type == TYPE_FMOV)
25562 cost = 1;
25563 else if (cost > 1)
25564 cost--;
25566 break;
25568 case PROCESSOR_K6:
25569 /* The esp dependency is resolved before
25570 the instruction is really finished. */
25571 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
25572 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
25573 return 1;
25575 /* INT->FP conversion is expensive. */
25576 if (get_attr_fp_int_src (dep_insn))
25577 cost += 5;
25579 memory = get_attr_memory (insn);
25581 /* Show ability of reorder buffer to hide latency of load by executing
25582 in parallel with previous instruction in case
25583 previous instruction is not needed to compute the address. */
25584 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
25585 && !ix86_agi_dependent (dep_insn, insn))
25587 /* Claim moves to take one cycle, as core can issue one load
25588 at time and the next load can start cycle later. */
25589 if (dep_insn_type == TYPE_IMOV
25590 || dep_insn_type == TYPE_FMOV)
25591 cost = 1;
25592 else if (cost > 2)
25593 cost -= 2;
25594 else
25595 cost = 1;
25597 break;
25599 case PROCESSOR_AMDFAM10:
25600 case PROCESSOR_BDVER1:
25601 case PROCESSOR_BDVER2:
25602 case PROCESSOR_BDVER3:
25603 case PROCESSOR_BDVER4:
25604 case PROCESSOR_BTVER1:
25605 case PROCESSOR_BTVER2:
25606 case PROCESSOR_GENERIC:
25607 /* Stack engine allows to execute push&pop instructions in parall. */
25608 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
25609 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
25610 return 0;
25611 /* FALLTHRU */
25613 case PROCESSOR_ATHLON:
25614 case PROCESSOR_K8:
25615 memory = get_attr_memory (insn);
25617 /* Show ability of reorder buffer to hide latency of load by executing
25618 in parallel with previous instruction in case
25619 previous instruction is not needed to compute the address. */
25620 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
25621 && !ix86_agi_dependent (dep_insn, insn))
25623 enum attr_unit unit = get_attr_unit (insn);
25624 int loadcost = 3;
25626 /* Because of the difference between the length of integer and
25627 floating unit pipeline preparation stages, the memory operands
25628 for floating point are cheaper.
25630 ??? For Athlon it the difference is most probably 2. */
25631 if (unit == UNIT_INTEGER || unit == UNIT_UNKNOWN)
25632 loadcost = 3;
25633 else
25634 loadcost = TARGET_ATHLON ? 2 : 0;
25636 if (cost >= loadcost)
25637 cost -= loadcost;
25638 else
25639 cost = 0;
25641 break;
25643 case PROCESSOR_CORE2:
25644 case PROCESSOR_NEHALEM:
25645 case PROCESSOR_SANDYBRIDGE:
25646 case PROCESSOR_HASWELL:
25647 /* Stack engine allows to execute push&pop instructions in parall. */
25648 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
25649 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
25650 return 0;
25652 memory = get_attr_memory (insn);
25654 /* Show ability of reorder buffer to hide latency of load by executing
25655 in parallel with previous instruction in case
25656 previous instruction is not needed to compute the address. */
25657 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
25658 && !ix86_agi_dependent (dep_insn, insn))
25660 if (cost >= 4)
25661 cost -= 4;
25662 else
25663 cost = 0;
25665 break;
25667 case PROCESSOR_SILVERMONT:
25668 case PROCESSOR_INTEL:
25669 if (!reload_completed)
25670 return cost;
25672 /* Increase cost of integer loads. */
25673 memory = get_attr_memory (dep_insn);
25674 if (memory == MEMORY_LOAD || memory == MEMORY_BOTH)
25676 enum attr_unit unit = get_attr_unit (dep_insn);
25677 if (unit == UNIT_INTEGER && cost == 1)
25679 if (memory == MEMORY_LOAD)
25680 cost = 3;
25681 else
25683 /* Increase cost of ld/st for short int types only
25684 because of store forwarding issue. */
25685 rtx set = single_set (dep_insn);
25686 if (set && (GET_MODE (SET_DEST (set)) == QImode
25687 || GET_MODE (SET_DEST (set)) == HImode))
25689 /* Increase cost of store/load insn if exact
25690 dependence exists and it is load insn. */
25691 enum attr_memory insn_memory = get_attr_memory (insn);
25692 if (insn_memory == MEMORY_LOAD
25693 && exact_store_load_dependency (dep_insn, insn))
25694 cost = 3;
25700 default:
25701 break;
25704 return cost;
25707 /* How many alternative schedules to try. This should be as wide as the
25708 scheduling freedom in the DFA, but no wider. Making this value too
25709 large results extra work for the scheduler. */
25711 static int
25712 ia32_multipass_dfa_lookahead (void)
25714 switch (ix86_tune)
25716 case PROCESSOR_PENTIUM:
25717 return 2;
25719 case PROCESSOR_PENTIUMPRO:
25720 case PROCESSOR_K6:
25721 return 1;
25723 case PROCESSOR_BDVER1:
25724 case PROCESSOR_BDVER2:
25725 case PROCESSOR_BDVER3:
25726 case PROCESSOR_BDVER4:
25727 /* We use lookahead value 4 for BD both before and after reload
25728 schedules. Plan is to have value 8 included for O3. */
25729 return 4;
25731 case PROCESSOR_CORE2:
25732 case PROCESSOR_NEHALEM:
25733 case PROCESSOR_SANDYBRIDGE:
25734 case PROCESSOR_HASWELL:
25735 case PROCESSOR_BONNELL:
25736 case PROCESSOR_SILVERMONT:
25737 case PROCESSOR_INTEL:
25738 /* Generally, we want haifa-sched:max_issue() to look ahead as far
25739 as many instructions can be executed on a cycle, i.e.,
25740 issue_rate. I wonder why tuning for many CPUs does not do this. */
25741 if (reload_completed)
25742 return ix86_issue_rate ();
25743 /* Don't use lookahead for pre-reload schedule to save compile time. */
25744 return 0;
25746 default:
25747 return 0;
25751 /* Return true if target platform supports macro-fusion. */
25753 static bool
25754 ix86_macro_fusion_p ()
25756 return TARGET_FUSE_CMP_AND_BRANCH;
25759 /* Check whether current microarchitecture support macro fusion
25760 for insn pair "CONDGEN + CONDJMP". Refer to
25761 "Intel Architectures Optimization Reference Manual". */
25763 static bool
25764 ix86_macro_fusion_pair_p (rtx condgen, rtx condjmp)
25766 rtx src, dest;
25767 rtx single_set = single_set (condgen);
25768 enum rtx_code ccode;
25769 rtx compare_set = NULL_RTX, test_if, cond;
25770 rtx alu_set = NULL_RTX, addr = NULL_RTX;
25772 if (get_attr_type (condgen) != TYPE_TEST
25773 && get_attr_type (condgen) != TYPE_ICMP
25774 && get_attr_type (condgen) != TYPE_INCDEC
25775 && get_attr_type (condgen) != TYPE_ALU)
25776 return false;
25778 if (single_set == NULL_RTX
25779 && !TARGET_FUSE_ALU_AND_BRANCH)
25780 return false;
25782 if (single_set != NULL_RTX)
25783 compare_set = single_set;
25784 else
25786 int i;
25787 rtx pat = PATTERN (condgen);
25788 for (i = 0; i < XVECLEN (pat, 0); i++)
25789 if (GET_CODE (XVECEXP (pat, 0, i)) == SET)
25791 rtx set_src = SET_SRC (XVECEXP (pat, 0, i));
25792 if (GET_CODE (set_src) == COMPARE)
25793 compare_set = XVECEXP (pat, 0, i);
25794 else
25795 alu_set = XVECEXP (pat, 0, i);
25798 if (compare_set == NULL_RTX)
25799 return false;
25800 src = SET_SRC (compare_set);
25801 if (GET_CODE (src) != COMPARE)
25802 return false;
25804 /* Macro-fusion for cmp/test MEM-IMM + conditional jmp is not
25805 supported. */
25806 if ((MEM_P (XEXP (src, 0))
25807 && CONST_INT_P (XEXP (src, 1)))
25808 || (MEM_P (XEXP (src, 1))
25809 && CONST_INT_P (XEXP (src, 0))))
25810 return false;
25812 /* No fusion for RIP-relative address. */
25813 if (MEM_P (XEXP (src, 0)))
25814 addr = XEXP (XEXP (src, 0), 0);
25815 else if (MEM_P (XEXP (src, 1)))
25816 addr = XEXP (XEXP (src, 1), 0);
25818 if (addr) {
25819 ix86_address parts;
25820 int ok = ix86_decompose_address (addr, &parts);
25821 gcc_assert (ok);
25823 if (rip_relative_addr_p (&parts))
25824 return false;
25827 test_if = SET_SRC (pc_set (condjmp));
25828 cond = XEXP (test_if, 0);
25829 ccode = GET_CODE (cond);
25830 /* Check whether conditional jump use Sign or Overflow Flags. */
25831 if (!TARGET_FUSE_CMP_AND_BRANCH_SOFLAGS
25832 && (ccode == GE
25833 || ccode == GT
25834 || ccode == LE
25835 || ccode == LT))
25836 return false;
25838 /* Return true for TYPE_TEST and TYPE_ICMP. */
25839 if (get_attr_type (condgen) == TYPE_TEST
25840 || get_attr_type (condgen) == TYPE_ICMP)
25841 return true;
25843 /* The following is the case that macro-fusion for alu + jmp. */
25844 if (!TARGET_FUSE_ALU_AND_BRANCH || !alu_set)
25845 return false;
25847 /* No fusion for alu op with memory destination operand. */
25848 dest = SET_DEST (alu_set);
25849 if (MEM_P (dest))
25850 return false;
25852 /* Macro-fusion for inc/dec + unsigned conditional jump is not
25853 supported. */
25854 if (get_attr_type (condgen) == TYPE_INCDEC
25855 && (ccode == GEU
25856 || ccode == GTU
25857 || ccode == LEU
25858 || ccode == LTU))
25859 return false;
25861 return true;
25864 /* Try to reorder ready list to take advantage of Atom pipelined IMUL
25865 execution. It is applied if
25866 (1) IMUL instruction is on the top of list;
25867 (2) There exists the only producer of independent IMUL instruction in
25868 ready list.
25869 Return index of IMUL producer if it was found and -1 otherwise. */
25870 static int
25871 do_reorder_for_imul (rtx *ready, int n_ready)
25873 rtx insn, set, insn1, insn2;
25874 sd_iterator_def sd_it;
25875 dep_t dep;
25876 int index = -1;
25877 int i;
25879 if (!TARGET_BONNELL)
25880 return index;
25882 /* Check that IMUL instruction is on the top of ready list. */
25883 insn = ready[n_ready - 1];
25884 set = single_set (insn);
25885 if (!set)
25886 return index;
25887 if (!(GET_CODE (SET_SRC (set)) == MULT
25888 && GET_MODE (SET_SRC (set)) == SImode))
25889 return index;
25891 /* Search for producer of independent IMUL instruction. */
25892 for (i = n_ready - 2; i >= 0; i--)
25894 insn = ready[i];
25895 if (!NONDEBUG_INSN_P (insn))
25896 continue;
25897 /* Skip IMUL instruction. */
25898 insn2 = PATTERN (insn);
25899 if (GET_CODE (insn2) == PARALLEL)
25900 insn2 = XVECEXP (insn2, 0, 0);
25901 if (GET_CODE (insn2) == SET
25902 && GET_CODE (SET_SRC (insn2)) == MULT
25903 && GET_MODE (SET_SRC (insn2)) == SImode)
25904 continue;
25906 FOR_EACH_DEP (insn, SD_LIST_FORW, sd_it, dep)
25908 rtx con;
25909 con = DEP_CON (dep);
25910 if (!NONDEBUG_INSN_P (con))
25911 continue;
25912 insn1 = PATTERN (con);
25913 if (GET_CODE (insn1) == PARALLEL)
25914 insn1 = XVECEXP (insn1, 0, 0);
25916 if (GET_CODE (insn1) == SET
25917 && GET_CODE (SET_SRC (insn1)) == MULT
25918 && GET_MODE (SET_SRC (insn1)) == SImode)
25920 sd_iterator_def sd_it1;
25921 dep_t dep1;
25922 /* Check if there is no other dependee for IMUL. */
25923 index = i;
25924 FOR_EACH_DEP (con, SD_LIST_BACK, sd_it1, dep1)
25926 rtx pro;
25927 pro = DEP_PRO (dep1);
25928 if (!NONDEBUG_INSN_P (pro))
25929 continue;
25930 if (pro != insn)
25931 index = -1;
25933 if (index >= 0)
25934 break;
25937 if (index >= 0)
25938 break;
25940 return index;
25943 /* Try to find the best candidate on the top of ready list if two insns
25944 have the same priority - candidate is best if its dependees were
25945 scheduled earlier. Applied for Silvermont only.
25946 Return true if top 2 insns must be interchanged. */
25947 static bool
25948 swap_top_of_ready_list (rtx *ready, int n_ready)
25950 rtx top = ready[n_ready - 1];
25951 rtx next = ready[n_ready - 2];
25952 rtx set;
25953 sd_iterator_def sd_it;
25954 dep_t dep;
25955 int clock1 = -1;
25956 int clock2 = -1;
25957 #define INSN_TICK(INSN) (HID (INSN)->tick)
25959 if (!TARGET_SILVERMONT && !TARGET_INTEL)
25960 return false;
25962 if (!NONDEBUG_INSN_P (top))
25963 return false;
25964 if (!NONJUMP_INSN_P (top))
25965 return false;
25966 if (!NONDEBUG_INSN_P (next))
25967 return false;
25968 if (!NONJUMP_INSN_P (next))
25969 return false;
25970 set = single_set (top);
25971 if (!set)
25972 return false;
25973 set = single_set (next);
25974 if (!set)
25975 return false;
25977 if (INSN_PRIORITY_KNOWN (top) && INSN_PRIORITY_KNOWN (next))
25979 if (INSN_PRIORITY (top) != INSN_PRIORITY (next))
25980 return false;
25981 /* Determine winner more precise. */
25982 FOR_EACH_DEP (top, SD_LIST_RES_BACK, sd_it, dep)
25984 rtx pro;
25985 pro = DEP_PRO (dep);
25986 if (!NONDEBUG_INSN_P (pro))
25987 continue;
25988 if (INSN_TICK (pro) > clock1)
25989 clock1 = INSN_TICK (pro);
25991 FOR_EACH_DEP (next, SD_LIST_RES_BACK, sd_it, dep)
25993 rtx pro;
25994 pro = DEP_PRO (dep);
25995 if (!NONDEBUG_INSN_P (pro))
25996 continue;
25997 if (INSN_TICK (pro) > clock2)
25998 clock2 = INSN_TICK (pro);
26001 if (clock1 == clock2)
26003 /* Determine winner - load must win. */
26004 enum attr_memory memory1, memory2;
26005 memory1 = get_attr_memory (top);
26006 memory2 = get_attr_memory (next);
26007 if (memory2 == MEMORY_LOAD && memory1 != MEMORY_LOAD)
26008 return true;
26010 return (bool) (clock2 < clock1);
26012 return false;
26013 #undef INSN_TICK
26016 /* Perform possible reodering of ready list for Atom/Silvermont only.
26017 Return issue rate. */
26018 static int
26019 ix86_sched_reorder (FILE *dump, int sched_verbose, rtx *ready, int *pn_ready,
26020 int clock_var)
26022 int issue_rate = -1;
26023 int n_ready = *pn_ready;
26024 int i;
26025 rtx insn;
26026 int index = -1;
26028 /* Set up issue rate. */
26029 issue_rate = ix86_issue_rate ();
26031 /* Do reodering for BONNELL/SILVERMONT only. */
26032 if (!TARGET_BONNELL && !TARGET_SILVERMONT && !TARGET_INTEL)
26033 return issue_rate;
26035 /* Nothing to do if ready list contains only 1 instruction. */
26036 if (n_ready <= 1)
26037 return issue_rate;
26039 /* Do reodering for post-reload scheduler only. */
26040 if (!reload_completed)
26041 return issue_rate;
26043 if ((index = do_reorder_for_imul (ready, n_ready)) >= 0)
26045 if (sched_verbose > 1)
26046 fprintf (dump, ";;\tatom sched_reorder: put %d insn on top\n",
26047 INSN_UID (ready[index]));
26049 /* Put IMUL producer (ready[index]) at the top of ready list. */
26050 insn = ready[index];
26051 for (i = index; i < n_ready - 1; i++)
26052 ready[i] = ready[i + 1];
26053 ready[n_ready - 1] = insn;
26054 return issue_rate;
26056 if (clock_var != 0 && swap_top_of_ready_list (ready, n_ready))
26058 if (sched_verbose > 1)
26059 fprintf (dump, ";;\tslm sched_reorder: swap %d and %d insns\n",
26060 INSN_UID (ready[n_ready - 1]), INSN_UID (ready[n_ready - 2]));
26061 /* Swap 2 top elements of ready list. */
26062 insn = ready[n_ready - 1];
26063 ready[n_ready - 1] = ready[n_ready - 2];
26064 ready[n_ready - 2] = insn;
26066 return issue_rate;
26069 static bool
26070 ix86_class_likely_spilled_p (reg_class_t);
26072 /* Returns true if lhs of insn is HW function argument register and set up
26073 is_spilled to true if it is likely spilled HW register. */
26074 static bool
26075 insn_is_function_arg (rtx insn, bool* is_spilled)
26077 rtx dst;
26079 if (!NONDEBUG_INSN_P (insn))
26080 return false;
26081 /* Call instructions are not movable, ignore it. */
26082 if (CALL_P (insn))
26083 return false;
26084 insn = PATTERN (insn);
26085 if (GET_CODE (insn) == PARALLEL)
26086 insn = XVECEXP (insn, 0, 0);
26087 if (GET_CODE (insn) != SET)
26088 return false;
26089 dst = SET_DEST (insn);
26090 if (REG_P (dst) && HARD_REGISTER_P (dst)
26091 && ix86_function_arg_regno_p (REGNO (dst)))
26093 /* Is it likely spilled HW register? */
26094 if (!TEST_HARD_REG_BIT (fixed_reg_set, REGNO (dst))
26095 && ix86_class_likely_spilled_p (REGNO_REG_CLASS (REGNO (dst))))
26096 *is_spilled = true;
26097 return true;
26099 return false;
26102 /* Add output dependencies for chain of function adjacent arguments if only
26103 there is a move to likely spilled HW register. Return first argument
26104 if at least one dependence was added or NULL otherwise. */
26105 static rtx
26106 add_parameter_dependencies (rtx call, rtx head)
26108 rtx insn;
26109 rtx last = call;
26110 rtx first_arg = NULL;
26111 bool is_spilled = false;
26113 head = PREV_INSN (head);
26115 /* Find nearest to call argument passing instruction. */
26116 while (true)
26118 last = PREV_INSN (last);
26119 if (last == head)
26120 return NULL;
26121 if (!NONDEBUG_INSN_P (last))
26122 continue;
26123 if (insn_is_function_arg (last, &is_spilled))
26124 break;
26125 return NULL;
26128 first_arg = last;
26129 while (true)
26131 insn = PREV_INSN (last);
26132 if (!INSN_P (insn))
26133 break;
26134 if (insn == head)
26135 break;
26136 if (!NONDEBUG_INSN_P (insn))
26138 last = insn;
26139 continue;
26141 if (insn_is_function_arg (insn, &is_spilled))
26143 /* Add output depdendence between two function arguments if chain
26144 of output arguments contains likely spilled HW registers. */
26145 if (is_spilled)
26146 add_dependence (first_arg, insn, REG_DEP_OUTPUT);
26147 first_arg = last = insn;
26149 else
26150 break;
26152 if (!is_spilled)
26153 return NULL;
26154 return first_arg;
26157 /* Add output or anti dependency from insn to first_arg to restrict its code
26158 motion. */
26159 static void
26160 avoid_func_arg_motion (rtx first_arg, rtx insn)
26162 rtx set;
26163 rtx tmp;
26165 set = single_set (insn);
26166 if (!set)
26167 return;
26168 tmp = SET_DEST (set);
26169 if (REG_P (tmp))
26171 /* Add output dependency to the first function argument. */
26172 add_dependence (first_arg, insn, REG_DEP_OUTPUT);
26173 return;
26175 /* Add anti dependency. */
26176 add_dependence (first_arg, insn, REG_DEP_ANTI);
26179 /* Avoid cross block motion of function argument through adding dependency
26180 from the first non-jump instruction in bb. */
26181 static void
26182 add_dependee_for_func_arg (rtx arg, basic_block bb)
26184 rtx insn = BB_END (bb);
26186 while (insn)
26188 if (NONDEBUG_INSN_P (insn) && NONJUMP_INSN_P (insn))
26190 rtx set = single_set (insn);
26191 if (set)
26193 avoid_func_arg_motion (arg, insn);
26194 return;
26197 if (insn == BB_HEAD (bb))
26198 return;
26199 insn = PREV_INSN (insn);
26203 /* Hook for pre-reload schedule - avoid motion of function arguments
26204 passed in likely spilled HW registers. */
26205 static void
26206 ix86_dependencies_evaluation_hook (rtx head, rtx tail)
26208 rtx insn;
26209 rtx first_arg = NULL;
26210 if (reload_completed)
26211 return;
26212 while (head != tail && DEBUG_INSN_P (head))
26213 head = NEXT_INSN (head);
26214 for (insn = tail; insn != head; insn = PREV_INSN (insn))
26215 if (INSN_P (insn) && CALL_P (insn))
26217 first_arg = add_parameter_dependencies (insn, head);
26218 if (first_arg)
26220 /* Add dependee for first argument to predecessors if only
26221 region contains more than one block. */
26222 basic_block bb = BLOCK_FOR_INSN (insn);
26223 int rgn = CONTAINING_RGN (bb->index);
26224 int nr_blks = RGN_NR_BLOCKS (rgn);
26225 /* Skip trivial regions and region head blocks that can have
26226 predecessors outside of region. */
26227 if (nr_blks > 1 && BLOCK_TO_BB (bb->index) != 0)
26229 edge e;
26230 edge_iterator ei;
26231 /* Assume that region is SCC, i.e. all immediate predecessors
26232 of non-head block are in the same region. */
26233 FOR_EACH_EDGE (e, ei, bb->preds)
26235 /* Avoid creating of loop-carried dependencies through
26236 using topological odering in region. */
26237 if (BLOCK_TO_BB (bb->index) > BLOCK_TO_BB (e->src->index))
26238 add_dependee_for_func_arg (first_arg, e->src);
26241 insn = first_arg;
26242 if (insn == head)
26243 break;
26246 else if (first_arg)
26247 avoid_func_arg_motion (first_arg, insn);
26250 /* Hook for pre-reload schedule - set priority of moves from likely spilled
26251 HW registers to maximum, to schedule them at soon as possible. These are
26252 moves from function argument registers at the top of the function entry
26253 and moves from function return value registers after call. */
26254 static int
26255 ix86_adjust_priority (rtx insn, int priority)
26257 rtx set;
26259 if (reload_completed)
26260 return priority;
26262 if (!NONDEBUG_INSN_P (insn))
26263 return priority;
26265 set = single_set (insn);
26266 if (set)
26268 rtx tmp = SET_SRC (set);
26269 if (REG_P (tmp)
26270 && HARD_REGISTER_P (tmp)
26271 && !TEST_HARD_REG_BIT (fixed_reg_set, REGNO (tmp))
26272 && ix86_class_likely_spilled_p (REGNO_REG_CLASS (REGNO (tmp))))
26273 return current_sched_info->sched_max_insns_priority;
26276 return priority;
26279 /* Model decoder of Core 2/i7.
26280 Below hooks for multipass scheduling (see haifa-sched.c:max_issue)
26281 track the instruction fetch block boundaries and make sure that long
26282 (9+ bytes) instructions are assigned to D0. */
26284 /* Maximum length of an insn that can be handled by
26285 a secondary decoder unit. '8' for Core 2/i7. */
26286 static int core2i7_secondary_decoder_max_insn_size;
26288 /* Ifetch block size, i.e., number of bytes decoder reads per cycle.
26289 '16' for Core 2/i7. */
26290 static int core2i7_ifetch_block_size;
26292 /* Maximum number of instructions decoder can handle per cycle.
26293 '6' for Core 2/i7. */
26294 static int core2i7_ifetch_block_max_insns;
26296 typedef struct ix86_first_cycle_multipass_data_ *
26297 ix86_first_cycle_multipass_data_t;
26298 typedef const struct ix86_first_cycle_multipass_data_ *
26299 const_ix86_first_cycle_multipass_data_t;
26301 /* A variable to store target state across calls to max_issue within
26302 one cycle. */
26303 static struct ix86_first_cycle_multipass_data_ _ix86_first_cycle_multipass_data,
26304 *ix86_first_cycle_multipass_data = &_ix86_first_cycle_multipass_data;
26306 /* Initialize DATA. */
26307 static void
26308 core2i7_first_cycle_multipass_init (void *_data)
26310 ix86_first_cycle_multipass_data_t data
26311 = (ix86_first_cycle_multipass_data_t) _data;
26313 data->ifetch_block_len = 0;
26314 data->ifetch_block_n_insns = 0;
26315 data->ready_try_change = NULL;
26316 data->ready_try_change_size = 0;
26319 /* Advancing the cycle; reset ifetch block counts. */
26320 static void
26321 core2i7_dfa_post_advance_cycle (void)
26323 ix86_first_cycle_multipass_data_t data = ix86_first_cycle_multipass_data;
26325 gcc_assert (data->ifetch_block_n_insns <= core2i7_ifetch_block_max_insns);
26327 data->ifetch_block_len = 0;
26328 data->ifetch_block_n_insns = 0;
26331 static int min_insn_size (rtx);
26333 /* Filter out insns from ready_try that the core will not be able to issue
26334 on current cycle due to decoder. */
26335 static void
26336 core2i7_first_cycle_multipass_filter_ready_try
26337 (const_ix86_first_cycle_multipass_data_t data,
26338 char *ready_try, int n_ready, bool first_cycle_insn_p)
26340 while (n_ready--)
26342 rtx insn;
26343 int insn_size;
26345 if (ready_try[n_ready])
26346 continue;
26348 insn = get_ready_element (n_ready);
26349 insn_size = min_insn_size (insn);
26351 if (/* If this is a too long an insn for a secondary decoder ... */
26352 (!first_cycle_insn_p
26353 && insn_size > core2i7_secondary_decoder_max_insn_size)
26354 /* ... or it would not fit into the ifetch block ... */
26355 || data->ifetch_block_len + insn_size > core2i7_ifetch_block_size
26356 /* ... or the decoder is full already ... */
26357 || data->ifetch_block_n_insns + 1 > core2i7_ifetch_block_max_insns)
26358 /* ... mask the insn out. */
26360 ready_try[n_ready] = 1;
26362 if (data->ready_try_change)
26363 bitmap_set_bit (data->ready_try_change, n_ready);
26368 /* Prepare for a new round of multipass lookahead scheduling. */
26369 static void
26370 core2i7_first_cycle_multipass_begin (void *_data, char *ready_try, int n_ready,
26371 bool first_cycle_insn_p)
26373 ix86_first_cycle_multipass_data_t data
26374 = (ix86_first_cycle_multipass_data_t) _data;
26375 const_ix86_first_cycle_multipass_data_t prev_data
26376 = ix86_first_cycle_multipass_data;
26378 /* Restore the state from the end of the previous round. */
26379 data->ifetch_block_len = prev_data->ifetch_block_len;
26380 data->ifetch_block_n_insns = prev_data->ifetch_block_n_insns;
26382 /* Filter instructions that cannot be issued on current cycle due to
26383 decoder restrictions. */
26384 core2i7_first_cycle_multipass_filter_ready_try (data, ready_try, n_ready,
26385 first_cycle_insn_p);
26388 /* INSN is being issued in current solution. Account for its impact on
26389 the decoder model. */
26390 static void
26391 core2i7_first_cycle_multipass_issue (void *_data, char *ready_try, int n_ready,
26392 rtx insn, const void *_prev_data)
26394 ix86_first_cycle_multipass_data_t data
26395 = (ix86_first_cycle_multipass_data_t) _data;
26396 const_ix86_first_cycle_multipass_data_t prev_data
26397 = (const_ix86_first_cycle_multipass_data_t) _prev_data;
26399 int insn_size = min_insn_size (insn);
26401 data->ifetch_block_len = prev_data->ifetch_block_len + insn_size;
26402 data->ifetch_block_n_insns = prev_data->ifetch_block_n_insns + 1;
26403 gcc_assert (data->ifetch_block_len <= core2i7_ifetch_block_size
26404 && data->ifetch_block_n_insns <= core2i7_ifetch_block_max_insns);
26406 /* Allocate or resize the bitmap for storing INSN's effect on ready_try. */
26407 if (!data->ready_try_change)
26409 data->ready_try_change = sbitmap_alloc (n_ready);
26410 data->ready_try_change_size = n_ready;
26412 else if (data->ready_try_change_size < n_ready)
26414 data->ready_try_change = sbitmap_resize (data->ready_try_change,
26415 n_ready, 0);
26416 data->ready_try_change_size = n_ready;
26418 bitmap_clear (data->ready_try_change);
26420 /* Filter out insns from ready_try that the core will not be able to issue
26421 on current cycle due to decoder. */
26422 core2i7_first_cycle_multipass_filter_ready_try (data, ready_try, n_ready,
26423 false);
26426 /* Revert the effect on ready_try. */
26427 static void
26428 core2i7_first_cycle_multipass_backtrack (const void *_data,
26429 char *ready_try,
26430 int n_ready ATTRIBUTE_UNUSED)
26432 const_ix86_first_cycle_multipass_data_t data
26433 = (const_ix86_first_cycle_multipass_data_t) _data;
26434 unsigned int i = 0;
26435 sbitmap_iterator sbi;
26437 gcc_assert (bitmap_last_set_bit (data->ready_try_change) < n_ready);
26438 EXECUTE_IF_SET_IN_BITMAP (data->ready_try_change, 0, i, sbi)
26440 ready_try[i] = 0;
26444 /* Save the result of multipass lookahead scheduling for the next round. */
26445 static void
26446 core2i7_first_cycle_multipass_end (const void *_data)
26448 const_ix86_first_cycle_multipass_data_t data
26449 = (const_ix86_first_cycle_multipass_data_t) _data;
26450 ix86_first_cycle_multipass_data_t next_data
26451 = ix86_first_cycle_multipass_data;
26453 if (data != NULL)
26455 next_data->ifetch_block_len = data->ifetch_block_len;
26456 next_data->ifetch_block_n_insns = data->ifetch_block_n_insns;
26460 /* Deallocate target data. */
26461 static void
26462 core2i7_first_cycle_multipass_fini (void *_data)
26464 ix86_first_cycle_multipass_data_t data
26465 = (ix86_first_cycle_multipass_data_t) _data;
26467 if (data->ready_try_change)
26469 sbitmap_free (data->ready_try_change);
26470 data->ready_try_change = NULL;
26471 data->ready_try_change_size = 0;
26475 /* Prepare for scheduling pass. */
26476 static void
26477 ix86_sched_init_global (FILE *dump ATTRIBUTE_UNUSED,
26478 int verbose ATTRIBUTE_UNUSED,
26479 int max_uid ATTRIBUTE_UNUSED)
26481 /* Install scheduling hooks for current CPU. Some of these hooks are used
26482 in time-critical parts of the scheduler, so we only set them up when
26483 they are actually used. */
26484 switch (ix86_tune)
26486 case PROCESSOR_CORE2:
26487 case PROCESSOR_NEHALEM:
26488 case PROCESSOR_SANDYBRIDGE:
26489 case PROCESSOR_HASWELL:
26490 /* Do not perform multipass scheduling for pre-reload schedule
26491 to save compile time. */
26492 if (reload_completed)
26494 targetm.sched.dfa_post_advance_cycle
26495 = core2i7_dfa_post_advance_cycle;
26496 targetm.sched.first_cycle_multipass_init
26497 = core2i7_first_cycle_multipass_init;
26498 targetm.sched.first_cycle_multipass_begin
26499 = core2i7_first_cycle_multipass_begin;
26500 targetm.sched.first_cycle_multipass_issue
26501 = core2i7_first_cycle_multipass_issue;
26502 targetm.sched.first_cycle_multipass_backtrack
26503 = core2i7_first_cycle_multipass_backtrack;
26504 targetm.sched.first_cycle_multipass_end
26505 = core2i7_first_cycle_multipass_end;
26506 targetm.sched.first_cycle_multipass_fini
26507 = core2i7_first_cycle_multipass_fini;
26509 /* Set decoder parameters. */
26510 core2i7_secondary_decoder_max_insn_size = 8;
26511 core2i7_ifetch_block_size = 16;
26512 core2i7_ifetch_block_max_insns = 6;
26513 break;
26515 /* ... Fall through ... */
26516 default:
26517 targetm.sched.dfa_post_advance_cycle = NULL;
26518 targetm.sched.first_cycle_multipass_init = NULL;
26519 targetm.sched.first_cycle_multipass_begin = NULL;
26520 targetm.sched.first_cycle_multipass_issue = NULL;
26521 targetm.sched.first_cycle_multipass_backtrack = NULL;
26522 targetm.sched.first_cycle_multipass_end = NULL;
26523 targetm.sched.first_cycle_multipass_fini = NULL;
26524 break;
26529 /* Compute the alignment given to a constant that is being placed in memory.
26530 EXP is the constant and ALIGN is the alignment that the object would
26531 ordinarily have.
26532 The value of this function is used instead of that alignment to align
26533 the object. */
26536 ix86_constant_alignment (tree exp, int align)
26538 if (TREE_CODE (exp) == REAL_CST || TREE_CODE (exp) == VECTOR_CST
26539 || TREE_CODE (exp) == INTEGER_CST)
26541 if (TYPE_MODE (TREE_TYPE (exp)) == DFmode && align < 64)
26542 return 64;
26543 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp))) && align < 128)
26544 return 128;
26546 else if (!optimize_size && TREE_CODE (exp) == STRING_CST
26547 && TREE_STRING_LENGTH (exp) >= 31 && align < BITS_PER_WORD)
26548 return BITS_PER_WORD;
26550 return align;
26553 /* Compute the alignment for a static variable.
26554 TYPE is the data type, and ALIGN is the alignment that
26555 the object would ordinarily have. The value of this function is used
26556 instead of that alignment to align the object. */
26559 ix86_data_alignment (tree type, int align, bool opt)
26561 /* GCC 4.8 and earlier used to incorrectly assume this alignment even
26562 for symbols from other compilation units or symbols that don't need
26563 to bind locally. In order to preserve some ABI compatibility with
26564 those compilers, ensure we don't decrease alignment from what we
26565 used to assume. */
26567 int max_align_compat
26568 = optimize_size ? BITS_PER_WORD : MIN (256, MAX_OFILE_ALIGNMENT);
26570 /* A data structure, equal or greater than the size of a cache line
26571 (64 bytes in the Pentium 4 and other recent Intel processors, including
26572 processors based on Intel Core microarchitecture) should be aligned
26573 so that its base address is a multiple of a cache line size. */
26575 int max_align
26576 = MIN ((unsigned) ix86_tune_cost->prefetch_block * 8, MAX_OFILE_ALIGNMENT);
26578 if (max_align < BITS_PER_WORD)
26579 max_align = BITS_PER_WORD;
26581 if (opt
26582 && AGGREGATE_TYPE_P (type)
26583 && TYPE_SIZE (type)
26584 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST)
26586 if ((TREE_INT_CST_LOW (TYPE_SIZE (type)) >= (unsigned) max_align_compat
26587 || TREE_INT_CST_HIGH (TYPE_SIZE (type)))
26588 && align < max_align_compat)
26589 align = max_align_compat;
26590 if ((TREE_INT_CST_LOW (TYPE_SIZE (type)) >= (unsigned) max_align
26591 || TREE_INT_CST_HIGH (TYPE_SIZE (type)))
26592 && align < max_align)
26593 align = max_align;
26596 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
26597 to 16byte boundary. */
26598 if (TARGET_64BIT)
26600 if ((opt ? AGGREGATE_TYPE_P (type) : TREE_CODE (type) == ARRAY_TYPE)
26601 && TYPE_SIZE (type)
26602 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
26603 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 128
26604 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
26605 return 128;
26608 if (!opt)
26609 return align;
26611 if (TREE_CODE (type) == ARRAY_TYPE)
26613 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
26614 return 64;
26615 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
26616 return 128;
26618 else if (TREE_CODE (type) == COMPLEX_TYPE)
26621 if (TYPE_MODE (type) == DCmode && align < 64)
26622 return 64;
26623 if ((TYPE_MODE (type) == XCmode
26624 || TYPE_MODE (type) == TCmode) && align < 128)
26625 return 128;
26627 else if ((TREE_CODE (type) == RECORD_TYPE
26628 || TREE_CODE (type) == UNION_TYPE
26629 || TREE_CODE (type) == QUAL_UNION_TYPE)
26630 && TYPE_FIELDS (type))
26632 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
26633 return 64;
26634 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
26635 return 128;
26637 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
26638 || TREE_CODE (type) == INTEGER_TYPE)
26640 if (TYPE_MODE (type) == DFmode && align < 64)
26641 return 64;
26642 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
26643 return 128;
26646 return align;
26649 /* Compute the alignment for a local variable or a stack slot. EXP is
26650 the data type or decl itself, MODE is the widest mode available and
26651 ALIGN is the alignment that the object would ordinarily have. The
26652 value of this macro is used instead of that alignment to align the
26653 object. */
26655 unsigned int
26656 ix86_local_alignment (tree exp, enum machine_mode mode,
26657 unsigned int align)
26659 tree type, decl;
26661 if (exp && DECL_P (exp))
26663 type = TREE_TYPE (exp);
26664 decl = exp;
26666 else
26668 type = exp;
26669 decl = NULL;
26672 /* Don't do dynamic stack realignment for long long objects with
26673 -mpreferred-stack-boundary=2. */
26674 if (!TARGET_64BIT
26675 && align == 64
26676 && ix86_preferred_stack_boundary < 64
26677 && (mode == DImode || (type && TYPE_MODE (type) == DImode))
26678 && (!type || !TYPE_USER_ALIGN (type))
26679 && (!decl || !DECL_USER_ALIGN (decl)))
26680 align = 32;
26682 /* If TYPE is NULL, we are allocating a stack slot for caller-save
26683 register in MODE. We will return the largest alignment of XF
26684 and DF. */
26685 if (!type)
26687 if (mode == XFmode && align < GET_MODE_ALIGNMENT (DFmode))
26688 align = GET_MODE_ALIGNMENT (DFmode);
26689 return align;
26692 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
26693 to 16byte boundary. Exact wording is:
26695 An array uses the same alignment as its elements, except that a local or
26696 global array variable of length at least 16 bytes or
26697 a C99 variable-length array variable always has alignment of at least 16 bytes.
26699 This was added to allow use of aligned SSE instructions at arrays. This
26700 rule is meant for static storage (where compiler can not do the analysis
26701 by itself). We follow it for automatic variables only when convenient.
26702 We fully control everything in the function compiled and functions from
26703 other unit can not rely on the alignment.
26705 Exclude va_list type. It is the common case of local array where
26706 we can not benefit from the alignment.
26708 TODO: Probably one should optimize for size only when var is not escaping. */
26709 if (TARGET_64BIT && optimize_function_for_speed_p (cfun)
26710 && TARGET_SSE)
26712 if (AGGREGATE_TYPE_P (type)
26713 && (va_list_type_node == NULL_TREE
26714 || (TYPE_MAIN_VARIANT (type)
26715 != TYPE_MAIN_VARIANT (va_list_type_node)))
26716 && TYPE_SIZE (type)
26717 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
26718 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 16
26719 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
26720 return 128;
26722 if (TREE_CODE (type) == ARRAY_TYPE)
26724 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
26725 return 64;
26726 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
26727 return 128;
26729 else if (TREE_CODE (type) == COMPLEX_TYPE)
26731 if (TYPE_MODE (type) == DCmode && align < 64)
26732 return 64;
26733 if ((TYPE_MODE (type) == XCmode
26734 || TYPE_MODE (type) == TCmode) && align < 128)
26735 return 128;
26737 else if ((TREE_CODE (type) == RECORD_TYPE
26738 || TREE_CODE (type) == UNION_TYPE
26739 || TREE_CODE (type) == QUAL_UNION_TYPE)
26740 && TYPE_FIELDS (type))
26742 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
26743 return 64;
26744 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
26745 return 128;
26747 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
26748 || TREE_CODE (type) == INTEGER_TYPE)
26751 if (TYPE_MODE (type) == DFmode && align < 64)
26752 return 64;
26753 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
26754 return 128;
26756 return align;
26759 /* Compute the minimum required alignment for dynamic stack realignment
26760 purposes for a local variable, parameter or a stack slot. EXP is
26761 the data type or decl itself, MODE is its mode and ALIGN is the
26762 alignment that the object would ordinarily have. */
26764 unsigned int
26765 ix86_minimum_alignment (tree exp, enum machine_mode mode,
26766 unsigned int align)
26768 tree type, decl;
26770 if (exp && DECL_P (exp))
26772 type = TREE_TYPE (exp);
26773 decl = exp;
26775 else
26777 type = exp;
26778 decl = NULL;
26781 if (TARGET_64BIT || align != 64 || ix86_preferred_stack_boundary >= 64)
26782 return align;
26784 /* Don't do dynamic stack realignment for long long objects with
26785 -mpreferred-stack-boundary=2. */
26786 if ((mode == DImode || (type && TYPE_MODE (type) == DImode))
26787 && (!type || !TYPE_USER_ALIGN (type))
26788 && (!decl || !DECL_USER_ALIGN (decl)))
26789 return 32;
26791 return align;
26794 /* Find a location for the static chain incoming to a nested function.
26795 This is a register, unless all free registers are used by arguments. */
26797 static rtx
26798 ix86_static_chain (const_tree fndecl, bool incoming_p)
26800 unsigned regno;
26802 if (!DECL_STATIC_CHAIN (fndecl))
26803 return NULL;
26805 if (TARGET_64BIT)
26807 /* We always use R10 in 64-bit mode. */
26808 regno = R10_REG;
26810 else
26812 tree fntype;
26813 unsigned int ccvt;
26815 /* By default in 32-bit mode we use ECX to pass the static chain. */
26816 regno = CX_REG;
26818 fntype = TREE_TYPE (fndecl);
26819 ccvt = ix86_get_callcvt (fntype);
26820 if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
26822 /* Fastcall functions use ecx/edx for arguments, which leaves
26823 us with EAX for the static chain.
26824 Thiscall functions use ecx for arguments, which also
26825 leaves us with EAX for the static chain. */
26826 regno = AX_REG;
26828 else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
26830 /* Thiscall functions use ecx for arguments, which leaves
26831 us with EAX and EDX for the static chain.
26832 We are using for abi-compatibility EAX. */
26833 regno = AX_REG;
26835 else if (ix86_function_regparm (fntype, fndecl) == 3)
26837 /* For regparm 3, we have no free call-clobbered registers in
26838 which to store the static chain. In order to implement this,
26839 we have the trampoline push the static chain to the stack.
26840 However, we can't push a value below the return address when
26841 we call the nested function directly, so we have to use an
26842 alternate entry point. For this we use ESI, and have the
26843 alternate entry point push ESI, so that things appear the
26844 same once we're executing the nested function. */
26845 if (incoming_p)
26847 if (fndecl == current_function_decl)
26848 ix86_static_chain_on_stack = true;
26849 return gen_frame_mem (SImode,
26850 plus_constant (Pmode,
26851 arg_pointer_rtx, -8));
26853 regno = SI_REG;
26857 return gen_rtx_REG (Pmode, regno);
26860 /* Emit RTL insns to initialize the variable parts of a trampoline.
26861 FNDECL is the decl of the target address; M_TRAMP is a MEM for
26862 the trampoline, and CHAIN_VALUE is an RTX for the static chain
26863 to be passed to the target function. */
26865 static void
26866 ix86_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
26868 rtx mem, fnaddr;
26869 int opcode;
26870 int offset = 0;
26872 fnaddr = XEXP (DECL_RTL (fndecl), 0);
26874 if (TARGET_64BIT)
26876 int size;
26878 /* Load the function address to r11. Try to load address using
26879 the shorter movl instead of movabs. We may want to support
26880 movq for kernel mode, but kernel does not use trampolines at
26881 the moment. FNADDR is a 32bit address and may not be in
26882 DImode when ptr_mode == SImode. Always use movl in this
26883 case. */
26884 if (ptr_mode == SImode
26885 || x86_64_zext_immediate_operand (fnaddr, VOIDmode))
26887 fnaddr = copy_addr_to_reg (fnaddr);
26889 mem = adjust_address (m_tramp, HImode, offset);
26890 emit_move_insn (mem, gen_int_mode (0xbb41, HImode));
26892 mem = adjust_address (m_tramp, SImode, offset + 2);
26893 emit_move_insn (mem, gen_lowpart (SImode, fnaddr));
26894 offset += 6;
26896 else
26898 mem = adjust_address (m_tramp, HImode, offset);
26899 emit_move_insn (mem, gen_int_mode (0xbb49, HImode));
26901 mem = adjust_address (m_tramp, DImode, offset + 2);
26902 emit_move_insn (mem, fnaddr);
26903 offset += 10;
26906 /* Load static chain using movabs to r10. Use the shorter movl
26907 instead of movabs when ptr_mode == SImode. */
26908 if (ptr_mode == SImode)
26910 opcode = 0xba41;
26911 size = 6;
26913 else
26915 opcode = 0xba49;
26916 size = 10;
26919 mem = adjust_address (m_tramp, HImode, offset);
26920 emit_move_insn (mem, gen_int_mode (opcode, HImode));
26922 mem = adjust_address (m_tramp, ptr_mode, offset + 2);
26923 emit_move_insn (mem, chain_value);
26924 offset += size;
26926 /* Jump to r11; the last (unused) byte is a nop, only there to
26927 pad the write out to a single 32-bit store. */
26928 mem = adjust_address (m_tramp, SImode, offset);
26929 emit_move_insn (mem, gen_int_mode (0x90e3ff49, SImode));
26930 offset += 4;
26932 else
26934 rtx disp, chain;
26936 /* Depending on the static chain location, either load a register
26937 with a constant, or push the constant to the stack. All of the
26938 instructions are the same size. */
26939 chain = ix86_static_chain (fndecl, true);
26940 if (REG_P (chain))
26942 switch (REGNO (chain))
26944 case AX_REG:
26945 opcode = 0xb8; break;
26946 case CX_REG:
26947 opcode = 0xb9; break;
26948 default:
26949 gcc_unreachable ();
26952 else
26953 opcode = 0x68;
26955 mem = adjust_address (m_tramp, QImode, offset);
26956 emit_move_insn (mem, gen_int_mode (opcode, QImode));
26958 mem = adjust_address (m_tramp, SImode, offset + 1);
26959 emit_move_insn (mem, chain_value);
26960 offset += 5;
26962 mem = adjust_address (m_tramp, QImode, offset);
26963 emit_move_insn (mem, gen_int_mode (0xe9, QImode));
26965 mem = adjust_address (m_tramp, SImode, offset + 1);
26967 /* Compute offset from the end of the jmp to the target function.
26968 In the case in which the trampoline stores the static chain on
26969 the stack, we need to skip the first insn which pushes the
26970 (call-saved) register static chain; this push is 1 byte. */
26971 offset += 5;
26972 disp = expand_binop (SImode, sub_optab, fnaddr,
26973 plus_constant (Pmode, XEXP (m_tramp, 0),
26974 offset - (MEM_P (chain) ? 1 : 0)),
26975 NULL_RTX, 1, OPTAB_DIRECT);
26976 emit_move_insn (mem, disp);
26979 gcc_assert (offset <= TRAMPOLINE_SIZE);
26981 #ifdef HAVE_ENABLE_EXECUTE_STACK
26982 #ifdef CHECK_EXECUTE_STACK_ENABLED
26983 if (CHECK_EXECUTE_STACK_ENABLED)
26984 #endif
26985 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
26986 LCT_NORMAL, VOIDmode, 1, XEXP (m_tramp, 0), Pmode);
26987 #endif
26990 /* The following file contains several enumerations and data structures
26991 built from the definitions in i386-builtin-types.def. */
26993 #include "i386-builtin-types.inc"
26995 /* Table for the ix86 builtin non-function types. */
26996 static GTY(()) tree ix86_builtin_type_tab[(int) IX86_BT_LAST_CPTR + 1];
26998 /* Retrieve an element from the above table, building some of
26999 the types lazily. */
27001 static tree
27002 ix86_get_builtin_type (enum ix86_builtin_type tcode)
27004 unsigned int index;
27005 tree type, itype;
27007 gcc_assert ((unsigned)tcode < ARRAY_SIZE(ix86_builtin_type_tab));
27009 type = ix86_builtin_type_tab[(int) tcode];
27010 if (type != NULL)
27011 return type;
27013 gcc_assert (tcode > IX86_BT_LAST_PRIM);
27014 if (tcode <= IX86_BT_LAST_VECT)
27016 enum machine_mode mode;
27018 index = tcode - IX86_BT_LAST_PRIM - 1;
27019 itype = ix86_get_builtin_type (ix86_builtin_type_vect_base[index]);
27020 mode = ix86_builtin_type_vect_mode[index];
27022 type = build_vector_type_for_mode (itype, mode);
27024 else
27026 int quals;
27028 index = tcode - IX86_BT_LAST_VECT - 1;
27029 if (tcode <= IX86_BT_LAST_PTR)
27030 quals = TYPE_UNQUALIFIED;
27031 else
27032 quals = TYPE_QUAL_CONST;
27034 itype = ix86_get_builtin_type (ix86_builtin_type_ptr_base[index]);
27035 if (quals != TYPE_UNQUALIFIED)
27036 itype = build_qualified_type (itype, quals);
27038 type = build_pointer_type (itype);
27041 ix86_builtin_type_tab[(int) tcode] = type;
27042 return type;
27045 /* Table for the ix86 builtin function types. */
27046 static GTY(()) tree ix86_builtin_func_type_tab[(int) IX86_BT_LAST_ALIAS + 1];
27048 /* Retrieve an element from the above table, building some of
27049 the types lazily. */
27051 static tree
27052 ix86_get_builtin_func_type (enum ix86_builtin_func_type tcode)
27054 tree type;
27056 gcc_assert ((unsigned)tcode < ARRAY_SIZE (ix86_builtin_func_type_tab));
27058 type = ix86_builtin_func_type_tab[(int) tcode];
27059 if (type != NULL)
27060 return type;
27062 if (tcode <= IX86_BT_LAST_FUNC)
27064 unsigned start = ix86_builtin_func_start[(int) tcode];
27065 unsigned after = ix86_builtin_func_start[(int) tcode + 1];
27066 tree rtype, atype, args = void_list_node;
27067 unsigned i;
27069 rtype = ix86_get_builtin_type (ix86_builtin_func_args[start]);
27070 for (i = after - 1; i > start; --i)
27072 atype = ix86_get_builtin_type (ix86_builtin_func_args[i]);
27073 args = tree_cons (NULL, atype, args);
27076 type = build_function_type (rtype, args);
27078 else
27080 unsigned index = tcode - IX86_BT_LAST_FUNC - 1;
27081 enum ix86_builtin_func_type icode;
27083 icode = ix86_builtin_func_alias_base[index];
27084 type = ix86_get_builtin_func_type (icode);
27087 ix86_builtin_func_type_tab[(int) tcode] = type;
27088 return type;
27092 /* Codes for all the SSE/MMX builtins. */
27093 enum ix86_builtins
27095 IX86_BUILTIN_ADDPS,
27096 IX86_BUILTIN_ADDSS,
27097 IX86_BUILTIN_DIVPS,
27098 IX86_BUILTIN_DIVSS,
27099 IX86_BUILTIN_MULPS,
27100 IX86_BUILTIN_MULSS,
27101 IX86_BUILTIN_SUBPS,
27102 IX86_BUILTIN_SUBSS,
27104 IX86_BUILTIN_CMPEQPS,
27105 IX86_BUILTIN_CMPLTPS,
27106 IX86_BUILTIN_CMPLEPS,
27107 IX86_BUILTIN_CMPGTPS,
27108 IX86_BUILTIN_CMPGEPS,
27109 IX86_BUILTIN_CMPNEQPS,
27110 IX86_BUILTIN_CMPNLTPS,
27111 IX86_BUILTIN_CMPNLEPS,
27112 IX86_BUILTIN_CMPNGTPS,
27113 IX86_BUILTIN_CMPNGEPS,
27114 IX86_BUILTIN_CMPORDPS,
27115 IX86_BUILTIN_CMPUNORDPS,
27116 IX86_BUILTIN_CMPEQSS,
27117 IX86_BUILTIN_CMPLTSS,
27118 IX86_BUILTIN_CMPLESS,
27119 IX86_BUILTIN_CMPNEQSS,
27120 IX86_BUILTIN_CMPNLTSS,
27121 IX86_BUILTIN_CMPNLESS,
27122 IX86_BUILTIN_CMPORDSS,
27123 IX86_BUILTIN_CMPUNORDSS,
27125 IX86_BUILTIN_COMIEQSS,
27126 IX86_BUILTIN_COMILTSS,
27127 IX86_BUILTIN_COMILESS,
27128 IX86_BUILTIN_COMIGTSS,
27129 IX86_BUILTIN_COMIGESS,
27130 IX86_BUILTIN_COMINEQSS,
27131 IX86_BUILTIN_UCOMIEQSS,
27132 IX86_BUILTIN_UCOMILTSS,
27133 IX86_BUILTIN_UCOMILESS,
27134 IX86_BUILTIN_UCOMIGTSS,
27135 IX86_BUILTIN_UCOMIGESS,
27136 IX86_BUILTIN_UCOMINEQSS,
27138 IX86_BUILTIN_CVTPI2PS,
27139 IX86_BUILTIN_CVTPS2PI,
27140 IX86_BUILTIN_CVTSI2SS,
27141 IX86_BUILTIN_CVTSI642SS,
27142 IX86_BUILTIN_CVTSS2SI,
27143 IX86_BUILTIN_CVTSS2SI64,
27144 IX86_BUILTIN_CVTTPS2PI,
27145 IX86_BUILTIN_CVTTSS2SI,
27146 IX86_BUILTIN_CVTTSS2SI64,
27148 IX86_BUILTIN_MAXPS,
27149 IX86_BUILTIN_MAXSS,
27150 IX86_BUILTIN_MINPS,
27151 IX86_BUILTIN_MINSS,
27153 IX86_BUILTIN_LOADUPS,
27154 IX86_BUILTIN_STOREUPS,
27155 IX86_BUILTIN_MOVSS,
27157 IX86_BUILTIN_MOVHLPS,
27158 IX86_BUILTIN_MOVLHPS,
27159 IX86_BUILTIN_LOADHPS,
27160 IX86_BUILTIN_LOADLPS,
27161 IX86_BUILTIN_STOREHPS,
27162 IX86_BUILTIN_STORELPS,
27164 IX86_BUILTIN_MASKMOVQ,
27165 IX86_BUILTIN_MOVMSKPS,
27166 IX86_BUILTIN_PMOVMSKB,
27168 IX86_BUILTIN_MOVNTPS,
27169 IX86_BUILTIN_MOVNTQ,
27171 IX86_BUILTIN_LOADDQU,
27172 IX86_BUILTIN_STOREDQU,
27174 IX86_BUILTIN_PACKSSWB,
27175 IX86_BUILTIN_PACKSSDW,
27176 IX86_BUILTIN_PACKUSWB,
27178 IX86_BUILTIN_PADDB,
27179 IX86_BUILTIN_PADDW,
27180 IX86_BUILTIN_PADDD,
27181 IX86_BUILTIN_PADDQ,
27182 IX86_BUILTIN_PADDSB,
27183 IX86_BUILTIN_PADDSW,
27184 IX86_BUILTIN_PADDUSB,
27185 IX86_BUILTIN_PADDUSW,
27186 IX86_BUILTIN_PSUBB,
27187 IX86_BUILTIN_PSUBW,
27188 IX86_BUILTIN_PSUBD,
27189 IX86_BUILTIN_PSUBQ,
27190 IX86_BUILTIN_PSUBSB,
27191 IX86_BUILTIN_PSUBSW,
27192 IX86_BUILTIN_PSUBUSB,
27193 IX86_BUILTIN_PSUBUSW,
27195 IX86_BUILTIN_PAND,
27196 IX86_BUILTIN_PANDN,
27197 IX86_BUILTIN_POR,
27198 IX86_BUILTIN_PXOR,
27200 IX86_BUILTIN_PAVGB,
27201 IX86_BUILTIN_PAVGW,
27203 IX86_BUILTIN_PCMPEQB,
27204 IX86_BUILTIN_PCMPEQW,
27205 IX86_BUILTIN_PCMPEQD,
27206 IX86_BUILTIN_PCMPGTB,
27207 IX86_BUILTIN_PCMPGTW,
27208 IX86_BUILTIN_PCMPGTD,
27210 IX86_BUILTIN_PMADDWD,
27212 IX86_BUILTIN_PMAXSW,
27213 IX86_BUILTIN_PMAXUB,
27214 IX86_BUILTIN_PMINSW,
27215 IX86_BUILTIN_PMINUB,
27217 IX86_BUILTIN_PMULHUW,
27218 IX86_BUILTIN_PMULHW,
27219 IX86_BUILTIN_PMULLW,
27221 IX86_BUILTIN_PSADBW,
27222 IX86_BUILTIN_PSHUFW,
27224 IX86_BUILTIN_PSLLW,
27225 IX86_BUILTIN_PSLLD,
27226 IX86_BUILTIN_PSLLQ,
27227 IX86_BUILTIN_PSRAW,
27228 IX86_BUILTIN_PSRAD,
27229 IX86_BUILTIN_PSRLW,
27230 IX86_BUILTIN_PSRLD,
27231 IX86_BUILTIN_PSRLQ,
27232 IX86_BUILTIN_PSLLWI,
27233 IX86_BUILTIN_PSLLDI,
27234 IX86_BUILTIN_PSLLQI,
27235 IX86_BUILTIN_PSRAWI,
27236 IX86_BUILTIN_PSRADI,
27237 IX86_BUILTIN_PSRLWI,
27238 IX86_BUILTIN_PSRLDI,
27239 IX86_BUILTIN_PSRLQI,
27241 IX86_BUILTIN_PUNPCKHBW,
27242 IX86_BUILTIN_PUNPCKHWD,
27243 IX86_BUILTIN_PUNPCKHDQ,
27244 IX86_BUILTIN_PUNPCKLBW,
27245 IX86_BUILTIN_PUNPCKLWD,
27246 IX86_BUILTIN_PUNPCKLDQ,
27248 IX86_BUILTIN_SHUFPS,
27250 IX86_BUILTIN_RCPPS,
27251 IX86_BUILTIN_RCPSS,
27252 IX86_BUILTIN_RSQRTPS,
27253 IX86_BUILTIN_RSQRTPS_NR,
27254 IX86_BUILTIN_RSQRTSS,
27255 IX86_BUILTIN_RSQRTF,
27256 IX86_BUILTIN_SQRTPS,
27257 IX86_BUILTIN_SQRTPS_NR,
27258 IX86_BUILTIN_SQRTSS,
27260 IX86_BUILTIN_UNPCKHPS,
27261 IX86_BUILTIN_UNPCKLPS,
27263 IX86_BUILTIN_ANDPS,
27264 IX86_BUILTIN_ANDNPS,
27265 IX86_BUILTIN_ORPS,
27266 IX86_BUILTIN_XORPS,
27268 IX86_BUILTIN_EMMS,
27269 IX86_BUILTIN_LDMXCSR,
27270 IX86_BUILTIN_STMXCSR,
27271 IX86_BUILTIN_SFENCE,
27273 IX86_BUILTIN_FXSAVE,
27274 IX86_BUILTIN_FXRSTOR,
27275 IX86_BUILTIN_FXSAVE64,
27276 IX86_BUILTIN_FXRSTOR64,
27278 IX86_BUILTIN_XSAVE,
27279 IX86_BUILTIN_XRSTOR,
27280 IX86_BUILTIN_XSAVE64,
27281 IX86_BUILTIN_XRSTOR64,
27283 IX86_BUILTIN_XSAVEOPT,
27284 IX86_BUILTIN_XSAVEOPT64,
27286 /* 3DNow! Original */
27287 IX86_BUILTIN_FEMMS,
27288 IX86_BUILTIN_PAVGUSB,
27289 IX86_BUILTIN_PF2ID,
27290 IX86_BUILTIN_PFACC,
27291 IX86_BUILTIN_PFADD,
27292 IX86_BUILTIN_PFCMPEQ,
27293 IX86_BUILTIN_PFCMPGE,
27294 IX86_BUILTIN_PFCMPGT,
27295 IX86_BUILTIN_PFMAX,
27296 IX86_BUILTIN_PFMIN,
27297 IX86_BUILTIN_PFMUL,
27298 IX86_BUILTIN_PFRCP,
27299 IX86_BUILTIN_PFRCPIT1,
27300 IX86_BUILTIN_PFRCPIT2,
27301 IX86_BUILTIN_PFRSQIT1,
27302 IX86_BUILTIN_PFRSQRT,
27303 IX86_BUILTIN_PFSUB,
27304 IX86_BUILTIN_PFSUBR,
27305 IX86_BUILTIN_PI2FD,
27306 IX86_BUILTIN_PMULHRW,
27308 /* 3DNow! Athlon Extensions */
27309 IX86_BUILTIN_PF2IW,
27310 IX86_BUILTIN_PFNACC,
27311 IX86_BUILTIN_PFPNACC,
27312 IX86_BUILTIN_PI2FW,
27313 IX86_BUILTIN_PSWAPDSI,
27314 IX86_BUILTIN_PSWAPDSF,
27316 /* SSE2 */
27317 IX86_BUILTIN_ADDPD,
27318 IX86_BUILTIN_ADDSD,
27319 IX86_BUILTIN_DIVPD,
27320 IX86_BUILTIN_DIVSD,
27321 IX86_BUILTIN_MULPD,
27322 IX86_BUILTIN_MULSD,
27323 IX86_BUILTIN_SUBPD,
27324 IX86_BUILTIN_SUBSD,
27326 IX86_BUILTIN_CMPEQPD,
27327 IX86_BUILTIN_CMPLTPD,
27328 IX86_BUILTIN_CMPLEPD,
27329 IX86_BUILTIN_CMPGTPD,
27330 IX86_BUILTIN_CMPGEPD,
27331 IX86_BUILTIN_CMPNEQPD,
27332 IX86_BUILTIN_CMPNLTPD,
27333 IX86_BUILTIN_CMPNLEPD,
27334 IX86_BUILTIN_CMPNGTPD,
27335 IX86_BUILTIN_CMPNGEPD,
27336 IX86_BUILTIN_CMPORDPD,
27337 IX86_BUILTIN_CMPUNORDPD,
27338 IX86_BUILTIN_CMPEQSD,
27339 IX86_BUILTIN_CMPLTSD,
27340 IX86_BUILTIN_CMPLESD,
27341 IX86_BUILTIN_CMPNEQSD,
27342 IX86_BUILTIN_CMPNLTSD,
27343 IX86_BUILTIN_CMPNLESD,
27344 IX86_BUILTIN_CMPORDSD,
27345 IX86_BUILTIN_CMPUNORDSD,
27347 IX86_BUILTIN_COMIEQSD,
27348 IX86_BUILTIN_COMILTSD,
27349 IX86_BUILTIN_COMILESD,
27350 IX86_BUILTIN_COMIGTSD,
27351 IX86_BUILTIN_COMIGESD,
27352 IX86_BUILTIN_COMINEQSD,
27353 IX86_BUILTIN_UCOMIEQSD,
27354 IX86_BUILTIN_UCOMILTSD,
27355 IX86_BUILTIN_UCOMILESD,
27356 IX86_BUILTIN_UCOMIGTSD,
27357 IX86_BUILTIN_UCOMIGESD,
27358 IX86_BUILTIN_UCOMINEQSD,
27360 IX86_BUILTIN_MAXPD,
27361 IX86_BUILTIN_MAXSD,
27362 IX86_BUILTIN_MINPD,
27363 IX86_BUILTIN_MINSD,
27365 IX86_BUILTIN_ANDPD,
27366 IX86_BUILTIN_ANDNPD,
27367 IX86_BUILTIN_ORPD,
27368 IX86_BUILTIN_XORPD,
27370 IX86_BUILTIN_SQRTPD,
27371 IX86_BUILTIN_SQRTSD,
27373 IX86_BUILTIN_UNPCKHPD,
27374 IX86_BUILTIN_UNPCKLPD,
27376 IX86_BUILTIN_SHUFPD,
27378 IX86_BUILTIN_LOADUPD,
27379 IX86_BUILTIN_STOREUPD,
27380 IX86_BUILTIN_MOVSD,
27382 IX86_BUILTIN_LOADHPD,
27383 IX86_BUILTIN_LOADLPD,
27385 IX86_BUILTIN_CVTDQ2PD,
27386 IX86_BUILTIN_CVTDQ2PS,
27388 IX86_BUILTIN_CVTPD2DQ,
27389 IX86_BUILTIN_CVTPD2PI,
27390 IX86_BUILTIN_CVTPD2PS,
27391 IX86_BUILTIN_CVTTPD2DQ,
27392 IX86_BUILTIN_CVTTPD2PI,
27394 IX86_BUILTIN_CVTPI2PD,
27395 IX86_BUILTIN_CVTSI2SD,
27396 IX86_BUILTIN_CVTSI642SD,
27398 IX86_BUILTIN_CVTSD2SI,
27399 IX86_BUILTIN_CVTSD2SI64,
27400 IX86_BUILTIN_CVTSD2SS,
27401 IX86_BUILTIN_CVTSS2SD,
27402 IX86_BUILTIN_CVTTSD2SI,
27403 IX86_BUILTIN_CVTTSD2SI64,
27405 IX86_BUILTIN_CVTPS2DQ,
27406 IX86_BUILTIN_CVTPS2PD,
27407 IX86_BUILTIN_CVTTPS2DQ,
27409 IX86_BUILTIN_MOVNTI,
27410 IX86_BUILTIN_MOVNTI64,
27411 IX86_BUILTIN_MOVNTPD,
27412 IX86_BUILTIN_MOVNTDQ,
27414 IX86_BUILTIN_MOVQ128,
27416 /* SSE2 MMX */
27417 IX86_BUILTIN_MASKMOVDQU,
27418 IX86_BUILTIN_MOVMSKPD,
27419 IX86_BUILTIN_PMOVMSKB128,
27421 IX86_BUILTIN_PACKSSWB128,
27422 IX86_BUILTIN_PACKSSDW128,
27423 IX86_BUILTIN_PACKUSWB128,
27425 IX86_BUILTIN_PADDB128,
27426 IX86_BUILTIN_PADDW128,
27427 IX86_BUILTIN_PADDD128,
27428 IX86_BUILTIN_PADDQ128,
27429 IX86_BUILTIN_PADDSB128,
27430 IX86_BUILTIN_PADDSW128,
27431 IX86_BUILTIN_PADDUSB128,
27432 IX86_BUILTIN_PADDUSW128,
27433 IX86_BUILTIN_PSUBB128,
27434 IX86_BUILTIN_PSUBW128,
27435 IX86_BUILTIN_PSUBD128,
27436 IX86_BUILTIN_PSUBQ128,
27437 IX86_BUILTIN_PSUBSB128,
27438 IX86_BUILTIN_PSUBSW128,
27439 IX86_BUILTIN_PSUBUSB128,
27440 IX86_BUILTIN_PSUBUSW128,
27442 IX86_BUILTIN_PAND128,
27443 IX86_BUILTIN_PANDN128,
27444 IX86_BUILTIN_POR128,
27445 IX86_BUILTIN_PXOR128,
27447 IX86_BUILTIN_PAVGB128,
27448 IX86_BUILTIN_PAVGW128,
27450 IX86_BUILTIN_PCMPEQB128,
27451 IX86_BUILTIN_PCMPEQW128,
27452 IX86_BUILTIN_PCMPEQD128,
27453 IX86_BUILTIN_PCMPGTB128,
27454 IX86_BUILTIN_PCMPGTW128,
27455 IX86_BUILTIN_PCMPGTD128,
27457 IX86_BUILTIN_PMADDWD128,
27459 IX86_BUILTIN_PMAXSW128,
27460 IX86_BUILTIN_PMAXUB128,
27461 IX86_BUILTIN_PMINSW128,
27462 IX86_BUILTIN_PMINUB128,
27464 IX86_BUILTIN_PMULUDQ,
27465 IX86_BUILTIN_PMULUDQ128,
27466 IX86_BUILTIN_PMULHUW128,
27467 IX86_BUILTIN_PMULHW128,
27468 IX86_BUILTIN_PMULLW128,
27470 IX86_BUILTIN_PSADBW128,
27471 IX86_BUILTIN_PSHUFHW,
27472 IX86_BUILTIN_PSHUFLW,
27473 IX86_BUILTIN_PSHUFD,
27475 IX86_BUILTIN_PSLLDQI128,
27476 IX86_BUILTIN_PSLLWI128,
27477 IX86_BUILTIN_PSLLDI128,
27478 IX86_BUILTIN_PSLLQI128,
27479 IX86_BUILTIN_PSRAWI128,
27480 IX86_BUILTIN_PSRADI128,
27481 IX86_BUILTIN_PSRLDQI128,
27482 IX86_BUILTIN_PSRLWI128,
27483 IX86_BUILTIN_PSRLDI128,
27484 IX86_BUILTIN_PSRLQI128,
27486 IX86_BUILTIN_PSLLDQ128,
27487 IX86_BUILTIN_PSLLW128,
27488 IX86_BUILTIN_PSLLD128,
27489 IX86_BUILTIN_PSLLQ128,
27490 IX86_BUILTIN_PSRAW128,
27491 IX86_BUILTIN_PSRAD128,
27492 IX86_BUILTIN_PSRLW128,
27493 IX86_BUILTIN_PSRLD128,
27494 IX86_BUILTIN_PSRLQ128,
27496 IX86_BUILTIN_PUNPCKHBW128,
27497 IX86_BUILTIN_PUNPCKHWD128,
27498 IX86_BUILTIN_PUNPCKHDQ128,
27499 IX86_BUILTIN_PUNPCKHQDQ128,
27500 IX86_BUILTIN_PUNPCKLBW128,
27501 IX86_BUILTIN_PUNPCKLWD128,
27502 IX86_BUILTIN_PUNPCKLDQ128,
27503 IX86_BUILTIN_PUNPCKLQDQ128,
27505 IX86_BUILTIN_CLFLUSH,
27506 IX86_BUILTIN_MFENCE,
27507 IX86_BUILTIN_LFENCE,
27508 IX86_BUILTIN_PAUSE,
27510 IX86_BUILTIN_FNSTENV,
27511 IX86_BUILTIN_FLDENV,
27512 IX86_BUILTIN_FNSTSW,
27513 IX86_BUILTIN_FNCLEX,
27515 IX86_BUILTIN_BSRSI,
27516 IX86_BUILTIN_BSRDI,
27517 IX86_BUILTIN_RDPMC,
27518 IX86_BUILTIN_RDTSC,
27519 IX86_BUILTIN_RDTSCP,
27520 IX86_BUILTIN_ROLQI,
27521 IX86_BUILTIN_ROLHI,
27522 IX86_BUILTIN_RORQI,
27523 IX86_BUILTIN_RORHI,
27525 /* SSE3. */
27526 IX86_BUILTIN_ADDSUBPS,
27527 IX86_BUILTIN_HADDPS,
27528 IX86_BUILTIN_HSUBPS,
27529 IX86_BUILTIN_MOVSHDUP,
27530 IX86_BUILTIN_MOVSLDUP,
27531 IX86_BUILTIN_ADDSUBPD,
27532 IX86_BUILTIN_HADDPD,
27533 IX86_BUILTIN_HSUBPD,
27534 IX86_BUILTIN_LDDQU,
27536 IX86_BUILTIN_MONITOR,
27537 IX86_BUILTIN_MWAIT,
27539 /* SSSE3. */
27540 IX86_BUILTIN_PHADDW,
27541 IX86_BUILTIN_PHADDD,
27542 IX86_BUILTIN_PHADDSW,
27543 IX86_BUILTIN_PHSUBW,
27544 IX86_BUILTIN_PHSUBD,
27545 IX86_BUILTIN_PHSUBSW,
27546 IX86_BUILTIN_PMADDUBSW,
27547 IX86_BUILTIN_PMULHRSW,
27548 IX86_BUILTIN_PSHUFB,
27549 IX86_BUILTIN_PSIGNB,
27550 IX86_BUILTIN_PSIGNW,
27551 IX86_BUILTIN_PSIGND,
27552 IX86_BUILTIN_PALIGNR,
27553 IX86_BUILTIN_PABSB,
27554 IX86_BUILTIN_PABSW,
27555 IX86_BUILTIN_PABSD,
27557 IX86_BUILTIN_PHADDW128,
27558 IX86_BUILTIN_PHADDD128,
27559 IX86_BUILTIN_PHADDSW128,
27560 IX86_BUILTIN_PHSUBW128,
27561 IX86_BUILTIN_PHSUBD128,
27562 IX86_BUILTIN_PHSUBSW128,
27563 IX86_BUILTIN_PMADDUBSW128,
27564 IX86_BUILTIN_PMULHRSW128,
27565 IX86_BUILTIN_PSHUFB128,
27566 IX86_BUILTIN_PSIGNB128,
27567 IX86_BUILTIN_PSIGNW128,
27568 IX86_BUILTIN_PSIGND128,
27569 IX86_BUILTIN_PALIGNR128,
27570 IX86_BUILTIN_PABSB128,
27571 IX86_BUILTIN_PABSW128,
27572 IX86_BUILTIN_PABSD128,
27574 /* AMDFAM10 - SSE4A New Instructions. */
27575 IX86_BUILTIN_MOVNTSD,
27576 IX86_BUILTIN_MOVNTSS,
27577 IX86_BUILTIN_EXTRQI,
27578 IX86_BUILTIN_EXTRQ,
27579 IX86_BUILTIN_INSERTQI,
27580 IX86_BUILTIN_INSERTQ,
27582 /* SSE4.1. */
27583 IX86_BUILTIN_BLENDPD,
27584 IX86_BUILTIN_BLENDPS,
27585 IX86_BUILTIN_BLENDVPD,
27586 IX86_BUILTIN_BLENDVPS,
27587 IX86_BUILTIN_PBLENDVB128,
27588 IX86_BUILTIN_PBLENDW128,
27590 IX86_BUILTIN_DPPD,
27591 IX86_BUILTIN_DPPS,
27593 IX86_BUILTIN_INSERTPS128,
27595 IX86_BUILTIN_MOVNTDQA,
27596 IX86_BUILTIN_MPSADBW128,
27597 IX86_BUILTIN_PACKUSDW128,
27598 IX86_BUILTIN_PCMPEQQ,
27599 IX86_BUILTIN_PHMINPOSUW128,
27601 IX86_BUILTIN_PMAXSB128,
27602 IX86_BUILTIN_PMAXSD128,
27603 IX86_BUILTIN_PMAXUD128,
27604 IX86_BUILTIN_PMAXUW128,
27606 IX86_BUILTIN_PMINSB128,
27607 IX86_BUILTIN_PMINSD128,
27608 IX86_BUILTIN_PMINUD128,
27609 IX86_BUILTIN_PMINUW128,
27611 IX86_BUILTIN_PMOVSXBW128,
27612 IX86_BUILTIN_PMOVSXBD128,
27613 IX86_BUILTIN_PMOVSXBQ128,
27614 IX86_BUILTIN_PMOVSXWD128,
27615 IX86_BUILTIN_PMOVSXWQ128,
27616 IX86_BUILTIN_PMOVSXDQ128,
27618 IX86_BUILTIN_PMOVZXBW128,
27619 IX86_BUILTIN_PMOVZXBD128,
27620 IX86_BUILTIN_PMOVZXBQ128,
27621 IX86_BUILTIN_PMOVZXWD128,
27622 IX86_BUILTIN_PMOVZXWQ128,
27623 IX86_BUILTIN_PMOVZXDQ128,
27625 IX86_BUILTIN_PMULDQ128,
27626 IX86_BUILTIN_PMULLD128,
27628 IX86_BUILTIN_ROUNDSD,
27629 IX86_BUILTIN_ROUNDSS,
27631 IX86_BUILTIN_ROUNDPD,
27632 IX86_BUILTIN_ROUNDPS,
27634 IX86_BUILTIN_FLOORPD,
27635 IX86_BUILTIN_CEILPD,
27636 IX86_BUILTIN_TRUNCPD,
27637 IX86_BUILTIN_RINTPD,
27638 IX86_BUILTIN_ROUNDPD_AZ,
27640 IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX,
27641 IX86_BUILTIN_CEILPD_VEC_PACK_SFIX,
27642 IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX,
27644 IX86_BUILTIN_FLOORPS,
27645 IX86_BUILTIN_CEILPS,
27646 IX86_BUILTIN_TRUNCPS,
27647 IX86_BUILTIN_RINTPS,
27648 IX86_BUILTIN_ROUNDPS_AZ,
27650 IX86_BUILTIN_FLOORPS_SFIX,
27651 IX86_BUILTIN_CEILPS_SFIX,
27652 IX86_BUILTIN_ROUNDPS_AZ_SFIX,
27654 IX86_BUILTIN_PTESTZ,
27655 IX86_BUILTIN_PTESTC,
27656 IX86_BUILTIN_PTESTNZC,
27658 IX86_BUILTIN_VEC_INIT_V2SI,
27659 IX86_BUILTIN_VEC_INIT_V4HI,
27660 IX86_BUILTIN_VEC_INIT_V8QI,
27661 IX86_BUILTIN_VEC_EXT_V2DF,
27662 IX86_BUILTIN_VEC_EXT_V2DI,
27663 IX86_BUILTIN_VEC_EXT_V4SF,
27664 IX86_BUILTIN_VEC_EXT_V4SI,
27665 IX86_BUILTIN_VEC_EXT_V8HI,
27666 IX86_BUILTIN_VEC_EXT_V2SI,
27667 IX86_BUILTIN_VEC_EXT_V4HI,
27668 IX86_BUILTIN_VEC_EXT_V16QI,
27669 IX86_BUILTIN_VEC_SET_V2DI,
27670 IX86_BUILTIN_VEC_SET_V4SF,
27671 IX86_BUILTIN_VEC_SET_V4SI,
27672 IX86_BUILTIN_VEC_SET_V8HI,
27673 IX86_BUILTIN_VEC_SET_V4HI,
27674 IX86_BUILTIN_VEC_SET_V16QI,
27676 IX86_BUILTIN_VEC_PACK_SFIX,
27677 IX86_BUILTIN_VEC_PACK_SFIX256,
27679 /* SSE4.2. */
27680 IX86_BUILTIN_CRC32QI,
27681 IX86_BUILTIN_CRC32HI,
27682 IX86_BUILTIN_CRC32SI,
27683 IX86_BUILTIN_CRC32DI,
27685 IX86_BUILTIN_PCMPESTRI128,
27686 IX86_BUILTIN_PCMPESTRM128,
27687 IX86_BUILTIN_PCMPESTRA128,
27688 IX86_BUILTIN_PCMPESTRC128,
27689 IX86_BUILTIN_PCMPESTRO128,
27690 IX86_BUILTIN_PCMPESTRS128,
27691 IX86_BUILTIN_PCMPESTRZ128,
27692 IX86_BUILTIN_PCMPISTRI128,
27693 IX86_BUILTIN_PCMPISTRM128,
27694 IX86_BUILTIN_PCMPISTRA128,
27695 IX86_BUILTIN_PCMPISTRC128,
27696 IX86_BUILTIN_PCMPISTRO128,
27697 IX86_BUILTIN_PCMPISTRS128,
27698 IX86_BUILTIN_PCMPISTRZ128,
27700 IX86_BUILTIN_PCMPGTQ,
27702 /* AES instructions */
27703 IX86_BUILTIN_AESENC128,
27704 IX86_BUILTIN_AESENCLAST128,
27705 IX86_BUILTIN_AESDEC128,
27706 IX86_BUILTIN_AESDECLAST128,
27707 IX86_BUILTIN_AESIMC128,
27708 IX86_BUILTIN_AESKEYGENASSIST128,
27710 /* PCLMUL instruction */
27711 IX86_BUILTIN_PCLMULQDQ128,
27713 /* AVX */
27714 IX86_BUILTIN_ADDPD256,
27715 IX86_BUILTIN_ADDPS256,
27716 IX86_BUILTIN_ADDSUBPD256,
27717 IX86_BUILTIN_ADDSUBPS256,
27718 IX86_BUILTIN_ANDPD256,
27719 IX86_BUILTIN_ANDPS256,
27720 IX86_BUILTIN_ANDNPD256,
27721 IX86_BUILTIN_ANDNPS256,
27722 IX86_BUILTIN_BLENDPD256,
27723 IX86_BUILTIN_BLENDPS256,
27724 IX86_BUILTIN_BLENDVPD256,
27725 IX86_BUILTIN_BLENDVPS256,
27726 IX86_BUILTIN_DIVPD256,
27727 IX86_BUILTIN_DIVPS256,
27728 IX86_BUILTIN_DPPS256,
27729 IX86_BUILTIN_HADDPD256,
27730 IX86_BUILTIN_HADDPS256,
27731 IX86_BUILTIN_HSUBPD256,
27732 IX86_BUILTIN_HSUBPS256,
27733 IX86_BUILTIN_MAXPD256,
27734 IX86_BUILTIN_MAXPS256,
27735 IX86_BUILTIN_MINPD256,
27736 IX86_BUILTIN_MINPS256,
27737 IX86_BUILTIN_MULPD256,
27738 IX86_BUILTIN_MULPS256,
27739 IX86_BUILTIN_ORPD256,
27740 IX86_BUILTIN_ORPS256,
27741 IX86_BUILTIN_SHUFPD256,
27742 IX86_BUILTIN_SHUFPS256,
27743 IX86_BUILTIN_SUBPD256,
27744 IX86_BUILTIN_SUBPS256,
27745 IX86_BUILTIN_XORPD256,
27746 IX86_BUILTIN_XORPS256,
27747 IX86_BUILTIN_CMPSD,
27748 IX86_BUILTIN_CMPSS,
27749 IX86_BUILTIN_CMPPD,
27750 IX86_BUILTIN_CMPPS,
27751 IX86_BUILTIN_CMPPD256,
27752 IX86_BUILTIN_CMPPS256,
27753 IX86_BUILTIN_CVTDQ2PD256,
27754 IX86_BUILTIN_CVTDQ2PS256,
27755 IX86_BUILTIN_CVTPD2PS256,
27756 IX86_BUILTIN_CVTPS2DQ256,
27757 IX86_BUILTIN_CVTPS2PD256,
27758 IX86_BUILTIN_CVTTPD2DQ256,
27759 IX86_BUILTIN_CVTPD2DQ256,
27760 IX86_BUILTIN_CVTTPS2DQ256,
27761 IX86_BUILTIN_EXTRACTF128PD256,
27762 IX86_BUILTIN_EXTRACTF128PS256,
27763 IX86_BUILTIN_EXTRACTF128SI256,
27764 IX86_BUILTIN_VZEROALL,
27765 IX86_BUILTIN_VZEROUPPER,
27766 IX86_BUILTIN_VPERMILVARPD,
27767 IX86_BUILTIN_VPERMILVARPS,
27768 IX86_BUILTIN_VPERMILVARPD256,
27769 IX86_BUILTIN_VPERMILVARPS256,
27770 IX86_BUILTIN_VPERMILPD,
27771 IX86_BUILTIN_VPERMILPS,
27772 IX86_BUILTIN_VPERMILPD256,
27773 IX86_BUILTIN_VPERMILPS256,
27774 IX86_BUILTIN_VPERMIL2PD,
27775 IX86_BUILTIN_VPERMIL2PS,
27776 IX86_BUILTIN_VPERMIL2PD256,
27777 IX86_BUILTIN_VPERMIL2PS256,
27778 IX86_BUILTIN_VPERM2F128PD256,
27779 IX86_BUILTIN_VPERM2F128PS256,
27780 IX86_BUILTIN_VPERM2F128SI256,
27781 IX86_BUILTIN_VBROADCASTSS,
27782 IX86_BUILTIN_VBROADCASTSD256,
27783 IX86_BUILTIN_VBROADCASTSS256,
27784 IX86_BUILTIN_VBROADCASTPD256,
27785 IX86_BUILTIN_VBROADCASTPS256,
27786 IX86_BUILTIN_VINSERTF128PD256,
27787 IX86_BUILTIN_VINSERTF128PS256,
27788 IX86_BUILTIN_VINSERTF128SI256,
27789 IX86_BUILTIN_LOADUPD256,
27790 IX86_BUILTIN_LOADUPS256,
27791 IX86_BUILTIN_STOREUPD256,
27792 IX86_BUILTIN_STOREUPS256,
27793 IX86_BUILTIN_LDDQU256,
27794 IX86_BUILTIN_MOVNTDQ256,
27795 IX86_BUILTIN_MOVNTPD256,
27796 IX86_BUILTIN_MOVNTPS256,
27797 IX86_BUILTIN_LOADDQU256,
27798 IX86_BUILTIN_STOREDQU256,
27799 IX86_BUILTIN_MASKLOADPD,
27800 IX86_BUILTIN_MASKLOADPS,
27801 IX86_BUILTIN_MASKSTOREPD,
27802 IX86_BUILTIN_MASKSTOREPS,
27803 IX86_BUILTIN_MASKLOADPD256,
27804 IX86_BUILTIN_MASKLOADPS256,
27805 IX86_BUILTIN_MASKSTOREPD256,
27806 IX86_BUILTIN_MASKSTOREPS256,
27807 IX86_BUILTIN_MOVSHDUP256,
27808 IX86_BUILTIN_MOVSLDUP256,
27809 IX86_BUILTIN_MOVDDUP256,
27811 IX86_BUILTIN_SQRTPD256,
27812 IX86_BUILTIN_SQRTPS256,
27813 IX86_BUILTIN_SQRTPS_NR256,
27814 IX86_BUILTIN_RSQRTPS256,
27815 IX86_BUILTIN_RSQRTPS_NR256,
27817 IX86_BUILTIN_RCPPS256,
27819 IX86_BUILTIN_ROUNDPD256,
27820 IX86_BUILTIN_ROUNDPS256,
27822 IX86_BUILTIN_FLOORPD256,
27823 IX86_BUILTIN_CEILPD256,
27824 IX86_BUILTIN_TRUNCPD256,
27825 IX86_BUILTIN_RINTPD256,
27826 IX86_BUILTIN_ROUNDPD_AZ256,
27828 IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX256,
27829 IX86_BUILTIN_CEILPD_VEC_PACK_SFIX256,
27830 IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX256,
27832 IX86_BUILTIN_FLOORPS256,
27833 IX86_BUILTIN_CEILPS256,
27834 IX86_BUILTIN_TRUNCPS256,
27835 IX86_BUILTIN_RINTPS256,
27836 IX86_BUILTIN_ROUNDPS_AZ256,
27838 IX86_BUILTIN_FLOORPS_SFIX256,
27839 IX86_BUILTIN_CEILPS_SFIX256,
27840 IX86_BUILTIN_ROUNDPS_AZ_SFIX256,
27842 IX86_BUILTIN_UNPCKHPD256,
27843 IX86_BUILTIN_UNPCKLPD256,
27844 IX86_BUILTIN_UNPCKHPS256,
27845 IX86_BUILTIN_UNPCKLPS256,
27847 IX86_BUILTIN_SI256_SI,
27848 IX86_BUILTIN_PS256_PS,
27849 IX86_BUILTIN_PD256_PD,
27850 IX86_BUILTIN_SI_SI256,
27851 IX86_BUILTIN_PS_PS256,
27852 IX86_BUILTIN_PD_PD256,
27854 IX86_BUILTIN_VTESTZPD,
27855 IX86_BUILTIN_VTESTCPD,
27856 IX86_BUILTIN_VTESTNZCPD,
27857 IX86_BUILTIN_VTESTZPS,
27858 IX86_BUILTIN_VTESTCPS,
27859 IX86_BUILTIN_VTESTNZCPS,
27860 IX86_BUILTIN_VTESTZPD256,
27861 IX86_BUILTIN_VTESTCPD256,
27862 IX86_BUILTIN_VTESTNZCPD256,
27863 IX86_BUILTIN_VTESTZPS256,
27864 IX86_BUILTIN_VTESTCPS256,
27865 IX86_BUILTIN_VTESTNZCPS256,
27866 IX86_BUILTIN_PTESTZ256,
27867 IX86_BUILTIN_PTESTC256,
27868 IX86_BUILTIN_PTESTNZC256,
27870 IX86_BUILTIN_MOVMSKPD256,
27871 IX86_BUILTIN_MOVMSKPS256,
27873 /* AVX2 */
27874 IX86_BUILTIN_MPSADBW256,
27875 IX86_BUILTIN_PABSB256,
27876 IX86_BUILTIN_PABSW256,
27877 IX86_BUILTIN_PABSD256,
27878 IX86_BUILTIN_PACKSSDW256,
27879 IX86_BUILTIN_PACKSSWB256,
27880 IX86_BUILTIN_PACKUSDW256,
27881 IX86_BUILTIN_PACKUSWB256,
27882 IX86_BUILTIN_PADDB256,
27883 IX86_BUILTIN_PADDW256,
27884 IX86_BUILTIN_PADDD256,
27885 IX86_BUILTIN_PADDQ256,
27886 IX86_BUILTIN_PADDSB256,
27887 IX86_BUILTIN_PADDSW256,
27888 IX86_BUILTIN_PADDUSB256,
27889 IX86_BUILTIN_PADDUSW256,
27890 IX86_BUILTIN_PALIGNR256,
27891 IX86_BUILTIN_AND256I,
27892 IX86_BUILTIN_ANDNOT256I,
27893 IX86_BUILTIN_PAVGB256,
27894 IX86_BUILTIN_PAVGW256,
27895 IX86_BUILTIN_PBLENDVB256,
27896 IX86_BUILTIN_PBLENDVW256,
27897 IX86_BUILTIN_PCMPEQB256,
27898 IX86_BUILTIN_PCMPEQW256,
27899 IX86_BUILTIN_PCMPEQD256,
27900 IX86_BUILTIN_PCMPEQQ256,
27901 IX86_BUILTIN_PCMPGTB256,
27902 IX86_BUILTIN_PCMPGTW256,
27903 IX86_BUILTIN_PCMPGTD256,
27904 IX86_BUILTIN_PCMPGTQ256,
27905 IX86_BUILTIN_PHADDW256,
27906 IX86_BUILTIN_PHADDD256,
27907 IX86_BUILTIN_PHADDSW256,
27908 IX86_BUILTIN_PHSUBW256,
27909 IX86_BUILTIN_PHSUBD256,
27910 IX86_BUILTIN_PHSUBSW256,
27911 IX86_BUILTIN_PMADDUBSW256,
27912 IX86_BUILTIN_PMADDWD256,
27913 IX86_BUILTIN_PMAXSB256,
27914 IX86_BUILTIN_PMAXSW256,
27915 IX86_BUILTIN_PMAXSD256,
27916 IX86_BUILTIN_PMAXUB256,
27917 IX86_BUILTIN_PMAXUW256,
27918 IX86_BUILTIN_PMAXUD256,
27919 IX86_BUILTIN_PMINSB256,
27920 IX86_BUILTIN_PMINSW256,
27921 IX86_BUILTIN_PMINSD256,
27922 IX86_BUILTIN_PMINUB256,
27923 IX86_BUILTIN_PMINUW256,
27924 IX86_BUILTIN_PMINUD256,
27925 IX86_BUILTIN_PMOVMSKB256,
27926 IX86_BUILTIN_PMOVSXBW256,
27927 IX86_BUILTIN_PMOVSXBD256,
27928 IX86_BUILTIN_PMOVSXBQ256,
27929 IX86_BUILTIN_PMOVSXWD256,
27930 IX86_BUILTIN_PMOVSXWQ256,
27931 IX86_BUILTIN_PMOVSXDQ256,
27932 IX86_BUILTIN_PMOVZXBW256,
27933 IX86_BUILTIN_PMOVZXBD256,
27934 IX86_BUILTIN_PMOVZXBQ256,
27935 IX86_BUILTIN_PMOVZXWD256,
27936 IX86_BUILTIN_PMOVZXWQ256,
27937 IX86_BUILTIN_PMOVZXDQ256,
27938 IX86_BUILTIN_PMULDQ256,
27939 IX86_BUILTIN_PMULHRSW256,
27940 IX86_BUILTIN_PMULHUW256,
27941 IX86_BUILTIN_PMULHW256,
27942 IX86_BUILTIN_PMULLW256,
27943 IX86_BUILTIN_PMULLD256,
27944 IX86_BUILTIN_PMULUDQ256,
27945 IX86_BUILTIN_POR256,
27946 IX86_BUILTIN_PSADBW256,
27947 IX86_BUILTIN_PSHUFB256,
27948 IX86_BUILTIN_PSHUFD256,
27949 IX86_BUILTIN_PSHUFHW256,
27950 IX86_BUILTIN_PSHUFLW256,
27951 IX86_BUILTIN_PSIGNB256,
27952 IX86_BUILTIN_PSIGNW256,
27953 IX86_BUILTIN_PSIGND256,
27954 IX86_BUILTIN_PSLLDQI256,
27955 IX86_BUILTIN_PSLLWI256,
27956 IX86_BUILTIN_PSLLW256,
27957 IX86_BUILTIN_PSLLDI256,
27958 IX86_BUILTIN_PSLLD256,
27959 IX86_BUILTIN_PSLLQI256,
27960 IX86_BUILTIN_PSLLQ256,
27961 IX86_BUILTIN_PSRAWI256,
27962 IX86_BUILTIN_PSRAW256,
27963 IX86_BUILTIN_PSRADI256,
27964 IX86_BUILTIN_PSRAD256,
27965 IX86_BUILTIN_PSRLDQI256,
27966 IX86_BUILTIN_PSRLWI256,
27967 IX86_BUILTIN_PSRLW256,
27968 IX86_BUILTIN_PSRLDI256,
27969 IX86_BUILTIN_PSRLD256,
27970 IX86_BUILTIN_PSRLQI256,
27971 IX86_BUILTIN_PSRLQ256,
27972 IX86_BUILTIN_PSUBB256,
27973 IX86_BUILTIN_PSUBW256,
27974 IX86_BUILTIN_PSUBD256,
27975 IX86_BUILTIN_PSUBQ256,
27976 IX86_BUILTIN_PSUBSB256,
27977 IX86_BUILTIN_PSUBSW256,
27978 IX86_BUILTIN_PSUBUSB256,
27979 IX86_BUILTIN_PSUBUSW256,
27980 IX86_BUILTIN_PUNPCKHBW256,
27981 IX86_BUILTIN_PUNPCKHWD256,
27982 IX86_BUILTIN_PUNPCKHDQ256,
27983 IX86_BUILTIN_PUNPCKHQDQ256,
27984 IX86_BUILTIN_PUNPCKLBW256,
27985 IX86_BUILTIN_PUNPCKLWD256,
27986 IX86_BUILTIN_PUNPCKLDQ256,
27987 IX86_BUILTIN_PUNPCKLQDQ256,
27988 IX86_BUILTIN_PXOR256,
27989 IX86_BUILTIN_MOVNTDQA256,
27990 IX86_BUILTIN_VBROADCASTSS_PS,
27991 IX86_BUILTIN_VBROADCASTSS_PS256,
27992 IX86_BUILTIN_VBROADCASTSD_PD256,
27993 IX86_BUILTIN_VBROADCASTSI256,
27994 IX86_BUILTIN_PBLENDD256,
27995 IX86_BUILTIN_PBLENDD128,
27996 IX86_BUILTIN_PBROADCASTB256,
27997 IX86_BUILTIN_PBROADCASTW256,
27998 IX86_BUILTIN_PBROADCASTD256,
27999 IX86_BUILTIN_PBROADCASTQ256,
28000 IX86_BUILTIN_PBROADCASTB128,
28001 IX86_BUILTIN_PBROADCASTW128,
28002 IX86_BUILTIN_PBROADCASTD128,
28003 IX86_BUILTIN_PBROADCASTQ128,
28004 IX86_BUILTIN_VPERMVARSI256,
28005 IX86_BUILTIN_VPERMDF256,
28006 IX86_BUILTIN_VPERMVARSF256,
28007 IX86_BUILTIN_VPERMDI256,
28008 IX86_BUILTIN_VPERMTI256,
28009 IX86_BUILTIN_VEXTRACT128I256,
28010 IX86_BUILTIN_VINSERT128I256,
28011 IX86_BUILTIN_MASKLOADD,
28012 IX86_BUILTIN_MASKLOADQ,
28013 IX86_BUILTIN_MASKLOADD256,
28014 IX86_BUILTIN_MASKLOADQ256,
28015 IX86_BUILTIN_MASKSTORED,
28016 IX86_BUILTIN_MASKSTOREQ,
28017 IX86_BUILTIN_MASKSTORED256,
28018 IX86_BUILTIN_MASKSTOREQ256,
28019 IX86_BUILTIN_PSLLVV4DI,
28020 IX86_BUILTIN_PSLLVV2DI,
28021 IX86_BUILTIN_PSLLVV8SI,
28022 IX86_BUILTIN_PSLLVV4SI,
28023 IX86_BUILTIN_PSRAVV8SI,
28024 IX86_BUILTIN_PSRAVV4SI,
28025 IX86_BUILTIN_PSRLVV4DI,
28026 IX86_BUILTIN_PSRLVV2DI,
28027 IX86_BUILTIN_PSRLVV8SI,
28028 IX86_BUILTIN_PSRLVV4SI,
28030 IX86_BUILTIN_GATHERSIV2DF,
28031 IX86_BUILTIN_GATHERSIV4DF,
28032 IX86_BUILTIN_GATHERDIV2DF,
28033 IX86_BUILTIN_GATHERDIV4DF,
28034 IX86_BUILTIN_GATHERSIV4SF,
28035 IX86_BUILTIN_GATHERSIV8SF,
28036 IX86_BUILTIN_GATHERDIV4SF,
28037 IX86_BUILTIN_GATHERDIV8SF,
28038 IX86_BUILTIN_GATHERSIV2DI,
28039 IX86_BUILTIN_GATHERSIV4DI,
28040 IX86_BUILTIN_GATHERDIV2DI,
28041 IX86_BUILTIN_GATHERDIV4DI,
28042 IX86_BUILTIN_GATHERSIV4SI,
28043 IX86_BUILTIN_GATHERSIV8SI,
28044 IX86_BUILTIN_GATHERDIV4SI,
28045 IX86_BUILTIN_GATHERDIV8SI,
28047 /* AVX512F */
28048 IX86_BUILTIN_ADDPD512,
28049 IX86_BUILTIN_ADDPS512,
28050 IX86_BUILTIN_ADDSD_ROUND,
28051 IX86_BUILTIN_ADDSS_ROUND,
28052 IX86_BUILTIN_ALIGND512,
28053 IX86_BUILTIN_ALIGNQ512,
28054 IX86_BUILTIN_BLENDMD512,
28055 IX86_BUILTIN_BLENDMPD512,
28056 IX86_BUILTIN_BLENDMPS512,
28057 IX86_BUILTIN_BLENDMQ512,
28058 IX86_BUILTIN_BROADCASTF32X4_512,
28059 IX86_BUILTIN_BROADCASTF64X4_512,
28060 IX86_BUILTIN_BROADCASTI32X4_512,
28061 IX86_BUILTIN_BROADCASTI64X4_512,
28062 IX86_BUILTIN_BROADCASTSD512,
28063 IX86_BUILTIN_BROADCASTSS512,
28064 IX86_BUILTIN_CMPD512,
28065 IX86_BUILTIN_CMPPD512,
28066 IX86_BUILTIN_CMPPS512,
28067 IX86_BUILTIN_CMPQ512,
28068 IX86_BUILTIN_CMPSD_MASK,
28069 IX86_BUILTIN_CMPSS_MASK,
28070 IX86_BUILTIN_COMIDF,
28071 IX86_BUILTIN_COMISF,
28072 IX86_BUILTIN_COMPRESSPD512,
28073 IX86_BUILTIN_COMPRESSPDSTORE512,
28074 IX86_BUILTIN_COMPRESSPS512,
28075 IX86_BUILTIN_COMPRESSPSSTORE512,
28076 IX86_BUILTIN_CVTDQ2PD512,
28077 IX86_BUILTIN_CVTDQ2PS512,
28078 IX86_BUILTIN_CVTPD2DQ512,
28079 IX86_BUILTIN_CVTPD2PS512,
28080 IX86_BUILTIN_CVTPD2UDQ512,
28081 IX86_BUILTIN_CVTPH2PS512,
28082 IX86_BUILTIN_CVTPS2DQ512,
28083 IX86_BUILTIN_CVTPS2PD512,
28084 IX86_BUILTIN_CVTPS2PH512,
28085 IX86_BUILTIN_CVTPS2UDQ512,
28086 IX86_BUILTIN_CVTSD2SS_ROUND,
28087 IX86_BUILTIN_CVTSI2SD64,
28088 IX86_BUILTIN_CVTSI2SS32,
28089 IX86_BUILTIN_CVTSI2SS64,
28090 IX86_BUILTIN_CVTSS2SD_ROUND,
28091 IX86_BUILTIN_CVTTPD2DQ512,
28092 IX86_BUILTIN_CVTTPD2UDQ512,
28093 IX86_BUILTIN_CVTTPS2DQ512,
28094 IX86_BUILTIN_CVTTPS2UDQ512,
28095 IX86_BUILTIN_CVTUDQ2PD512,
28096 IX86_BUILTIN_CVTUDQ2PS512,
28097 IX86_BUILTIN_CVTUSI2SD32,
28098 IX86_BUILTIN_CVTUSI2SD64,
28099 IX86_BUILTIN_CVTUSI2SS32,
28100 IX86_BUILTIN_CVTUSI2SS64,
28101 IX86_BUILTIN_DIVPD512,
28102 IX86_BUILTIN_DIVPS512,
28103 IX86_BUILTIN_DIVSD_ROUND,
28104 IX86_BUILTIN_DIVSS_ROUND,
28105 IX86_BUILTIN_EXPANDPD512,
28106 IX86_BUILTIN_EXPANDPD512Z,
28107 IX86_BUILTIN_EXPANDPDLOAD512,
28108 IX86_BUILTIN_EXPANDPDLOAD512Z,
28109 IX86_BUILTIN_EXPANDPS512,
28110 IX86_BUILTIN_EXPANDPS512Z,
28111 IX86_BUILTIN_EXPANDPSLOAD512,
28112 IX86_BUILTIN_EXPANDPSLOAD512Z,
28113 IX86_BUILTIN_EXTRACTF32X4,
28114 IX86_BUILTIN_EXTRACTF64X4,
28115 IX86_BUILTIN_EXTRACTI32X4,
28116 IX86_BUILTIN_EXTRACTI64X4,
28117 IX86_BUILTIN_FIXUPIMMPD512_MASK,
28118 IX86_BUILTIN_FIXUPIMMPD512_MASKZ,
28119 IX86_BUILTIN_FIXUPIMMPS512_MASK,
28120 IX86_BUILTIN_FIXUPIMMPS512_MASKZ,
28121 IX86_BUILTIN_FIXUPIMMSD128_MASK,
28122 IX86_BUILTIN_FIXUPIMMSD128_MASKZ,
28123 IX86_BUILTIN_FIXUPIMMSS128_MASK,
28124 IX86_BUILTIN_FIXUPIMMSS128_MASKZ,
28125 IX86_BUILTIN_GETEXPPD512,
28126 IX86_BUILTIN_GETEXPPS512,
28127 IX86_BUILTIN_GETEXPSD128,
28128 IX86_BUILTIN_GETEXPSS128,
28129 IX86_BUILTIN_GETMANTPD512,
28130 IX86_BUILTIN_GETMANTPS512,
28131 IX86_BUILTIN_GETMANTSD128,
28132 IX86_BUILTIN_GETMANTSS128,
28133 IX86_BUILTIN_INSERTF32X4,
28134 IX86_BUILTIN_INSERTF64X4,
28135 IX86_BUILTIN_INSERTI32X4,
28136 IX86_BUILTIN_INSERTI64X4,
28137 IX86_BUILTIN_LOADAPD512,
28138 IX86_BUILTIN_LOADAPS512,
28139 IX86_BUILTIN_LOADDQUDI512,
28140 IX86_BUILTIN_LOADDQUSI512,
28141 IX86_BUILTIN_LOADUPD512,
28142 IX86_BUILTIN_LOADUPS512,
28143 IX86_BUILTIN_MAXPD512,
28144 IX86_BUILTIN_MAXPS512,
28145 IX86_BUILTIN_MAXSD_ROUND,
28146 IX86_BUILTIN_MAXSS_ROUND,
28147 IX86_BUILTIN_MINPD512,
28148 IX86_BUILTIN_MINPS512,
28149 IX86_BUILTIN_MINSD_ROUND,
28150 IX86_BUILTIN_MINSS_ROUND,
28151 IX86_BUILTIN_MOVAPD512,
28152 IX86_BUILTIN_MOVAPS512,
28153 IX86_BUILTIN_MOVDDUP512,
28154 IX86_BUILTIN_MOVDQA32LOAD512,
28155 IX86_BUILTIN_MOVDQA32STORE512,
28156 IX86_BUILTIN_MOVDQA32_512,
28157 IX86_BUILTIN_MOVDQA64LOAD512,
28158 IX86_BUILTIN_MOVDQA64STORE512,
28159 IX86_BUILTIN_MOVDQA64_512,
28160 IX86_BUILTIN_MOVNTDQ512,
28161 IX86_BUILTIN_MOVNTDQA512,
28162 IX86_BUILTIN_MOVNTPD512,
28163 IX86_BUILTIN_MOVNTPS512,
28164 IX86_BUILTIN_MOVSHDUP512,
28165 IX86_BUILTIN_MOVSLDUP512,
28166 IX86_BUILTIN_MULPD512,
28167 IX86_BUILTIN_MULPS512,
28168 IX86_BUILTIN_MULSD_ROUND,
28169 IX86_BUILTIN_MULSS_ROUND,
28170 IX86_BUILTIN_PABSD512,
28171 IX86_BUILTIN_PABSQ512,
28172 IX86_BUILTIN_PADDD512,
28173 IX86_BUILTIN_PADDQ512,
28174 IX86_BUILTIN_PANDD512,
28175 IX86_BUILTIN_PANDND512,
28176 IX86_BUILTIN_PANDNQ512,
28177 IX86_BUILTIN_PANDQ512,
28178 IX86_BUILTIN_PBROADCASTD512,
28179 IX86_BUILTIN_PBROADCASTD512_GPR,
28180 IX86_BUILTIN_PBROADCASTMB512,
28181 IX86_BUILTIN_PBROADCASTMW512,
28182 IX86_BUILTIN_PBROADCASTQ512,
28183 IX86_BUILTIN_PBROADCASTQ512_GPR,
28184 IX86_BUILTIN_PBROADCASTQ512_MEM,
28185 IX86_BUILTIN_PCMPEQD512_MASK,
28186 IX86_BUILTIN_PCMPEQQ512_MASK,
28187 IX86_BUILTIN_PCMPGTD512_MASK,
28188 IX86_BUILTIN_PCMPGTQ512_MASK,
28189 IX86_BUILTIN_PCOMPRESSD512,
28190 IX86_BUILTIN_PCOMPRESSDSTORE512,
28191 IX86_BUILTIN_PCOMPRESSQ512,
28192 IX86_BUILTIN_PCOMPRESSQSTORE512,
28193 IX86_BUILTIN_PEXPANDD512,
28194 IX86_BUILTIN_PEXPANDD512Z,
28195 IX86_BUILTIN_PEXPANDDLOAD512,
28196 IX86_BUILTIN_PEXPANDDLOAD512Z,
28197 IX86_BUILTIN_PEXPANDQ512,
28198 IX86_BUILTIN_PEXPANDQ512Z,
28199 IX86_BUILTIN_PEXPANDQLOAD512,
28200 IX86_BUILTIN_PEXPANDQLOAD512Z,
28201 IX86_BUILTIN_PMAXSD512,
28202 IX86_BUILTIN_PMAXSQ512,
28203 IX86_BUILTIN_PMAXUD512,
28204 IX86_BUILTIN_PMAXUQ512,
28205 IX86_BUILTIN_PMINSD512,
28206 IX86_BUILTIN_PMINSQ512,
28207 IX86_BUILTIN_PMINUD512,
28208 IX86_BUILTIN_PMINUQ512,
28209 IX86_BUILTIN_PMOVDB512,
28210 IX86_BUILTIN_PMOVDB512_MEM,
28211 IX86_BUILTIN_PMOVDW512,
28212 IX86_BUILTIN_PMOVDW512_MEM,
28213 IX86_BUILTIN_PMOVQB512,
28214 IX86_BUILTIN_PMOVQB512_MEM,
28215 IX86_BUILTIN_PMOVQD512,
28216 IX86_BUILTIN_PMOVQD512_MEM,
28217 IX86_BUILTIN_PMOVQW512,
28218 IX86_BUILTIN_PMOVQW512_MEM,
28219 IX86_BUILTIN_PMOVSDB512,
28220 IX86_BUILTIN_PMOVSDB512_MEM,
28221 IX86_BUILTIN_PMOVSDW512,
28222 IX86_BUILTIN_PMOVSDW512_MEM,
28223 IX86_BUILTIN_PMOVSQB512,
28224 IX86_BUILTIN_PMOVSQB512_MEM,
28225 IX86_BUILTIN_PMOVSQD512,
28226 IX86_BUILTIN_PMOVSQD512_MEM,
28227 IX86_BUILTIN_PMOVSQW512,
28228 IX86_BUILTIN_PMOVSQW512_MEM,
28229 IX86_BUILTIN_PMOVSXBD512,
28230 IX86_BUILTIN_PMOVSXBQ512,
28231 IX86_BUILTIN_PMOVSXDQ512,
28232 IX86_BUILTIN_PMOVSXWD512,
28233 IX86_BUILTIN_PMOVSXWQ512,
28234 IX86_BUILTIN_PMOVUSDB512,
28235 IX86_BUILTIN_PMOVUSDB512_MEM,
28236 IX86_BUILTIN_PMOVUSDW512,
28237 IX86_BUILTIN_PMOVUSDW512_MEM,
28238 IX86_BUILTIN_PMOVUSQB512,
28239 IX86_BUILTIN_PMOVUSQB512_MEM,
28240 IX86_BUILTIN_PMOVUSQD512,
28241 IX86_BUILTIN_PMOVUSQD512_MEM,
28242 IX86_BUILTIN_PMOVUSQW512,
28243 IX86_BUILTIN_PMOVUSQW512_MEM,
28244 IX86_BUILTIN_PMOVZXBD512,
28245 IX86_BUILTIN_PMOVZXBQ512,
28246 IX86_BUILTIN_PMOVZXDQ512,
28247 IX86_BUILTIN_PMOVZXWD512,
28248 IX86_BUILTIN_PMOVZXWQ512,
28249 IX86_BUILTIN_PMULDQ512,
28250 IX86_BUILTIN_PMULLD512,
28251 IX86_BUILTIN_PMULUDQ512,
28252 IX86_BUILTIN_PORD512,
28253 IX86_BUILTIN_PORQ512,
28254 IX86_BUILTIN_PROLD512,
28255 IX86_BUILTIN_PROLQ512,
28256 IX86_BUILTIN_PROLVD512,
28257 IX86_BUILTIN_PROLVQ512,
28258 IX86_BUILTIN_PRORD512,
28259 IX86_BUILTIN_PRORQ512,
28260 IX86_BUILTIN_PRORVD512,
28261 IX86_BUILTIN_PRORVQ512,
28262 IX86_BUILTIN_PSHUFD512,
28263 IX86_BUILTIN_PSLLD512,
28264 IX86_BUILTIN_PSLLDI512,
28265 IX86_BUILTIN_PSLLQ512,
28266 IX86_BUILTIN_PSLLQI512,
28267 IX86_BUILTIN_PSLLVV16SI,
28268 IX86_BUILTIN_PSLLVV8DI,
28269 IX86_BUILTIN_PSRAD512,
28270 IX86_BUILTIN_PSRADI512,
28271 IX86_BUILTIN_PSRAQ512,
28272 IX86_BUILTIN_PSRAQI512,
28273 IX86_BUILTIN_PSRAVV16SI,
28274 IX86_BUILTIN_PSRAVV8DI,
28275 IX86_BUILTIN_PSRLD512,
28276 IX86_BUILTIN_PSRLDI512,
28277 IX86_BUILTIN_PSRLQ512,
28278 IX86_BUILTIN_PSRLQI512,
28279 IX86_BUILTIN_PSRLVV16SI,
28280 IX86_BUILTIN_PSRLVV8DI,
28281 IX86_BUILTIN_PSUBD512,
28282 IX86_BUILTIN_PSUBQ512,
28283 IX86_BUILTIN_PTESTMD512,
28284 IX86_BUILTIN_PTESTMQ512,
28285 IX86_BUILTIN_PTESTNMD512,
28286 IX86_BUILTIN_PTESTNMQ512,
28287 IX86_BUILTIN_PUNPCKHDQ512,
28288 IX86_BUILTIN_PUNPCKHQDQ512,
28289 IX86_BUILTIN_PUNPCKLDQ512,
28290 IX86_BUILTIN_PUNPCKLQDQ512,
28291 IX86_BUILTIN_PXORD512,
28292 IX86_BUILTIN_PXORQ512,
28293 IX86_BUILTIN_RCP14PD512,
28294 IX86_BUILTIN_RCP14PS512,
28295 IX86_BUILTIN_RCP14SD,
28296 IX86_BUILTIN_RCP14SS,
28297 IX86_BUILTIN_RNDSCALEPD,
28298 IX86_BUILTIN_RNDSCALEPS,
28299 IX86_BUILTIN_RNDSCALESD,
28300 IX86_BUILTIN_RNDSCALESS,
28301 IX86_BUILTIN_RSQRT14PD512,
28302 IX86_BUILTIN_RSQRT14PS512,
28303 IX86_BUILTIN_RSQRT14SD,
28304 IX86_BUILTIN_RSQRT14SS,
28305 IX86_BUILTIN_SCALEFPD512,
28306 IX86_BUILTIN_SCALEFPS512,
28307 IX86_BUILTIN_SCALEFSD,
28308 IX86_BUILTIN_SCALEFSS,
28309 IX86_BUILTIN_SHUFPD512,
28310 IX86_BUILTIN_SHUFPS512,
28311 IX86_BUILTIN_SHUF_F32x4,
28312 IX86_BUILTIN_SHUF_F64x2,
28313 IX86_BUILTIN_SHUF_I32x4,
28314 IX86_BUILTIN_SHUF_I64x2,
28315 IX86_BUILTIN_SQRTPD512,
28316 IX86_BUILTIN_SQRTPD512_MASK,
28317 IX86_BUILTIN_SQRTPS512_MASK,
28318 IX86_BUILTIN_SQRTPS_NR512,
28319 IX86_BUILTIN_SQRTSD_ROUND,
28320 IX86_BUILTIN_SQRTSS_ROUND,
28321 IX86_BUILTIN_STOREAPD512,
28322 IX86_BUILTIN_STOREAPS512,
28323 IX86_BUILTIN_STOREDQUDI512,
28324 IX86_BUILTIN_STOREDQUSI512,
28325 IX86_BUILTIN_STOREUPD512,
28326 IX86_BUILTIN_STOREUPS512,
28327 IX86_BUILTIN_SUBPD512,
28328 IX86_BUILTIN_SUBPS512,
28329 IX86_BUILTIN_SUBSD_ROUND,
28330 IX86_BUILTIN_SUBSS_ROUND,
28331 IX86_BUILTIN_UCMPD512,
28332 IX86_BUILTIN_UCMPQ512,
28333 IX86_BUILTIN_UNPCKHPD512,
28334 IX86_BUILTIN_UNPCKHPS512,
28335 IX86_BUILTIN_UNPCKLPD512,
28336 IX86_BUILTIN_UNPCKLPS512,
28337 IX86_BUILTIN_VCVTSD2SI32,
28338 IX86_BUILTIN_VCVTSD2SI64,
28339 IX86_BUILTIN_VCVTSD2USI32,
28340 IX86_BUILTIN_VCVTSD2USI64,
28341 IX86_BUILTIN_VCVTSS2SI32,
28342 IX86_BUILTIN_VCVTSS2SI64,
28343 IX86_BUILTIN_VCVTSS2USI32,
28344 IX86_BUILTIN_VCVTSS2USI64,
28345 IX86_BUILTIN_VCVTTSD2SI32,
28346 IX86_BUILTIN_VCVTTSD2SI64,
28347 IX86_BUILTIN_VCVTTSD2USI32,
28348 IX86_BUILTIN_VCVTTSD2USI64,
28349 IX86_BUILTIN_VCVTTSS2SI32,
28350 IX86_BUILTIN_VCVTTSS2SI64,
28351 IX86_BUILTIN_VCVTTSS2USI32,
28352 IX86_BUILTIN_VCVTTSS2USI64,
28353 IX86_BUILTIN_VFMADDPD512_MASK,
28354 IX86_BUILTIN_VFMADDPD512_MASK3,
28355 IX86_BUILTIN_VFMADDPD512_MASKZ,
28356 IX86_BUILTIN_VFMADDPS512_MASK,
28357 IX86_BUILTIN_VFMADDPS512_MASK3,
28358 IX86_BUILTIN_VFMADDPS512_MASKZ,
28359 IX86_BUILTIN_VFMADDSD3_ROUND,
28360 IX86_BUILTIN_VFMADDSS3_ROUND,
28361 IX86_BUILTIN_VFMADDSUBPD512_MASK,
28362 IX86_BUILTIN_VFMADDSUBPD512_MASK3,
28363 IX86_BUILTIN_VFMADDSUBPD512_MASKZ,
28364 IX86_BUILTIN_VFMADDSUBPS512_MASK,
28365 IX86_BUILTIN_VFMADDSUBPS512_MASK3,
28366 IX86_BUILTIN_VFMADDSUBPS512_MASKZ,
28367 IX86_BUILTIN_VFMSUBADDPD512_MASK3,
28368 IX86_BUILTIN_VFMSUBADDPS512_MASK3,
28369 IX86_BUILTIN_VFMSUBPD512_MASK3,
28370 IX86_BUILTIN_VFMSUBPS512_MASK3,
28371 IX86_BUILTIN_VFMSUBSD3_MASK3,
28372 IX86_BUILTIN_VFMSUBSS3_MASK3,
28373 IX86_BUILTIN_VFNMADDPD512_MASK,
28374 IX86_BUILTIN_VFNMADDPS512_MASK,
28375 IX86_BUILTIN_VFNMSUBPD512_MASK,
28376 IX86_BUILTIN_VFNMSUBPD512_MASK3,
28377 IX86_BUILTIN_VFNMSUBPS512_MASK,
28378 IX86_BUILTIN_VFNMSUBPS512_MASK3,
28379 IX86_BUILTIN_VPCLZCNTD512,
28380 IX86_BUILTIN_VPCLZCNTQ512,
28381 IX86_BUILTIN_VPCONFLICTD512,
28382 IX86_BUILTIN_VPCONFLICTQ512,
28383 IX86_BUILTIN_VPERMDF512,
28384 IX86_BUILTIN_VPERMDI512,
28385 IX86_BUILTIN_VPERMI2VARD512,
28386 IX86_BUILTIN_VPERMI2VARPD512,
28387 IX86_BUILTIN_VPERMI2VARPS512,
28388 IX86_BUILTIN_VPERMI2VARQ512,
28389 IX86_BUILTIN_VPERMILPD512,
28390 IX86_BUILTIN_VPERMILPS512,
28391 IX86_BUILTIN_VPERMILVARPD512,
28392 IX86_BUILTIN_VPERMILVARPS512,
28393 IX86_BUILTIN_VPERMT2VARD512,
28394 IX86_BUILTIN_VPERMT2VARD512_MASKZ,
28395 IX86_BUILTIN_VPERMT2VARPD512,
28396 IX86_BUILTIN_VPERMT2VARPD512_MASKZ,
28397 IX86_BUILTIN_VPERMT2VARPS512,
28398 IX86_BUILTIN_VPERMT2VARPS512_MASKZ,
28399 IX86_BUILTIN_VPERMT2VARQ512,
28400 IX86_BUILTIN_VPERMT2VARQ512_MASKZ,
28401 IX86_BUILTIN_VPERMVARDF512,
28402 IX86_BUILTIN_VPERMVARDI512,
28403 IX86_BUILTIN_VPERMVARSF512,
28404 IX86_BUILTIN_VPERMVARSI512,
28405 IX86_BUILTIN_VTERNLOGD512_MASK,
28406 IX86_BUILTIN_VTERNLOGD512_MASKZ,
28407 IX86_BUILTIN_VTERNLOGQ512_MASK,
28408 IX86_BUILTIN_VTERNLOGQ512_MASKZ,
28410 /* Mask arithmetic operations */
28411 IX86_BUILTIN_KAND16,
28412 IX86_BUILTIN_KANDN16,
28413 IX86_BUILTIN_KNOT16,
28414 IX86_BUILTIN_KOR16,
28415 IX86_BUILTIN_KORTESTC16,
28416 IX86_BUILTIN_KORTESTZ16,
28417 IX86_BUILTIN_KUNPCKBW,
28418 IX86_BUILTIN_KXNOR16,
28419 IX86_BUILTIN_KXOR16,
28420 IX86_BUILTIN_KMOV16,
28422 /* Alternate 4 and 8 element gather/scatter for the vectorizer
28423 where all operands are 32-byte or 64-byte wide respectively. */
28424 IX86_BUILTIN_GATHERALTSIV4DF,
28425 IX86_BUILTIN_GATHERALTDIV8SF,
28426 IX86_BUILTIN_GATHERALTSIV4DI,
28427 IX86_BUILTIN_GATHERALTDIV8SI,
28428 IX86_BUILTIN_GATHER3ALTDIV16SF,
28429 IX86_BUILTIN_GATHER3ALTDIV16SI,
28430 IX86_BUILTIN_GATHER3ALTSIV8DF,
28431 IX86_BUILTIN_GATHER3ALTSIV8DI,
28432 IX86_BUILTIN_GATHER3DIV16SF,
28433 IX86_BUILTIN_GATHER3DIV16SI,
28434 IX86_BUILTIN_GATHER3DIV8DF,
28435 IX86_BUILTIN_GATHER3DIV8DI,
28436 IX86_BUILTIN_GATHER3SIV16SF,
28437 IX86_BUILTIN_GATHER3SIV16SI,
28438 IX86_BUILTIN_GATHER3SIV8DF,
28439 IX86_BUILTIN_GATHER3SIV8DI,
28440 IX86_BUILTIN_SCATTERDIV16SF,
28441 IX86_BUILTIN_SCATTERDIV16SI,
28442 IX86_BUILTIN_SCATTERDIV8DF,
28443 IX86_BUILTIN_SCATTERDIV8DI,
28444 IX86_BUILTIN_SCATTERSIV16SF,
28445 IX86_BUILTIN_SCATTERSIV16SI,
28446 IX86_BUILTIN_SCATTERSIV8DF,
28447 IX86_BUILTIN_SCATTERSIV8DI,
28449 /* AVX512PF */
28450 IX86_BUILTIN_GATHERPFQPD,
28451 IX86_BUILTIN_GATHERPFDPS,
28452 IX86_BUILTIN_GATHERPFDPD,
28453 IX86_BUILTIN_GATHERPFQPS,
28454 IX86_BUILTIN_SCATTERPFDPD,
28455 IX86_BUILTIN_SCATTERPFDPS,
28456 IX86_BUILTIN_SCATTERPFQPD,
28457 IX86_BUILTIN_SCATTERPFQPS,
28459 /* AVX-512ER */
28460 IX86_BUILTIN_EXP2PD_MASK,
28461 IX86_BUILTIN_EXP2PS_MASK,
28462 IX86_BUILTIN_EXP2PS,
28463 IX86_BUILTIN_RCP28PD,
28464 IX86_BUILTIN_RCP28PS,
28465 IX86_BUILTIN_RCP28SD,
28466 IX86_BUILTIN_RCP28SS,
28467 IX86_BUILTIN_RSQRT28PD,
28468 IX86_BUILTIN_RSQRT28PS,
28469 IX86_BUILTIN_RSQRT28SD,
28470 IX86_BUILTIN_RSQRT28SS,
28472 /* SHA builtins. */
28473 IX86_BUILTIN_SHA1MSG1,
28474 IX86_BUILTIN_SHA1MSG2,
28475 IX86_BUILTIN_SHA1NEXTE,
28476 IX86_BUILTIN_SHA1RNDS4,
28477 IX86_BUILTIN_SHA256MSG1,
28478 IX86_BUILTIN_SHA256MSG2,
28479 IX86_BUILTIN_SHA256RNDS2,
28481 /* TFmode support builtins. */
28482 IX86_BUILTIN_INFQ,
28483 IX86_BUILTIN_HUGE_VALQ,
28484 IX86_BUILTIN_FABSQ,
28485 IX86_BUILTIN_COPYSIGNQ,
28487 /* Vectorizer support builtins. */
28488 IX86_BUILTIN_CEILPD_VEC_PACK_SFIX512,
28489 IX86_BUILTIN_CPYSGNPS,
28490 IX86_BUILTIN_CPYSGNPD,
28491 IX86_BUILTIN_CPYSGNPS256,
28492 IX86_BUILTIN_CPYSGNPS512,
28493 IX86_BUILTIN_CPYSGNPD256,
28494 IX86_BUILTIN_CPYSGNPD512,
28495 IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX512,
28496 IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX512,
28499 /* FMA4 instructions. */
28500 IX86_BUILTIN_VFMADDSS,
28501 IX86_BUILTIN_VFMADDSD,
28502 IX86_BUILTIN_VFMADDPS,
28503 IX86_BUILTIN_VFMADDPD,
28504 IX86_BUILTIN_VFMADDPS256,
28505 IX86_BUILTIN_VFMADDPD256,
28506 IX86_BUILTIN_VFMADDSUBPS,
28507 IX86_BUILTIN_VFMADDSUBPD,
28508 IX86_BUILTIN_VFMADDSUBPS256,
28509 IX86_BUILTIN_VFMADDSUBPD256,
28511 /* FMA3 instructions. */
28512 IX86_BUILTIN_VFMADDSS3,
28513 IX86_BUILTIN_VFMADDSD3,
28515 /* XOP instructions. */
28516 IX86_BUILTIN_VPCMOV,
28517 IX86_BUILTIN_VPCMOV_V2DI,
28518 IX86_BUILTIN_VPCMOV_V4SI,
28519 IX86_BUILTIN_VPCMOV_V8HI,
28520 IX86_BUILTIN_VPCMOV_V16QI,
28521 IX86_BUILTIN_VPCMOV_V4SF,
28522 IX86_BUILTIN_VPCMOV_V2DF,
28523 IX86_BUILTIN_VPCMOV256,
28524 IX86_BUILTIN_VPCMOV_V4DI256,
28525 IX86_BUILTIN_VPCMOV_V8SI256,
28526 IX86_BUILTIN_VPCMOV_V16HI256,
28527 IX86_BUILTIN_VPCMOV_V32QI256,
28528 IX86_BUILTIN_VPCMOV_V8SF256,
28529 IX86_BUILTIN_VPCMOV_V4DF256,
28531 IX86_BUILTIN_VPPERM,
28533 IX86_BUILTIN_VPMACSSWW,
28534 IX86_BUILTIN_VPMACSWW,
28535 IX86_BUILTIN_VPMACSSWD,
28536 IX86_BUILTIN_VPMACSWD,
28537 IX86_BUILTIN_VPMACSSDD,
28538 IX86_BUILTIN_VPMACSDD,
28539 IX86_BUILTIN_VPMACSSDQL,
28540 IX86_BUILTIN_VPMACSSDQH,
28541 IX86_BUILTIN_VPMACSDQL,
28542 IX86_BUILTIN_VPMACSDQH,
28543 IX86_BUILTIN_VPMADCSSWD,
28544 IX86_BUILTIN_VPMADCSWD,
28546 IX86_BUILTIN_VPHADDBW,
28547 IX86_BUILTIN_VPHADDBD,
28548 IX86_BUILTIN_VPHADDBQ,
28549 IX86_BUILTIN_VPHADDWD,
28550 IX86_BUILTIN_VPHADDWQ,
28551 IX86_BUILTIN_VPHADDDQ,
28552 IX86_BUILTIN_VPHADDUBW,
28553 IX86_BUILTIN_VPHADDUBD,
28554 IX86_BUILTIN_VPHADDUBQ,
28555 IX86_BUILTIN_VPHADDUWD,
28556 IX86_BUILTIN_VPHADDUWQ,
28557 IX86_BUILTIN_VPHADDUDQ,
28558 IX86_BUILTIN_VPHSUBBW,
28559 IX86_BUILTIN_VPHSUBWD,
28560 IX86_BUILTIN_VPHSUBDQ,
28562 IX86_BUILTIN_VPROTB,
28563 IX86_BUILTIN_VPROTW,
28564 IX86_BUILTIN_VPROTD,
28565 IX86_BUILTIN_VPROTQ,
28566 IX86_BUILTIN_VPROTB_IMM,
28567 IX86_BUILTIN_VPROTW_IMM,
28568 IX86_BUILTIN_VPROTD_IMM,
28569 IX86_BUILTIN_VPROTQ_IMM,
28571 IX86_BUILTIN_VPSHLB,
28572 IX86_BUILTIN_VPSHLW,
28573 IX86_BUILTIN_VPSHLD,
28574 IX86_BUILTIN_VPSHLQ,
28575 IX86_BUILTIN_VPSHAB,
28576 IX86_BUILTIN_VPSHAW,
28577 IX86_BUILTIN_VPSHAD,
28578 IX86_BUILTIN_VPSHAQ,
28580 IX86_BUILTIN_VFRCZSS,
28581 IX86_BUILTIN_VFRCZSD,
28582 IX86_BUILTIN_VFRCZPS,
28583 IX86_BUILTIN_VFRCZPD,
28584 IX86_BUILTIN_VFRCZPS256,
28585 IX86_BUILTIN_VFRCZPD256,
28587 IX86_BUILTIN_VPCOMEQUB,
28588 IX86_BUILTIN_VPCOMNEUB,
28589 IX86_BUILTIN_VPCOMLTUB,
28590 IX86_BUILTIN_VPCOMLEUB,
28591 IX86_BUILTIN_VPCOMGTUB,
28592 IX86_BUILTIN_VPCOMGEUB,
28593 IX86_BUILTIN_VPCOMFALSEUB,
28594 IX86_BUILTIN_VPCOMTRUEUB,
28596 IX86_BUILTIN_VPCOMEQUW,
28597 IX86_BUILTIN_VPCOMNEUW,
28598 IX86_BUILTIN_VPCOMLTUW,
28599 IX86_BUILTIN_VPCOMLEUW,
28600 IX86_BUILTIN_VPCOMGTUW,
28601 IX86_BUILTIN_VPCOMGEUW,
28602 IX86_BUILTIN_VPCOMFALSEUW,
28603 IX86_BUILTIN_VPCOMTRUEUW,
28605 IX86_BUILTIN_VPCOMEQUD,
28606 IX86_BUILTIN_VPCOMNEUD,
28607 IX86_BUILTIN_VPCOMLTUD,
28608 IX86_BUILTIN_VPCOMLEUD,
28609 IX86_BUILTIN_VPCOMGTUD,
28610 IX86_BUILTIN_VPCOMGEUD,
28611 IX86_BUILTIN_VPCOMFALSEUD,
28612 IX86_BUILTIN_VPCOMTRUEUD,
28614 IX86_BUILTIN_VPCOMEQUQ,
28615 IX86_BUILTIN_VPCOMNEUQ,
28616 IX86_BUILTIN_VPCOMLTUQ,
28617 IX86_BUILTIN_VPCOMLEUQ,
28618 IX86_BUILTIN_VPCOMGTUQ,
28619 IX86_BUILTIN_VPCOMGEUQ,
28620 IX86_BUILTIN_VPCOMFALSEUQ,
28621 IX86_BUILTIN_VPCOMTRUEUQ,
28623 IX86_BUILTIN_VPCOMEQB,
28624 IX86_BUILTIN_VPCOMNEB,
28625 IX86_BUILTIN_VPCOMLTB,
28626 IX86_BUILTIN_VPCOMLEB,
28627 IX86_BUILTIN_VPCOMGTB,
28628 IX86_BUILTIN_VPCOMGEB,
28629 IX86_BUILTIN_VPCOMFALSEB,
28630 IX86_BUILTIN_VPCOMTRUEB,
28632 IX86_BUILTIN_VPCOMEQW,
28633 IX86_BUILTIN_VPCOMNEW,
28634 IX86_BUILTIN_VPCOMLTW,
28635 IX86_BUILTIN_VPCOMLEW,
28636 IX86_BUILTIN_VPCOMGTW,
28637 IX86_BUILTIN_VPCOMGEW,
28638 IX86_BUILTIN_VPCOMFALSEW,
28639 IX86_BUILTIN_VPCOMTRUEW,
28641 IX86_BUILTIN_VPCOMEQD,
28642 IX86_BUILTIN_VPCOMNED,
28643 IX86_BUILTIN_VPCOMLTD,
28644 IX86_BUILTIN_VPCOMLED,
28645 IX86_BUILTIN_VPCOMGTD,
28646 IX86_BUILTIN_VPCOMGED,
28647 IX86_BUILTIN_VPCOMFALSED,
28648 IX86_BUILTIN_VPCOMTRUED,
28650 IX86_BUILTIN_VPCOMEQQ,
28651 IX86_BUILTIN_VPCOMNEQ,
28652 IX86_BUILTIN_VPCOMLTQ,
28653 IX86_BUILTIN_VPCOMLEQ,
28654 IX86_BUILTIN_VPCOMGTQ,
28655 IX86_BUILTIN_VPCOMGEQ,
28656 IX86_BUILTIN_VPCOMFALSEQ,
28657 IX86_BUILTIN_VPCOMTRUEQ,
28659 /* LWP instructions. */
28660 IX86_BUILTIN_LLWPCB,
28661 IX86_BUILTIN_SLWPCB,
28662 IX86_BUILTIN_LWPVAL32,
28663 IX86_BUILTIN_LWPVAL64,
28664 IX86_BUILTIN_LWPINS32,
28665 IX86_BUILTIN_LWPINS64,
28667 IX86_BUILTIN_CLZS,
28669 /* RTM */
28670 IX86_BUILTIN_XBEGIN,
28671 IX86_BUILTIN_XEND,
28672 IX86_BUILTIN_XABORT,
28673 IX86_BUILTIN_XTEST,
28675 /* BMI instructions. */
28676 IX86_BUILTIN_BEXTR32,
28677 IX86_BUILTIN_BEXTR64,
28678 IX86_BUILTIN_CTZS,
28680 /* TBM instructions. */
28681 IX86_BUILTIN_BEXTRI32,
28682 IX86_BUILTIN_BEXTRI64,
28684 /* BMI2 instructions. */
28685 IX86_BUILTIN_BZHI32,
28686 IX86_BUILTIN_BZHI64,
28687 IX86_BUILTIN_PDEP32,
28688 IX86_BUILTIN_PDEP64,
28689 IX86_BUILTIN_PEXT32,
28690 IX86_BUILTIN_PEXT64,
28692 /* ADX instructions. */
28693 IX86_BUILTIN_ADDCARRYX32,
28694 IX86_BUILTIN_ADDCARRYX64,
28696 /* FSGSBASE instructions. */
28697 IX86_BUILTIN_RDFSBASE32,
28698 IX86_BUILTIN_RDFSBASE64,
28699 IX86_BUILTIN_RDGSBASE32,
28700 IX86_BUILTIN_RDGSBASE64,
28701 IX86_BUILTIN_WRFSBASE32,
28702 IX86_BUILTIN_WRFSBASE64,
28703 IX86_BUILTIN_WRGSBASE32,
28704 IX86_BUILTIN_WRGSBASE64,
28706 /* RDRND instructions. */
28707 IX86_BUILTIN_RDRAND16_STEP,
28708 IX86_BUILTIN_RDRAND32_STEP,
28709 IX86_BUILTIN_RDRAND64_STEP,
28711 /* RDSEED instructions. */
28712 IX86_BUILTIN_RDSEED16_STEP,
28713 IX86_BUILTIN_RDSEED32_STEP,
28714 IX86_BUILTIN_RDSEED64_STEP,
28716 /* F16C instructions. */
28717 IX86_BUILTIN_CVTPH2PS,
28718 IX86_BUILTIN_CVTPH2PS256,
28719 IX86_BUILTIN_CVTPS2PH,
28720 IX86_BUILTIN_CVTPS2PH256,
28722 /* CFString built-in for darwin */
28723 IX86_BUILTIN_CFSTRING,
28725 /* Builtins to get CPU type and supported features. */
28726 IX86_BUILTIN_CPU_INIT,
28727 IX86_BUILTIN_CPU_IS,
28728 IX86_BUILTIN_CPU_SUPPORTS,
28730 /* Read/write FLAGS register built-ins. */
28731 IX86_BUILTIN_READ_FLAGS,
28732 IX86_BUILTIN_WRITE_FLAGS,
28734 IX86_BUILTIN_MAX
28737 /* Table for the ix86 builtin decls. */
28738 static GTY(()) tree ix86_builtins[(int) IX86_BUILTIN_MAX];
28740 /* Table of all of the builtin functions that are possible with different ISA's
28741 but are waiting to be built until a function is declared to use that
28742 ISA. */
28743 struct builtin_isa {
28744 const char *name; /* function name */
28745 enum ix86_builtin_func_type tcode; /* type to use in the declaration */
28746 HOST_WIDE_INT isa; /* isa_flags this builtin is defined for */
28747 bool const_p; /* true if the declaration is constant */
28748 bool set_and_not_built_p;
28751 static struct builtin_isa ix86_builtins_isa[(int) IX86_BUILTIN_MAX];
28754 /* Add an ix86 target builtin function with CODE, NAME and TYPE. Save the MASK
28755 of which isa_flags to use in the ix86_builtins_isa array. Stores the
28756 function decl in the ix86_builtins array. Returns the function decl or
28757 NULL_TREE, if the builtin was not added.
28759 If the front end has a special hook for builtin functions, delay adding
28760 builtin functions that aren't in the current ISA until the ISA is changed
28761 with function specific optimization. Doing so, can save about 300K for the
28762 default compiler. When the builtin is expanded, check at that time whether
28763 it is valid.
28765 If the front end doesn't have a special hook, record all builtins, even if
28766 it isn't an instruction set in the current ISA in case the user uses
28767 function specific options for a different ISA, so that we don't get scope
28768 errors if a builtin is added in the middle of a function scope. */
28770 static inline tree
28771 def_builtin (HOST_WIDE_INT mask, const char *name,
28772 enum ix86_builtin_func_type tcode,
28773 enum ix86_builtins code)
28775 tree decl = NULL_TREE;
28777 if (!(mask & OPTION_MASK_ISA_64BIT) || TARGET_64BIT)
28779 ix86_builtins_isa[(int) code].isa = mask;
28781 mask &= ~OPTION_MASK_ISA_64BIT;
28782 if (mask == 0
28783 || (mask & ix86_isa_flags) != 0
28784 || (lang_hooks.builtin_function
28785 == lang_hooks.builtin_function_ext_scope))
28788 tree type = ix86_get_builtin_func_type (tcode);
28789 decl = add_builtin_function (name, type, code, BUILT_IN_MD,
28790 NULL, NULL_TREE);
28791 ix86_builtins[(int) code] = decl;
28792 ix86_builtins_isa[(int) code].set_and_not_built_p = false;
28794 else
28796 ix86_builtins[(int) code] = NULL_TREE;
28797 ix86_builtins_isa[(int) code].tcode = tcode;
28798 ix86_builtins_isa[(int) code].name = name;
28799 ix86_builtins_isa[(int) code].const_p = false;
28800 ix86_builtins_isa[(int) code].set_and_not_built_p = true;
28804 return decl;
28807 /* Like def_builtin, but also marks the function decl "const". */
28809 static inline tree
28810 def_builtin_const (HOST_WIDE_INT mask, const char *name,
28811 enum ix86_builtin_func_type tcode, enum ix86_builtins code)
28813 tree decl = def_builtin (mask, name, tcode, code);
28814 if (decl)
28815 TREE_READONLY (decl) = 1;
28816 else
28817 ix86_builtins_isa[(int) code].const_p = true;
28819 return decl;
28822 /* Add any new builtin functions for a given ISA that may not have been
28823 declared. This saves a bit of space compared to adding all of the
28824 declarations to the tree, even if we didn't use them. */
28826 static void
28827 ix86_add_new_builtins (HOST_WIDE_INT isa)
28829 int i;
28831 for (i = 0; i < (int)IX86_BUILTIN_MAX; i++)
28833 if ((ix86_builtins_isa[i].isa & isa) != 0
28834 && ix86_builtins_isa[i].set_and_not_built_p)
28836 tree decl, type;
28838 /* Don't define the builtin again. */
28839 ix86_builtins_isa[i].set_and_not_built_p = false;
28841 type = ix86_get_builtin_func_type (ix86_builtins_isa[i].tcode);
28842 decl = add_builtin_function_ext_scope (ix86_builtins_isa[i].name,
28843 type, i, BUILT_IN_MD, NULL,
28844 NULL_TREE);
28846 ix86_builtins[i] = decl;
28847 if (ix86_builtins_isa[i].const_p)
28848 TREE_READONLY (decl) = 1;
28853 /* Bits for builtin_description.flag. */
28855 /* Set when we don't support the comparison natively, and should
28856 swap_comparison in order to support it. */
28857 #define BUILTIN_DESC_SWAP_OPERANDS 1
28859 struct builtin_description
28861 const HOST_WIDE_INT mask;
28862 const enum insn_code icode;
28863 const char *const name;
28864 const enum ix86_builtins code;
28865 const enum rtx_code comparison;
28866 const int flag;
28869 static const struct builtin_description bdesc_comi[] =
28871 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, UNEQ, 0 },
28872 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, UNLT, 0 },
28873 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, UNLE, 0 },
28874 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, GT, 0 },
28875 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, GE, 0 },
28876 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, LTGT, 0 },
28877 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, UNEQ, 0 },
28878 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, UNLT, 0 },
28879 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, UNLE, 0 },
28880 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, GT, 0 },
28881 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, GE, 0 },
28882 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, LTGT, 0 },
28883 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD, UNEQ, 0 },
28884 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD, UNLT, 0 },
28885 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD, UNLE, 0 },
28886 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD, GT, 0 },
28887 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD, GE, 0 },
28888 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD, LTGT, 0 },
28889 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD, UNEQ, 0 },
28890 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD, UNLT, 0 },
28891 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD, UNLE, 0 },
28892 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD, GT, 0 },
28893 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD, GE, 0 },
28894 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD, LTGT, 0 },
28897 static const struct builtin_description bdesc_pcmpestr[] =
28899 /* SSE4.2 */
28900 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestri128", IX86_BUILTIN_PCMPESTRI128, UNKNOWN, 0 },
28901 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestrm128", IX86_BUILTIN_PCMPESTRM128, UNKNOWN, 0 },
28902 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestria128", IX86_BUILTIN_PCMPESTRA128, UNKNOWN, (int) CCAmode },
28903 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestric128", IX86_BUILTIN_PCMPESTRC128, UNKNOWN, (int) CCCmode },
28904 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestrio128", IX86_BUILTIN_PCMPESTRO128, UNKNOWN, (int) CCOmode },
28905 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestris128", IX86_BUILTIN_PCMPESTRS128, UNKNOWN, (int) CCSmode },
28906 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestriz128", IX86_BUILTIN_PCMPESTRZ128, UNKNOWN, (int) CCZmode },
28909 static const struct builtin_description bdesc_pcmpistr[] =
28911 /* SSE4.2 */
28912 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistri128", IX86_BUILTIN_PCMPISTRI128, UNKNOWN, 0 },
28913 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistrm128", IX86_BUILTIN_PCMPISTRM128, UNKNOWN, 0 },
28914 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistria128", IX86_BUILTIN_PCMPISTRA128, UNKNOWN, (int) CCAmode },
28915 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistric128", IX86_BUILTIN_PCMPISTRC128, UNKNOWN, (int) CCCmode },
28916 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistrio128", IX86_BUILTIN_PCMPISTRO128, UNKNOWN, (int) CCOmode },
28917 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistris128", IX86_BUILTIN_PCMPISTRS128, UNKNOWN, (int) CCSmode },
28918 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistriz128", IX86_BUILTIN_PCMPISTRZ128, UNKNOWN, (int) CCZmode },
28921 /* Special builtins with variable number of arguments. */
28922 static const struct builtin_description bdesc_special_args[] =
28924 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_rdtsc", IX86_BUILTIN_RDTSC, UNKNOWN, (int) UINT64_FTYPE_VOID },
28925 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_rdtscp", IX86_BUILTIN_RDTSCP, UNKNOWN, (int) UINT64_FTYPE_PUNSIGNED },
28926 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_pause, "__builtin_ia32_pause", IX86_BUILTIN_PAUSE, UNKNOWN, (int) VOID_FTYPE_VOID },
28928 /* 80387 (for use internally for atomic compound assignment). */
28929 { 0, CODE_FOR_fnstenv, "__builtin_ia32_fnstenv", IX86_BUILTIN_FNSTENV, UNKNOWN, (int) VOID_FTYPE_PVOID },
28930 { 0, CODE_FOR_fldenv, "__builtin_ia32_fldenv", IX86_BUILTIN_FLDENV, UNKNOWN, (int) VOID_FTYPE_PCVOID },
28931 { 0, CODE_FOR_fnstsw, "__builtin_ia32_fnstsw", IX86_BUILTIN_FNSTSW, UNKNOWN, (int) VOID_FTYPE_PUSHORT },
28932 { 0, CODE_FOR_fnclex, "__builtin_ia32_fnclex", IX86_BUILTIN_FNCLEX, UNKNOWN, (int) VOID_FTYPE_VOID },
28934 /* MMX */
28935 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_emms, "__builtin_ia32_emms", IX86_BUILTIN_EMMS, UNKNOWN, (int) VOID_FTYPE_VOID },
28937 /* 3DNow! */
28938 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_femms, "__builtin_ia32_femms", IX86_BUILTIN_FEMMS, UNKNOWN, (int) VOID_FTYPE_VOID },
28940 /* FXSR, XSAVE and XSAVEOPT */
28941 { OPTION_MASK_ISA_FXSR, CODE_FOR_nothing, "__builtin_ia32_fxsave", IX86_BUILTIN_FXSAVE, UNKNOWN, (int) VOID_FTYPE_PVOID },
28942 { OPTION_MASK_ISA_FXSR, CODE_FOR_nothing, "__builtin_ia32_fxrstor", IX86_BUILTIN_FXRSTOR, UNKNOWN, (int) VOID_FTYPE_PVOID },
28943 { OPTION_MASK_ISA_XSAVE, CODE_FOR_nothing, "__builtin_ia32_xsave", IX86_BUILTIN_XSAVE, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
28944 { OPTION_MASK_ISA_XSAVE, CODE_FOR_nothing, "__builtin_ia32_xrstor", IX86_BUILTIN_XRSTOR, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
28945 { OPTION_MASK_ISA_XSAVEOPT, CODE_FOR_nothing, "__builtin_ia32_xsaveopt", IX86_BUILTIN_XSAVEOPT, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
28947 { OPTION_MASK_ISA_FXSR | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_fxsave64", IX86_BUILTIN_FXSAVE64, UNKNOWN, (int) VOID_FTYPE_PVOID },
28948 { OPTION_MASK_ISA_FXSR | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_fxrstor64", IX86_BUILTIN_FXRSTOR64, UNKNOWN, (int) VOID_FTYPE_PVOID },
28949 { OPTION_MASK_ISA_XSAVE | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_xsave64", IX86_BUILTIN_XSAVE64, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
28950 { OPTION_MASK_ISA_XSAVE | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_xrstor64", IX86_BUILTIN_XRSTOR64, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
28951 { OPTION_MASK_ISA_XSAVEOPT | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_xsaveopt64", IX86_BUILTIN_XSAVEOPT64, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
28953 /* SSE */
28954 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_storeups, "__builtin_ia32_storeups", IX86_BUILTIN_STOREUPS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
28955 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movntv4sf, "__builtin_ia32_movntps", IX86_BUILTIN_MOVNTPS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
28956 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_loadups, "__builtin_ia32_loadups", IX86_BUILTIN_LOADUPS, UNKNOWN, (int) V4SF_FTYPE_PCFLOAT },
28958 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_loadhps_exp, "__builtin_ia32_loadhps", IX86_BUILTIN_LOADHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_PCV2SF },
28959 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_loadlps_exp, "__builtin_ia32_loadlps", IX86_BUILTIN_LOADLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_PCV2SF },
28960 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_storehps, "__builtin_ia32_storehps", IX86_BUILTIN_STOREHPS, UNKNOWN, (int) VOID_FTYPE_PV2SF_V4SF },
28961 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_storelps, "__builtin_ia32_storelps", IX86_BUILTIN_STORELPS, UNKNOWN, (int) VOID_FTYPE_PV2SF_V4SF },
28963 /* SSE or 3DNow!A */
28964 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_sse_sfence, "__builtin_ia32_sfence", IX86_BUILTIN_SFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
28965 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_sse_movntq, "__builtin_ia32_movntq", IX86_BUILTIN_MOVNTQ, UNKNOWN, (int) VOID_FTYPE_PULONGLONG_ULONGLONG },
28967 /* SSE2 */
28968 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_lfence, "__builtin_ia32_lfence", IX86_BUILTIN_LFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
28969 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_mfence, 0, IX86_BUILTIN_MFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
28970 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_storeupd, "__builtin_ia32_storeupd", IX86_BUILTIN_STOREUPD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
28971 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_storedquv16qi, "__builtin_ia32_storedqu", IX86_BUILTIN_STOREDQU, UNKNOWN, (int) VOID_FTYPE_PCHAR_V16QI },
28972 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntv2df, "__builtin_ia32_movntpd", IX86_BUILTIN_MOVNTPD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
28973 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntv2di, "__builtin_ia32_movntdq", IX86_BUILTIN_MOVNTDQ, UNKNOWN, (int) VOID_FTYPE_PV2DI_V2DI },
28974 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntisi, "__builtin_ia32_movnti", IX86_BUILTIN_MOVNTI, UNKNOWN, (int) VOID_FTYPE_PINT_INT },
28975 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_movntidi, "__builtin_ia32_movnti64", IX86_BUILTIN_MOVNTI64, UNKNOWN, (int) VOID_FTYPE_PLONGLONG_LONGLONG },
28976 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loadupd, "__builtin_ia32_loadupd", IX86_BUILTIN_LOADUPD, UNKNOWN, (int) V2DF_FTYPE_PCDOUBLE },
28977 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loaddquv16qi, "__builtin_ia32_loaddqu", IX86_BUILTIN_LOADDQU, UNKNOWN, (int) V16QI_FTYPE_PCCHAR },
28979 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loadhpd_exp, "__builtin_ia32_loadhpd", IX86_BUILTIN_LOADHPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_PCDOUBLE },
28980 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loadlpd_exp, "__builtin_ia32_loadlpd", IX86_BUILTIN_LOADLPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_PCDOUBLE },
28982 /* SSE3 */
28983 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_lddqu, "__builtin_ia32_lddqu", IX86_BUILTIN_LDDQU, UNKNOWN, (int) V16QI_FTYPE_PCCHAR },
28985 /* SSE4.1 */
28986 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_movntdqa, "__builtin_ia32_movntdqa", IX86_BUILTIN_MOVNTDQA, UNKNOWN, (int) V2DI_FTYPE_PV2DI },
28988 /* SSE4A */
28989 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_vmmovntv2df, "__builtin_ia32_movntsd", IX86_BUILTIN_MOVNTSD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
28990 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_vmmovntv4sf, "__builtin_ia32_movntss", IX86_BUILTIN_MOVNTSS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
28992 /* AVX */
28993 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vzeroall, "__builtin_ia32_vzeroall", IX86_BUILTIN_VZEROALL, UNKNOWN, (int) VOID_FTYPE_VOID },
28994 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vzeroupper, "__builtin_ia32_vzeroupper", IX86_BUILTIN_VZEROUPPER, UNKNOWN, (int) VOID_FTYPE_VOID },
28996 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_dupv4sf, "__builtin_ia32_vbroadcastss", IX86_BUILTIN_VBROADCASTSS, UNKNOWN, (int) V4SF_FTYPE_PCFLOAT },
28997 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_dupv4df, "__builtin_ia32_vbroadcastsd256", IX86_BUILTIN_VBROADCASTSD256, UNKNOWN, (int) V4DF_FTYPE_PCDOUBLE },
28998 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_dupv8sf, "__builtin_ia32_vbroadcastss256", IX86_BUILTIN_VBROADCASTSS256, UNKNOWN, (int) V8SF_FTYPE_PCFLOAT },
28999 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastf128_v4df, "__builtin_ia32_vbroadcastf128_pd256", IX86_BUILTIN_VBROADCASTPD256, UNKNOWN, (int) V4DF_FTYPE_PCV2DF },
29000 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastf128_v8sf, "__builtin_ia32_vbroadcastf128_ps256", IX86_BUILTIN_VBROADCASTPS256, UNKNOWN, (int) V8SF_FTYPE_PCV4SF },
29002 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_loadupd256, "__builtin_ia32_loadupd256", IX86_BUILTIN_LOADUPD256, UNKNOWN, (int) V4DF_FTYPE_PCDOUBLE },
29003 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_loadups256, "__builtin_ia32_loadups256", IX86_BUILTIN_LOADUPS256, UNKNOWN, (int) V8SF_FTYPE_PCFLOAT },
29004 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_storeupd256, "__builtin_ia32_storeupd256", IX86_BUILTIN_STOREUPD256, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V4DF },
29005 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_storeups256, "__builtin_ia32_storeups256", IX86_BUILTIN_STOREUPS256, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V8SF },
29006 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_loaddquv32qi, "__builtin_ia32_loaddqu256", IX86_BUILTIN_LOADDQU256, UNKNOWN, (int) V32QI_FTYPE_PCCHAR },
29007 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_storedquv32qi, "__builtin_ia32_storedqu256", IX86_BUILTIN_STOREDQU256, UNKNOWN, (int) VOID_FTYPE_PCHAR_V32QI },
29008 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_lddqu256, "__builtin_ia32_lddqu256", IX86_BUILTIN_LDDQU256, UNKNOWN, (int) V32QI_FTYPE_PCCHAR },
29010 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv4di, "__builtin_ia32_movntdq256", IX86_BUILTIN_MOVNTDQ256, UNKNOWN, (int) VOID_FTYPE_PV4DI_V4DI },
29011 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv4df, "__builtin_ia32_movntpd256", IX86_BUILTIN_MOVNTPD256, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V4DF },
29012 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv8sf, "__builtin_ia32_movntps256", IX86_BUILTIN_MOVNTPS256, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V8SF },
29014 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadpd, "__builtin_ia32_maskloadpd", IX86_BUILTIN_MASKLOADPD, UNKNOWN, (int) V2DF_FTYPE_PCV2DF_V2DI },
29015 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadps, "__builtin_ia32_maskloadps", IX86_BUILTIN_MASKLOADPS, UNKNOWN, (int) V4SF_FTYPE_PCV4SF_V4SI },
29016 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadpd256, "__builtin_ia32_maskloadpd256", IX86_BUILTIN_MASKLOADPD256, UNKNOWN, (int) V4DF_FTYPE_PCV4DF_V4DI },
29017 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadps256, "__builtin_ia32_maskloadps256", IX86_BUILTIN_MASKLOADPS256, UNKNOWN, (int) V8SF_FTYPE_PCV8SF_V8SI },
29018 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstorepd, "__builtin_ia32_maskstorepd", IX86_BUILTIN_MASKSTOREPD, UNKNOWN, (int) VOID_FTYPE_PV2DF_V2DI_V2DF },
29019 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstoreps, "__builtin_ia32_maskstoreps", IX86_BUILTIN_MASKSTOREPS, UNKNOWN, (int) VOID_FTYPE_PV4SF_V4SI_V4SF },
29020 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstorepd256, "__builtin_ia32_maskstorepd256", IX86_BUILTIN_MASKSTOREPD256, UNKNOWN, (int) VOID_FTYPE_PV4DF_V4DI_V4DF },
29021 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstoreps256, "__builtin_ia32_maskstoreps256", IX86_BUILTIN_MASKSTOREPS256, UNKNOWN, (int) VOID_FTYPE_PV8SF_V8SI_V8SF },
29023 /* AVX2 */
29024 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_movntdqa, "__builtin_ia32_movntdqa256", IX86_BUILTIN_MOVNTDQA256, UNKNOWN, (int) V4DI_FTYPE_PV4DI },
29025 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskloadd, "__builtin_ia32_maskloadd", IX86_BUILTIN_MASKLOADD, UNKNOWN, (int) V4SI_FTYPE_PCV4SI_V4SI },
29026 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskloadq, "__builtin_ia32_maskloadq", IX86_BUILTIN_MASKLOADQ, UNKNOWN, (int) V2DI_FTYPE_PCV2DI_V2DI },
29027 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskloadd256, "__builtin_ia32_maskloadd256", IX86_BUILTIN_MASKLOADD256, UNKNOWN, (int) V8SI_FTYPE_PCV8SI_V8SI },
29028 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskloadq256, "__builtin_ia32_maskloadq256", IX86_BUILTIN_MASKLOADQ256, UNKNOWN, (int) V4DI_FTYPE_PCV4DI_V4DI },
29029 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskstored, "__builtin_ia32_maskstored", IX86_BUILTIN_MASKSTORED, UNKNOWN, (int) VOID_FTYPE_PV4SI_V4SI_V4SI },
29030 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskstoreq, "__builtin_ia32_maskstoreq", IX86_BUILTIN_MASKSTOREQ, UNKNOWN, (int) VOID_FTYPE_PV2DI_V2DI_V2DI },
29031 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskstored256, "__builtin_ia32_maskstored256", IX86_BUILTIN_MASKSTORED256, UNKNOWN, (int) VOID_FTYPE_PV8SI_V8SI_V8SI },
29032 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskstoreq256, "__builtin_ia32_maskstoreq256", IX86_BUILTIN_MASKSTOREQ256, UNKNOWN, (int) VOID_FTYPE_PV4DI_V4DI_V4DI },
29034 /* AVX512F */
29035 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressstorev16sf_mask, "__builtin_ia32_compressstoresf512_mask", IX86_BUILTIN_COMPRESSPSSTORE512, UNKNOWN, (int) VOID_FTYPE_PV16SF_V16SF_HI },
29036 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressstorev16si_mask, "__builtin_ia32_compressstoresi512_mask", IX86_BUILTIN_PCOMPRESSDSTORE512, UNKNOWN, (int) VOID_FTYPE_PV16SI_V16SI_HI },
29037 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressstorev8df_mask, "__builtin_ia32_compressstoredf512_mask", IX86_BUILTIN_COMPRESSPDSTORE512, UNKNOWN, (int) VOID_FTYPE_PV8DF_V8DF_QI },
29038 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressstorev8di_mask, "__builtin_ia32_compressstoredi512_mask", IX86_BUILTIN_PCOMPRESSQSTORE512, UNKNOWN, (int) VOID_FTYPE_PV8DI_V8DI_QI },
29039 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16sf_mask, "__builtin_ia32_expandloadsf512_mask", IX86_BUILTIN_EXPANDPSLOAD512, UNKNOWN, (int) V16SF_FTYPE_PCV16SF_V16SF_HI },
29040 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16sf_maskz, "__builtin_ia32_expandloadsf512_maskz", IX86_BUILTIN_EXPANDPSLOAD512Z, UNKNOWN, (int) V16SF_FTYPE_PCV16SF_V16SF_HI },
29041 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16si_mask, "__builtin_ia32_expandloadsi512_mask", IX86_BUILTIN_PEXPANDDLOAD512, UNKNOWN, (int) V16SI_FTYPE_PCV16SI_V16SI_HI },
29042 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16si_maskz, "__builtin_ia32_expandloadsi512_maskz", IX86_BUILTIN_PEXPANDDLOAD512Z, UNKNOWN, (int) V16SI_FTYPE_PCV16SI_V16SI_HI },
29043 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8df_mask, "__builtin_ia32_expandloaddf512_mask", IX86_BUILTIN_EXPANDPDLOAD512, UNKNOWN, (int) V8DF_FTYPE_PCV8DF_V8DF_QI },
29044 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8df_maskz, "__builtin_ia32_expandloaddf512_maskz", IX86_BUILTIN_EXPANDPDLOAD512Z, UNKNOWN, (int) V8DF_FTYPE_PCV8DF_V8DF_QI },
29045 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8di_mask, "__builtin_ia32_expandloaddi512_mask", IX86_BUILTIN_PEXPANDQLOAD512, UNKNOWN, (int) V8DI_FTYPE_PCV8DI_V8DI_QI },
29046 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8di_maskz, "__builtin_ia32_expandloaddi512_maskz", IX86_BUILTIN_PEXPANDQLOAD512Z, UNKNOWN, (int) V8DI_FTYPE_PCV8DI_V8DI_QI },
29047 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loaddquv16si_mask, "__builtin_ia32_loaddqusi512_mask", IX86_BUILTIN_LOADDQUSI512, UNKNOWN, (int) V16SI_FTYPE_PCV16SI_V16SI_HI },
29048 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loaddquv8di_mask, "__builtin_ia32_loaddqudi512_mask", IX86_BUILTIN_LOADDQUDI512, UNKNOWN, (int) V8DI_FTYPE_PCV8DI_V8DI_QI },
29049 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadupd512_mask, "__builtin_ia32_loadupd512_mask", IX86_BUILTIN_LOADUPD512, UNKNOWN, (int) V8DF_FTYPE_PCV8DF_V8DF_QI },
29050 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadups512_mask, "__builtin_ia32_loadups512_mask", IX86_BUILTIN_LOADUPS512, UNKNOWN, (int) V16SF_FTYPE_PCV16SF_V16SF_HI },
29051 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv16sf_mask, "__builtin_ia32_loadaps512_mask", IX86_BUILTIN_LOADAPS512, UNKNOWN, (int) V16SF_FTYPE_PCV16SF_V16SF_HI },
29052 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv16si_mask, "__builtin_ia32_movdqa32load512_mask", IX86_BUILTIN_MOVDQA32LOAD512, UNKNOWN, (int) V16SI_FTYPE_PCV16SI_V16SI_HI },
29053 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv8df_mask, "__builtin_ia32_loadapd512_mask", IX86_BUILTIN_LOADAPD512, UNKNOWN, (int) V8DF_FTYPE_PCV8DF_V8DF_QI },
29054 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv8di_mask, "__builtin_ia32_movdqa64load512_mask", IX86_BUILTIN_MOVDQA64LOAD512, UNKNOWN, (int) V8DI_FTYPE_PCV8DI_V8DI_QI },
29055 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movntv16sf, "__builtin_ia32_movntps512", IX86_BUILTIN_MOVNTPS512, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V16SF },
29056 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movntv8df, "__builtin_ia32_movntpd512", IX86_BUILTIN_MOVNTPD512, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V8DF },
29057 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movntv8di, "__builtin_ia32_movntdq512", IX86_BUILTIN_MOVNTDQ512, UNKNOWN, (int) VOID_FTYPE_PV8DI_V8DI },
29058 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movntdqa, "__builtin_ia32_movntdqa512", IX86_BUILTIN_MOVNTDQA512, UNKNOWN, (int) V8DI_FTYPE_PV8DI },
29059 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storedquv16si_mask, "__builtin_ia32_storedqusi512_mask", IX86_BUILTIN_STOREDQUSI512, UNKNOWN, (int) VOID_FTYPE_PV16SI_V16SI_HI },
29060 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storedquv8di_mask, "__builtin_ia32_storedqudi512_mask", IX86_BUILTIN_STOREDQUDI512, UNKNOWN, (int) VOID_FTYPE_PV8DI_V8DI_QI },
29061 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storeupd512_mask, "__builtin_ia32_storeupd512_mask", IX86_BUILTIN_STOREUPD512, UNKNOWN, (int) VOID_FTYPE_PV8DF_V8DF_QI },
29062 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev8div8si2_mask_store, "__builtin_ia32_pmovusqd512mem_mask", IX86_BUILTIN_PMOVUSQD512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8SI_V8DI_QI },
29063 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev8div8si2_mask_store, "__builtin_ia32_pmovsqd512mem_mask", IX86_BUILTIN_PMOVSQD512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8SI_V8DI_QI },
29064 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev8div8si2_mask_store, "__builtin_ia32_pmovqd512mem_mask", IX86_BUILTIN_PMOVQD512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8SI_V8DI_QI },
29065 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev8div8hi2_mask_store, "__builtin_ia32_pmovusqw512mem_mask", IX86_BUILTIN_PMOVUSQW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V8DI_QI },
29066 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev8div8hi2_mask_store, "__builtin_ia32_pmovsqw512mem_mask", IX86_BUILTIN_PMOVSQW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V8DI_QI },
29067 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev8div8hi2_mask_store, "__builtin_ia32_pmovqw512mem_mask", IX86_BUILTIN_PMOVQW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V8DI_QI },
29068 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev16siv16hi2_mask_store, "__builtin_ia32_pmovusdw512mem_mask", IX86_BUILTIN_PMOVUSDW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16HI_V16SI_HI },
29069 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev16siv16hi2_mask_store, "__builtin_ia32_pmovsdw512mem_mask", IX86_BUILTIN_PMOVSDW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16HI_V16SI_HI },
29070 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev16siv16hi2_mask_store, "__builtin_ia32_pmovdw512mem_mask", IX86_BUILTIN_PMOVDW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16HI_V16SI_HI },
29071 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev8div16qi2_mask_store, "__builtin_ia32_pmovqb512mem_mask", IX86_BUILTIN_PMOVQB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V8DI_QI },
29072 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev8div16qi2_mask_store, "__builtin_ia32_pmovusqb512mem_mask", IX86_BUILTIN_PMOVUSQB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V8DI_QI },
29073 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev8div16qi2_mask_store, "__builtin_ia32_pmovsqb512mem_mask", IX86_BUILTIN_PMOVSQB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V8DI_QI },
29074 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev16siv16qi2_mask_store, "__builtin_ia32_pmovusdb512mem_mask", IX86_BUILTIN_PMOVUSDB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V16SI_HI },
29075 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev16siv16qi2_mask_store, "__builtin_ia32_pmovsdb512mem_mask", IX86_BUILTIN_PMOVSDB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V16SI_HI },
29076 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev16siv16qi2_mask_store, "__builtin_ia32_pmovdb512mem_mask", IX86_BUILTIN_PMOVDB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V16SI_HI },
29077 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storeups512_mask, "__builtin_ia32_storeups512_mask", IX86_BUILTIN_STOREUPS512, UNKNOWN, (int) VOID_FTYPE_PV16SF_V16SF_HI },
29078 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storev16sf_mask, "__builtin_ia32_storeaps512_mask", IX86_BUILTIN_STOREAPS512, UNKNOWN, (int) VOID_FTYPE_PV16SF_V16SF_HI },
29079 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storev16si_mask, "__builtin_ia32_movdqa32store512_mask", IX86_BUILTIN_MOVDQA32STORE512, UNKNOWN, (int) VOID_FTYPE_PV16SI_V16SI_HI },
29080 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storev8df_mask, "__builtin_ia32_storeapd512_mask", IX86_BUILTIN_STOREAPD512, UNKNOWN, (int) VOID_FTYPE_PV8DF_V8DF_QI },
29081 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storev8di_mask, "__builtin_ia32_movdqa64store512_mask", IX86_BUILTIN_MOVDQA64STORE512, UNKNOWN, (int) VOID_FTYPE_PV8DI_V8DI_QI },
29083 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_llwpcb, "__builtin_ia32_llwpcb", IX86_BUILTIN_LLWPCB, UNKNOWN, (int) VOID_FTYPE_PVOID },
29084 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_slwpcb, "__builtin_ia32_slwpcb", IX86_BUILTIN_SLWPCB, UNKNOWN, (int) PVOID_FTYPE_VOID },
29085 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpvalsi3, "__builtin_ia32_lwpval32", IX86_BUILTIN_LWPVAL32, UNKNOWN, (int) VOID_FTYPE_UINT_UINT_UINT },
29086 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpvaldi3, "__builtin_ia32_lwpval64", IX86_BUILTIN_LWPVAL64, UNKNOWN, (int) VOID_FTYPE_UINT64_UINT_UINT },
29087 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpinssi3, "__builtin_ia32_lwpins32", IX86_BUILTIN_LWPINS32, UNKNOWN, (int) UCHAR_FTYPE_UINT_UINT_UINT },
29088 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpinsdi3, "__builtin_ia32_lwpins64", IX86_BUILTIN_LWPINS64, UNKNOWN, (int) UCHAR_FTYPE_UINT64_UINT_UINT },
29090 /* FSGSBASE */
29091 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdfsbasesi, "__builtin_ia32_rdfsbase32", IX86_BUILTIN_RDFSBASE32, UNKNOWN, (int) UNSIGNED_FTYPE_VOID },
29092 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdfsbasedi, "__builtin_ia32_rdfsbase64", IX86_BUILTIN_RDFSBASE64, UNKNOWN, (int) UINT64_FTYPE_VOID },
29093 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdgsbasesi, "__builtin_ia32_rdgsbase32", IX86_BUILTIN_RDGSBASE32, UNKNOWN, (int) UNSIGNED_FTYPE_VOID },
29094 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdgsbasedi, "__builtin_ia32_rdgsbase64", IX86_BUILTIN_RDGSBASE64, UNKNOWN, (int) UINT64_FTYPE_VOID },
29095 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrfsbasesi, "__builtin_ia32_wrfsbase32", IX86_BUILTIN_WRFSBASE32, UNKNOWN, (int) VOID_FTYPE_UNSIGNED },
29096 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrfsbasedi, "__builtin_ia32_wrfsbase64", IX86_BUILTIN_WRFSBASE64, UNKNOWN, (int) VOID_FTYPE_UINT64 },
29097 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrgsbasesi, "__builtin_ia32_wrgsbase32", IX86_BUILTIN_WRGSBASE32, UNKNOWN, (int) VOID_FTYPE_UNSIGNED },
29098 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrgsbasedi, "__builtin_ia32_wrgsbase64", IX86_BUILTIN_WRGSBASE64, UNKNOWN, (int) VOID_FTYPE_UINT64 },
29100 /* RTM */
29101 { OPTION_MASK_ISA_RTM, CODE_FOR_xbegin, "__builtin_ia32_xbegin", IX86_BUILTIN_XBEGIN, UNKNOWN, (int) UNSIGNED_FTYPE_VOID },
29102 { OPTION_MASK_ISA_RTM, CODE_FOR_xend, "__builtin_ia32_xend", IX86_BUILTIN_XEND, UNKNOWN, (int) VOID_FTYPE_VOID },
29103 { OPTION_MASK_ISA_RTM, CODE_FOR_xtest, "__builtin_ia32_xtest", IX86_BUILTIN_XTEST, UNKNOWN, (int) INT_FTYPE_VOID },
29106 /* Builtins with variable number of arguments. */
29107 static const struct builtin_description bdesc_args[] =
29109 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_bsr, "__builtin_ia32_bsrsi", IX86_BUILTIN_BSRSI, UNKNOWN, (int) INT_FTYPE_INT },
29110 { OPTION_MASK_ISA_64BIT, CODE_FOR_bsr_rex64, "__builtin_ia32_bsrdi", IX86_BUILTIN_BSRDI, UNKNOWN, (int) INT64_FTYPE_INT64 },
29111 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_rdpmc", IX86_BUILTIN_RDPMC, UNKNOWN, (int) UINT64_FTYPE_INT },
29112 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotlqi3, "__builtin_ia32_rolqi", IX86_BUILTIN_ROLQI, UNKNOWN, (int) UINT8_FTYPE_UINT8_INT },
29113 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotlhi3, "__builtin_ia32_rolhi", IX86_BUILTIN_ROLHI, UNKNOWN, (int) UINT16_FTYPE_UINT16_INT },
29114 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotrqi3, "__builtin_ia32_rorqi", IX86_BUILTIN_RORQI, UNKNOWN, (int) UINT8_FTYPE_UINT8_INT },
29115 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotrhi3, "__builtin_ia32_rorhi", IX86_BUILTIN_RORHI, UNKNOWN, (int) UINT16_FTYPE_UINT16_INT },
29117 /* MMX */
29118 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
29119 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29120 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
29121 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
29122 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29123 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
29125 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
29126 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29127 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
29128 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29129 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
29130 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29131 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
29132 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29134 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29135 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29137 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_andv2si3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
29138 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_andnotv2si3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
29139 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_iorv2si3, "__builtin_ia32_por", IX86_BUILTIN_POR, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
29140 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_xorv2si3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
29142 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
29143 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29144 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv2si3, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
29145 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv8qi3, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
29146 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29147 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
29149 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
29150 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29151 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhdq, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
29152 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpcklbw, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
29153 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpcklwd, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI},
29154 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckldq, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI},
29156 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packsswb, "__builtin_ia32_packsswb", IX86_BUILTIN_PACKSSWB, UNKNOWN, (int) V8QI_FTYPE_V4HI_V4HI },
29157 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packssdw, "__builtin_ia32_packssdw", IX86_BUILTIN_PACKSSDW, UNKNOWN, (int) V4HI_FTYPE_V2SI_V2SI },
29158 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packuswb, "__builtin_ia32_packuswb", IX86_BUILTIN_PACKUSWB, UNKNOWN, (int) V8QI_FTYPE_V4HI_V4HI },
29160 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_pmaddwd, "__builtin_ia32_pmaddwd", IX86_BUILTIN_PMADDWD, UNKNOWN, (int) V2SI_FTYPE_V4HI_V4HI },
29162 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv4hi3, "__builtin_ia32_psllwi", IX86_BUILTIN_PSLLWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
29163 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv2si3, "__builtin_ia32_pslldi", IX86_BUILTIN_PSLLDI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
29164 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv1di3, "__builtin_ia32_psllqi", IX86_BUILTIN_PSLLQI, UNKNOWN, (int) V1DI_FTYPE_V1DI_SI_COUNT },
29165 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv4hi3, "__builtin_ia32_psllw", IX86_BUILTIN_PSLLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
29166 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv2si3, "__builtin_ia32_pslld", IX86_BUILTIN_PSLLD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
29167 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv1di3, "__builtin_ia32_psllq", IX86_BUILTIN_PSLLQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_COUNT },
29169 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv4hi3, "__builtin_ia32_psrlwi", IX86_BUILTIN_PSRLWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
29170 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv2si3, "__builtin_ia32_psrldi", IX86_BUILTIN_PSRLDI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
29171 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv1di3, "__builtin_ia32_psrlqi", IX86_BUILTIN_PSRLQI, UNKNOWN, (int) V1DI_FTYPE_V1DI_SI_COUNT },
29172 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv4hi3, "__builtin_ia32_psrlw", IX86_BUILTIN_PSRLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
29173 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv2si3, "__builtin_ia32_psrld", IX86_BUILTIN_PSRLD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
29174 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv1di3, "__builtin_ia32_psrlq", IX86_BUILTIN_PSRLQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_COUNT },
29176 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv4hi3, "__builtin_ia32_psrawi", IX86_BUILTIN_PSRAWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
29177 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv2si3, "__builtin_ia32_psradi", IX86_BUILTIN_PSRADI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
29178 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv4hi3, "__builtin_ia32_psraw", IX86_BUILTIN_PSRAW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
29179 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv2si3, "__builtin_ia32_psrad", IX86_BUILTIN_PSRAD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
29181 /* 3DNow! */
29182 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_pf2id, "__builtin_ia32_pf2id", IX86_BUILTIN_PF2ID, UNKNOWN, (int) V2SI_FTYPE_V2SF },
29183 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_floatv2si2, "__builtin_ia32_pi2fd", IX86_BUILTIN_PI2FD, UNKNOWN, (int) V2SF_FTYPE_V2SI },
29184 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpv2sf2, "__builtin_ia32_pfrcp", IX86_BUILTIN_PFRCP, UNKNOWN, (int) V2SF_FTYPE_V2SF },
29185 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rsqrtv2sf2, "__builtin_ia32_pfrsqrt", IX86_BUILTIN_PFRSQRT, UNKNOWN, (int) V2SF_FTYPE_V2SF },
29187 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgusb", IX86_BUILTIN_PAVGUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
29188 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_haddv2sf3, "__builtin_ia32_pfacc", IX86_BUILTIN_PFACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
29189 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_addv2sf3, "__builtin_ia32_pfadd", IX86_BUILTIN_PFADD, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
29190 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_eqv2sf3, "__builtin_ia32_pfcmpeq", IX86_BUILTIN_PFCMPEQ, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
29191 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_gev2sf3, "__builtin_ia32_pfcmpge", IX86_BUILTIN_PFCMPGE, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
29192 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_gtv2sf3, "__builtin_ia32_pfcmpgt", IX86_BUILTIN_PFCMPGT, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
29193 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_smaxv2sf3, "__builtin_ia32_pfmax", IX86_BUILTIN_PFMAX, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
29194 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_sminv2sf3, "__builtin_ia32_pfmin", IX86_BUILTIN_PFMIN, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
29195 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_mulv2sf3, "__builtin_ia32_pfmul", IX86_BUILTIN_PFMUL, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
29196 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpit1v2sf3, "__builtin_ia32_pfrcpit1", IX86_BUILTIN_PFRCPIT1, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
29197 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpit2v2sf3, "__builtin_ia32_pfrcpit2", IX86_BUILTIN_PFRCPIT2, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
29198 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rsqit1v2sf3, "__builtin_ia32_pfrsqit1", IX86_BUILTIN_PFRSQIT1, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
29199 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_subv2sf3, "__builtin_ia32_pfsub", IX86_BUILTIN_PFSUB, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
29200 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_subrv2sf3, "__builtin_ia32_pfsubr", IX86_BUILTIN_PFSUBR, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
29201 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_pmulhrwv4hi3, "__builtin_ia32_pmulhrw", IX86_BUILTIN_PMULHRW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29203 /* 3DNow!A */
29204 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pf2iw, "__builtin_ia32_pf2iw", IX86_BUILTIN_PF2IW, UNKNOWN, (int) V2SI_FTYPE_V2SF },
29205 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pi2fw, "__builtin_ia32_pi2fw", IX86_BUILTIN_PI2FW, UNKNOWN, (int) V2SF_FTYPE_V2SI },
29206 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pswapdv2si2, "__builtin_ia32_pswapdsi", IX86_BUILTIN_PSWAPDSI, UNKNOWN, (int) V2SI_FTYPE_V2SI },
29207 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pswapdv2sf2, "__builtin_ia32_pswapdsf", IX86_BUILTIN_PSWAPDSF, UNKNOWN, (int) V2SF_FTYPE_V2SF },
29208 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_hsubv2sf3, "__builtin_ia32_pfnacc", IX86_BUILTIN_PFNACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
29209 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_addsubv2sf3, "__builtin_ia32_pfpnacc", IX86_BUILTIN_PFPNACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
29211 /* SSE */
29212 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movmskps, "__builtin_ia32_movmskps", IX86_BUILTIN_MOVMSKPS, UNKNOWN, (int) INT_FTYPE_V4SF },
29213 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_sqrtv4sf2, "__builtin_ia32_sqrtps", IX86_BUILTIN_SQRTPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
29214 { OPTION_MASK_ISA_SSE, CODE_FOR_sqrtv4sf2, "__builtin_ia32_sqrtps_nr", IX86_BUILTIN_SQRTPS_NR, UNKNOWN, (int) V4SF_FTYPE_V4SF },
29215 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_rsqrtv4sf2, "__builtin_ia32_rsqrtps", IX86_BUILTIN_RSQRTPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
29216 { OPTION_MASK_ISA_SSE, CODE_FOR_rsqrtv4sf2, "__builtin_ia32_rsqrtps_nr", IX86_BUILTIN_RSQRTPS_NR, UNKNOWN, (int) V4SF_FTYPE_V4SF },
29217 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_rcpv4sf2, "__builtin_ia32_rcpps", IX86_BUILTIN_RCPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
29218 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtps2pi, "__builtin_ia32_cvtps2pi", IX86_BUILTIN_CVTPS2PI, UNKNOWN, (int) V2SI_FTYPE_V4SF },
29219 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtss2si, "__builtin_ia32_cvtss2si", IX86_BUILTIN_CVTSS2SI, UNKNOWN, (int) INT_FTYPE_V4SF },
29220 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtss2siq, "__builtin_ia32_cvtss2si64", IX86_BUILTIN_CVTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF },
29221 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvttps2pi, "__builtin_ia32_cvttps2pi", IX86_BUILTIN_CVTTPS2PI, UNKNOWN, (int) V2SI_FTYPE_V4SF },
29222 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvttss2si, "__builtin_ia32_cvttss2si", IX86_BUILTIN_CVTTSS2SI, UNKNOWN, (int) INT_FTYPE_V4SF },
29223 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvttss2siq, "__builtin_ia32_cvttss2si64", IX86_BUILTIN_CVTTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF },
29225 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_shufps, "__builtin_ia32_shufps", IX86_BUILTIN_SHUFPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
29227 { OPTION_MASK_ISA_SSE, CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29228 { OPTION_MASK_ISA_SSE, CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29229 { OPTION_MASK_ISA_SSE, CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29230 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29231 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmaddv4sf3, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29232 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsubv4sf3, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29233 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmulv4sf3, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29234 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmdivv4sf3, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29236 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, (int) V4SF_FTYPE_V4SF_V4SF },
29237 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, (int) V4SF_FTYPE_V4SF_V4SF },
29238 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, (int) V4SF_FTYPE_V4SF_V4SF },
29239 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
29240 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
29241 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
29242 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, NE, (int) V4SF_FTYPE_V4SF_V4SF },
29243 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF },
29244 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF },
29245 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
29246 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF_SWAP},
29247 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, ORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
29248 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, (int) V4SF_FTYPE_V4SF_V4SF },
29249 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, (int) V4SF_FTYPE_V4SF_V4SF },
29250 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, (int) V4SF_FTYPE_V4SF_V4SF },
29251 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
29252 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, NE, (int) V4SF_FTYPE_V4SF_V4SF },
29253 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF },
29254 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF },
29255 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, ORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
29257 { OPTION_MASK_ISA_SSE, CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29258 { OPTION_MASK_ISA_SSE, CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29259 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29260 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29262 { OPTION_MASK_ISA_SSE, CODE_FOR_andv4sf3, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29263 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_andnotv4sf3, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29264 { OPTION_MASK_ISA_SSE, CODE_FOR_iorv4sf3, "__builtin_ia32_orps", IX86_BUILTIN_ORPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29265 { OPTION_MASK_ISA_SSE, CODE_FOR_xorv4sf3, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29267 { OPTION_MASK_ISA_SSE, CODE_FOR_copysignv4sf3, "__builtin_ia32_copysignps", IX86_BUILTIN_CPYSGNPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29269 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movss, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29270 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movhlps_exp, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29271 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movlhps_exp, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29272 { OPTION_MASK_ISA_SSE, CODE_FOR_vec_interleave_highv4sf, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29273 { OPTION_MASK_ISA_SSE, CODE_FOR_vec_interleave_lowv4sf, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29275 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtpi2ps, "__builtin_ia32_cvtpi2ps", IX86_BUILTIN_CVTPI2PS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V2SI },
29276 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtsi2ss, "__builtin_ia32_cvtsi2ss", IX86_BUILTIN_CVTSI2SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_SI },
29277 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtsi2ssq, "__builtin_ia32_cvtsi642ss", IX86_BUILTIN_CVTSI642SS, UNKNOWN, V4SF_FTYPE_V4SF_DI },
29279 { OPTION_MASK_ISA_SSE, CODE_FOR_rsqrtsf2, "__builtin_ia32_rsqrtf", IX86_BUILTIN_RSQRTF, UNKNOWN, (int) FLOAT_FTYPE_FLOAT },
29281 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsqrtv4sf2, "__builtin_ia32_sqrtss", IX86_BUILTIN_SQRTSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
29282 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmrsqrtv4sf2, "__builtin_ia32_rsqrtss", IX86_BUILTIN_RSQRTSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
29283 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmrcpv4sf2, "__builtin_ia32_rcpss", IX86_BUILTIN_RCPSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
29285 { OPTION_MASK_ISA_SSE, CODE_FOR_abstf2, 0, IX86_BUILTIN_FABSQ, UNKNOWN, (int) FLOAT128_FTYPE_FLOAT128 },
29286 { OPTION_MASK_ISA_SSE, CODE_FOR_copysigntf3, 0, IX86_BUILTIN_COPYSIGNQ, UNKNOWN, (int) FLOAT128_FTYPE_FLOAT128_FLOAT128 },
29288 /* SSE MMX or 3Dnow!A */
29289 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
29290 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29291 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29293 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
29294 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29295 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
29296 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29298 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_psadbw, "__builtin_ia32_psadbw", IX86_BUILTIN_PSADBW, UNKNOWN, (int) V1DI_FTYPE_V8QI_V8QI },
29299 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pmovmskb, "__builtin_ia32_pmovmskb", IX86_BUILTIN_PMOVMSKB, UNKNOWN, (int) INT_FTYPE_V8QI },
29301 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pshufw, "__builtin_ia32_pshufw", IX86_BUILTIN_PSHUFW, UNKNOWN, (int) V4HI_FTYPE_V4HI_INT },
29303 /* SSE2 */
29304 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_shufpd, "__builtin_ia32_shufpd", IX86_BUILTIN_SHUFPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
29306 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movmskpd, "__builtin_ia32_movmskpd", IX86_BUILTIN_MOVMSKPD, UNKNOWN, (int) INT_FTYPE_V2DF },
29307 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pmovmskb, "__builtin_ia32_pmovmskb128", IX86_BUILTIN_PMOVMSKB128, UNKNOWN, (int) INT_FTYPE_V16QI },
29308 { OPTION_MASK_ISA_SSE2, CODE_FOR_sqrtv2df2, "__builtin_ia32_sqrtpd", IX86_BUILTIN_SQRTPD, UNKNOWN, (int) V2DF_FTYPE_V2DF },
29309 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtdq2pd, "__builtin_ia32_cvtdq2pd", IX86_BUILTIN_CVTDQ2PD, UNKNOWN, (int) V2DF_FTYPE_V4SI },
29310 { OPTION_MASK_ISA_SSE2, CODE_FOR_floatv4siv4sf2, "__builtin_ia32_cvtdq2ps", IX86_BUILTIN_CVTDQ2PS, UNKNOWN, (int) V4SF_FTYPE_V4SI },
29312 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2dq, "__builtin_ia32_cvtpd2dq", IX86_BUILTIN_CVTPD2DQ, UNKNOWN, (int) V4SI_FTYPE_V2DF },
29313 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2pi, "__builtin_ia32_cvtpd2pi", IX86_BUILTIN_CVTPD2PI, UNKNOWN, (int) V2SI_FTYPE_V2DF },
29314 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2ps, "__builtin_ia32_cvtpd2ps", IX86_BUILTIN_CVTPD2PS, UNKNOWN, (int) V4SF_FTYPE_V2DF },
29315 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttpd2dq, "__builtin_ia32_cvttpd2dq", IX86_BUILTIN_CVTTPD2DQ, UNKNOWN, (int) V4SI_FTYPE_V2DF },
29316 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttpd2pi, "__builtin_ia32_cvttpd2pi", IX86_BUILTIN_CVTTPD2PI, UNKNOWN, (int) V2SI_FTYPE_V2DF },
29318 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpi2pd, "__builtin_ia32_cvtpi2pd", IX86_BUILTIN_CVTPI2PD, UNKNOWN, (int) V2DF_FTYPE_V2SI },
29320 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsd2si, "__builtin_ia32_cvtsd2si", IX86_BUILTIN_CVTSD2SI, UNKNOWN, (int) INT_FTYPE_V2DF },
29321 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttsd2si, "__builtin_ia32_cvttsd2si", IX86_BUILTIN_CVTTSD2SI, UNKNOWN, (int) INT_FTYPE_V2DF },
29322 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsd2siq, "__builtin_ia32_cvtsd2si64", IX86_BUILTIN_CVTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF },
29323 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvttsd2siq, "__builtin_ia32_cvttsd2si64", IX86_BUILTIN_CVTTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF },
29325 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_fix_notruncv4sfv4si, "__builtin_ia32_cvtps2dq", IX86_BUILTIN_CVTPS2DQ, UNKNOWN, (int) V4SI_FTYPE_V4SF },
29326 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtps2pd, "__builtin_ia32_cvtps2pd", IX86_BUILTIN_CVTPS2PD, UNKNOWN, (int) V2DF_FTYPE_V4SF },
29327 { OPTION_MASK_ISA_SSE2, CODE_FOR_fix_truncv4sfv4si2, "__builtin_ia32_cvttps2dq", IX86_BUILTIN_CVTTPS2DQ, UNKNOWN, (int) V4SI_FTYPE_V4SF },
29329 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv2df3, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29330 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv2df3, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29331 { OPTION_MASK_ISA_SSE2, CODE_FOR_mulv2df3, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29332 { OPTION_MASK_ISA_SSE2, CODE_FOR_divv2df3, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29333 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmaddv2df3, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29334 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsubv2df3, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29335 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmulv2df3, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29336 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmdivv2df3, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29338 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD, EQ, (int) V2DF_FTYPE_V2DF_V2DF },
29339 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD, LT, (int) V2DF_FTYPE_V2DF_V2DF },
29340 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD, LE, (int) V2DF_FTYPE_V2DF_V2DF },
29341 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD, LT, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
29342 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD, LE, (int) V2DF_FTYPE_V2DF_V2DF_SWAP},
29343 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD, UNORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
29344 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD, NE, (int) V2DF_FTYPE_V2DF_V2DF },
29345 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF },
29346 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF },
29347 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
29348 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
29349 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD, ORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
29350 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD, EQ, (int) V2DF_FTYPE_V2DF_V2DF },
29351 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD, LT, (int) V2DF_FTYPE_V2DF_V2DF },
29352 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD, LE, (int) V2DF_FTYPE_V2DF_V2DF },
29353 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD, UNORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
29354 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD, NE, (int) V2DF_FTYPE_V2DF_V2DF },
29355 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF },
29356 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF },
29357 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD, ORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
29359 { OPTION_MASK_ISA_SSE2, CODE_FOR_sminv2df3, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29360 { OPTION_MASK_ISA_SSE2, CODE_FOR_smaxv2df3, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29361 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsminv2df3, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29362 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsmaxv2df3, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29364 { OPTION_MASK_ISA_SSE2, CODE_FOR_andv2df3, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29365 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_andnotv2df3, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29366 { OPTION_MASK_ISA_SSE2, CODE_FOR_iorv2df3, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29367 { OPTION_MASK_ISA_SSE2, CODE_FOR_xorv2df3, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29369 { OPTION_MASK_ISA_SSE2, CODE_FOR_copysignv2df3, "__builtin_ia32_copysignpd", IX86_BUILTIN_CPYSGNPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29371 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movsd, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29372 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv2df, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29373 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv2df, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29375 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_pack_sfix_v2df, "__builtin_ia32_vec_pack_sfix", IX86_BUILTIN_VEC_PACK_SFIX, UNKNOWN, (int) V4SI_FTYPE_V2DF_V2DF },
29377 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv16qi3, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29378 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv8hi3, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29379 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
29380 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv2di3, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
29381 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv16qi3, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29382 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv8hi3, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29383 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
29384 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv2di3, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
29386 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ssaddv16qi3, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29387 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ssaddv8hi3, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29388 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_sssubv16qi3, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29389 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_sssubv8hi3, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29390 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_usaddv16qi3, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29391 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_usaddv8hi3, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29392 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ussubv16qi3, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29393 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ussubv8hi3, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29395 { OPTION_MASK_ISA_SSE2, CODE_FOR_mulv8hi3, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29396 { OPTION_MASK_ISA_SSE2, CODE_FOR_smulv8hi3_highpart, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128, UNKNOWN,(int) V8HI_FTYPE_V8HI_V8HI },
29398 { OPTION_MASK_ISA_SSE2, CODE_FOR_andv2di3, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
29399 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_andnotv2di3, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
29400 { OPTION_MASK_ISA_SSE2, CODE_FOR_iorv2di3, "__builtin_ia32_por128", IX86_BUILTIN_POR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
29401 { OPTION_MASK_ISA_SSE2, CODE_FOR_xorv2di3, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
29403 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_uavgv16qi3, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29404 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_uavgv8hi3, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29406 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv16qi3, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29407 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv8hi3, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29408 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv4si3, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
29409 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv16qi3, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29410 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv8hi3, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29411 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv4si3, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
29413 { OPTION_MASK_ISA_SSE2, CODE_FOR_umaxv16qi3, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29414 { OPTION_MASK_ISA_SSE2, CODE_FOR_smaxv8hi3, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29415 { OPTION_MASK_ISA_SSE2, CODE_FOR_uminv16qi3, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29416 { OPTION_MASK_ISA_SSE2, CODE_FOR_sminv8hi3, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29418 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv16qi, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29419 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv8hi, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29420 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv4si, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
29421 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv2di, "__builtin_ia32_punpckhqdq128", IX86_BUILTIN_PUNPCKHQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
29422 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv16qi, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29423 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv8hi, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29424 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv4si, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
29425 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv2di, "__builtin_ia32_punpcklqdq128", IX86_BUILTIN_PUNPCKLQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
29427 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packsswb, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V8HI },
29428 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packssdw, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V4SI },
29429 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packuswb, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V8HI },
29431 { OPTION_MASK_ISA_SSE2, CODE_FOR_umulv8hi3_highpart, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29432 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_psadbw, "__builtin_ia32_psadbw128", IX86_BUILTIN_PSADBW128, UNKNOWN, (int) V2DI_FTYPE_V16QI_V16QI },
29434 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_umulv1siv1di3, "__builtin_ia32_pmuludq", IX86_BUILTIN_PMULUDQ, UNKNOWN, (int) V1DI_FTYPE_V2SI_V2SI },
29435 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_widen_umult_even_v4si, "__builtin_ia32_pmuludq128", IX86_BUILTIN_PMULUDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI_V4SI },
29437 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pmaddwd, "__builtin_ia32_pmaddwd128", IX86_BUILTIN_PMADDWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI_V8HI },
29439 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsi2sd, "__builtin_ia32_cvtsi2sd", IX86_BUILTIN_CVTSI2SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_SI },
29440 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsi2sdq, "__builtin_ia32_cvtsi642sd", IX86_BUILTIN_CVTSI642SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_DI },
29441 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsd2ss, "__builtin_ia32_cvtsd2ss", IX86_BUILTIN_CVTSD2SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V2DF },
29442 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtss2sd, "__builtin_ia32_cvtss2sd", IX86_BUILTIN_CVTSS2SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V4SF },
29444 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ashlv1ti3, "__builtin_ia32_pslldqi128", IX86_BUILTIN_PSLLDQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_CONVERT },
29445 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv8hi3, "__builtin_ia32_psllwi128", IX86_BUILTIN_PSLLWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
29446 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv4si3, "__builtin_ia32_pslldi128", IX86_BUILTIN_PSLLDI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
29447 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv2di3, "__builtin_ia32_psllqi128", IX86_BUILTIN_PSLLQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_SI_COUNT },
29448 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv8hi3, "__builtin_ia32_psllw128", IX86_BUILTIN_PSLLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
29449 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv4si3, "__builtin_ia32_pslld128", IX86_BUILTIN_PSLLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
29450 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv2di3, "__builtin_ia32_psllq128", IX86_BUILTIN_PSLLQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_COUNT },
29452 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_lshrv1ti3, "__builtin_ia32_psrldqi128", IX86_BUILTIN_PSRLDQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_CONVERT },
29453 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv8hi3, "__builtin_ia32_psrlwi128", IX86_BUILTIN_PSRLWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
29454 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv4si3, "__builtin_ia32_psrldi128", IX86_BUILTIN_PSRLDI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
29455 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv2di3, "__builtin_ia32_psrlqi128", IX86_BUILTIN_PSRLQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_SI_COUNT },
29456 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv8hi3, "__builtin_ia32_psrlw128", IX86_BUILTIN_PSRLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
29457 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv4si3, "__builtin_ia32_psrld128", IX86_BUILTIN_PSRLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
29458 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv2di3, "__builtin_ia32_psrlq128", IX86_BUILTIN_PSRLQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_COUNT },
29460 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv8hi3, "__builtin_ia32_psrawi128", IX86_BUILTIN_PSRAWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
29461 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv4si3, "__builtin_ia32_psradi128", IX86_BUILTIN_PSRADI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
29462 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv8hi3, "__builtin_ia32_psraw128", IX86_BUILTIN_PSRAW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
29463 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv4si3, "__builtin_ia32_psrad128", IX86_BUILTIN_PSRAD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
29465 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshufd, "__builtin_ia32_pshufd", IX86_BUILTIN_PSHUFD, UNKNOWN, (int) V4SI_FTYPE_V4SI_INT },
29466 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshuflw, "__builtin_ia32_pshuflw", IX86_BUILTIN_PSHUFLW, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT },
29467 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshufhw, "__builtin_ia32_pshufhw", IX86_BUILTIN_PSHUFHW, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT },
29469 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsqrtv2df2, "__builtin_ia32_sqrtsd", IX86_BUILTIN_SQRTSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_VEC_MERGE },
29471 { OPTION_MASK_ISA_SSE, CODE_FOR_sse2_movq128, "__builtin_ia32_movq128", IX86_BUILTIN_MOVQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI },
29473 /* SSE2 MMX */
29474 { OPTION_MASK_ISA_SSE2, CODE_FOR_mmx_addv1di3, "__builtin_ia32_paddq", IX86_BUILTIN_PADDQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI },
29475 { OPTION_MASK_ISA_SSE2, CODE_FOR_mmx_subv1di3, "__builtin_ia32_psubq", IX86_BUILTIN_PSUBQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI },
29477 /* SSE3 */
29478 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_movshdup, "__builtin_ia32_movshdup", IX86_BUILTIN_MOVSHDUP, UNKNOWN, (int) V4SF_FTYPE_V4SF},
29479 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_movsldup, "__builtin_ia32_movsldup", IX86_BUILTIN_MOVSLDUP, UNKNOWN, (int) V4SF_FTYPE_V4SF },
29481 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_addsubv4sf3, "__builtin_ia32_addsubps", IX86_BUILTIN_ADDSUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29482 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_addsubv2df3, "__builtin_ia32_addsubpd", IX86_BUILTIN_ADDSUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29483 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_haddv4sf3, "__builtin_ia32_haddps", IX86_BUILTIN_HADDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29484 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_haddv2df3, "__builtin_ia32_haddpd", IX86_BUILTIN_HADDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29485 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_hsubv4sf3, "__builtin_ia32_hsubps", IX86_BUILTIN_HSUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29486 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_hsubv2df3, "__builtin_ia32_hsubpd", IX86_BUILTIN_HSUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29488 /* SSSE3 */
29489 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv16qi2, "__builtin_ia32_pabsb128", IX86_BUILTIN_PABSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI },
29490 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv8qi2, "__builtin_ia32_pabsb", IX86_BUILTIN_PABSB, UNKNOWN, (int) V8QI_FTYPE_V8QI },
29491 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv8hi2, "__builtin_ia32_pabsw128", IX86_BUILTIN_PABSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI },
29492 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv4hi2, "__builtin_ia32_pabsw", IX86_BUILTIN_PABSW, UNKNOWN, (int) V4HI_FTYPE_V4HI },
29493 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv4si2, "__builtin_ia32_pabsd128", IX86_BUILTIN_PABSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI },
29494 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv2si2, "__builtin_ia32_pabsd", IX86_BUILTIN_PABSD, UNKNOWN, (int) V2SI_FTYPE_V2SI },
29496 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddwv8hi3, "__builtin_ia32_phaddw128", IX86_BUILTIN_PHADDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29497 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddwv4hi3, "__builtin_ia32_phaddw", IX86_BUILTIN_PHADDW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29498 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phadddv4si3, "__builtin_ia32_phaddd128", IX86_BUILTIN_PHADDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
29499 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phadddv2si3, "__builtin_ia32_phaddd", IX86_BUILTIN_PHADDD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
29500 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddswv8hi3, "__builtin_ia32_phaddsw128", IX86_BUILTIN_PHADDSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29501 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddswv4hi3, "__builtin_ia32_phaddsw", IX86_BUILTIN_PHADDSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29502 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubwv8hi3, "__builtin_ia32_phsubw128", IX86_BUILTIN_PHSUBW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29503 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubwv4hi3, "__builtin_ia32_phsubw", IX86_BUILTIN_PHSUBW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29504 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubdv4si3, "__builtin_ia32_phsubd128", IX86_BUILTIN_PHSUBD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
29505 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubdv2si3, "__builtin_ia32_phsubd", IX86_BUILTIN_PHSUBD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
29506 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubswv8hi3, "__builtin_ia32_phsubsw128", IX86_BUILTIN_PHSUBSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29507 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubswv4hi3, "__builtin_ia32_phsubsw", IX86_BUILTIN_PHSUBSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29508 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmaddubsw128, "__builtin_ia32_pmaddubsw128", IX86_BUILTIN_PMADDUBSW128, UNKNOWN, (int) V8HI_FTYPE_V16QI_V16QI },
29509 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmaddubsw, "__builtin_ia32_pmaddubsw", IX86_BUILTIN_PMADDUBSW, UNKNOWN, (int) V4HI_FTYPE_V8QI_V8QI },
29510 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmulhrswv8hi3, "__builtin_ia32_pmulhrsw128", IX86_BUILTIN_PMULHRSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29511 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmulhrswv4hi3, "__builtin_ia32_pmulhrsw", IX86_BUILTIN_PMULHRSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29512 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pshufbv16qi3, "__builtin_ia32_pshufb128", IX86_BUILTIN_PSHUFB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29513 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pshufbv8qi3, "__builtin_ia32_pshufb", IX86_BUILTIN_PSHUFB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
29514 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv16qi3, "__builtin_ia32_psignb128", IX86_BUILTIN_PSIGNB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29515 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv8qi3, "__builtin_ia32_psignb", IX86_BUILTIN_PSIGNB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
29516 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv8hi3, "__builtin_ia32_psignw128", IX86_BUILTIN_PSIGNW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29517 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv4hi3, "__builtin_ia32_psignw", IX86_BUILTIN_PSIGNW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29518 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv4si3, "__builtin_ia32_psignd128", IX86_BUILTIN_PSIGND128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
29519 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv2si3, "__builtin_ia32_psignd", IX86_BUILTIN_PSIGND, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
29521 /* SSSE3. */
29522 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_palignrti, "__builtin_ia32_palignr128", IX86_BUILTIN_PALIGNR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_INT_CONVERT },
29523 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_palignrdi, "__builtin_ia32_palignr", IX86_BUILTIN_PALIGNR, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_INT_CONVERT },
29525 /* SSE4.1 */
29526 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendpd, "__builtin_ia32_blendpd", IX86_BUILTIN_BLENDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
29527 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendps, "__builtin_ia32_blendps", IX86_BUILTIN_BLENDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
29528 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendvpd, "__builtin_ia32_blendvpd", IX86_BUILTIN_BLENDVPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF },
29529 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendvps, "__builtin_ia32_blendvps", IX86_BUILTIN_BLENDVPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF },
29530 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_dppd, "__builtin_ia32_dppd", IX86_BUILTIN_DPPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
29531 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_dpps, "__builtin_ia32_dpps", IX86_BUILTIN_DPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
29532 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_insertps, "__builtin_ia32_insertps128", IX86_BUILTIN_INSERTPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
29533 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_mpsadbw, "__builtin_ia32_mpsadbw128", IX86_BUILTIN_MPSADBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_INT },
29534 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_pblendvb, "__builtin_ia32_pblendvb128", IX86_BUILTIN_PBLENDVB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI },
29535 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_pblendw, "__builtin_ia32_pblendw128", IX86_BUILTIN_PBLENDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_INT },
29537 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv8qiv8hi2, "__builtin_ia32_pmovsxbw128", IX86_BUILTIN_PMOVSXBW128, UNKNOWN, (int) V8HI_FTYPE_V16QI },
29538 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv4qiv4si2, "__builtin_ia32_pmovsxbd128", IX86_BUILTIN_PMOVSXBD128, UNKNOWN, (int) V4SI_FTYPE_V16QI },
29539 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv2qiv2di2, "__builtin_ia32_pmovsxbq128", IX86_BUILTIN_PMOVSXBQ128, UNKNOWN, (int) V2DI_FTYPE_V16QI },
29540 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv4hiv4si2, "__builtin_ia32_pmovsxwd128", IX86_BUILTIN_PMOVSXWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI },
29541 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv2hiv2di2, "__builtin_ia32_pmovsxwq128", IX86_BUILTIN_PMOVSXWQ128, UNKNOWN, (int) V2DI_FTYPE_V8HI },
29542 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv2siv2di2, "__builtin_ia32_pmovsxdq128", IX86_BUILTIN_PMOVSXDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI },
29543 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv8qiv8hi2, "__builtin_ia32_pmovzxbw128", IX86_BUILTIN_PMOVZXBW128, UNKNOWN, (int) V8HI_FTYPE_V16QI },
29544 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv4qiv4si2, "__builtin_ia32_pmovzxbd128", IX86_BUILTIN_PMOVZXBD128, UNKNOWN, (int) V4SI_FTYPE_V16QI },
29545 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2qiv2di2, "__builtin_ia32_pmovzxbq128", IX86_BUILTIN_PMOVZXBQ128, UNKNOWN, (int) V2DI_FTYPE_V16QI },
29546 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv4hiv4si2, "__builtin_ia32_pmovzxwd128", IX86_BUILTIN_PMOVZXWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI },
29547 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2hiv2di2, "__builtin_ia32_pmovzxwq128", IX86_BUILTIN_PMOVZXWQ128, UNKNOWN, (int) V2DI_FTYPE_V8HI },
29548 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2siv2di2, "__builtin_ia32_pmovzxdq128", IX86_BUILTIN_PMOVZXDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI },
29549 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_phminposuw, "__builtin_ia32_phminposuw128", IX86_BUILTIN_PHMINPOSUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI },
29551 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_packusdw, "__builtin_ia32_packusdw128", IX86_BUILTIN_PACKUSDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V4SI },
29552 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_eqv2di3, "__builtin_ia32_pcmpeqq", IX86_BUILTIN_PCMPEQQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
29553 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_smaxv16qi3, "__builtin_ia32_pmaxsb128", IX86_BUILTIN_PMAXSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29554 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_smaxv4si3, "__builtin_ia32_pmaxsd128", IX86_BUILTIN_PMAXSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
29555 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_umaxv4si3, "__builtin_ia32_pmaxud128", IX86_BUILTIN_PMAXUD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
29556 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_umaxv8hi3, "__builtin_ia32_pmaxuw128", IX86_BUILTIN_PMAXUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29557 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sminv16qi3, "__builtin_ia32_pminsb128", IX86_BUILTIN_PMINSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29558 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sminv4si3, "__builtin_ia32_pminsd128", IX86_BUILTIN_PMINSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
29559 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_uminv4si3, "__builtin_ia32_pminud128", IX86_BUILTIN_PMINUD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
29560 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_uminv8hi3, "__builtin_ia32_pminuw128", IX86_BUILTIN_PMINUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29561 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_mulv2siv2di3, "__builtin_ia32_pmuldq128", IX86_BUILTIN_PMULDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI_V4SI },
29562 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_mulv4si3, "__builtin_ia32_pmulld128", IX86_BUILTIN_PMULLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
29564 /* SSE4.1 */
29565 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_roundpd", IX86_BUILTIN_ROUNDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT },
29566 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_roundps", IX86_BUILTIN_ROUNDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT },
29567 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundsd, "__builtin_ia32_roundsd", IX86_BUILTIN_ROUNDSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
29568 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundss, "__builtin_ia32_roundss", IX86_BUILTIN_ROUNDSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
29570 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_floorpd", IX86_BUILTIN_FLOORPD, (enum rtx_code) ROUND_FLOOR, (int) V2DF_FTYPE_V2DF_ROUND },
29571 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_ceilpd", IX86_BUILTIN_CEILPD, (enum rtx_code) ROUND_CEIL, (int) V2DF_FTYPE_V2DF_ROUND },
29572 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_truncpd", IX86_BUILTIN_TRUNCPD, (enum rtx_code) ROUND_TRUNC, (int) V2DF_FTYPE_V2DF_ROUND },
29573 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_rintpd", IX86_BUILTIN_RINTPD, (enum rtx_code) ROUND_MXCSR, (int) V2DF_FTYPE_V2DF_ROUND },
29575 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd_vec_pack_sfix, "__builtin_ia32_floorpd_vec_pack_sfix", IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX, (enum rtx_code) ROUND_FLOOR, (int) V4SI_FTYPE_V2DF_V2DF_ROUND },
29576 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd_vec_pack_sfix, "__builtin_ia32_ceilpd_vec_pack_sfix", IX86_BUILTIN_CEILPD_VEC_PACK_SFIX, (enum rtx_code) ROUND_CEIL, (int) V4SI_FTYPE_V2DF_V2DF_ROUND },
29578 { OPTION_MASK_ISA_ROUND, CODE_FOR_roundv2df2, "__builtin_ia32_roundpd_az", IX86_BUILTIN_ROUNDPD_AZ, UNKNOWN, (int) V2DF_FTYPE_V2DF },
29579 { OPTION_MASK_ISA_ROUND, CODE_FOR_roundv2df2_vec_pack_sfix, "__builtin_ia32_roundpd_az_vec_pack_sfix", IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX, UNKNOWN, (int) V4SI_FTYPE_V2DF_V2DF },
29581 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_floorps", IX86_BUILTIN_FLOORPS, (enum rtx_code) ROUND_FLOOR, (int) V4SF_FTYPE_V4SF_ROUND },
29582 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_ceilps", IX86_BUILTIN_CEILPS, (enum rtx_code) ROUND_CEIL, (int) V4SF_FTYPE_V4SF_ROUND },
29583 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_truncps", IX86_BUILTIN_TRUNCPS, (enum rtx_code) ROUND_TRUNC, (int) V4SF_FTYPE_V4SF_ROUND },
29584 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_rintps", IX86_BUILTIN_RINTPS, (enum rtx_code) ROUND_MXCSR, (int) V4SF_FTYPE_V4SF_ROUND },
29586 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps_sfix, "__builtin_ia32_floorps_sfix", IX86_BUILTIN_FLOORPS_SFIX, (enum rtx_code) ROUND_FLOOR, (int) V4SI_FTYPE_V4SF_ROUND },
29587 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps_sfix, "__builtin_ia32_ceilps_sfix", IX86_BUILTIN_CEILPS_SFIX, (enum rtx_code) ROUND_CEIL, (int) V4SI_FTYPE_V4SF_ROUND },
29589 { OPTION_MASK_ISA_ROUND, CODE_FOR_roundv4sf2, "__builtin_ia32_roundps_az", IX86_BUILTIN_ROUNDPS_AZ, UNKNOWN, (int) V4SF_FTYPE_V4SF },
29590 { OPTION_MASK_ISA_ROUND, CODE_FOR_roundv4sf2_sfix, "__builtin_ia32_roundps_az_sfix", IX86_BUILTIN_ROUNDPS_AZ_SFIX, UNKNOWN, (int) V4SI_FTYPE_V4SF },
29592 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestz128", IX86_BUILTIN_PTESTZ, EQ, (int) INT_FTYPE_V2DI_V2DI_PTEST },
29593 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestc128", IX86_BUILTIN_PTESTC, LTU, (int) INT_FTYPE_V2DI_V2DI_PTEST },
29594 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestnzc128", IX86_BUILTIN_PTESTNZC, GTU, (int) INT_FTYPE_V2DI_V2DI_PTEST },
29596 /* SSE4.2 */
29597 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_gtv2di3, "__builtin_ia32_pcmpgtq", IX86_BUILTIN_PCMPGTQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
29598 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32, CODE_FOR_sse4_2_crc32qi, "__builtin_ia32_crc32qi", IX86_BUILTIN_CRC32QI, UNKNOWN, (int) UINT_FTYPE_UINT_UCHAR },
29599 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32, CODE_FOR_sse4_2_crc32hi, "__builtin_ia32_crc32hi", IX86_BUILTIN_CRC32HI, UNKNOWN, (int) UINT_FTYPE_UINT_USHORT },
29600 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32, CODE_FOR_sse4_2_crc32si, "__builtin_ia32_crc32si", IX86_BUILTIN_CRC32SI, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
29601 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse4_2_crc32di, "__builtin_ia32_crc32di", IX86_BUILTIN_CRC32DI, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
29603 /* SSE4A */
29604 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_extrqi, "__builtin_ia32_extrqi", IX86_BUILTIN_EXTRQI, UNKNOWN, (int) V2DI_FTYPE_V2DI_UINT_UINT },
29605 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_extrq, "__builtin_ia32_extrq", IX86_BUILTIN_EXTRQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V16QI },
29606 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_insertqi, "__builtin_ia32_insertqi", IX86_BUILTIN_INSERTQI, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_UINT_UINT },
29607 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_insertq, "__builtin_ia32_insertq", IX86_BUILTIN_INSERTQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
29609 /* AES */
29610 { OPTION_MASK_ISA_SSE2, CODE_FOR_aeskeygenassist, 0, IX86_BUILTIN_AESKEYGENASSIST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT },
29611 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesimc, 0, IX86_BUILTIN_AESIMC128, UNKNOWN, (int) V2DI_FTYPE_V2DI },
29613 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesenc, 0, IX86_BUILTIN_AESENC128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
29614 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesenclast, 0, IX86_BUILTIN_AESENCLAST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
29615 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesdec, 0, IX86_BUILTIN_AESDEC128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
29616 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesdeclast, 0, IX86_BUILTIN_AESDECLAST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
29618 /* PCLMUL */
29619 { OPTION_MASK_ISA_SSE2, CODE_FOR_pclmulqdq, 0, IX86_BUILTIN_PCLMULQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_INT },
29621 /* AVX */
29622 { OPTION_MASK_ISA_AVX, CODE_FOR_addv4df3, "__builtin_ia32_addpd256", IX86_BUILTIN_ADDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
29623 { OPTION_MASK_ISA_AVX, CODE_FOR_addv8sf3, "__builtin_ia32_addps256", IX86_BUILTIN_ADDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
29624 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_addsubv4df3, "__builtin_ia32_addsubpd256", IX86_BUILTIN_ADDSUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
29625 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_addsubv8sf3, "__builtin_ia32_addsubps256", IX86_BUILTIN_ADDSUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
29626 { OPTION_MASK_ISA_AVX, CODE_FOR_andv4df3, "__builtin_ia32_andpd256", IX86_BUILTIN_ANDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
29627 { OPTION_MASK_ISA_AVX, CODE_FOR_andv8sf3, "__builtin_ia32_andps256", IX86_BUILTIN_ANDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
29628 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_andnotv4df3, "__builtin_ia32_andnpd256", IX86_BUILTIN_ANDNPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
29629 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_andnotv8sf3, "__builtin_ia32_andnps256", IX86_BUILTIN_ANDNPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
29630 { OPTION_MASK_ISA_AVX, CODE_FOR_divv4df3, "__builtin_ia32_divpd256", IX86_BUILTIN_DIVPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
29631 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_divv8sf3, "__builtin_ia32_divps256", IX86_BUILTIN_DIVPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
29632 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_haddv4df3, "__builtin_ia32_haddpd256", IX86_BUILTIN_HADDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
29633 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_hsubv8sf3, "__builtin_ia32_hsubps256", IX86_BUILTIN_HSUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
29634 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_hsubv4df3, "__builtin_ia32_hsubpd256", IX86_BUILTIN_HSUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
29635 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_haddv8sf3, "__builtin_ia32_haddps256", IX86_BUILTIN_HADDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
29636 { OPTION_MASK_ISA_AVX, CODE_FOR_smaxv4df3, "__builtin_ia32_maxpd256", IX86_BUILTIN_MAXPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
29637 { OPTION_MASK_ISA_AVX, CODE_FOR_smaxv8sf3, "__builtin_ia32_maxps256", IX86_BUILTIN_MAXPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
29638 { OPTION_MASK_ISA_AVX, CODE_FOR_sminv4df3, "__builtin_ia32_minpd256", IX86_BUILTIN_MINPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
29639 { OPTION_MASK_ISA_AVX, CODE_FOR_sminv8sf3, "__builtin_ia32_minps256", IX86_BUILTIN_MINPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
29640 { OPTION_MASK_ISA_AVX, CODE_FOR_mulv4df3, "__builtin_ia32_mulpd256", IX86_BUILTIN_MULPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
29641 { OPTION_MASK_ISA_AVX, CODE_FOR_mulv8sf3, "__builtin_ia32_mulps256", IX86_BUILTIN_MULPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
29642 { OPTION_MASK_ISA_AVX, CODE_FOR_iorv4df3, "__builtin_ia32_orpd256", IX86_BUILTIN_ORPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
29643 { OPTION_MASK_ISA_AVX, CODE_FOR_iorv8sf3, "__builtin_ia32_orps256", IX86_BUILTIN_ORPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
29644 { OPTION_MASK_ISA_AVX, CODE_FOR_subv4df3, "__builtin_ia32_subpd256", IX86_BUILTIN_SUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
29645 { OPTION_MASK_ISA_AVX, CODE_FOR_subv8sf3, "__builtin_ia32_subps256", IX86_BUILTIN_SUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
29646 { OPTION_MASK_ISA_AVX, CODE_FOR_xorv4df3, "__builtin_ia32_xorpd256", IX86_BUILTIN_XORPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
29647 { OPTION_MASK_ISA_AVX, CODE_FOR_xorv8sf3, "__builtin_ia32_xorps256", IX86_BUILTIN_XORPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
29649 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv2df3, "__builtin_ia32_vpermilvarpd", IX86_BUILTIN_VPERMILVARPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DI },
29650 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv4sf3, "__builtin_ia32_vpermilvarps", IX86_BUILTIN_VPERMILVARPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SI },
29651 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv4df3, "__builtin_ia32_vpermilvarpd256", IX86_BUILTIN_VPERMILVARPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DI },
29652 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv8sf3, "__builtin_ia32_vpermilvarps256", IX86_BUILTIN_VPERMILVARPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SI },
29654 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendpd256, "__builtin_ia32_blendpd256", IX86_BUILTIN_BLENDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
29655 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendps256, "__builtin_ia32_blendps256", IX86_BUILTIN_BLENDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
29656 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendvpd256, "__builtin_ia32_blendvpd256", IX86_BUILTIN_BLENDVPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF },
29657 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendvps256, "__builtin_ia32_blendvps256", IX86_BUILTIN_BLENDVPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF },
29658 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_dpps256, "__builtin_ia32_dpps256", IX86_BUILTIN_DPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
29659 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_shufpd256, "__builtin_ia32_shufpd256", IX86_BUILTIN_SHUFPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
29660 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_shufps256, "__builtin_ia32_shufps256", IX86_BUILTIN_SHUFPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
29661 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vmcmpv2df3, "__builtin_ia32_cmpsd", IX86_BUILTIN_CMPSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
29662 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vmcmpv4sf3, "__builtin_ia32_cmpss", IX86_BUILTIN_CMPSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
29663 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpv2df3, "__builtin_ia32_cmppd", IX86_BUILTIN_CMPPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
29664 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpv4sf3, "__builtin_ia32_cmpps", IX86_BUILTIN_CMPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
29665 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpv4df3, "__builtin_ia32_cmppd256", IX86_BUILTIN_CMPPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
29666 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpv8sf3, "__builtin_ia32_cmpps256", IX86_BUILTIN_CMPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
29667 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v4df, "__builtin_ia32_vextractf128_pd256", IX86_BUILTIN_EXTRACTF128PD256, UNKNOWN, (int) V2DF_FTYPE_V4DF_INT },
29668 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v8sf, "__builtin_ia32_vextractf128_ps256", IX86_BUILTIN_EXTRACTF128PS256, UNKNOWN, (int) V4SF_FTYPE_V8SF_INT },
29669 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v8si, "__builtin_ia32_vextractf128_si256", IX86_BUILTIN_EXTRACTF128SI256, UNKNOWN, (int) V4SI_FTYPE_V8SI_INT },
29670 { OPTION_MASK_ISA_AVX, CODE_FOR_floatv4siv4df2, "__builtin_ia32_cvtdq2pd256", IX86_BUILTIN_CVTDQ2PD256, UNKNOWN, (int) V4DF_FTYPE_V4SI },
29671 { OPTION_MASK_ISA_AVX, CODE_FOR_floatv8siv8sf2, "__builtin_ia32_cvtdq2ps256", IX86_BUILTIN_CVTDQ2PS256, UNKNOWN, (int) V8SF_FTYPE_V8SI },
29672 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtpd2ps256, "__builtin_ia32_cvtpd2ps256", IX86_BUILTIN_CVTPD2PS256, UNKNOWN, (int) V4SF_FTYPE_V4DF },
29673 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_fix_notruncv8sfv8si, "__builtin_ia32_cvtps2dq256", IX86_BUILTIN_CVTPS2DQ256, UNKNOWN, (int) V8SI_FTYPE_V8SF },
29674 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtps2pd256, "__builtin_ia32_cvtps2pd256", IX86_BUILTIN_CVTPS2PD256, UNKNOWN, (int) V4DF_FTYPE_V4SF },
29675 { OPTION_MASK_ISA_AVX, CODE_FOR_fix_truncv4dfv4si2, "__builtin_ia32_cvttpd2dq256", IX86_BUILTIN_CVTTPD2DQ256, UNKNOWN, (int) V4SI_FTYPE_V4DF },
29676 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtpd2dq256, "__builtin_ia32_cvtpd2dq256", IX86_BUILTIN_CVTPD2DQ256, UNKNOWN, (int) V4SI_FTYPE_V4DF },
29677 { OPTION_MASK_ISA_AVX, CODE_FOR_fix_truncv8sfv8si2, "__builtin_ia32_cvttps2dq256", IX86_BUILTIN_CVTTPS2DQ256, UNKNOWN, (int) V8SI_FTYPE_V8SF },
29678 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v4df3, "__builtin_ia32_vperm2f128_pd256", IX86_BUILTIN_VPERM2F128PD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
29679 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v8sf3, "__builtin_ia32_vperm2f128_ps256", IX86_BUILTIN_VPERM2F128PS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
29680 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v8si3, "__builtin_ia32_vperm2f128_si256", IX86_BUILTIN_VPERM2F128SI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_INT },
29681 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv2df, "__builtin_ia32_vpermilpd", IX86_BUILTIN_VPERMILPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT },
29682 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv4sf, "__builtin_ia32_vpermilps", IX86_BUILTIN_VPERMILPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT },
29683 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv4df, "__builtin_ia32_vpermilpd256", IX86_BUILTIN_VPERMILPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT },
29684 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv8sf, "__builtin_ia32_vpermilps256", IX86_BUILTIN_VPERMILPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT },
29685 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v4df, "__builtin_ia32_vinsertf128_pd256", IX86_BUILTIN_VINSERTF128PD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V2DF_INT },
29686 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v8sf, "__builtin_ia32_vinsertf128_ps256", IX86_BUILTIN_VINSERTF128PS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V4SF_INT },
29687 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v8si, "__builtin_ia32_vinsertf128_si256", IX86_BUILTIN_VINSERTF128SI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_INT },
29689 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movshdup256, "__builtin_ia32_movshdup256", IX86_BUILTIN_MOVSHDUP256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
29690 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movsldup256, "__builtin_ia32_movsldup256", IX86_BUILTIN_MOVSLDUP256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
29691 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movddup256, "__builtin_ia32_movddup256", IX86_BUILTIN_MOVDDUP256, UNKNOWN, (int) V4DF_FTYPE_V4DF },
29693 { OPTION_MASK_ISA_AVX, CODE_FOR_sqrtv4df2, "__builtin_ia32_sqrtpd256", IX86_BUILTIN_SQRTPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF },
29694 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_sqrtv8sf2, "__builtin_ia32_sqrtps256", IX86_BUILTIN_SQRTPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
29695 { OPTION_MASK_ISA_AVX, CODE_FOR_sqrtv8sf2, "__builtin_ia32_sqrtps_nr256", IX86_BUILTIN_SQRTPS_NR256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
29696 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_rsqrtv8sf2, "__builtin_ia32_rsqrtps256", IX86_BUILTIN_RSQRTPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
29697 { OPTION_MASK_ISA_AVX, CODE_FOR_rsqrtv8sf2, "__builtin_ia32_rsqrtps_nr256", IX86_BUILTIN_RSQRTPS_NR256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
29699 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_rcpv8sf2, "__builtin_ia32_rcpps256", IX86_BUILTIN_RCPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
29701 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_roundpd256", IX86_BUILTIN_ROUNDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT },
29702 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_roundps256", IX86_BUILTIN_ROUNDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT },
29704 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_floorpd256", IX86_BUILTIN_FLOORPD256, (enum rtx_code) ROUND_FLOOR, (int) V4DF_FTYPE_V4DF_ROUND },
29705 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_ceilpd256", IX86_BUILTIN_CEILPD256, (enum rtx_code) ROUND_CEIL, (int) V4DF_FTYPE_V4DF_ROUND },
29706 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_truncpd256", IX86_BUILTIN_TRUNCPD256, (enum rtx_code) ROUND_TRUNC, (int) V4DF_FTYPE_V4DF_ROUND },
29707 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_rintpd256", IX86_BUILTIN_RINTPD256, (enum rtx_code) ROUND_MXCSR, (int) V4DF_FTYPE_V4DF_ROUND },
29709 { OPTION_MASK_ISA_AVX, CODE_FOR_roundv4df2, "__builtin_ia32_roundpd_az256", IX86_BUILTIN_ROUNDPD_AZ256, UNKNOWN, (int) V4DF_FTYPE_V4DF },
29710 { OPTION_MASK_ISA_AVX, CODE_FOR_roundv4df2_vec_pack_sfix, "__builtin_ia32_roundpd_az_vec_pack_sfix256", IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX256, UNKNOWN, (int) V8SI_FTYPE_V4DF_V4DF },
29712 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd_vec_pack_sfix256, "__builtin_ia32_floorpd_vec_pack_sfix256", IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX256, (enum rtx_code) ROUND_FLOOR, (int) V8SI_FTYPE_V4DF_V4DF_ROUND },
29713 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd_vec_pack_sfix256, "__builtin_ia32_ceilpd_vec_pack_sfix256", IX86_BUILTIN_CEILPD_VEC_PACK_SFIX256, (enum rtx_code) ROUND_CEIL, (int) V8SI_FTYPE_V4DF_V4DF_ROUND },
29715 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_floorps256", IX86_BUILTIN_FLOORPS256, (enum rtx_code) ROUND_FLOOR, (int) V8SF_FTYPE_V8SF_ROUND },
29716 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_ceilps256", IX86_BUILTIN_CEILPS256, (enum rtx_code) ROUND_CEIL, (int) V8SF_FTYPE_V8SF_ROUND },
29717 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_truncps256", IX86_BUILTIN_TRUNCPS256, (enum rtx_code) ROUND_TRUNC, (int) V8SF_FTYPE_V8SF_ROUND },
29718 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_rintps256", IX86_BUILTIN_RINTPS256, (enum rtx_code) ROUND_MXCSR, (int) V8SF_FTYPE_V8SF_ROUND },
29720 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps_sfix256, "__builtin_ia32_floorps_sfix256", IX86_BUILTIN_FLOORPS_SFIX256, (enum rtx_code) ROUND_FLOOR, (int) V8SI_FTYPE_V8SF_ROUND },
29721 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps_sfix256, "__builtin_ia32_ceilps_sfix256", IX86_BUILTIN_CEILPS_SFIX256, (enum rtx_code) ROUND_CEIL, (int) V8SI_FTYPE_V8SF_ROUND },
29723 { OPTION_MASK_ISA_AVX, CODE_FOR_roundv8sf2, "__builtin_ia32_roundps_az256", IX86_BUILTIN_ROUNDPS_AZ256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
29724 { OPTION_MASK_ISA_AVX, CODE_FOR_roundv8sf2_sfix, "__builtin_ia32_roundps_az_sfix256", IX86_BUILTIN_ROUNDPS_AZ_SFIX256, UNKNOWN, (int) V8SI_FTYPE_V8SF },
29726 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpckhpd256, "__builtin_ia32_unpckhpd256", IX86_BUILTIN_UNPCKHPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
29727 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpcklpd256, "__builtin_ia32_unpcklpd256", IX86_BUILTIN_UNPCKLPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
29728 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpckhps256, "__builtin_ia32_unpckhps256", IX86_BUILTIN_UNPCKHPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
29729 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpcklps256, "__builtin_ia32_unpcklps256", IX86_BUILTIN_UNPCKLPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
29731 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_si256_si, "__builtin_ia32_si256_si", IX86_BUILTIN_SI256_SI, UNKNOWN, (int) V8SI_FTYPE_V4SI },
29732 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ps256_ps, "__builtin_ia32_ps256_ps", IX86_BUILTIN_PS256_PS, UNKNOWN, (int) V8SF_FTYPE_V4SF },
29733 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_pd256_pd, "__builtin_ia32_pd256_pd", IX86_BUILTIN_PD256_PD, UNKNOWN, (int) V4DF_FTYPE_V2DF },
29734 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_extract_lo_v8si, "__builtin_ia32_si_si256", IX86_BUILTIN_SI_SI256, UNKNOWN, (int) V4SI_FTYPE_V8SI },
29735 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_extract_lo_v8sf, "__builtin_ia32_ps_ps256", IX86_BUILTIN_PS_PS256, UNKNOWN, (int) V4SF_FTYPE_V8SF },
29736 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_extract_lo_v4df, "__builtin_ia32_pd_pd256", IX86_BUILTIN_PD_PD256, UNKNOWN, (int) V2DF_FTYPE_V4DF },
29738 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestzpd", IX86_BUILTIN_VTESTZPD, EQ, (int) INT_FTYPE_V2DF_V2DF_PTEST },
29739 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestcpd", IX86_BUILTIN_VTESTCPD, LTU, (int) INT_FTYPE_V2DF_V2DF_PTEST },
29740 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestnzcpd", IX86_BUILTIN_VTESTNZCPD, GTU, (int) INT_FTYPE_V2DF_V2DF_PTEST },
29741 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestzps", IX86_BUILTIN_VTESTZPS, EQ, (int) INT_FTYPE_V4SF_V4SF_PTEST },
29742 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestcps", IX86_BUILTIN_VTESTCPS, LTU, (int) INT_FTYPE_V4SF_V4SF_PTEST },
29743 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestnzcps", IX86_BUILTIN_VTESTNZCPS, GTU, (int) INT_FTYPE_V4SF_V4SF_PTEST },
29744 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestzpd256", IX86_BUILTIN_VTESTZPD256, EQ, (int) INT_FTYPE_V4DF_V4DF_PTEST },
29745 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestcpd256", IX86_BUILTIN_VTESTCPD256, LTU, (int) INT_FTYPE_V4DF_V4DF_PTEST },
29746 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestnzcpd256", IX86_BUILTIN_VTESTNZCPD256, GTU, (int) INT_FTYPE_V4DF_V4DF_PTEST },
29747 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestzps256", IX86_BUILTIN_VTESTZPS256, EQ, (int) INT_FTYPE_V8SF_V8SF_PTEST },
29748 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestcps256", IX86_BUILTIN_VTESTCPS256, LTU, (int) INT_FTYPE_V8SF_V8SF_PTEST },
29749 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestnzcps256", IX86_BUILTIN_VTESTNZCPS256, GTU, (int) INT_FTYPE_V8SF_V8SF_PTEST },
29750 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptest256, "__builtin_ia32_ptestz256", IX86_BUILTIN_PTESTZ256, EQ, (int) INT_FTYPE_V4DI_V4DI_PTEST },
29751 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptest256, "__builtin_ia32_ptestc256", IX86_BUILTIN_PTESTC256, LTU, (int) INT_FTYPE_V4DI_V4DI_PTEST },
29752 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptest256, "__builtin_ia32_ptestnzc256", IX86_BUILTIN_PTESTNZC256, GTU, (int) INT_FTYPE_V4DI_V4DI_PTEST },
29754 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movmskpd256, "__builtin_ia32_movmskpd256", IX86_BUILTIN_MOVMSKPD256, UNKNOWN, (int) INT_FTYPE_V4DF },
29755 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movmskps256, "__builtin_ia32_movmskps256", IX86_BUILTIN_MOVMSKPS256, UNKNOWN, (int) INT_FTYPE_V8SF },
29757 { OPTION_MASK_ISA_AVX, CODE_FOR_copysignv8sf3, "__builtin_ia32_copysignps256", IX86_BUILTIN_CPYSGNPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
29758 { OPTION_MASK_ISA_AVX, CODE_FOR_copysignv4df3, "__builtin_ia32_copysignpd256", IX86_BUILTIN_CPYSGNPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
29760 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_pack_sfix_v4df, "__builtin_ia32_vec_pack_sfix256 ", IX86_BUILTIN_VEC_PACK_SFIX256, UNKNOWN, (int) V8SI_FTYPE_V4DF_V4DF },
29762 /* AVX2 */
29763 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_mpsadbw, "__builtin_ia32_mpsadbw256", IX86_BUILTIN_MPSADBW256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_INT },
29764 { OPTION_MASK_ISA_AVX2, CODE_FOR_absv32qi2, "__builtin_ia32_pabsb256", IX86_BUILTIN_PABSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI },
29765 { OPTION_MASK_ISA_AVX2, CODE_FOR_absv16hi2, "__builtin_ia32_pabsw256", IX86_BUILTIN_PABSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI },
29766 { OPTION_MASK_ISA_AVX2, CODE_FOR_absv8si2, "__builtin_ia32_pabsd256", IX86_BUILTIN_PABSD256, UNKNOWN, (int) V8SI_FTYPE_V8SI },
29767 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_packssdw, "__builtin_ia32_packssdw256", IX86_BUILTIN_PACKSSDW256, UNKNOWN, (int) V16HI_FTYPE_V8SI_V8SI },
29768 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_packsswb, "__builtin_ia32_packsswb256", IX86_BUILTIN_PACKSSWB256, UNKNOWN, (int) V32QI_FTYPE_V16HI_V16HI },
29769 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_packusdw, "__builtin_ia32_packusdw256", IX86_BUILTIN_PACKUSDW256, UNKNOWN, (int) V16HI_FTYPE_V8SI_V8SI },
29770 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_packuswb, "__builtin_ia32_packuswb256", IX86_BUILTIN_PACKUSWB256, UNKNOWN, (int) V32QI_FTYPE_V16HI_V16HI },
29771 { OPTION_MASK_ISA_AVX2, CODE_FOR_addv32qi3, "__builtin_ia32_paddb256", IX86_BUILTIN_PADDB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
29772 { OPTION_MASK_ISA_AVX2, CODE_FOR_addv16hi3, "__builtin_ia32_paddw256", IX86_BUILTIN_PADDW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29773 { OPTION_MASK_ISA_AVX2, CODE_FOR_addv8si3, "__builtin_ia32_paddd256", IX86_BUILTIN_PADDD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
29774 { OPTION_MASK_ISA_AVX2, CODE_FOR_addv4di3, "__builtin_ia32_paddq256", IX86_BUILTIN_PADDQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
29775 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ssaddv32qi3, "__builtin_ia32_paddsb256", IX86_BUILTIN_PADDSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
29776 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ssaddv16hi3, "__builtin_ia32_paddsw256", IX86_BUILTIN_PADDSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29777 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_usaddv32qi3, "__builtin_ia32_paddusb256", IX86_BUILTIN_PADDUSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
29778 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_usaddv16hi3, "__builtin_ia32_paddusw256", IX86_BUILTIN_PADDUSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29779 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_palignrv2ti, "__builtin_ia32_palignr256", IX86_BUILTIN_PALIGNR256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_INT_CONVERT },
29780 { OPTION_MASK_ISA_AVX2, CODE_FOR_andv4di3, "__builtin_ia32_andsi256", IX86_BUILTIN_AND256I, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
29781 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_andnotv4di3, "__builtin_ia32_andnotsi256", IX86_BUILTIN_ANDNOT256I, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
29782 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_uavgv32qi3, "__builtin_ia32_pavgb256", IX86_BUILTIN_PAVGB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
29783 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_uavgv16hi3, "__builtin_ia32_pavgw256", IX86_BUILTIN_PAVGW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29784 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pblendvb, "__builtin_ia32_pblendvb256", IX86_BUILTIN_PBLENDVB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI },
29785 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pblendw, "__builtin_ia32_pblendw256", IX86_BUILTIN_PBLENDVW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_INT },
29786 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_eqv32qi3, "__builtin_ia32_pcmpeqb256", IX86_BUILTIN_PCMPEQB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
29787 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_eqv16hi3, "__builtin_ia32_pcmpeqw256", IX86_BUILTIN_PCMPEQW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29788 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_eqv8si3, "__builtin_ia32_pcmpeqd256", IX86_BUILTIN_PCMPEQD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
29789 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_eqv4di3, "__builtin_ia32_pcmpeqq256", IX86_BUILTIN_PCMPEQQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
29790 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_gtv32qi3, "__builtin_ia32_pcmpgtb256", IX86_BUILTIN_PCMPGTB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
29791 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_gtv16hi3, "__builtin_ia32_pcmpgtw256", IX86_BUILTIN_PCMPGTW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29792 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_gtv8si3, "__builtin_ia32_pcmpgtd256", IX86_BUILTIN_PCMPGTD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
29793 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_gtv4di3, "__builtin_ia32_pcmpgtq256", IX86_BUILTIN_PCMPGTQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
29794 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phaddwv16hi3, "__builtin_ia32_phaddw256", IX86_BUILTIN_PHADDW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29795 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phadddv8si3, "__builtin_ia32_phaddd256", IX86_BUILTIN_PHADDD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
29796 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phaddswv16hi3, "__builtin_ia32_phaddsw256", IX86_BUILTIN_PHADDSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29797 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phsubwv16hi3, "__builtin_ia32_phsubw256", IX86_BUILTIN_PHSUBW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29798 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phsubdv8si3, "__builtin_ia32_phsubd256", IX86_BUILTIN_PHSUBD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
29799 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phsubswv16hi3, "__builtin_ia32_phsubsw256", IX86_BUILTIN_PHSUBSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29800 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pmaddubsw256, "__builtin_ia32_pmaddubsw256", IX86_BUILTIN_PMADDUBSW256, UNKNOWN, (int) V16HI_FTYPE_V32QI_V32QI },
29801 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pmaddwd, "__builtin_ia32_pmaddwd256", IX86_BUILTIN_PMADDWD256, UNKNOWN, (int) V8SI_FTYPE_V16HI_V16HI },
29802 { OPTION_MASK_ISA_AVX2, CODE_FOR_smaxv32qi3, "__builtin_ia32_pmaxsb256", IX86_BUILTIN_PMAXSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
29803 { OPTION_MASK_ISA_AVX2, CODE_FOR_smaxv16hi3, "__builtin_ia32_pmaxsw256", IX86_BUILTIN_PMAXSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29804 { OPTION_MASK_ISA_AVX2, CODE_FOR_smaxv8si3 , "__builtin_ia32_pmaxsd256", IX86_BUILTIN_PMAXSD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
29805 { OPTION_MASK_ISA_AVX2, CODE_FOR_umaxv32qi3, "__builtin_ia32_pmaxub256", IX86_BUILTIN_PMAXUB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
29806 { OPTION_MASK_ISA_AVX2, CODE_FOR_umaxv16hi3, "__builtin_ia32_pmaxuw256", IX86_BUILTIN_PMAXUW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29807 { OPTION_MASK_ISA_AVX2, CODE_FOR_umaxv8si3 , "__builtin_ia32_pmaxud256", IX86_BUILTIN_PMAXUD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
29808 { OPTION_MASK_ISA_AVX2, CODE_FOR_sminv32qi3, "__builtin_ia32_pminsb256", IX86_BUILTIN_PMINSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
29809 { OPTION_MASK_ISA_AVX2, CODE_FOR_sminv16hi3, "__builtin_ia32_pminsw256", IX86_BUILTIN_PMINSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29810 { OPTION_MASK_ISA_AVX2, CODE_FOR_sminv8si3 , "__builtin_ia32_pminsd256", IX86_BUILTIN_PMINSD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
29811 { OPTION_MASK_ISA_AVX2, CODE_FOR_uminv32qi3, "__builtin_ia32_pminub256", IX86_BUILTIN_PMINUB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
29812 { OPTION_MASK_ISA_AVX2, CODE_FOR_uminv16hi3, "__builtin_ia32_pminuw256", IX86_BUILTIN_PMINUW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29813 { OPTION_MASK_ISA_AVX2, CODE_FOR_uminv8si3 , "__builtin_ia32_pminud256", IX86_BUILTIN_PMINUD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
29814 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pmovmskb, "__builtin_ia32_pmovmskb256", IX86_BUILTIN_PMOVMSKB256, UNKNOWN, (int) INT_FTYPE_V32QI },
29815 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv16qiv16hi2, "__builtin_ia32_pmovsxbw256", IX86_BUILTIN_PMOVSXBW256, UNKNOWN, (int) V16HI_FTYPE_V16QI },
29816 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv8qiv8si2 , "__builtin_ia32_pmovsxbd256", IX86_BUILTIN_PMOVSXBD256, UNKNOWN, (int) V8SI_FTYPE_V16QI },
29817 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv4qiv4di2 , "__builtin_ia32_pmovsxbq256", IX86_BUILTIN_PMOVSXBQ256, UNKNOWN, (int) V4DI_FTYPE_V16QI },
29818 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv8hiv8si2 , "__builtin_ia32_pmovsxwd256", IX86_BUILTIN_PMOVSXWD256, UNKNOWN, (int) V8SI_FTYPE_V8HI },
29819 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv4hiv4di2 , "__builtin_ia32_pmovsxwq256", IX86_BUILTIN_PMOVSXWQ256, UNKNOWN, (int) V4DI_FTYPE_V8HI },
29820 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv4siv4di2 , "__builtin_ia32_pmovsxdq256", IX86_BUILTIN_PMOVSXDQ256, UNKNOWN, (int) V4DI_FTYPE_V4SI },
29821 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv16qiv16hi2, "__builtin_ia32_pmovzxbw256", IX86_BUILTIN_PMOVZXBW256, UNKNOWN, (int) V16HI_FTYPE_V16QI },
29822 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv8qiv8si2 , "__builtin_ia32_pmovzxbd256", IX86_BUILTIN_PMOVZXBD256, UNKNOWN, (int) V8SI_FTYPE_V16QI },
29823 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv4qiv4di2 , "__builtin_ia32_pmovzxbq256", IX86_BUILTIN_PMOVZXBQ256, UNKNOWN, (int) V4DI_FTYPE_V16QI },
29824 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv8hiv8si2 , "__builtin_ia32_pmovzxwd256", IX86_BUILTIN_PMOVZXWD256, UNKNOWN, (int) V8SI_FTYPE_V8HI },
29825 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv4hiv4di2 , "__builtin_ia32_pmovzxwq256", IX86_BUILTIN_PMOVZXWQ256, UNKNOWN, (int) V4DI_FTYPE_V8HI },
29826 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv4siv4di2 , "__builtin_ia32_pmovzxdq256", IX86_BUILTIN_PMOVZXDQ256, UNKNOWN, (int) V4DI_FTYPE_V4SI },
29827 { OPTION_MASK_ISA_AVX2, CODE_FOR_vec_widen_smult_even_v8si, "__builtin_ia32_pmuldq256", IX86_BUILTIN_PMULDQ256, UNKNOWN, (int) V4DI_FTYPE_V8SI_V8SI },
29828 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pmulhrswv16hi3 , "__builtin_ia32_pmulhrsw256", IX86_BUILTIN_PMULHRSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29829 { OPTION_MASK_ISA_AVX2, CODE_FOR_umulv16hi3_highpart, "__builtin_ia32_pmulhuw256" , IX86_BUILTIN_PMULHUW256 , UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29830 { OPTION_MASK_ISA_AVX2, CODE_FOR_smulv16hi3_highpart, "__builtin_ia32_pmulhw256" , IX86_BUILTIN_PMULHW256 , UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29831 { OPTION_MASK_ISA_AVX2, CODE_FOR_mulv16hi3, "__builtin_ia32_pmullw256" , IX86_BUILTIN_PMULLW256 , UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29832 { OPTION_MASK_ISA_AVX2, CODE_FOR_mulv8si3, "__builtin_ia32_pmulld256" , IX86_BUILTIN_PMULLD256 , UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
29833 { OPTION_MASK_ISA_AVX2, CODE_FOR_vec_widen_umult_even_v8si, "__builtin_ia32_pmuludq256", IX86_BUILTIN_PMULUDQ256, UNKNOWN, (int) V4DI_FTYPE_V8SI_V8SI },
29834 { OPTION_MASK_ISA_AVX2, CODE_FOR_iorv4di3, "__builtin_ia32_por256", IX86_BUILTIN_POR256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
29835 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_psadbw, "__builtin_ia32_psadbw256", IX86_BUILTIN_PSADBW256, UNKNOWN, (int) V16HI_FTYPE_V32QI_V32QI },
29836 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pshufbv32qi3, "__builtin_ia32_pshufb256", IX86_BUILTIN_PSHUFB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
29837 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pshufdv3, "__builtin_ia32_pshufd256", IX86_BUILTIN_PSHUFD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_INT },
29838 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pshufhwv3, "__builtin_ia32_pshufhw256", IX86_BUILTIN_PSHUFHW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_INT },
29839 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pshuflwv3, "__builtin_ia32_pshuflw256", IX86_BUILTIN_PSHUFLW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_INT },
29840 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_psignv32qi3, "__builtin_ia32_psignb256", IX86_BUILTIN_PSIGNB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
29841 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_psignv16hi3, "__builtin_ia32_psignw256", IX86_BUILTIN_PSIGNW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29842 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_psignv8si3 , "__builtin_ia32_psignd256", IX86_BUILTIN_PSIGND256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
29843 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashlv2ti3, "__builtin_ia32_pslldqi256", IX86_BUILTIN_PSLLDQI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_CONVERT },
29844 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashlv16hi3, "__builtin_ia32_psllwi256", IX86_BUILTIN_PSLLWI256 , UNKNOWN, (int) V16HI_FTYPE_V16HI_SI_COUNT },
29845 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashlv16hi3, "__builtin_ia32_psllw256", IX86_BUILTIN_PSLLW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V8HI_COUNT },
29846 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashlv8si3, "__builtin_ia32_pslldi256", IX86_BUILTIN_PSLLDI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_SI_COUNT },
29847 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashlv8si3, "__builtin_ia32_pslld256", IX86_BUILTIN_PSLLD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_COUNT },
29848 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashlv4di3, "__builtin_ia32_psllqi256", IX86_BUILTIN_PSLLQI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_COUNT },
29849 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashlv4di3, "__builtin_ia32_psllq256", IX86_BUILTIN_PSLLQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V2DI_COUNT },
29850 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashrv16hi3, "__builtin_ia32_psrawi256", IX86_BUILTIN_PSRAWI256, UNKNOWN, (int) V16HI_FTYPE_V16HI_SI_COUNT },
29851 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashrv16hi3, "__builtin_ia32_psraw256", IX86_BUILTIN_PSRAW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V8HI_COUNT },
29852 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashrv8si3, "__builtin_ia32_psradi256", IX86_BUILTIN_PSRADI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_SI_COUNT },
29853 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashrv8si3, "__builtin_ia32_psrad256", IX86_BUILTIN_PSRAD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_COUNT },
29854 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshrv2ti3, "__builtin_ia32_psrldqi256", IX86_BUILTIN_PSRLDQI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_CONVERT },
29855 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv16hi3, "__builtin_ia32_psrlwi256", IX86_BUILTIN_PSRLWI256 , UNKNOWN, (int) V16HI_FTYPE_V16HI_SI_COUNT },
29856 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv16hi3, "__builtin_ia32_psrlw256", IX86_BUILTIN_PSRLW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V8HI_COUNT },
29857 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv8si3, "__builtin_ia32_psrldi256", IX86_BUILTIN_PSRLDI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_SI_COUNT },
29858 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv8si3, "__builtin_ia32_psrld256", IX86_BUILTIN_PSRLD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_COUNT },
29859 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv4di3, "__builtin_ia32_psrlqi256", IX86_BUILTIN_PSRLQI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_COUNT },
29860 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv4di3, "__builtin_ia32_psrlq256", IX86_BUILTIN_PSRLQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V2DI_COUNT },
29861 { OPTION_MASK_ISA_AVX2, CODE_FOR_subv32qi3, "__builtin_ia32_psubb256", IX86_BUILTIN_PSUBB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
29862 { OPTION_MASK_ISA_AVX2, CODE_FOR_subv16hi3, "__builtin_ia32_psubw256", IX86_BUILTIN_PSUBW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29863 { OPTION_MASK_ISA_AVX2, CODE_FOR_subv8si3, "__builtin_ia32_psubd256", IX86_BUILTIN_PSUBD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
29864 { OPTION_MASK_ISA_AVX2, CODE_FOR_subv4di3, "__builtin_ia32_psubq256", IX86_BUILTIN_PSUBQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
29865 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sssubv32qi3, "__builtin_ia32_psubsb256", IX86_BUILTIN_PSUBSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
29866 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sssubv16hi3, "__builtin_ia32_psubsw256", IX86_BUILTIN_PSUBSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29867 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ussubv32qi3, "__builtin_ia32_psubusb256", IX86_BUILTIN_PSUBUSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
29868 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ussubv16hi3, "__builtin_ia32_psubusw256", IX86_BUILTIN_PSUBUSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29869 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_highv32qi, "__builtin_ia32_punpckhbw256", IX86_BUILTIN_PUNPCKHBW256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
29870 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_highv16hi, "__builtin_ia32_punpckhwd256", IX86_BUILTIN_PUNPCKHWD256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29871 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_highv8si, "__builtin_ia32_punpckhdq256", IX86_BUILTIN_PUNPCKHDQ256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
29872 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_highv4di, "__builtin_ia32_punpckhqdq256", IX86_BUILTIN_PUNPCKHQDQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
29873 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_lowv32qi, "__builtin_ia32_punpcklbw256", IX86_BUILTIN_PUNPCKLBW256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
29874 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_lowv16hi, "__builtin_ia32_punpcklwd256", IX86_BUILTIN_PUNPCKLWD256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29875 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_lowv8si, "__builtin_ia32_punpckldq256", IX86_BUILTIN_PUNPCKLDQ256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
29876 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_lowv4di, "__builtin_ia32_punpcklqdq256", IX86_BUILTIN_PUNPCKLQDQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
29877 { OPTION_MASK_ISA_AVX2, CODE_FOR_xorv4di3, "__builtin_ia32_pxor256", IX86_BUILTIN_PXOR256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
29878 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_vec_dupv4sf, "__builtin_ia32_vbroadcastss_ps", IX86_BUILTIN_VBROADCASTSS_PS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
29879 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_vec_dupv8sf, "__builtin_ia32_vbroadcastss_ps256", IX86_BUILTIN_VBROADCASTSS_PS256, UNKNOWN, (int) V8SF_FTYPE_V4SF },
29880 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_vec_dupv4df, "__builtin_ia32_vbroadcastsd_pd256", IX86_BUILTIN_VBROADCASTSD_PD256, UNKNOWN, (int) V4DF_FTYPE_V2DF },
29881 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_vbroadcasti128_v4di, "__builtin_ia32_vbroadcastsi256", IX86_BUILTIN_VBROADCASTSI256, UNKNOWN, (int) V4DI_FTYPE_V2DI },
29882 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pblenddv4si, "__builtin_ia32_pblendd128", IX86_BUILTIN_PBLENDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_INT },
29883 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pblenddv8si, "__builtin_ia32_pblendd256", IX86_BUILTIN_PBLENDD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_INT },
29884 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv32qi, "__builtin_ia32_pbroadcastb256", IX86_BUILTIN_PBROADCASTB256, UNKNOWN, (int) V32QI_FTYPE_V16QI },
29885 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv16hi, "__builtin_ia32_pbroadcastw256", IX86_BUILTIN_PBROADCASTW256, UNKNOWN, (int) V16HI_FTYPE_V8HI },
29886 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv8si, "__builtin_ia32_pbroadcastd256", IX86_BUILTIN_PBROADCASTD256, UNKNOWN, (int) V8SI_FTYPE_V4SI },
29887 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv4di, "__builtin_ia32_pbroadcastq256", IX86_BUILTIN_PBROADCASTQ256, UNKNOWN, (int) V4DI_FTYPE_V2DI },
29888 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv16qi, "__builtin_ia32_pbroadcastb128", IX86_BUILTIN_PBROADCASTB128, UNKNOWN, (int) V16QI_FTYPE_V16QI },
29889 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv8hi, "__builtin_ia32_pbroadcastw128", IX86_BUILTIN_PBROADCASTW128, UNKNOWN, (int) V8HI_FTYPE_V8HI },
29890 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv4si, "__builtin_ia32_pbroadcastd128", IX86_BUILTIN_PBROADCASTD128, UNKNOWN, (int) V4SI_FTYPE_V4SI },
29891 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv2di, "__builtin_ia32_pbroadcastq128", IX86_BUILTIN_PBROADCASTQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI },
29892 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permvarv8si, "__builtin_ia32_permvarsi256", IX86_BUILTIN_VPERMVARSI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
29893 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permvarv8sf, "__builtin_ia32_permvarsf256", IX86_BUILTIN_VPERMVARSF256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SI },
29894 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permv4df, "__builtin_ia32_permdf256", IX86_BUILTIN_VPERMDF256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT },
29895 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permv4di, "__builtin_ia32_permdi256", IX86_BUILTIN_VPERMDI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT },
29896 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permv2ti, "__builtin_ia32_permti256", IX86_BUILTIN_VPERMTI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_INT },
29897 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_extracti128, "__builtin_ia32_extract128i256", IX86_BUILTIN_VEXTRACT128I256, UNKNOWN, (int) V2DI_FTYPE_V4DI_INT },
29898 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_inserti128, "__builtin_ia32_insert128i256", IX86_BUILTIN_VINSERT128I256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V2DI_INT },
29899 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashlvv4di, "__builtin_ia32_psllv4di", IX86_BUILTIN_PSLLVV4DI, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
29900 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashlvv2di, "__builtin_ia32_psllv2di", IX86_BUILTIN_PSLLVV2DI, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
29901 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashlvv8si, "__builtin_ia32_psllv8si", IX86_BUILTIN_PSLLVV8SI, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
29902 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashlvv4si, "__builtin_ia32_psllv4si", IX86_BUILTIN_PSLLVV4SI, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
29903 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashrvv8si, "__builtin_ia32_psrav8si", IX86_BUILTIN_PSRAVV8SI, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
29904 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashrvv4si, "__builtin_ia32_psrav4si", IX86_BUILTIN_PSRAVV4SI, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
29905 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshrvv4di, "__builtin_ia32_psrlv4di", IX86_BUILTIN_PSRLVV4DI, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
29906 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshrvv2di, "__builtin_ia32_psrlv2di", IX86_BUILTIN_PSRLVV2DI, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
29907 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshrvv8si, "__builtin_ia32_psrlv8si", IX86_BUILTIN_PSRLVV8SI, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
29908 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshrvv4si, "__builtin_ia32_psrlv4si", IX86_BUILTIN_PSRLVV4SI, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
29910 { OPTION_MASK_ISA_LZCNT, CODE_FOR_clzhi2_lzcnt, "__builtin_clzs", IX86_BUILTIN_CLZS, UNKNOWN, (int) UINT16_FTYPE_UINT16 },
29912 /* BMI */
29913 { OPTION_MASK_ISA_BMI, CODE_FOR_bmi_bextr_si, "__builtin_ia32_bextr_u32", IX86_BUILTIN_BEXTR32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
29914 { OPTION_MASK_ISA_BMI, CODE_FOR_bmi_bextr_di, "__builtin_ia32_bextr_u64", IX86_BUILTIN_BEXTR64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
29915 { OPTION_MASK_ISA_BMI, CODE_FOR_ctzhi2, "__builtin_ctzs", IX86_BUILTIN_CTZS, UNKNOWN, (int) UINT16_FTYPE_UINT16 },
29917 /* TBM */
29918 { OPTION_MASK_ISA_TBM, CODE_FOR_tbm_bextri_si, "__builtin_ia32_bextri_u32", IX86_BUILTIN_BEXTRI32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
29919 { OPTION_MASK_ISA_TBM, CODE_FOR_tbm_bextri_di, "__builtin_ia32_bextri_u64", IX86_BUILTIN_BEXTRI64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
29921 /* F16C */
29922 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtph2ps, "__builtin_ia32_vcvtph2ps", IX86_BUILTIN_CVTPH2PS, UNKNOWN, (int) V4SF_FTYPE_V8HI },
29923 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtph2ps256, "__builtin_ia32_vcvtph2ps256", IX86_BUILTIN_CVTPH2PS256, UNKNOWN, (int) V8SF_FTYPE_V8HI },
29924 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtps2ph, "__builtin_ia32_vcvtps2ph", IX86_BUILTIN_CVTPS2PH, UNKNOWN, (int) V8HI_FTYPE_V4SF_INT },
29925 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtps2ph256, "__builtin_ia32_vcvtps2ph256", IX86_BUILTIN_CVTPS2PH256, UNKNOWN, (int) V8HI_FTYPE_V8SF_INT },
29927 /* BMI2 */
29928 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_bzhi_si3, "__builtin_ia32_bzhi_si", IX86_BUILTIN_BZHI32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
29929 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_bzhi_di3, "__builtin_ia32_bzhi_di", IX86_BUILTIN_BZHI64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
29930 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_pdep_si3, "__builtin_ia32_pdep_si", IX86_BUILTIN_PDEP32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
29931 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_pdep_di3, "__builtin_ia32_pdep_di", IX86_BUILTIN_PDEP64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
29932 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_pext_si3, "__builtin_ia32_pext_si", IX86_BUILTIN_PEXT32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
29933 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_pext_di3, "__builtin_ia32_pext_di", IX86_BUILTIN_PEXT64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
29935 /* AVX512F */
29936 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_alignv16si_mask, "__builtin_ia32_alignd512_mask", IX86_BUILTIN_ALIGND512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_INT_V16SI_HI },
29937 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_alignv8di_mask, "__builtin_ia32_alignq512_mask", IX86_BUILTIN_ALIGNQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_INT_V8DI_QI },
29938 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_blendmv16si, "__builtin_ia32_blendmd_512_mask", IX86_BUILTIN_BLENDMD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
29939 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_blendmv8df, "__builtin_ia32_blendmpd_512_mask", IX86_BUILTIN_BLENDMPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
29940 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_blendmv16sf, "__builtin_ia32_blendmps_512_mask", IX86_BUILTIN_BLENDMPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
29941 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_blendmv8di, "__builtin_ia32_blendmq_512_mask", IX86_BUILTIN_BLENDMQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
29942 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_broadcastv16sf_mask, "__builtin_ia32_broadcastf32x4_512", IX86_BUILTIN_BROADCASTF32X4_512, UNKNOWN, (int) V16SF_FTYPE_V4SF_V16SF_HI },
29943 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_broadcastv8df_mask, "__builtin_ia32_broadcastf64x4_512", IX86_BUILTIN_BROADCASTF64X4_512, UNKNOWN, (int) V8DF_FTYPE_V4DF_V8DF_QI },
29944 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_broadcastv16si_mask, "__builtin_ia32_broadcasti32x4_512", IX86_BUILTIN_BROADCASTI32X4_512, UNKNOWN, (int) V16SI_FTYPE_V4SI_V16SI_HI },
29945 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_broadcastv8di_mask, "__builtin_ia32_broadcasti64x4_512", IX86_BUILTIN_BROADCASTI64X4_512, UNKNOWN, (int) V8DI_FTYPE_V4DI_V8DI_QI },
29946 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vec_dupv8df_mask, "__builtin_ia32_broadcastsd512", IX86_BUILTIN_BROADCASTSD512, UNKNOWN, (int) V8DF_FTYPE_V2DF_V8DF_QI },
29947 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vec_dupv16sf_mask, "__builtin_ia32_broadcastss512", IX86_BUILTIN_BROADCASTSS512, UNKNOWN, (int) V16SF_FTYPE_V4SF_V16SF_HI },
29948 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cmpv16si3_mask, "__builtin_ia32_cmpd512_mask", IX86_BUILTIN_CMPD512, UNKNOWN, (int) HI_FTYPE_V16SI_V16SI_INT_HI },
29949 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cmpv8di3_mask, "__builtin_ia32_cmpq512_mask", IX86_BUILTIN_CMPQ512, UNKNOWN, (int) QI_FTYPE_V8DI_V8DI_INT_QI },
29950 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressv8df_mask, "__builtin_ia32_compressdf512_mask", IX86_BUILTIN_COMPRESSPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
29951 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressv16sf_mask, "__builtin_ia32_compresssf512_mask", IX86_BUILTIN_COMPRESSPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
29952 { OPTION_MASK_ISA_AVX512F, CODE_FOR_floatv8siv8df2_mask, "__builtin_ia32_cvtdq2pd512_mask", IX86_BUILTIN_CVTDQ2PD512, UNKNOWN, (int) V8DF_FTYPE_V8SI_V8DF_QI },
29953 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vcvtps2ph512_mask, "__builtin_ia32_vcvtps2ph512_mask", IX86_BUILTIN_CVTPS2PH512, UNKNOWN, (int) V16HI_FTYPE_V16SF_INT_V16HI_HI },
29954 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ufloatv8siv8df_mask, "__builtin_ia32_cvtudq2pd512_mask", IX86_BUILTIN_CVTUDQ2PD512, UNKNOWN, (int) V8DF_FTYPE_V8SI_V8DF_QI },
29955 { OPTION_MASK_ISA_AVX512F, CODE_FOR_cvtusi2sd32, "__builtin_ia32_cvtusi2sd32", IX86_BUILTIN_CVTUSI2SD32, UNKNOWN, (int) V2DF_FTYPE_V2DF_UINT },
29956 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8df_mask, "__builtin_ia32_expanddf512_mask", IX86_BUILTIN_EXPANDPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
29957 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8df_maskz, "__builtin_ia32_expanddf512_maskz", IX86_BUILTIN_EXPANDPD512Z, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
29958 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16sf_mask, "__builtin_ia32_expandsf512_mask", IX86_BUILTIN_EXPANDPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
29959 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16sf_maskz, "__builtin_ia32_expandsf512_maskz", IX86_BUILTIN_EXPANDPS512Z, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
29960 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vextractf32x4_mask, "__builtin_ia32_extractf32x4_mask", IX86_BUILTIN_EXTRACTF32X4, UNKNOWN, (int) V4SF_FTYPE_V16SF_INT_V4SF_QI },
29961 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vextractf64x4_mask, "__builtin_ia32_extractf64x4_mask", IX86_BUILTIN_EXTRACTF64X4, UNKNOWN, (int) V4DF_FTYPE_V8DF_INT_V4DF_QI },
29962 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vextracti32x4_mask, "__builtin_ia32_extracti32x4_mask", IX86_BUILTIN_EXTRACTI32X4, UNKNOWN, (int) V4SI_FTYPE_V16SI_INT_V4SI_QI },
29963 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vextracti64x4_mask, "__builtin_ia32_extracti64x4_mask", IX86_BUILTIN_EXTRACTI64X4, UNKNOWN, (int) V4DI_FTYPE_V8DI_INT_V4DI_QI },
29964 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vinsertf32x4_mask, "__builtin_ia32_insertf32x4_mask", IX86_BUILTIN_INSERTF32X4, UNKNOWN, (int) V16SF_FTYPE_V16SF_V4SF_INT_V16SF_HI },
29965 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vinsertf64x4_mask, "__builtin_ia32_insertf64x4_mask", IX86_BUILTIN_INSERTF64X4, UNKNOWN, (int) V8DF_FTYPE_V8DF_V4DF_INT_V8DF_QI },
29966 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vinserti32x4_mask, "__builtin_ia32_inserti32x4_mask", IX86_BUILTIN_INSERTI32X4, UNKNOWN, (int) V16SI_FTYPE_V16SI_V4SI_INT_V16SI_HI },
29967 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vinserti64x4_mask, "__builtin_ia32_inserti64x4_mask", IX86_BUILTIN_INSERTI64X4, UNKNOWN, (int) V8DI_FTYPE_V8DI_V4DI_INT_V8DI_QI },
29968 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv8df_mask, "__builtin_ia32_movapd512_mask", IX86_BUILTIN_MOVAPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
29969 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv16sf_mask, "__builtin_ia32_movaps512_mask", IX86_BUILTIN_MOVAPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
29970 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movddup512_mask, "__builtin_ia32_movddup512_mask", IX86_BUILTIN_MOVDDUP512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
29971 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv16si_mask, "__builtin_ia32_movdqa32_512_mask", IX86_BUILTIN_MOVDQA32_512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
29972 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv8di_mask, "__builtin_ia32_movdqa64_512_mask", IX86_BUILTIN_MOVDQA64_512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
29973 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movshdup512_mask, "__builtin_ia32_movshdup512_mask", IX86_BUILTIN_MOVSHDUP512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
29974 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movsldup512_mask, "__builtin_ia32_movsldup512_mask", IX86_BUILTIN_MOVSLDUP512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
29975 { OPTION_MASK_ISA_AVX512F, CODE_FOR_absv16si2_mask, "__builtin_ia32_pabsd512_mask", IX86_BUILTIN_PABSD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
29976 { OPTION_MASK_ISA_AVX512F, CODE_FOR_absv8di2_mask, "__builtin_ia32_pabsq512_mask", IX86_BUILTIN_PABSQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
29977 { OPTION_MASK_ISA_AVX512F, CODE_FOR_addv16si3_mask, "__builtin_ia32_paddd512_mask", IX86_BUILTIN_PADDD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
29978 { OPTION_MASK_ISA_AVX512F, CODE_FOR_addv8di3_mask, "__builtin_ia32_paddq512_mask", IX86_BUILTIN_PADDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
29979 { OPTION_MASK_ISA_AVX512F, CODE_FOR_andv16si3_mask, "__builtin_ia32_pandd512_mask", IX86_BUILTIN_PANDD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
29980 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_andnotv16si3_mask, "__builtin_ia32_pandnd512_mask", IX86_BUILTIN_PANDND512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
29981 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_andnotv8di3_mask, "__builtin_ia32_pandnq512_mask", IX86_BUILTIN_PANDNQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
29982 { OPTION_MASK_ISA_AVX512F, CODE_FOR_andv8di3_mask, "__builtin_ia32_pandq512_mask", IX86_BUILTIN_PANDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
29983 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vec_dupv16si_mask, "__builtin_ia32_pbroadcastd512", IX86_BUILTIN_PBROADCASTD512, UNKNOWN, (int) V16SI_FTYPE_V4SI_V16SI_HI },
29984 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vec_dup_gprv16si_mask, "__builtin_ia32_pbroadcastd512_gpr_mask", IX86_BUILTIN_PBROADCASTD512_GPR, UNKNOWN, (int) V16SI_FTYPE_SI_V16SI_HI },
29985 { OPTION_MASK_ISA_AVX512CD, CODE_FOR_avx512cd_maskb_vec_dupv8di, "__builtin_ia32_broadcastmb512", IX86_BUILTIN_PBROADCASTMB512, UNKNOWN, (int) V8DI_FTYPE_QI },
29986 { OPTION_MASK_ISA_AVX512CD, CODE_FOR_avx512cd_maskw_vec_dupv16si, "__builtin_ia32_broadcastmw512", IX86_BUILTIN_PBROADCASTMW512, UNKNOWN, (int) V16SI_FTYPE_HI },
29987 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vec_dupv8di_mask, "__builtin_ia32_pbroadcastq512", IX86_BUILTIN_PBROADCASTQ512, UNKNOWN, (int) V8DI_FTYPE_V2DI_V8DI_QI },
29988 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_avx512f_vec_dup_gprv8di_mask, "__builtin_ia32_pbroadcastq512_gpr_mask", IX86_BUILTIN_PBROADCASTQ512_GPR, UNKNOWN, (int) V8DI_FTYPE_DI_V8DI_QI },
29989 { OPTION_MASK_ISA_AVX512F & ~OPTION_MASK_ISA_64BIT, CODE_FOR_avx512f_vec_dup_memv8di_mask, "__builtin_ia32_pbroadcastq512_mem_mask", IX86_BUILTIN_PBROADCASTQ512_MEM, UNKNOWN, (int) V8DI_FTYPE_DI_V8DI_QI },
29990 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_eqv16si3_mask, "__builtin_ia32_pcmpeqd512_mask", IX86_BUILTIN_PCMPEQD512_MASK, UNKNOWN, (int) HI_FTYPE_V16SI_V16SI_HI },
29991 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_eqv8di3_mask, "__builtin_ia32_pcmpeqq512_mask", IX86_BUILTIN_PCMPEQQ512_MASK, UNKNOWN, (int) QI_FTYPE_V8DI_V8DI_QI },
29992 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_gtv16si3_mask, "__builtin_ia32_pcmpgtd512_mask", IX86_BUILTIN_PCMPGTD512_MASK, UNKNOWN, (int) HI_FTYPE_V16SI_V16SI_HI },
29993 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_gtv8di3_mask, "__builtin_ia32_pcmpgtq512_mask", IX86_BUILTIN_PCMPGTQ512_MASK, UNKNOWN, (int) QI_FTYPE_V8DI_V8DI_QI },
29994 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressv16si_mask, "__builtin_ia32_compresssi512_mask", IX86_BUILTIN_PCOMPRESSD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
29995 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressv8di_mask, "__builtin_ia32_compressdi512_mask", IX86_BUILTIN_PCOMPRESSQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
29996 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16si_mask, "__builtin_ia32_expandsi512_mask", IX86_BUILTIN_PEXPANDD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
29997 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16si_maskz, "__builtin_ia32_expandsi512_maskz", IX86_BUILTIN_PEXPANDD512Z, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
29998 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8di_mask, "__builtin_ia32_expanddi512_mask", IX86_BUILTIN_PEXPANDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
29999 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8di_maskz, "__builtin_ia32_expanddi512_maskz", IX86_BUILTIN_PEXPANDQ512Z, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
30000 { OPTION_MASK_ISA_AVX512F, CODE_FOR_smaxv16si3_mask, "__builtin_ia32_pmaxsd512_mask", IX86_BUILTIN_PMAXSD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30001 { OPTION_MASK_ISA_AVX512F, CODE_FOR_smaxv8di3_mask, "__builtin_ia32_pmaxsq512_mask", IX86_BUILTIN_PMAXSQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30002 { OPTION_MASK_ISA_AVX512F, CODE_FOR_umaxv16si3_mask, "__builtin_ia32_pmaxud512_mask", IX86_BUILTIN_PMAXUD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30003 { OPTION_MASK_ISA_AVX512F, CODE_FOR_umaxv8di3_mask, "__builtin_ia32_pmaxuq512_mask", IX86_BUILTIN_PMAXUQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30004 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sminv16si3_mask, "__builtin_ia32_pminsd512_mask", IX86_BUILTIN_PMINSD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30005 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sminv8di3_mask, "__builtin_ia32_pminsq512_mask", IX86_BUILTIN_PMINSQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30006 { OPTION_MASK_ISA_AVX512F, CODE_FOR_uminv16si3_mask, "__builtin_ia32_pminud512_mask", IX86_BUILTIN_PMINUD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30007 { OPTION_MASK_ISA_AVX512F, CODE_FOR_uminv8di3_mask, "__builtin_ia32_pminuq512_mask", IX86_BUILTIN_PMINUQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30008 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev16siv16qi2_mask, "__builtin_ia32_pmovdb512_mask", IX86_BUILTIN_PMOVDB512, UNKNOWN, (int) V16QI_FTYPE_V16SI_V16QI_HI },
30009 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev16siv16hi2_mask, "__builtin_ia32_pmovdw512_mask", IX86_BUILTIN_PMOVDW512, UNKNOWN, (int) V16HI_FTYPE_V16SI_V16HI_HI },
30010 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev8div16qi2_mask, "__builtin_ia32_pmovqb512_mask", IX86_BUILTIN_PMOVQB512, UNKNOWN, (int) V16QI_FTYPE_V8DI_V16QI_QI },
30011 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev8div8si2_mask, "__builtin_ia32_pmovqd512_mask", IX86_BUILTIN_PMOVQD512, UNKNOWN, (int) V8SI_FTYPE_V8DI_V8SI_QI },
30012 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev8div8hi2_mask, "__builtin_ia32_pmovqw512_mask", IX86_BUILTIN_PMOVQW512, UNKNOWN, (int) V8HI_FTYPE_V8DI_V8HI_QI },
30013 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev16siv16qi2_mask, "__builtin_ia32_pmovsdb512_mask", IX86_BUILTIN_PMOVSDB512, UNKNOWN, (int) V16QI_FTYPE_V16SI_V16QI_HI },
30014 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev16siv16hi2_mask, "__builtin_ia32_pmovsdw512_mask", IX86_BUILTIN_PMOVSDW512, UNKNOWN, (int) V16HI_FTYPE_V16SI_V16HI_HI },
30015 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev8div16qi2_mask, "__builtin_ia32_pmovsqb512_mask", IX86_BUILTIN_PMOVSQB512, UNKNOWN, (int) V16QI_FTYPE_V8DI_V16QI_QI },
30016 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev8div8si2_mask, "__builtin_ia32_pmovsqd512_mask", IX86_BUILTIN_PMOVSQD512, UNKNOWN, (int) V8SI_FTYPE_V8DI_V8SI_QI },
30017 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev8div8hi2_mask, "__builtin_ia32_pmovsqw512_mask", IX86_BUILTIN_PMOVSQW512, UNKNOWN, (int) V8HI_FTYPE_V8DI_V8HI_QI },
30018 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sign_extendv16qiv16si2_mask, "__builtin_ia32_pmovsxbd512_mask", IX86_BUILTIN_PMOVSXBD512, UNKNOWN, (int) V16SI_FTYPE_V16QI_V16SI_HI },
30019 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sign_extendv8qiv8di2_mask, "__builtin_ia32_pmovsxbq512_mask", IX86_BUILTIN_PMOVSXBQ512, UNKNOWN, (int) V8DI_FTYPE_V16QI_V8DI_QI },
30020 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sign_extendv8siv8di2_mask, "__builtin_ia32_pmovsxdq512_mask", IX86_BUILTIN_PMOVSXDQ512, UNKNOWN, (int) V8DI_FTYPE_V8SI_V8DI_QI },
30021 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sign_extendv16hiv16si2_mask, "__builtin_ia32_pmovsxwd512_mask", IX86_BUILTIN_PMOVSXWD512, UNKNOWN, (int) V16SI_FTYPE_V16HI_V16SI_HI },
30022 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sign_extendv8hiv8di2_mask, "__builtin_ia32_pmovsxwq512_mask", IX86_BUILTIN_PMOVSXWQ512, UNKNOWN, (int) V8DI_FTYPE_V8HI_V8DI_QI },
30023 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev16siv16qi2_mask, "__builtin_ia32_pmovusdb512_mask", IX86_BUILTIN_PMOVUSDB512, UNKNOWN, (int) V16QI_FTYPE_V16SI_V16QI_HI },
30024 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev16siv16hi2_mask, "__builtin_ia32_pmovusdw512_mask", IX86_BUILTIN_PMOVUSDW512, UNKNOWN, (int) V16HI_FTYPE_V16SI_V16HI_HI },
30025 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev8div16qi2_mask, "__builtin_ia32_pmovusqb512_mask", IX86_BUILTIN_PMOVUSQB512, UNKNOWN, (int) V16QI_FTYPE_V8DI_V16QI_QI },
30026 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev8div8si2_mask, "__builtin_ia32_pmovusqd512_mask", IX86_BUILTIN_PMOVUSQD512, UNKNOWN, (int) V8SI_FTYPE_V8DI_V8SI_QI },
30027 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev8div8hi2_mask, "__builtin_ia32_pmovusqw512_mask", IX86_BUILTIN_PMOVUSQW512, UNKNOWN, (int) V8HI_FTYPE_V8DI_V8HI_QI },
30028 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_zero_extendv16qiv16si2_mask, "__builtin_ia32_pmovzxbd512_mask", IX86_BUILTIN_PMOVZXBD512, UNKNOWN, (int) V16SI_FTYPE_V16QI_V16SI_HI },
30029 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_zero_extendv8qiv8di2_mask, "__builtin_ia32_pmovzxbq512_mask", IX86_BUILTIN_PMOVZXBQ512, UNKNOWN, (int) V8DI_FTYPE_V16QI_V8DI_QI },
30030 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_zero_extendv8siv8di2_mask, "__builtin_ia32_pmovzxdq512_mask", IX86_BUILTIN_PMOVZXDQ512, UNKNOWN, (int) V8DI_FTYPE_V8SI_V8DI_QI },
30031 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_zero_extendv16hiv16si2_mask, "__builtin_ia32_pmovzxwd512_mask", IX86_BUILTIN_PMOVZXWD512, UNKNOWN, (int) V16SI_FTYPE_V16HI_V16SI_HI },
30032 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_zero_extendv8hiv8di2_mask, "__builtin_ia32_pmovzxwq512_mask", IX86_BUILTIN_PMOVZXWQ512, UNKNOWN, (int) V8DI_FTYPE_V8HI_V8DI_QI },
30033 { OPTION_MASK_ISA_AVX512F, CODE_FOR_vec_widen_smult_even_v16si_mask, "__builtin_ia32_pmuldq512_mask", IX86_BUILTIN_PMULDQ512, UNKNOWN, (int) V8DI_FTYPE_V16SI_V16SI_V8DI_QI },
30034 { OPTION_MASK_ISA_AVX512F, CODE_FOR_mulv16si3_mask, "__builtin_ia32_pmulld512_mask" , IX86_BUILTIN_PMULLD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30035 { OPTION_MASK_ISA_AVX512F, CODE_FOR_vec_widen_umult_even_v16si_mask, "__builtin_ia32_pmuludq512_mask", IX86_BUILTIN_PMULUDQ512, UNKNOWN, (int) V8DI_FTYPE_V16SI_V16SI_V8DI_QI },
30036 { OPTION_MASK_ISA_AVX512F, CODE_FOR_iorv16si3_mask, "__builtin_ia32_pord512_mask", IX86_BUILTIN_PORD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30037 { OPTION_MASK_ISA_AVX512F, CODE_FOR_iorv8di3_mask, "__builtin_ia32_porq512_mask", IX86_BUILTIN_PORQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30038 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rolv16si_mask, "__builtin_ia32_prold512_mask", IX86_BUILTIN_PROLD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_HI },
30039 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rolv8di_mask, "__builtin_ia32_prolq512_mask", IX86_BUILTIN_PROLQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_QI },
30040 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rolvv16si_mask, "__builtin_ia32_prolvd512_mask", IX86_BUILTIN_PROLVD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30041 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rolvv8di_mask, "__builtin_ia32_prolvq512_mask", IX86_BUILTIN_PROLVQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30042 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rorv16si_mask, "__builtin_ia32_prord512_mask", IX86_BUILTIN_PRORD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_HI },
30043 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rorv8di_mask, "__builtin_ia32_prorq512_mask", IX86_BUILTIN_PRORQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_QI },
30044 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rorvv16si_mask, "__builtin_ia32_prorvd512_mask", IX86_BUILTIN_PRORVD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30045 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rorvv8di_mask, "__builtin_ia32_prorvq512_mask", IX86_BUILTIN_PRORVQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30046 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_pshufdv3_mask, "__builtin_ia32_pshufd512_mask", IX86_BUILTIN_PSHUFD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_HI },
30047 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashlv16si3_mask, "__builtin_ia32_pslld512_mask", IX86_BUILTIN_PSLLD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V4SI_V16SI_HI },
30048 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashlv16si3_mask, "__builtin_ia32_pslldi512_mask", IX86_BUILTIN_PSLLDI512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_HI },
30049 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashlv8di3_mask, "__builtin_ia32_psllq512_mask", IX86_BUILTIN_PSLLQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V2DI_V8DI_QI },
30050 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashlv8di3_mask, "__builtin_ia32_psllqi512_mask", IX86_BUILTIN_PSLLQI512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_QI },
30051 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ashlvv16si_mask, "__builtin_ia32_psllv16si_mask", IX86_BUILTIN_PSLLVV16SI, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30052 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ashlvv8di_mask, "__builtin_ia32_psllv8di_mask", IX86_BUILTIN_PSLLVV8DI, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30053 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashrv16si3_mask, "__builtin_ia32_psrad512_mask", IX86_BUILTIN_PSRAD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V4SI_V16SI_HI },
30054 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashrv16si3_mask, "__builtin_ia32_psradi512_mask", IX86_BUILTIN_PSRADI512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_HI },
30055 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashrv8di3_mask, "__builtin_ia32_psraq512_mask", IX86_BUILTIN_PSRAQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V2DI_V8DI_QI },
30056 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashrv8di3_mask, "__builtin_ia32_psraqi512_mask", IX86_BUILTIN_PSRAQI512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_QI },
30057 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ashrvv16si_mask, "__builtin_ia32_psrav16si_mask", IX86_BUILTIN_PSRAVV16SI, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30058 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ashrvv8di_mask, "__builtin_ia32_psrav8di_mask", IX86_BUILTIN_PSRAVV8DI, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30059 { OPTION_MASK_ISA_AVX512F, CODE_FOR_lshrv16si3_mask, "__builtin_ia32_psrld512_mask", IX86_BUILTIN_PSRLD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V4SI_V16SI_HI },
30060 { OPTION_MASK_ISA_AVX512F, CODE_FOR_lshrv16si3_mask, "__builtin_ia32_psrldi512_mask", IX86_BUILTIN_PSRLDI512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_HI },
30061 { OPTION_MASK_ISA_AVX512F, CODE_FOR_lshrv8di3_mask, "__builtin_ia32_psrlq512_mask", IX86_BUILTIN_PSRLQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V2DI_V8DI_QI },
30062 { OPTION_MASK_ISA_AVX512F, CODE_FOR_lshrv8di3_mask, "__builtin_ia32_psrlqi512_mask", IX86_BUILTIN_PSRLQI512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_QI },
30063 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_lshrvv16si_mask, "__builtin_ia32_psrlv16si_mask", IX86_BUILTIN_PSRLVV16SI, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30064 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_lshrvv8di_mask, "__builtin_ia32_psrlv8di_mask", IX86_BUILTIN_PSRLVV8DI, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30065 { OPTION_MASK_ISA_AVX512F, CODE_FOR_subv16si3_mask, "__builtin_ia32_psubd512_mask", IX86_BUILTIN_PSUBD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30066 { OPTION_MASK_ISA_AVX512F, CODE_FOR_subv8di3_mask, "__builtin_ia32_psubq512_mask", IX86_BUILTIN_PSUBQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30067 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_testmv16si3_mask, "__builtin_ia32_ptestmd512", IX86_BUILTIN_PTESTMD512, UNKNOWN, (int) HI_FTYPE_V16SI_V16SI_HI },
30068 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_testmv8di3_mask, "__builtin_ia32_ptestmq512", IX86_BUILTIN_PTESTMQ512, UNKNOWN, (int) QI_FTYPE_V8DI_V8DI_QI },
30069 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_testnmv16si3_mask, "__builtin_ia32_ptestnmd512", IX86_BUILTIN_PTESTNMD512, UNKNOWN, (int) HI_FTYPE_V16SI_V16SI_HI },
30070 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_testnmv8di3_mask, "__builtin_ia32_ptestnmq512", IX86_BUILTIN_PTESTNMQ512, UNKNOWN, (int) QI_FTYPE_V8DI_V8DI_QI },
30071 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_interleave_highv16si_mask, "__builtin_ia32_punpckhdq512_mask", IX86_BUILTIN_PUNPCKHDQ512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30072 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_interleave_highv8di_mask, "__builtin_ia32_punpckhqdq512_mask", IX86_BUILTIN_PUNPCKHQDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30073 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_interleave_lowv16si_mask, "__builtin_ia32_punpckldq512_mask", IX86_BUILTIN_PUNPCKLDQ512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30074 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_interleave_lowv8di_mask, "__builtin_ia32_punpcklqdq512_mask", IX86_BUILTIN_PUNPCKLQDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30075 { OPTION_MASK_ISA_AVX512F, CODE_FOR_xorv16si3_mask, "__builtin_ia32_pxord512_mask", IX86_BUILTIN_PXORD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30076 { OPTION_MASK_ISA_AVX512F, CODE_FOR_xorv8di3_mask, "__builtin_ia32_pxorq512_mask", IX86_BUILTIN_PXORQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30077 { OPTION_MASK_ISA_AVX512F, CODE_FOR_rcp14v8df_mask, "__builtin_ia32_rcp14pd512_mask", IX86_BUILTIN_RCP14PD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
30078 { OPTION_MASK_ISA_AVX512F, CODE_FOR_rcp14v16sf_mask, "__builtin_ia32_rcp14ps512_mask", IX86_BUILTIN_RCP14PS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
30079 { OPTION_MASK_ISA_AVX512F, CODE_FOR_srcp14v2df, "__builtin_ia32_rcp14sd", IX86_BUILTIN_RCP14SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
30080 { OPTION_MASK_ISA_AVX512F, CODE_FOR_srcp14v4sf, "__builtin_ia32_rcp14ss", IX86_BUILTIN_RCP14SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
30081 { OPTION_MASK_ISA_AVX512F, CODE_FOR_rsqrt14v8df_mask, "__builtin_ia32_rsqrt14pd512_mask", IX86_BUILTIN_RSQRT14PD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
30082 { OPTION_MASK_ISA_AVX512F, CODE_FOR_rsqrt14v16sf_mask, "__builtin_ia32_rsqrt14ps512_mask", IX86_BUILTIN_RSQRT14PS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
30083 { OPTION_MASK_ISA_AVX512F, CODE_FOR_rsqrt14v2df, "__builtin_ia32_rsqrt14sd", IX86_BUILTIN_RSQRT14SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
30084 { OPTION_MASK_ISA_AVX512F, CODE_FOR_rsqrt14v4sf, "__builtin_ia32_rsqrt14ss", IX86_BUILTIN_RSQRT14SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
30085 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_shufpd512_mask, "__builtin_ia32_shufpd512_mask", IX86_BUILTIN_SHUFPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_INT_V8DF_QI },
30086 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_shufps512_mask, "__builtin_ia32_shufps512_mask", IX86_BUILTIN_SHUFPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_INT_V16SF_HI },
30087 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_shuf_f32x4_mask, "__builtin_ia32_shuf_f32x4_mask", IX86_BUILTIN_SHUF_F32x4, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_INT_V16SF_HI },
30088 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_shuf_f64x2_mask, "__builtin_ia32_shuf_f64x2_mask", IX86_BUILTIN_SHUF_F64x2, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_INT_V8DF_QI },
30089 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_shuf_i32x4_mask, "__builtin_ia32_shuf_i32x4_mask", IX86_BUILTIN_SHUF_I32x4, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_INT_V16SI_HI },
30090 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_shuf_i64x2_mask, "__builtin_ia32_shuf_i64x2_mask", IX86_BUILTIN_SHUF_I64x2, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_INT_V8DI_QI },
30091 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ucmpv16si3_mask, "__builtin_ia32_ucmpd512_mask", IX86_BUILTIN_UCMPD512, UNKNOWN, (int) HI_FTYPE_V16SI_V16SI_INT_HI },
30092 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ucmpv8di3_mask, "__builtin_ia32_ucmpq512_mask", IX86_BUILTIN_UCMPQ512, UNKNOWN, (int) QI_FTYPE_V8DI_V8DI_INT_QI },
30093 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_unpckhpd512_mask, "__builtin_ia32_unpckhpd512_mask", IX86_BUILTIN_UNPCKHPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI },
30094 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_unpckhps512_mask, "__builtin_ia32_unpckhps512_mask", IX86_BUILTIN_UNPCKHPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI },
30095 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_unpcklpd512_mask, "__builtin_ia32_unpcklpd512_mask", IX86_BUILTIN_UNPCKLPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI },
30096 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_unpcklps512_mask, "__builtin_ia32_unpcklps512_mask", IX86_BUILTIN_UNPCKLPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI },
30097 { OPTION_MASK_ISA_AVX512CD, CODE_FOR_clzv16si2_mask, "__builtin_ia32_vplzcntd_512_mask", IX86_BUILTIN_VPCLZCNTD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
30098 { OPTION_MASK_ISA_AVX512CD, CODE_FOR_clzv8di2_mask, "__builtin_ia32_vplzcntq_512_mask", IX86_BUILTIN_VPCLZCNTQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
30099 { OPTION_MASK_ISA_AVX512CD, CODE_FOR_conflictv16si_mask, "__builtin_ia32_vpconflictsi_512_mask", IX86_BUILTIN_VPCONFLICTD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
30100 { OPTION_MASK_ISA_AVX512CD, CODE_FOR_conflictv8di_mask, "__builtin_ia32_vpconflictdi_512_mask", IX86_BUILTIN_VPCONFLICTQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
30101 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_permv8df_mask, "__builtin_ia32_permdf512_mask", IX86_BUILTIN_VPERMDF512, UNKNOWN, (int) V8DF_FTYPE_V8DF_INT_V8DF_QI },
30102 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_permv8di_mask, "__builtin_ia32_permdi512_mask", IX86_BUILTIN_VPERMDI512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_QI },
30103 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermi2varv16si3_mask, "__builtin_ia32_vpermi2vard512_mask", IX86_BUILTIN_VPERMI2VARD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30104 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermi2varv8df3_mask, "__builtin_ia32_vpermi2varpd512_mask", IX86_BUILTIN_VPERMI2VARPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DI_V8DF_QI },
30105 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermi2varv16sf3_mask, "__builtin_ia32_vpermi2varps512_mask", IX86_BUILTIN_VPERMI2VARPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SI_V16SF_HI },
30106 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermi2varv8di3_mask, "__builtin_ia32_vpermi2varq512_mask", IX86_BUILTIN_VPERMI2VARQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30107 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermilv8df_mask, "__builtin_ia32_vpermilpd512_mask", IX86_BUILTIN_VPERMILPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_INT_V8DF_QI },
30108 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermilv16sf_mask, "__builtin_ia32_vpermilps512_mask", IX86_BUILTIN_VPERMILPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_INT_V16SF_HI },
30109 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermilvarv8df3_mask, "__builtin_ia32_vpermilvarpd512_mask", IX86_BUILTIN_VPERMILVARPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DI_V8DF_QI },
30110 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermilvarv16sf3_mask, "__builtin_ia32_vpermilvarps512_mask", IX86_BUILTIN_VPERMILVARPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SI_V16SF_HI },
30111 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv16si3_mask, "__builtin_ia32_vpermt2vard512_mask", IX86_BUILTIN_VPERMT2VARD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30112 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv16si3_maskz, "__builtin_ia32_vpermt2vard512_maskz", IX86_BUILTIN_VPERMT2VARD512_MASKZ, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30113 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv8df3_mask, "__builtin_ia32_vpermt2varpd512_mask", IX86_BUILTIN_VPERMT2VARPD512, UNKNOWN, (int) V8DF_FTYPE_V8DI_V8DF_V8DF_QI },
30114 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv8df3_maskz, "__builtin_ia32_vpermt2varpd512_maskz", IX86_BUILTIN_VPERMT2VARPD512_MASKZ, UNKNOWN, (int) V8DF_FTYPE_V8DI_V8DF_V8DF_QI },
30115 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv16sf3_mask, "__builtin_ia32_vpermt2varps512_mask", IX86_BUILTIN_VPERMT2VARPS512, UNKNOWN, (int) V16SF_FTYPE_V16SI_V16SF_V16SF_HI },
30116 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv16sf3_maskz, "__builtin_ia32_vpermt2varps512_maskz", IX86_BUILTIN_VPERMT2VARPS512_MASKZ, UNKNOWN, (int) V16SF_FTYPE_V16SI_V16SF_V16SF_HI },
30117 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv8di3_mask, "__builtin_ia32_vpermt2varq512_mask", IX86_BUILTIN_VPERMT2VARQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30118 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv8di3_maskz, "__builtin_ia32_vpermt2varq512_maskz", IX86_BUILTIN_VPERMT2VARQ512_MASKZ, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30119 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_permvarv8df_mask, "__builtin_ia32_permvardf512_mask", IX86_BUILTIN_VPERMVARDF512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DI_V8DF_QI },
30120 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_permvarv8di_mask, "__builtin_ia32_permvardi512_mask", IX86_BUILTIN_VPERMVARDI512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30121 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_permvarv16sf_mask, "__builtin_ia32_permvarsf512_mask", IX86_BUILTIN_VPERMVARSF512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SI_V16SF_HI },
30122 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_permvarv16si_mask, "__builtin_ia32_permvarsi512_mask", IX86_BUILTIN_VPERMVARSI512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30123 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vternlogv16si_mask, "__builtin_ia32_pternlogd512_mask", IX86_BUILTIN_VTERNLOGD512_MASK, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_INT_HI },
30124 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vternlogv16si_maskz, "__builtin_ia32_pternlogd512_maskz", IX86_BUILTIN_VTERNLOGD512_MASKZ, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_INT_HI },
30125 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vternlogv8di_mask, "__builtin_ia32_pternlogq512_mask", IX86_BUILTIN_VTERNLOGQ512_MASK, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_INT_QI },
30126 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vternlogv8di_maskz, "__builtin_ia32_pternlogq512_maskz", IX86_BUILTIN_VTERNLOGQ512_MASKZ, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_INT_QI },
30128 { OPTION_MASK_ISA_AVX512F, CODE_FOR_copysignv16sf3, "__builtin_ia32_copysignps512", IX86_BUILTIN_CPYSGNPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF },
30129 { OPTION_MASK_ISA_AVX512F, CODE_FOR_copysignv8df3, "__builtin_ia32_copysignpd512", IX86_BUILTIN_CPYSGNPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF },
30130 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sqrtv8df2, "__builtin_ia32_sqrtpd512", IX86_BUILTIN_SQRTPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF },
30131 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sqrtv16sf2, "__builtin_ia32_sqrtps512", IX86_BUILTIN_SQRTPS_NR512, UNKNOWN, (int) V16SF_FTYPE_V16SF },
30132 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_exp2v16sf, "__builtin_ia32_exp2ps", IX86_BUILTIN_EXP2PS, UNKNOWN, (int) V16SF_FTYPE_V16SF },
30133 { OPTION_MASK_ISA_AVX512F, CODE_FOR_roundv8df2_vec_pack_sfix, "__builtin_ia32_roundpd_az_vec_pack_sfix512", IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX512, UNKNOWN, (int) V16SI_FTYPE_V8DF_V8DF },
30134 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_roundpd_vec_pack_sfix512, "__builtin_ia32_floorpd_vec_pack_sfix512", IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX512, (enum rtx_code) ROUND_FLOOR, (int) V16SI_FTYPE_V8DF_V8DF_ROUND },
30135 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_roundpd_vec_pack_sfix512, "__builtin_ia32_ceilpd_vec_pack_sfix512", IX86_BUILTIN_CEILPD_VEC_PACK_SFIX512, (enum rtx_code) ROUND_CEIL, (int) V16SI_FTYPE_V8DF_V8DF_ROUND },
30137 /* Mask arithmetic operations */
30138 { OPTION_MASK_ISA_AVX512F, CODE_FOR_andhi3, "__builtin_ia32_kandhi", IX86_BUILTIN_KAND16, UNKNOWN, (int) HI_FTYPE_HI_HI },
30139 { OPTION_MASK_ISA_AVX512F, CODE_FOR_kandnhi, "__builtin_ia32_kandnhi", IX86_BUILTIN_KANDN16, UNKNOWN, (int) HI_FTYPE_HI_HI },
30140 { OPTION_MASK_ISA_AVX512F, CODE_FOR_one_cmplhi2, "__builtin_ia32_knothi", IX86_BUILTIN_KNOT16, UNKNOWN, (int) HI_FTYPE_HI },
30141 { OPTION_MASK_ISA_AVX512F, CODE_FOR_iorhi3, "__builtin_ia32_korhi", IX86_BUILTIN_KOR16, UNKNOWN, (int) HI_FTYPE_HI_HI },
30142 { OPTION_MASK_ISA_AVX512F, CODE_FOR_kortestchi, "__builtin_ia32_kortestchi", IX86_BUILTIN_KORTESTC16, UNKNOWN, (int) HI_FTYPE_HI_HI },
30143 { OPTION_MASK_ISA_AVX512F, CODE_FOR_kortestzhi, "__builtin_ia32_kortestzhi", IX86_BUILTIN_KORTESTZ16, UNKNOWN, (int) HI_FTYPE_HI_HI },
30144 { OPTION_MASK_ISA_AVX512F, CODE_FOR_kunpckhi, "__builtin_ia32_kunpckhi", IX86_BUILTIN_KUNPCKBW, UNKNOWN, (int) HI_FTYPE_HI_HI },
30145 { OPTION_MASK_ISA_AVX512F, CODE_FOR_kxnorhi, "__builtin_ia32_kxnorhi", IX86_BUILTIN_KXNOR16, UNKNOWN, (int) HI_FTYPE_HI_HI },
30146 { OPTION_MASK_ISA_AVX512F, CODE_FOR_xorhi3, "__builtin_ia32_kxorhi", IX86_BUILTIN_KXOR16, UNKNOWN, (int) HI_FTYPE_HI_HI },
30147 { OPTION_MASK_ISA_AVX512F, CODE_FOR_kmovw, "__builtin_ia32_kmov16", IX86_BUILTIN_KMOV16, UNKNOWN, (int) HI_FTYPE_HI },
30149 /* SHA */
30150 { OPTION_MASK_ISA_SSE2, CODE_FOR_sha1msg1, 0, IX86_BUILTIN_SHA1MSG1, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
30151 { OPTION_MASK_ISA_SSE2, CODE_FOR_sha1msg2, 0, IX86_BUILTIN_SHA1MSG2, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
30152 { OPTION_MASK_ISA_SSE2, CODE_FOR_sha1nexte, 0, IX86_BUILTIN_SHA1NEXTE, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
30153 { OPTION_MASK_ISA_SSE2, CODE_FOR_sha1rnds4, 0, IX86_BUILTIN_SHA1RNDS4, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_INT },
30154 { OPTION_MASK_ISA_SSE2, CODE_FOR_sha256msg1, 0, IX86_BUILTIN_SHA256MSG1, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
30155 { OPTION_MASK_ISA_SSE2, CODE_FOR_sha256msg2, 0, IX86_BUILTIN_SHA256MSG2, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
30156 { OPTION_MASK_ISA_SSE2, CODE_FOR_sha256rnds2, 0, IX86_BUILTIN_SHA256RNDS2, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI },
30159 /* Builtins with rounding support. */
30160 static const struct builtin_description bdesc_round_args[] =
30162 /* AVX512F */
30163 { OPTION_MASK_ISA_AVX512F, CODE_FOR_addv8df3_mask_round, "__builtin_ia32_addpd512_mask", IX86_BUILTIN_ADDPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30164 { OPTION_MASK_ISA_AVX512F, CODE_FOR_addv16sf3_mask_round, "__builtin_ia32_addps512_mask", IX86_BUILTIN_ADDPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30165 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmaddv2df3_round, "__builtin_ia32_addsd_round", IX86_BUILTIN_ADDSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
30166 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmaddv4sf3_round, "__builtin_ia32_addss_round", IX86_BUILTIN_ADDSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
30167 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cmpv8df3_mask_round, "__builtin_ia32_cmppd512_mask", IX86_BUILTIN_CMPPD512, UNKNOWN, (int) QI_FTYPE_V8DF_V8DF_INT_QI_INT },
30168 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cmpv16sf3_mask_round, "__builtin_ia32_cmpps512_mask", IX86_BUILTIN_CMPPS512, UNKNOWN, (int) HI_FTYPE_V16SF_V16SF_INT_HI_INT },
30169 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vmcmpv2df3_mask_round, "__builtin_ia32_cmpsd_mask", IX86_BUILTIN_CMPSD_MASK, UNKNOWN, (int) QI_FTYPE_V2DF_V2DF_INT_QI_INT },
30170 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vmcmpv4sf3_mask_round, "__builtin_ia32_cmpss_mask", IX86_BUILTIN_CMPSS_MASK, UNKNOWN, (int) QI_FTYPE_V4SF_V4SF_INT_QI_INT },
30171 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_comi_round, "__builtin_ia32_vcomisd", IX86_BUILTIN_COMIDF, UNKNOWN, (int) INT_FTYPE_V2DF_V2DF_INT_INT },
30172 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_comi_round, "__builtin_ia32_vcomiss", IX86_BUILTIN_COMISF, UNKNOWN, (int) INT_FTYPE_V4SF_V4SF_INT_INT },
30173 { OPTION_MASK_ISA_AVX512F, CODE_FOR_floatv16siv16sf2_mask_round, "__builtin_ia32_cvtdq2ps512_mask", IX86_BUILTIN_CVTDQ2PS512, UNKNOWN, (int) V16SF_FTYPE_V16SI_V16SF_HI_INT },
30174 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cvtpd2dq512_mask_round, "__builtin_ia32_cvtpd2dq512_mask", IX86_BUILTIN_CVTPD2DQ512, UNKNOWN, (int) V8SI_FTYPE_V8DF_V8SI_QI_INT },
30175 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cvtpd2ps512_mask_round, "__builtin_ia32_cvtpd2ps512_mask", IX86_BUILTIN_CVTPD2PS512, UNKNOWN, (int) V8SF_FTYPE_V8DF_V8SF_QI_INT },
30176 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ufix_notruncv8dfv8si_mask_round, "__builtin_ia32_cvtpd2udq512_mask", IX86_BUILTIN_CVTPD2UDQ512, UNKNOWN, (int) V8SI_FTYPE_V8DF_V8SI_QI_INT },
30177 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vcvtph2ps512_mask_round, "__builtin_ia32_vcvtph2ps512_mask", IX86_BUILTIN_CVTPH2PS512, UNKNOWN, (int) V16SF_FTYPE_V16HI_V16SF_HI_INT },
30178 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fix_notruncv16sfv16si_mask_round, "__builtin_ia32_cvtps2dq512_mask", IX86_BUILTIN_CVTPS2DQ512, UNKNOWN, (int) V16SI_FTYPE_V16SF_V16SI_HI_INT },
30179 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cvtps2pd512_mask_round, "__builtin_ia32_cvtps2pd512_mask", IX86_BUILTIN_CVTPS2PD512, UNKNOWN, (int) V8DF_FTYPE_V8SF_V8DF_QI_INT },
30180 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ufix_notruncv16sfv16si_mask_round, "__builtin_ia32_cvtps2udq512_mask", IX86_BUILTIN_CVTPS2UDQ512, UNKNOWN, (int) V16SI_FTYPE_V16SF_V16SI_HI_INT },
30181 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_cvtsd2ss_round, "__builtin_ia32_cvtsd2ss_round", IX86_BUILTIN_CVTSD2SS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V2DF_INT },
30182 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsi2sdq_round, "__builtin_ia32_cvtsi2sd64", IX86_BUILTIN_CVTSI2SD64, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT64_INT },
30183 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_cvtsi2ss_round, "__builtin_ia32_cvtsi2ss32", IX86_BUILTIN_CVTSI2SS32, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT_INT },
30184 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtsi2ssq_round, "__builtin_ia32_cvtsi2ss64", IX86_BUILTIN_CVTSI2SS64, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT64_INT },
30185 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_cvtss2sd_round, "__builtin_ia32_cvtss2sd_round", IX86_BUILTIN_CVTSS2SD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V4SF_INT },
30186 { OPTION_MASK_ISA_AVX512F, CODE_FOR_fix_truncv8dfv8si2_mask_round, "__builtin_ia32_cvttpd2dq512_mask", IX86_BUILTIN_CVTTPD2DQ512, UNKNOWN, (int) V8SI_FTYPE_V8DF_V8SI_QI_INT },
30187 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ufix_truncv8dfv8si2_mask_round, "__builtin_ia32_cvttpd2udq512_mask", IX86_BUILTIN_CVTTPD2UDQ512, UNKNOWN, (int) V8SI_FTYPE_V8DF_V8SI_QI_INT },
30188 { OPTION_MASK_ISA_AVX512F, CODE_FOR_fix_truncv16sfv16si2_mask_round, "__builtin_ia32_cvttps2dq512_mask", IX86_BUILTIN_CVTTPS2DQ512, UNKNOWN, (int) V16SI_FTYPE_V16SF_V16SI_HI_INT },
30189 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ufix_truncv16sfv16si2_mask_round, "__builtin_ia32_cvttps2udq512_mask", IX86_BUILTIN_CVTTPS2UDQ512, UNKNOWN, (int) V16SI_FTYPE_V16SF_V16SI_HI_INT },
30190 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ufloatv16siv16sf2_mask_round, "__builtin_ia32_cvtudq2ps512_mask", IX86_BUILTIN_CVTUDQ2PS512, UNKNOWN, (int) V16SF_FTYPE_V16SI_V16SF_HI_INT },
30191 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_cvtusi2sd64_round, "__builtin_ia32_cvtusi2sd64", IX86_BUILTIN_CVTUSI2SD64, UNKNOWN, (int) V2DF_FTYPE_V2DF_UINT64_INT },
30192 { OPTION_MASK_ISA_AVX512F, CODE_FOR_cvtusi2ss32_round, "__builtin_ia32_cvtusi2ss32", IX86_BUILTIN_CVTUSI2SS32, UNKNOWN, (int) V4SF_FTYPE_V4SF_UINT_INT },
30193 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_cvtusi2ss64_round, "__builtin_ia32_cvtusi2ss64", IX86_BUILTIN_CVTUSI2SS64, UNKNOWN, (int) V4SF_FTYPE_V4SF_UINT64_INT },
30194 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_divv8df3_mask_round, "__builtin_ia32_divpd512_mask", IX86_BUILTIN_DIVPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30195 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_divv16sf3_mask_round, "__builtin_ia32_divps512_mask", IX86_BUILTIN_DIVPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30196 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmdivv2df3_round, "__builtin_ia32_divsd_round", IX86_BUILTIN_DIVSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
30197 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmdivv4sf3_round, "__builtin_ia32_divss_round", IX86_BUILTIN_DIVSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
30198 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fixupimmv8df_mask_round, "__builtin_ia32_fixupimmpd512_mask", IX86_BUILTIN_FIXUPIMMPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DI_INT_QI_INT },
30199 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fixupimmv8df_maskz_round, "__builtin_ia32_fixupimmpd512_maskz", IX86_BUILTIN_FIXUPIMMPD512_MASKZ, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DI_INT_QI_INT },
30200 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fixupimmv16sf_mask_round, "__builtin_ia32_fixupimmps512_mask", IX86_BUILTIN_FIXUPIMMPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SI_INT_HI_INT },
30201 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fixupimmv16sf_maskz_round, "__builtin_ia32_fixupimmps512_maskz", IX86_BUILTIN_FIXUPIMMPS512_MASKZ, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SI_INT_HI_INT },
30202 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sfixupimmv2df_mask_round, "__builtin_ia32_fixupimmsd_mask", IX86_BUILTIN_FIXUPIMMSD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DI_INT_QI_INT },
30203 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sfixupimmv2df_maskz_round, "__builtin_ia32_fixupimmsd_maskz", IX86_BUILTIN_FIXUPIMMSD128_MASKZ, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DI_INT_QI_INT },
30204 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sfixupimmv4sf_mask_round, "__builtin_ia32_fixupimmss_mask", IX86_BUILTIN_FIXUPIMMSS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SI_INT_QI_INT },
30205 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sfixupimmv4sf_maskz_round, "__builtin_ia32_fixupimmss_maskz", IX86_BUILTIN_FIXUPIMMSS128_MASKZ, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SI_INT_QI_INT },
30206 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_getexpv8df_mask_round, "__builtin_ia32_getexppd512_mask", IX86_BUILTIN_GETEXPPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI_INT },
30207 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_getexpv16sf_mask_round, "__builtin_ia32_getexpps512_mask", IX86_BUILTIN_GETEXPPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI_INT },
30208 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sgetexpv2df_round, "__builtin_ia32_getexpsd128_round", IX86_BUILTIN_GETEXPSD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
30209 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sgetexpv4sf_round, "__builtin_ia32_getexpss128_round", IX86_BUILTIN_GETEXPSS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
30210 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_getmantv8df_mask_round, "__builtin_ia32_getmantpd512_mask", IX86_BUILTIN_GETMANTPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_INT_V8DF_QI_INT },
30211 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_getmantv16sf_mask_round, "__builtin_ia32_getmantps512_mask", IX86_BUILTIN_GETMANTPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_INT_V16SF_HI_INT },
30212 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_getmantv2df_round, "__builtin_ia32_getmantsd_round", IX86_BUILTIN_GETMANTSD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT_INT },
30213 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_getmantv4sf_round, "__builtin_ia32_getmantss_round", IX86_BUILTIN_GETMANTSS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT_INT },
30214 { OPTION_MASK_ISA_AVX512F, CODE_FOR_smaxv8df3_mask_round, "__builtin_ia32_maxpd512_mask", IX86_BUILTIN_MAXPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30215 { OPTION_MASK_ISA_AVX512F, CODE_FOR_smaxv16sf3_mask_round, "__builtin_ia32_maxps512_mask", IX86_BUILTIN_MAXPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30216 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmsmaxv2df3_round, "__builtin_ia32_maxsd_round", IX86_BUILTIN_MAXSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
30217 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmsmaxv4sf3_round, "__builtin_ia32_maxss_round", IX86_BUILTIN_MAXSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
30218 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sminv8df3_mask_round, "__builtin_ia32_minpd512_mask", IX86_BUILTIN_MINPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30219 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sminv16sf3_mask_round, "__builtin_ia32_minps512_mask", IX86_BUILTIN_MINPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30220 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmsminv2df3_round, "__builtin_ia32_minsd_round", IX86_BUILTIN_MINSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
30221 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmsminv4sf3_round, "__builtin_ia32_minss_round", IX86_BUILTIN_MINSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
30222 { OPTION_MASK_ISA_AVX512F, CODE_FOR_mulv8df3_mask_round, "__builtin_ia32_mulpd512_mask", IX86_BUILTIN_MULPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30223 { OPTION_MASK_ISA_AVX512F, CODE_FOR_mulv16sf3_mask_round, "__builtin_ia32_mulps512_mask", IX86_BUILTIN_MULPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30224 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmmulv2df3_round, "__builtin_ia32_mulsd_round", IX86_BUILTIN_MULSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
30225 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmmulv4sf3_round, "__builtin_ia32_mulss_round", IX86_BUILTIN_MULSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
30226 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rndscalev8df_mask_round, "__builtin_ia32_rndscalepd_mask", IX86_BUILTIN_RNDSCALEPD, UNKNOWN, (int) V8DF_FTYPE_V8DF_INT_V8DF_QI_INT },
30227 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rndscalev16sf_mask_round, "__builtin_ia32_rndscaleps_mask", IX86_BUILTIN_RNDSCALEPS, UNKNOWN, (int) V16SF_FTYPE_V16SF_INT_V16SF_HI_INT },
30228 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rndscalev2df_round, "__builtin_ia32_rndscalesd_round", IX86_BUILTIN_RNDSCALESD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT_INT },
30229 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rndscalev4sf_round, "__builtin_ia32_rndscaless_round", IX86_BUILTIN_RNDSCALESS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT_INT },
30230 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_scalefv8df_mask_round, "__builtin_ia32_scalefpd512_mask", IX86_BUILTIN_SCALEFPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30231 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_scalefv16sf_mask_round, "__builtin_ia32_scalefps512_mask", IX86_BUILTIN_SCALEFPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30232 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vmscalefv2df_round, "__builtin_ia32_scalefsd_round", IX86_BUILTIN_SCALEFSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
30233 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vmscalefv4sf_round, "__builtin_ia32_scalefss_round", IX86_BUILTIN_SCALEFSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
30234 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sqrtv8df2_mask_round, "__builtin_ia32_sqrtpd512_mask", IX86_BUILTIN_SQRTPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI_INT },
30235 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sqrtv16sf2_mask_round, "__builtin_ia32_sqrtps512_mask", IX86_BUILTIN_SQRTPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI_INT },
30236 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmsqrtv2df2_round, "__builtin_ia32_sqrtsd_round", IX86_BUILTIN_SQRTSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
30237 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmsqrtv4sf2_round, "__builtin_ia32_sqrtss_round", IX86_BUILTIN_SQRTSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
30238 { OPTION_MASK_ISA_AVX512F, CODE_FOR_subv8df3_mask_round, "__builtin_ia32_subpd512_mask", IX86_BUILTIN_SUBPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30239 { OPTION_MASK_ISA_AVX512F, CODE_FOR_subv16sf3_mask_round, "__builtin_ia32_subps512_mask", IX86_BUILTIN_SUBPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30240 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmsubv2df3_round, "__builtin_ia32_subsd_round", IX86_BUILTIN_SUBSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
30241 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmsubv4sf3_round, "__builtin_ia32_subss_round", IX86_BUILTIN_SUBSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
30242 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_cvtsd2si_round, "__builtin_ia32_vcvtsd2si32", IX86_BUILTIN_VCVTSD2SI32, UNKNOWN, (int) INT_FTYPE_V2DF_INT },
30243 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsd2siq_round, "__builtin_ia32_vcvtsd2si64", IX86_BUILTIN_VCVTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF_INT },
30244 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vcvtsd2usi_round, "__builtin_ia32_vcvtsd2usi32", IX86_BUILTIN_VCVTSD2USI32, UNKNOWN, (int) UINT_FTYPE_V2DF_INT },
30245 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_avx512f_vcvtsd2usiq_round, "__builtin_ia32_vcvtsd2usi64", IX86_BUILTIN_VCVTSD2USI64, UNKNOWN, (int) UINT64_FTYPE_V2DF_INT },
30246 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_cvtss2si_round, "__builtin_ia32_vcvtss2si32", IX86_BUILTIN_VCVTSS2SI32, UNKNOWN, (int) INT_FTYPE_V4SF_INT },
30247 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtss2siq_round, "__builtin_ia32_vcvtss2si64", IX86_BUILTIN_VCVTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF_INT },
30248 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vcvtss2usi_round, "__builtin_ia32_vcvtss2usi32", IX86_BUILTIN_VCVTSS2USI32, UNKNOWN, (int) UINT_FTYPE_V4SF_INT },
30249 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_avx512f_vcvtss2usiq_round, "__builtin_ia32_vcvtss2usi64", IX86_BUILTIN_VCVTSS2USI64, UNKNOWN, (int) UINT64_FTYPE_V4SF_INT },
30250 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_cvttsd2si_round, "__builtin_ia32_vcvttsd2si32", IX86_BUILTIN_VCVTTSD2SI32, UNKNOWN, (int) INT_FTYPE_V2DF_INT },
30251 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvttsd2siq_round, "__builtin_ia32_vcvttsd2si64", IX86_BUILTIN_VCVTTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF_INT },
30252 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vcvttsd2usi_round, "__builtin_ia32_vcvttsd2usi32", IX86_BUILTIN_VCVTTSD2USI32, UNKNOWN, (int) UINT_FTYPE_V2DF_INT },
30253 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_avx512f_vcvttsd2usiq_round, "__builtin_ia32_vcvttsd2usi64", IX86_BUILTIN_VCVTTSD2USI64, UNKNOWN, (int) UINT64_FTYPE_V2DF_INT },
30254 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_cvttss2si_round, "__builtin_ia32_vcvttss2si32", IX86_BUILTIN_VCVTTSS2SI32, UNKNOWN, (int) INT_FTYPE_V4SF_INT },
30255 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvttss2siq_round, "__builtin_ia32_vcvttss2si64", IX86_BUILTIN_VCVTTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF_INT },
30256 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vcvttss2usi_round, "__builtin_ia32_vcvttss2usi32", IX86_BUILTIN_VCVTTSS2USI32, UNKNOWN, (int) UINT_FTYPE_V4SF_INT },
30257 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_avx512f_vcvttss2usiq_round, "__builtin_ia32_vcvttss2usi64", IX86_BUILTIN_VCVTTSS2USI64, UNKNOWN, (int) UINT64_FTYPE_V4SF_INT },
30258 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmadd_v8df_mask_round, "__builtin_ia32_vfmaddpd512_mask", IX86_BUILTIN_VFMADDPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30259 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmadd_v8df_mask3_round, "__builtin_ia32_vfmaddpd512_mask3", IX86_BUILTIN_VFMADDPD512_MASK3, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30260 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmadd_v8df_maskz_round, "__builtin_ia32_vfmaddpd512_maskz", IX86_BUILTIN_VFMADDPD512_MASKZ, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30261 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmadd_v16sf_mask_round, "__builtin_ia32_vfmaddps512_mask", IX86_BUILTIN_VFMADDPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30262 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmadd_v16sf_mask3_round, "__builtin_ia32_vfmaddps512_mask3", IX86_BUILTIN_VFMADDPS512_MASK3, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30263 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmadd_v16sf_maskz_round, "__builtin_ia32_vfmaddps512_maskz", IX86_BUILTIN_VFMADDPS512_MASKZ, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30264 { OPTION_MASK_ISA_AVX512F, CODE_FOR_fmai_vmfmadd_v2df_round, "__builtin_ia32_vfmaddsd3_round", IX86_BUILTIN_VFMADDSD3_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_INT },
30265 { OPTION_MASK_ISA_AVX512F, CODE_FOR_fmai_vmfmadd_v4sf_round, "__builtin_ia32_vfmaddss3_round", IX86_BUILTIN_VFMADDSS3_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_INT },
30266 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmaddsub_v8df_mask_round, "__builtin_ia32_vfmaddsubpd512_mask", IX86_BUILTIN_VFMADDSUBPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30267 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmaddsub_v8df_mask3_round, "__builtin_ia32_vfmaddsubpd512_mask3", IX86_BUILTIN_VFMADDSUBPD512_MASK3, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30268 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmaddsub_v8df_maskz_round, "__builtin_ia32_vfmaddsubpd512_maskz", IX86_BUILTIN_VFMADDSUBPD512_MASKZ, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30269 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmaddsub_v16sf_mask_round, "__builtin_ia32_vfmaddsubps512_mask", IX86_BUILTIN_VFMADDSUBPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30270 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmaddsub_v16sf_mask3_round, "__builtin_ia32_vfmaddsubps512_mask3", IX86_BUILTIN_VFMADDSUBPS512_MASK3, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30271 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmaddsub_v16sf_maskz_round, "__builtin_ia32_vfmaddsubps512_maskz", IX86_BUILTIN_VFMADDSUBPS512_MASKZ, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30272 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmsubadd_v8df_mask3_round, "__builtin_ia32_vfmsubaddpd512_mask3", IX86_BUILTIN_VFMSUBADDPD512_MASK3, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30273 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmsubadd_v16sf_mask3_round, "__builtin_ia32_vfmsubaddps512_mask3", IX86_BUILTIN_VFMSUBADDPS512_MASK3, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30274 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmsub_v8df_mask3_round, "__builtin_ia32_vfmsubpd512_mask3", IX86_BUILTIN_VFMSUBPD512_MASK3, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30275 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmsub_v16sf_mask3_round, "__builtin_ia32_vfmsubps512_mask3", IX86_BUILTIN_VFMSUBPS512_MASK3, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30276 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmadd_v8df_mask_round, "__builtin_ia32_vfnmaddpd512_mask", IX86_BUILTIN_VFNMADDPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30277 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmadd_v16sf_mask_round, "__builtin_ia32_vfnmaddps512_mask", IX86_BUILTIN_VFNMADDPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30278 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmsub_v8df_mask_round, "__builtin_ia32_vfnmsubpd512_mask", IX86_BUILTIN_VFNMSUBPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30279 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmsub_v8df_mask3_round, "__builtin_ia32_vfnmsubpd512_mask3", IX86_BUILTIN_VFNMSUBPD512_MASK3, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30280 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmsub_v16sf_mask_round, "__builtin_ia32_vfnmsubps512_mask", IX86_BUILTIN_VFNMSUBPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30281 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmsub_v16sf_mask3_round, "__builtin_ia32_vfnmsubps512_mask3", IX86_BUILTIN_VFNMSUBPS512_MASK3, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30283 /* AVX512ER */
30284 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_exp2v8df_mask_round, "__builtin_ia32_exp2pd_mask", IX86_BUILTIN_EXP2PD_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI_INT },
30285 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_exp2v16sf_mask_round, "__builtin_ia32_exp2ps_mask", IX86_BUILTIN_EXP2PS_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI_INT },
30286 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_rcp28v8df_mask_round, "__builtin_ia32_rcp28pd_mask", IX86_BUILTIN_RCP28PD, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI_INT },
30287 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_rcp28v16sf_mask_round, "__builtin_ia32_rcp28ps_mask", IX86_BUILTIN_RCP28PS, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI_INT },
30288 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_vmrcp28v2df_round, "__builtin_ia32_rcp28sd_round", IX86_BUILTIN_RCP28SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
30289 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_vmrcp28v4sf_round, "__builtin_ia32_rcp28ss_round", IX86_BUILTIN_RCP28SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
30290 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_rsqrt28v8df_mask_round, "__builtin_ia32_rsqrt28pd_mask", IX86_BUILTIN_RSQRT28PD, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI_INT },
30291 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_rsqrt28v16sf_mask_round, "__builtin_ia32_rsqrt28ps_mask", IX86_BUILTIN_RSQRT28PS, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI_INT },
30292 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_vmrsqrt28v2df_round, "__builtin_ia32_rsqrt28sd_round", IX86_BUILTIN_RSQRT28SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
30293 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_vmrsqrt28v4sf_round, "__builtin_ia32_rsqrt28ss_round", IX86_BUILTIN_RSQRT28SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
30296 /* FMA4 and XOP. */
30297 #define MULTI_ARG_4_DF2_DI_I V2DF_FTYPE_V2DF_V2DF_V2DI_INT
30298 #define MULTI_ARG_4_DF2_DI_I1 V4DF_FTYPE_V4DF_V4DF_V4DI_INT
30299 #define MULTI_ARG_4_SF2_SI_I V4SF_FTYPE_V4SF_V4SF_V4SI_INT
30300 #define MULTI_ARG_4_SF2_SI_I1 V8SF_FTYPE_V8SF_V8SF_V8SI_INT
30301 #define MULTI_ARG_3_SF V4SF_FTYPE_V4SF_V4SF_V4SF
30302 #define MULTI_ARG_3_DF V2DF_FTYPE_V2DF_V2DF_V2DF
30303 #define MULTI_ARG_3_SF2 V8SF_FTYPE_V8SF_V8SF_V8SF
30304 #define MULTI_ARG_3_DF2 V4DF_FTYPE_V4DF_V4DF_V4DF
30305 #define MULTI_ARG_3_DI V2DI_FTYPE_V2DI_V2DI_V2DI
30306 #define MULTI_ARG_3_SI V4SI_FTYPE_V4SI_V4SI_V4SI
30307 #define MULTI_ARG_3_SI_DI V4SI_FTYPE_V4SI_V4SI_V2DI
30308 #define MULTI_ARG_3_HI V8HI_FTYPE_V8HI_V8HI_V8HI
30309 #define MULTI_ARG_3_HI_SI V8HI_FTYPE_V8HI_V8HI_V4SI
30310 #define MULTI_ARG_3_QI V16QI_FTYPE_V16QI_V16QI_V16QI
30311 #define MULTI_ARG_3_DI2 V4DI_FTYPE_V4DI_V4DI_V4DI
30312 #define MULTI_ARG_3_SI2 V8SI_FTYPE_V8SI_V8SI_V8SI
30313 #define MULTI_ARG_3_HI2 V16HI_FTYPE_V16HI_V16HI_V16HI
30314 #define MULTI_ARG_3_QI2 V32QI_FTYPE_V32QI_V32QI_V32QI
30315 #define MULTI_ARG_2_SF V4SF_FTYPE_V4SF_V4SF
30316 #define MULTI_ARG_2_DF V2DF_FTYPE_V2DF_V2DF
30317 #define MULTI_ARG_2_DI V2DI_FTYPE_V2DI_V2DI
30318 #define MULTI_ARG_2_SI V4SI_FTYPE_V4SI_V4SI
30319 #define MULTI_ARG_2_HI V8HI_FTYPE_V8HI_V8HI
30320 #define MULTI_ARG_2_QI V16QI_FTYPE_V16QI_V16QI
30321 #define MULTI_ARG_2_DI_IMM V2DI_FTYPE_V2DI_SI
30322 #define MULTI_ARG_2_SI_IMM V4SI_FTYPE_V4SI_SI
30323 #define MULTI_ARG_2_HI_IMM V8HI_FTYPE_V8HI_SI
30324 #define MULTI_ARG_2_QI_IMM V16QI_FTYPE_V16QI_SI
30325 #define MULTI_ARG_2_DI_CMP V2DI_FTYPE_V2DI_V2DI_CMP
30326 #define MULTI_ARG_2_SI_CMP V4SI_FTYPE_V4SI_V4SI_CMP
30327 #define MULTI_ARG_2_HI_CMP V8HI_FTYPE_V8HI_V8HI_CMP
30328 #define MULTI_ARG_2_QI_CMP V16QI_FTYPE_V16QI_V16QI_CMP
30329 #define MULTI_ARG_2_SF_TF V4SF_FTYPE_V4SF_V4SF_TF
30330 #define MULTI_ARG_2_DF_TF V2DF_FTYPE_V2DF_V2DF_TF
30331 #define MULTI_ARG_2_DI_TF V2DI_FTYPE_V2DI_V2DI_TF
30332 #define MULTI_ARG_2_SI_TF V4SI_FTYPE_V4SI_V4SI_TF
30333 #define MULTI_ARG_2_HI_TF V8HI_FTYPE_V8HI_V8HI_TF
30334 #define MULTI_ARG_2_QI_TF V16QI_FTYPE_V16QI_V16QI_TF
30335 #define MULTI_ARG_1_SF V4SF_FTYPE_V4SF
30336 #define MULTI_ARG_1_DF V2DF_FTYPE_V2DF
30337 #define MULTI_ARG_1_SF2 V8SF_FTYPE_V8SF
30338 #define MULTI_ARG_1_DF2 V4DF_FTYPE_V4DF
30339 #define MULTI_ARG_1_DI V2DI_FTYPE_V2DI
30340 #define MULTI_ARG_1_SI V4SI_FTYPE_V4SI
30341 #define MULTI_ARG_1_HI V8HI_FTYPE_V8HI
30342 #define MULTI_ARG_1_QI V16QI_FTYPE_V16QI
30343 #define MULTI_ARG_1_SI_DI V2DI_FTYPE_V4SI
30344 #define MULTI_ARG_1_HI_DI V2DI_FTYPE_V8HI
30345 #define MULTI_ARG_1_HI_SI V4SI_FTYPE_V8HI
30346 #define MULTI_ARG_1_QI_DI V2DI_FTYPE_V16QI
30347 #define MULTI_ARG_1_QI_SI V4SI_FTYPE_V16QI
30348 #define MULTI_ARG_1_QI_HI V8HI_FTYPE_V16QI
30350 static const struct builtin_description bdesc_multi_arg[] =
30352 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_vmfmadd_v4sf,
30353 "__builtin_ia32_vfmaddss", IX86_BUILTIN_VFMADDSS,
30354 UNKNOWN, (int)MULTI_ARG_3_SF },
30355 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_vmfmadd_v2df,
30356 "__builtin_ia32_vfmaddsd", IX86_BUILTIN_VFMADDSD,
30357 UNKNOWN, (int)MULTI_ARG_3_DF },
30359 { OPTION_MASK_ISA_FMA, CODE_FOR_fmai_vmfmadd_v4sf,
30360 "__builtin_ia32_vfmaddss3", IX86_BUILTIN_VFMADDSS3,
30361 UNKNOWN, (int)MULTI_ARG_3_SF },
30362 { OPTION_MASK_ISA_FMA, CODE_FOR_fmai_vmfmadd_v2df,
30363 "__builtin_ia32_vfmaddsd3", IX86_BUILTIN_VFMADDSD3,
30364 UNKNOWN, (int)MULTI_ARG_3_DF },
30366 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmadd_v4sf,
30367 "__builtin_ia32_vfmaddps", IX86_BUILTIN_VFMADDPS,
30368 UNKNOWN, (int)MULTI_ARG_3_SF },
30369 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmadd_v2df,
30370 "__builtin_ia32_vfmaddpd", IX86_BUILTIN_VFMADDPD,
30371 UNKNOWN, (int)MULTI_ARG_3_DF },
30372 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmadd_v8sf,
30373 "__builtin_ia32_vfmaddps256", IX86_BUILTIN_VFMADDPS256,
30374 UNKNOWN, (int)MULTI_ARG_3_SF2 },
30375 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmadd_v4df,
30376 "__builtin_ia32_vfmaddpd256", IX86_BUILTIN_VFMADDPD256,
30377 UNKNOWN, (int)MULTI_ARG_3_DF2 },
30379 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fmaddsub_v4sf,
30380 "__builtin_ia32_vfmaddsubps", IX86_BUILTIN_VFMADDSUBPS,
30381 UNKNOWN, (int)MULTI_ARG_3_SF },
30382 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fmaddsub_v2df,
30383 "__builtin_ia32_vfmaddsubpd", IX86_BUILTIN_VFMADDSUBPD,
30384 UNKNOWN, (int)MULTI_ARG_3_DF },
30385 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fmaddsub_v8sf,
30386 "__builtin_ia32_vfmaddsubps256", IX86_BUILTIN_VFMADDSUBPS256,
30387 UNKNOWN, (int)MULTI_ARG_3_SF2 },
30388 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fmaddsub_v4df,
30389 "__builtin_ia32_vfmaddsubpd256", IX86_BUILTIN_VFMADDSUBPD256,
30390 UNKNOWN, (int)MULTI_ARG_3_DF2 },
30392 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v2di, "__builtin_ia32_vpcmov", IX86_BUILTIN_VPCMOV, UNKNOWN, (int)MULTI_ARG_3_DI },
30393 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v2di, "__builtin_ia32_vpcmov_v2di", IX86_BUILTIN_VPCMOV_V2DI, UNKNOWN, (int)MULTI_ARG_3_DI },
30394 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4si, "__builtin_ia32_vpcmov_v4si", IX86_BUILTIN_VPCMOV_V4SI, UNKNOWN, (int)MULTI_ARG_3_SI },
30395 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v8hi, "__builtin_ia32_vpcmov_v8hi", IX86_BUILTIN_VPCMOV_V8HI, UNKNOWN, (int)MULTI_ARG_3_HI },
30396 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v16qi, "__builtin_ia32_vpcmov_v16qi",IX86_BUILTIN_VPCMOV_V16QI,UNKNOWN, (int)MULTI_ARG_3_QI },
30397 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v2df, "__builtin_ia32_vpcmov_v2df", IX86_BUILTIN_VPCMOV_V2DF, UNKNOWN, (int)MULTI_ARG_3_DF },
30398 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4sf, "__builtin_ia32_vpcmov_v4sf", IX86_BUILTIN_VPCMOV_V4SF, UNKNOWN, (int)MULTI_ARG_3_SF },
30400 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4di256, "__builtin_ia32_vpcmov256", IX86_BUILTIN_VPCMOV256, UNKNOWN, (int)MULTI_ARG_3_DI2 },
30401 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4di256, "__builtin_ia32_vpcmov_v4di256", IX86_BUILTIN_VPCMOV_V4DI256, UNKNOWN, (int)MULTI_ARG_3_DI2 },
30402 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v8si256, "__builtin_ia32_vpcmov_v8si256", IX86_BUILTIN_VPCMOV_V8SI256, UNKNOWN, (int)MULTI_ARG_3_SI2 },
30403 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v16hi256, "__builtin_ia32_vpcmov_v16hi256", IX86_BUILTIN_VPCMOV_V16HI256, UNKNOWN, (int)MULTI_ARG_3_HI2 },
30404 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v32qi256, "__builtin_ia32_vpcmov_v32qi256", IX86_BUILTIN_VPCMOV_V32QI256, UNKNOWN, (int)MULTI_ARG_3_QI2 },
30405 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4df256, "__builtin_ia32_vpcmov_v4df256", IX86_BUILTIN_VPCMOV_V4DF256, UNKNOWN, (int)MULTI_ARG_3_DF2 },
30406 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v8sf256, "__builtin_ia32_vpcmov_v8sf256", IX86_BUILTIN_VPCMOV_V8SF256, UNKNOWN, (int)MULTI_ARG_3_SF2 },
30408 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pperm, "__builtin_ia32_vpperm", IX86_BUILTIN_VPPERM, UNKNOWN, (int)MULTI_ARG_3_QI },
30410 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssww, "__builtin_ia32_vpmacssww", IX86_BUILTIN_VPMACSSWW, UNKNOWN, (int)MULTI_ARG_3_HI },
30411 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsww, "__builtin_ia32_vpmacsww", IX86_BUILTIN_VPMACSWW, UNKNOWN, (int)MULTI_ARG_3_HI },
30412 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsswd, "__builtin_ia32_vpmacsswd", IX86_BUILTIN_VPMACSSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
30413 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacswd, "__builtin_ia32_vpmacswd", IX86_BUILTIN_VPMACSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
30414 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssdd, "__builtin_ia32_vpmacssdd", IX86_BUILTIN_VPMACSSDD, UNKNOWN, (int)MULTI_ARG_3_SI },
30415 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsdd, "__builtin_ia32_vpmacsdd", IX86_BUILTIN_VPMACSDD, UNKNOWN, (int)MULTI_ARG_3_SI },
30416 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssdql, "__builtin_ia32_vpmacssdql", IX86_BUILTIN_VPMACSSDQL, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
30417 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssdqh, "__builtin_ia32_vpmacssdqh", IX86_BUILTIN_VPMACSSDQH, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
30418 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsdql, "__builtin_ia32_vpmacsdql", IX86_BUILTIN_VPMACSDQL, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
30419 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsdqh, "__builtin_ia32_vpmacsdqh", IX86_BUILTIN_VPMACSDQH, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
30420 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmadcsswd, "__builtin_ia32_vpmadcsswd", IX86_BUILTIN_VPMADCSSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
30421 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmadcswd, "__builtin_ia32_vpmadcswd", IX86_BUILTIN_VPMADCSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
30423 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv2di3, "__builtin_ia32_vprotq", IX86_BUILTIN_VPROTQ, UNKNOWN, (int)MULTI_ARG_2_DI },
30424 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv4si3, "__builtin_ia32_vprotd", IX86_BUILTIN_VPROTD, UNKNOWN, (int)MULTI_ARG_2_SI },
30425 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv8hi3, "__builtin_ia32_vprotw", IX86_BUILTIN_VPROTW, UNKNOWN, (int)MULTI_ARG_2_HI },
30426 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv16qi3, "__builtin_ia32_vprotb", IX86_BUILTIN_VPROTB, UNKNOWN, (int)MULTI_ARG_2_QI },
30427 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv2di3, "__builtin_ia32_vprotqi", IX86_BUILTIN_VPROTQ_IMM, UNKNOWN, (int)MULTI_ARG_2_DI_IMM },
30428 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv4si3, "__builtin_ia32_vprotdi", IX86_BUILTIN_VPROTD_IMM, UNKNOWN, (int)MULTI_ARG_2_SI_IMM },
30429 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv8hi3, "__builtin_ia32_vprotwi", IX86_BUILTIN_VPROTW_IMM, UNKNOWN, (int)MULTI_ARG_2_HI_IMM },
30430 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv16qi3, "__builtin_ia32_vprotbi", IX86_BUILTIN_VPROTB_IMM, UNKNOWN, (int)MULTI_ARG_2_QI_IMM },
30431 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shav2di3, "__builtin_ia32_vpshaq", IX86_BUILTIN_VPSHAQ, UNKNOWN, (int)MULTI_ARG_2_DI },
30432 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shav4si3, "__builtin_ia32_vpshad", IX86_BUILTIN_VPSHAD, UNKNOWN, (int)MULTI_ARG_2_SI },
30433 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shav8hi3, "__builtin_ia32_vpshaw", IX86_BUILTIN_VPSHAW, UNKNOWN, (int)MULTI_ARG_2_HI },
30434 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shav16qi3, "__builtin_ia32_vpshab", IX86_BUILTIN_VPSHAB, UNKNOWN, (int)MULTI_ARG_2_QI },
30435 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shlv2di3, "__builtin_ia32_vpshlq", IX86_BUILTIN_VPSHLQ, UNKNOWN, (int)MULTI_ARG_2_DI },
30436 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shlv4si3, "__builtin_ia32_vpshld", IX86_BUILTIN_VPSHLD, UNKNOWN, (int)MULTI_ARG_2_SI },
30437 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shlv8hi3, "__builtin_ia32_vpshlw", IX86_BUILTIN_VPSHLW, UNKNOWN, (int)MULTI_ARG_2_HI },
30438 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shlv16qi3, "__builtin_ia32_vpshlb", IX86_BUILTIN_VPSHLB, UNKNOWN, (int)MULTI_ARG_2_QI },
30440 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vmfrczv4sf2, "__builtin_ia32_vfrczss", IX86_BUILTIN_VFRCZSS, UNKNOWN, (int)MULTI_ARG_1_SF },
30441 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vmfrczv2df2, "__builtin_ia32_vfrczsd", IX86_BUILTIN_VFRCZSD, UNKNOWN, (int)MULTI_ARG_1_DF },
30442 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv4sf2, "__builtin_ia32_vfrczps", IX86_BUILTIN_VFRCZPS, UNKNOWN, (int)MULTI_ARG_1_SF },
30443 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv2df2, "__builtin_ia32_vfrczpd", IX86_BUILTIN_VFRCZPD, UNKNOWN, (int)MULTI_ARG_1_DF },
30444 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv8sf2, "__builtin_ia32_vfrczps256", IX86_BUILTIN_VFRCZPS256, UNKNOWN, (int)MULTI_ARG_1_SF2 },
30445 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv4df2, "__builtin_ia32_vfrczpd256", IX86_BUILTIN_VFRCZPD256, UNKNOWN, (int)MULTI_ARG_1_DF2 },
30447 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddbw, "__builtin_ia32_vphaddbw", IX86_BUILTIN_VPHADDBW, UNKNOWN, (int)MULTI_ARG_1_QI_HI },
30448 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddbd, "__builtin_ia32_vphaddbd", IX86_BUILTIN_VPHADDBD, UNKNOWN, (int)MULTI_ARG_1_QI_SI },
30449 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddbq, "__builtin_ia32_vphaddbq", IX86_BUILTIN_VPHADDBQ, UNKNOWN, (int)MULTI_ARG_1_QI_DI },
30450 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddwd, "__builtin_ia32_vphaddwd", IX86_BUILTIN_VPHADDWD, UNKNOWN, (int)MULTI_ARG_1_HI_SI },
30451 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddwq, "__builtin_ia32_vphaddwq", IX86_BUILTIN_VPHADDWQ, UNKNOWN, (int)MULTI_ARG_1_HI_DI },
30452 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phadddq, "__builtin_ia32_vphadddq", IX86_BUILTIN_VPHADDDQ, UNKNOWN, (int)MULTI_ARG_1_SI_DI },
30453 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddubw, "__builtin_ia32_vphaddubw", IX86_BUILTIN_VPHADDUBW, UNKNOWN, (int)MULTI_ARG_1_QI_HI },
30454 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddubd, "__builtin_ia32_vphaddubd", IX86_BUILTIN_VPHADDUBD, UNKNOWN, (int)MULTI_ARG_1_QI_SI },
30455 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddubq, "__builtin_ia32_vphaddubq", IX86_BUILTIN_VPHADDUBQ, UNKNOWN, (int)MULTI_ARG_1_QI_DI },
30456 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phadduwd, "__builtin_ia32_vphadduwd", IX86_BUILTIN_VPHADDUWD, UNKNOWN, (int)MULTI_ARG_1_HI_SI },
30457 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phadduwq, "__builtin_ia32_vphadduwq", IX86_BUILTIN_VPHADDUWQ, UNKNOWN, (int)MULTI_ARG_1_HI_DI },
30458 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddudq, "__builtin_ia32_vphaddudq", IX86_BUILTIN_VPHADDUDQ, UNKNOWN, (int)MULTI_ARG_1_SI_DI },
30459 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phsubbw, "__builtin_ia32_vphsubbw", IX86_BUILTIN_VPHSUBBW, UNKNOWN, (int)MULTI_ARG_1_QI_HI },
30460 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phsubwd, "__builtin_ia32_vphsubwd", IX86_BUILTIN_VPHSUBWD, UNKNOWN, (int)MULTI_ARG_1_HI_SI },
30461 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phsubdq, "__builtin_ia32_vphsubdq", IX86_BUILTIN_VPHSUBDQ, UNKNOWN, (int)MULTI_ARG_1_SI_DI },
30463 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomeqb", IX86_BUILTIN_VPCOMEQB, EQ, (int)MULTI_ARG_2_QI_CMP },
30464 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomneb", IX86_BUILTIN_VPCOMNEB, NE, (int)MULTI_ARG_2_QI_CMP },
30465 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomneqb", IX86_BUILTIN_VPCOMNEB, NE, (int)MULTI_ARG_2_QI_CMP },
30466 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomltb", IX86_BUILTIN_VPCOMLTB, LT, (int)MULTI_ARG_2_QI_CMP },
30467 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomleb", IX86_BUILTIN_VPCOMLEB, LE, (int)MULTI_ARG_2_QI_CMP },
30468 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomgtb", IX86_BUILTIN_VPCOMGTB, GT, (int)MULTI_ARG_2_QI_CMP },
30469 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomgeb", IX86_BUILTIN_VPCOMGEB, GE, (int)MULTI_ARG_2_QI_CMP },
30471 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomeqw", IX86_BUILTIN_VPCOMEQW, EQ, (int)MULTI_ARG_2_HI_CMP },
30472 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomnew", IX86_BUILTIN_VPCOMNEW, NE, (int)MULTI_ARG_2_HI_CMP },
30473 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomneqw", IX86_BUILTIN_VPCOMNEW, NE, (int)MULTI_ARG_2_HI_CMP },
30474 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomltw", IX86_BUILTIN_VPCOMLTW, LT, (int)MULTI_ARG_2_HI_CMP },
30475 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomlew", IX86_BUILTIN_VPCOMLEW, LE, (int)MULTI_ARG_2_HI_CMP },
30476 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomgtw", IX86_BUILTIN_VPCOMGTW, GT, (int)MULTI_ARG_2_HI_CMP },
30477 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomgew", IX86_BUILTIN_VPCOMGEW, GE, (int)MULTI_ARG_2_HI_CMP },
30479 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomeqd", IX86_BUILTIN_VPCOMEQD, EQ, (int)MULTI_ARG_2_SI_CMP },
30480 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomned", IX86_BUILTIN_VPCOMNED, NE, (int)MULTI_ARG_2_SI_CMP },
30481 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomneqd", IX86_BUILTIN_VPCOMNED, NE, (int)MULTI_ARG_2_SI_CMP },
30482 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomltd", IX86_BUILTIN_VPCOMLTD, LT, (int)MULTI_ARG_2_SI_CMP },
30483 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomled", IX86_BUILTIN_VPCOMLED, LE, (int)MULTI_ARG_2_SI_CMP },
30484 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomgtd", IX86_BUILTIN_VPCOMGTD, GT, (int)MULTI_ARG_2_SI_CMP },
30485 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomged", IX86_BUILTIN_VPCOMGED, GE, (int)MULTI_ARG_2_SI_CMP },
30487 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomeqq", IX86_BUILTIN_VPCOMEQQ, EQ, (int)MULTI_ARG_2_DI_CMP },
30488 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomneq", IX86_BUILTIN_VPCOMNEQ, NE, (int)MULTI_ARG_2_DI_CMP },
30489 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomneqq", IX86_BUILTIN_VPCOMNEQ, NE, (int)MULTI_ARG_2_DI_CMP },
30490 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomltq", IX86_BUILTIN_VPCOMLTQ, LT, (int)MULTI_ARG_2_DI_CMP },
30491 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomleq", IX86_BUILTIN_VPCOMLEQ, LE, (int)MULTI_ARG_2_DI_CMP },
30492 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomgtq", IX86_BUILTIN_VPCOMGTQ, GT, (int)MULTI_ARG_2_DI_CMP },
30493 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomgeq", IX86_BUILTIN_VPCOMGEQ, GE, (int)MULTI_ARG_2_DI_CMP },
30495 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v16qi3,"__builtin_ia32_vpcomequb", IX86_BUILTIN_VPCOMEQUB, EQ, (int)MULTI_ARG_2_QI_CMP },
30496 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v16qi3,"__builtin_ia32_vpcomneub", IX86_BUILTIN_VPCOMNEUB, NE, (int)MULTI_ARG_2_QI_CMP },
30497 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v16qi3,"__builtin_ia32_vpcomnequb", IX86_BUILTIN_VPCOMNEUB, NE, (int)MULTI_ARG_2_QI_CMP },
30498 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomltub", IX86_BUILTIN_VPCOMLTUB, LTU, (int)MULTI_ARG_2_QI_CMP },
30499 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomleub", IX86_BUILTIN_VPCOMLEUB, LEU, (int)MULTI_ARG_2_QI_CMP },
30500 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomgtub", IX86_BUILTIN_VPCOMGTUB, GTU, (int)MULTI_ARG_2_QI_CMP },
30501 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomgeub", IX86_BUILTIN_VPCOMGEUB, GEU, (int)MULTI_ARG_2_QI_CMP },
30503 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v8hi3, "__builtin_ia32_vpcomequw", IX86_BUILTIN_VPCOMEQUW, EQ, (int)MULTI_ARG_2_HI_CMP },
30504 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v8hi3, "__builtin_ia32_vpcomneuw", IX86_BUILTIN_VPCOMNEUW, NE, (int)MULTI_ARG_2_HI_CMP },
30505 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v8hi3, "__builtin_ia32_vpcomnequw", IX86_BUILTIN_VPCOMNEUW, NE, (int)MULTI_ARG_2_HI_CMP },
30506 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomltuw", IX86_BUILTIN_VPCOMLTUW, LTU, (int)MULTI_ARG_2_HI_CMP },
30507 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomleuw", IX86_BUILTIN_VPCOMLEUW, LEU, (int)MULTI_ARG_2_HI_CMP },
30508 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomgtuw", IX86_BUILTIN_VPCOMGTUW, GTU, (int)MULTI_ARG_2_HI_CMP },
30509 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomgeuw", IX86_BUILTIN_VPCOMGEUW, GEU, (int)MULTI_ARG_2_HI_CMP },
30511 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v4si3, "__builtin_ia32_vpcomequd", IX86_BUILTIN_VPCOMEQUD, EQ, (int)MULTI_ARG_2_SI_CMP },
30512 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v4si3, "__builtin_ia32_vpcomneud", IX86_BUILTIN_VPCOMNEUD, NE, (int)MULTI_ARG_2_SI_CMP },
30513 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v4si3, "__builtin_ia32_vpcomnequd", IX86_BUILTIN_VPCOMNEUD, NE, (int)MULTI_ARG_2_SI_CMP },
30514 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomltud", IX86_BUILTIN_VPCOMLTUD, LTU, (int)MULTI_ARG_2_SI_CMP },
30515 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomleud", IX86_BUILTIN_VPCOMLEUD, LEU, (int)MULTI_ARG_2_SI_CMP },
30516 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomgtud", IX86_BUILTIN_VPCOMGTUD, GTU, (int)MULTI_ARG_2_SI_CMP },
30517 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomgeud", IX86_BUILTIN_VPCOMGEUD, GEU, (int)MULTI_ARG_2_SI_CMP },
30519 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v2di3, "__builtin_ia32_vpcomequq", IX86_BUILTIN_VPCOMEQUQ, EQ, (int)MULTI_ARG_2_DI_CMP },
30520 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v2di3, "__builtin_ia32_vpcomneuq", IX86_BUILTIN_VPCOMNEUQ, NE, (int)MULTI_ARG_2_DI_CMP },
30521 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v2di3, "__builtin_ia32_vpcomnequq", IX86_BUILTIN_VPCOMNEUQ, NE, (int)MULTI_ARG_2_DI_CMP },
30522 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomltuq", IX86_BUILTIN_VPCOMLTUQ, LTU, (int)MULTI_ARG_2_DI_CMP },
30523 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomleuq", IX86_BUILTIN_VPCOMLEUQ, LEU, (int)MULTI_ARG_2_DI_CMP },
30524 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomgtuq", IX86_BUILTIN_VPCOMGTUQ, GTU, (int)MULTI_ARG_2_DI_CMP },
30525 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomgeuq", IX86_BUILTIN_VPCOMGEUQ, GEU, (int)MULTI_ARG_2_DI_CMP },
30527 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomfalseb", IX86_BUILTIN_VPCOMFALSEB, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_QI_TF },
30528 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomfalsew", IX86_BUILTIN_VPCOMFALSEW, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_HI_TF },
30529 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomfalsed", IX86_BUILTIN_VPCOMFALSED, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_SI_TF },
30530 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomfalseq", IX86_BUILTIN_VPCOMFALSEQ, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_DI_TF },
30531 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomfalseub",IX86_BUILTIN_VPCOMFALSEUB,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_QI_TF },
30532 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomfalseuw",IX86_BUILTIN_VPCOMFALSEUW,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_HI_TF },
30533 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomfalseud",IX86_BUILTIN_VPCOMFALSEUD,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_SI_TF },
30534 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomfalseuq",IX86_BUILTIN_VPCOMFALSEUQ,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_DI_TF },
30536 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomtrueb", IX86_BUILTIN_VPCOMTRUEB, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_QI_TF },
30537 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomtruew", IX86_BUILTIN_VPCOMTRUEW, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_HI_TF },
30538 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomtrued", IX86_BUILTIN_VPCOMTRUED, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_SI_TF },
30539 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomtrueq", IX86_BUILTIN_VPCOMTRUEQ, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_DI_TF },
30540 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomtrueub", IX86_BUILTIN_VPCOMTRUEUB, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_QI_TF },
30541 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomtrueuw", IX86_BUILTIN_VPCOMTRUEUW, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_HI_TF },
30542 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomtrueud", IX86_BUILTIN_VPCOMTRUEUD, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_SI_TF },
30543 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomtrueuq", IX86_BUILTIN_VPCOMTRUEUQ, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_DI_TF },
30545 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v2df3, "__builtin_ia32_vpermil2pd", IX86_BUILTIN_VPERMIL2PD, UNKNOWN, (int)MULTI_ARG_4_DF2_DI_I },
30546 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v4sf3, "__builtin_ia32_vpermil2ps", IX86_BUILTIN_VPERMIL2PS, UNKNOWN, (int)MULTI_ARG_4_SF2_SI_I },
30547 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v4df3, "__builtin_ia32_vpermil2pd256", IX86_BUILTIN_VPERMIL2PD256, UNKNOWN, (int)MULTI_ARG_4_DF2_DI_I1 },
30548 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v8sf3, "__builtin_ia32_vpermil2ps256", IX86_BUILTIN_VPERMIL2PS256, UNKNOWN, (int)MULTI_ARG_4_SF2_SI_I1 },
30552 /* TM vector builtins. */
30554 /* Reuse the existing x86-specific `struct builtin_description' cause
30555 we're lazy. Add casts to make them fit. */
30556 static const struct builtin_description bdesc_tm[] =
30558 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_WM64", (enum ix86_builtins) BUILT_IN_TM_STORE_M64, UNKNOWN, VOID_FTYPE_PV2SI_V2SI },
30559 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_WaRM64", (enum ix86_builtins) BUILT_IN_TM_STORE_WAR_M64, UNKNOWN, VOID_FTYPE_PV2SI_V2SI },
30560 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_WaWM64", (enum ix86_builtins) BUILT_IN_TM_STORE_WAW_M64, UNKNOWN, VOID_FTYPE_PV2SI_V2SI },
30561 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_RM64", (enum ix86_builtins) BUILT_IN_TM_LOAD_M64, UNKNOWN, V2SI_FTYPE_PCV2SI },
30562 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_RaRM64", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAR_M64, UNKNOWN, V2SI_FTYPE_PCV2SI },
30563 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_RaWM64", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAW_M64, UNKNOWN, V2SI_FTYPE_PCV2SI },
30564 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_RfWM64", (enum ix86_builtins) BUILT_IN_TM_LOAD_RFW_M64, UNKNOWN, V2SI_FTYPE_PCV2SI },
30566 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_WM128", (enum ix86_builtins) BUILT_IN_TM_STORE_M128, UNKNOWN, VOID_FTYPE_PV4SF_V4SF },
30567 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_WaRM128", (enum ix86_builtins) BUILT_IN_TM_STORE_WAR_M128, UNKNOWN, VOID_FTYPE_PV4SF_V4SF },
30568 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_WaWM128", (enum ix86_builtins) BUILT_IN_TM_STORE_WAW_M128, UNKNOWN, VOID_FTYPE_PV4SF_V4SF },
30569 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_RM128", (enum ix86_builtins) BUILT_IN_TM_LOAD_M128, UNKNOWN, V4SF_FTYPE_PCV4SF },
30570 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_RaRM128", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAR_M128, UNKNOWN, V4SF_FTYPE_PCV4SF },
30571 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_RaWM128", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAW_M128, UNKNOWN, V4SF_FTYPE_PCV4SF },
30572 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_RfWM128", (enum ix86_builtins) BUILT_IN_TM_LOAD_RFW_M128, UNKNOWN, V4SF_FTYPE_PCV4SF },
30574 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_WM256", (enum ix86_builtins) BUILT_IN_TM_STORE_M256, UNKNOWN, VOID_FTYPE_PV8SF_V8SF },
30575 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_WaRM256", (enum ix86_builtins) BUILT_IN_TM_STORE_WAR_M256, UNKNOWN, VOID_FTYPE_PV8SF_V8SF },
30576 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_WaWM256", (enum ix86_builtins) BUILT_IN_TM_STORE_WAW_M256, UNKNOWN, VOID_FTYPE_PV8SF_V8SF },
30577 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_RM256", (enum ix86_builtins) BUILT_IN_TM_LOAD_M256, UNKNOWN, V8SF_FTYPE_PCV8SF },
30578 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_RaRM256", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAR_M256, UNKNOWN, V8SF_FTYPE_PCV8SF },
30579 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_RaWM256", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAW_M256, UNKNOWN, V8SF_FTYPE_PCV8SF },
30580 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_RfWM256", (enum ix86_builtins) BUILT_IN_TM_LOAD_RFW_M256, UNKNOWN, V8SF_FTYPE_PCV8SF },
30582 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_LM64", (enum ix86_builtins) BUILT_IN_TM_LOG_M64, UNKNOWN, VOID_FTYPE_PCVOID },
30583 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_LM128", (enum ix86_builtins) BUILT_IN_TM_LOG_M128, UNKNOWN, VOID_FTYPE_PCVOID },
30584 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_LM256", (enum ix86_builtins) BUILT_IN_TM_LOG_M256, UNKNOWN, VOID_FTYPE_PCVOID },
30587 /* TM callbacks. */
30589 /* Return the builtin decl needed to load a vector of TYPE. */
30591 static tree
30592 ix86_builtin_tm_load (tree type)
30594 if (TREE_CODE (type) == VECTOR_TYPE)
30596 switch (tree_to_uhwi (TYPE_SIZE (type)))
30598 case 64:
30599 return builtin_decl_explicit (BUILT_IN_TM_LOAD_M64);
30600 case 128:
30601 return builtin_decl_explicit (BUILT_IN_TM_LOAD_M128);
30602 case 256:
30603 return builtin_decl_explicit (BUILT_IN_TM_LOAD_M256);
30606 return NULL_TREE;
30609 /* Return the builtin decl needed to store a vector of TYPE. */
30611 static tree
30612 ix86_builtin_tm_store (tree type)
30614 if (TREE_CODE (type) == VECTOR_TYPE)
30616 switch (tree_to_uhwi (TYPE_SIZE (type)))
30618 case 64:
30619 return builtin_decl_explicit (BUILT_IN_TM_STORE_M64);
30620 case 128:
30621 return builtin_decl_explicit (BUILT_IN_TM_STORE_M128);
30622 case 256:
30623 return builtin_decl_explicit (BUILT_IN_TM_STORE_M256);
30626 return NULL_TREE;
30629 /* Initialize the transactional memory vector load/store builtins. */
30631 static void
30632 ix86_init_tm_builtins (void)
30634 enum ix86_builtin_func_type ftype;
30635 const struct builtin_description *d;
30636 size_t i;
30637 tree decl;
30638 tree attrs_load, attrs_type_load, attrs_store, attrs_type_store;
30639 tree attrs_log, attrs_type_log;
30641 if (!flag_tm)
30642 return;
30644 /* If there are no builtins defined, we must be compiling in a
30645 language without trans-mem support. */
30646 if (!builtin_decl_explicit_p (BUILT_IN_TM_LOAD_1))
30647 return;
30649 /* Use whatever attributes a normal TM load has. */
30650 decl = builtin_decl_explicit (BUILT_IN_TM_LOAD_1);
30651 attrs_load = DECL_ATTRIBUTES (decl);
30652 attrs_type_load = TYPE_ATTRIBUTES (TREE_TYPE (decl));
30653 /* Use whatever attributes a normal TM store has. */
30654 decl = builtin_decl_explicit (BUILT_IN_TM_STORE_1);
30655 attrs_store = DECL_ATTRIBUTES (decl);
30656 attrs_type_store = TYPE_ATTRIBUTES (TREE_TYPE (decl));
30657 /* Use whatever attributes a normal TM log has. */
30658 decl = builtin_decl_explicit (BUILT_IN_TM_LOG);
30659 attrs_log = DECL_ATTRIBUTES (decl);
30660 attrs_type_log = TYPE_ATTRIBUTES (TREE_TYPE (decl));
30662 for (i = 0, d = bdesc_tm;
30663 i < ARRAY_SIZE (bdesc_tm);
30664 i++, d++)
30666 if ((d->mask & ix86_isa_flags) != 0
30667 || (lang_hooks.builtin_function
30668 == lang_hooks.builtin_function_ext_scope))
30670 tree type, attrs, attrs_type;
30671 enum built_in_function code = (enum built_in_function) d->code;
30673 ftype = (enum ix86_builtin_func_type) d->flag;
30674 type = ix86_get_builtin_func_type (ftype);
30676 if (BUILTIN_TM_LOAD_P (code))
30678 attrs = attrs_load;
30679 attrs_type = attrs_type_load;
30681 else if (BUILTIN_TM_STORE_P (code))
30683 attrs = attrs_store;
30684 attrs_type = attrs_type_store;
30686 else
30688 attrs = attrs_log;
30689 attrs_type = attrs_type_log;
30691 decl = add_builtin_function (d->name, type, code, BUILT_IN_NORMAL,
30692 /* The builtin without the prefix for
30693 calling it directly. */
30694 d->name + strlen ("__builtin_"),
30695 attrs);
30696 /* add_builtin_function() will set the DECL_ATTRIBUTES, now
30697 set the TYPE_ATTRIBUTES. */
30698 decl_attributes (&TREE_TYPE (decl), attrs_type, ATTR_FLAG_BUILT_IN);
30700 set_builtin_decl (code, decl, false);
30705 /* Set up all the MMX/SSE builtins, even builtins for instructions that are not
30706 in the current target ISA to allow the user to compile particular modules
30707 with different target specific options that differ from the command line
30708 options. */
30709 static void
30710 ix86_init_mmx_sse_builtins (void)
30712 const struct builtin_description * d;
30713 enum ix86_builtin_func_type ftype;
30714 size_t i;
30716 /* Add all special builtins with variable number of operands. */
30717 for (i = 0, d = bdesc_special_args;
30718 i < ARRAY_SIZE (bdesc_special_args);
30719 i++, d++)
30721 if (d->name == 0)
30722 continue;
30724 ftype = (enum ix86_builtin_func_type) d->flag;
30725 def_builtin (d->mask, d->name, ftype, d->code);
30728 /* Add all builtins with variable number of operands. */
30729 for (i = 0, d = bdesc_args;
30730 i < ARRAY_SIZE (bdesc_args);
30731 i++, d++)
30733 if (d->name == 0)
30734 continue;
30736 ftype = (enum ix86_builtin_func_type) d->flag;
30737 def_builtin_const (d->mask, d->name, ftype, d->code);
30740 /* Add all builtins with rounding. */
30741 for (i = 0, d = bdesc_round_args;
30742 i < ARRAY_SIZE (bdesc_round_args);
30743 i++, d++)
30745 if (d->name == 0)
30746 continue;
30748 ftype = (enum ix86_builtin_func_type) d->flag;
30749 def_builtin_const (d->mask, d->name, ftype, d->code);
30752 /* pcmpestr[im] insns. */
30753 for (i = 0, d = bdesc_pcmpestr;
30754 i < ARRAY_SIZE (bdesc_pcmpestr);
30755 i++, d++)
30757 if (d->code == IX86_BUILTIN_PCMPESTRM128)
30758 ftype = V16QI_FTYPE_V16QI_INT_V16QI_INT_INT;
30759 else
30760 ftype = INT_FTYPE_V16QI_INT_V16QI_INT_INT;
30761 def_builtin_const (d->mask, d->name, ftype, d->code);
30764 /* pcmpistr[im] insns. */
30765 for (i = 0, d = bdesc_pcmpistr;
30766 i < ARRAY_SIZE (bdesc_pcmpistr);
30767 i++, d++)
30769 if (d->code == IX86_BUILTIN_PCMPISTRM128)
30770 ftype = V16QI_FTYPE_V16QI_V16QI_INT;
30771 else
30772 ftype = INT_FTYPE_V16QI_V16QI_INT;
30773 def_builtin_const (d->mask, d->name, ftype, d->code);
30776 /* comi/ucomi insns. */
30777 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
30779 if (d->mask == OPTION_MASK_ISA_SSE2)
30780 ftype = INT_FTYPE_V2DF_V2DF;
30781 else
30782 ftype = INT_FTYPE_V4SF_V4SF;
30783 def_builtin_const (d->mask, d->name, ftype, d->code);
30786 /* SSE */
30787 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_ldmxcsr",
30788 VOID_FTYPE_UNSIGNED, IX86_BUILTIN_LDMXCSR);
30789 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_stmxcsr",
30790 UNSIGNED_FTYPE_VOID, IX86_BUILTIN_STMXCSR);
30792 /* SSE or 3DNow!A */
30793 def_builtin (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A,
30794 "__builtin_ia32_maskmovq", VOID_FTYPE_V8QI_V8QI_PCHAR,
30795 IX86_BUILTIN_MASKMOVQ);
30797 /* SSE2 */
30798 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_maskmovdqu",
30799 VOID_FTYPE_V16QI_V16QI_PCHAR, IX86_BUILTIN_MASKMOVDQU);
30801 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_clflush",
30802 VOID_FTYPE_PCVOID, IX86_BUILTIN_CLFLUSH);
30803 x86_mfence = def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_mfence",
30804 VOID_FTYPE_VOID, IX86_BUILTIN_MFENCE);
30806 /* SSE3. */
30807 def_builtin (OPTION_MASK_ISA_SSE3, "__builtin_ia32_monitor",
30808 VOID_FTYPE_PCVOID_UNSIGNED_UNSIGNED, IX86_BUILTIN_MONITOR);
30809 def_builtin (OPTION_MASK_ISA_SSE3, "__builtin_ia32_mwait",
30810 VOID_FTYPE_UNSIGNED_UNSIGNED, IX86_BUILTIN_MWAIT);
30812 /* AES */
30813 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesenc128",
30814 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESENC128);
30815 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesenclast128",
30816 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESENCLAST128);
30817 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesdec128",
30818 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESDEC128);
30819 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesdeclast128",
30820 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESDECLAST128);
30821 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesimc128",
30822 V2DI_FTYPE_V2DI, IX86_BUILTIN_AESIMC128);
30823 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aeskeygenassist128",
30824 V2DI_FTYPE_V2DI_INT, IX86_BUILTIN_AESKEYGENASSIST128);
30826 /* PCLMUL */
30827 def_builtin_const (OPTION_MASK_ISA_PCLMUL, "__builtin_ia32_pclmulqdq128",
30828 V2DI_FTYPE_V2DI_V2DI_INT, IX86_BUILTIN_PCLMULQDQ128);
30830 /* RDRND */
30831 def_builtin (OPTION_MASK_ISA_RDRND, "__builtin_ia32_rdrand16_step",
30832 INT_FTYPE_PUSHORT, IX86_BUILTIN_RDRAND16_STEP);
30833 def_builtin (OPTION_MASK_ISA_RDRND, "__builtin_ia32_rdrand32_step",
30834 INT_FTYPE_PUNSIGNED, IX86_BUILTIN_RDRAND32_STEP);
30835 def_builtin (OPTION_MASK_ISA_RDRND | OPTION_MASK_ISA_64BIT,
30836 "__builtin_ia32_rdrand64_step", INT_FTYPE_PULONGLONG,
30837 IX86_BUILTIN_RDRAND64_STEP);
30839 /* AVX2 */
30840 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv2df",
30841 V2DF_FTYPE_V2DF_PCDOUBLE_V4SI_V2DF_INT,
30842 IX86_BUILTIN_GATHERSIV2DF);
30844 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv4df",
30845 V4DF_FTYPE_V4DF_PCDOUBLE_V4SI_V4DF_INT,
30846 IX86_BUILTIN_GATHERSIV4DF);
30848 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv2df",
30849 V2DF_FTYPE_V2DF_PCDOUBLE_V2DI_V2DF_INT,
30850 IX86_BUILTIN_GATHERDIV2DF);
30852 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4df",
30853 V4DF_FTYPE_V4DF_PCDOUBLE_V4DI_V4DF_INT,
30854 IX86_BUILTIN_GATHERDIV4DF);
30856 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv4sf",
30857 V4SF_FTYPE_V4SF_PCFLOAT_V4SI_V4SF_INT,
30858 IX86_BUILTIN_GATHERSIV4SF);
30860 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv8sf",
30861 V8SF_FTYPE_V8SF_PCFLOAT_V8SI_V8SF_INT,
30862 IX86_BUILTIN_GATHERSIV8SF);
30864 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4sf",
30865 V4SF_FTYPE_V4SF_PCFLOAT_V2DI_V4SF_INT,
30866 IX86_BUILTIN_GATHERDIV4SF);
30868 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4sf256",
30869 V4SF_FTYPE_V4SF_PCFLOAT_V4DI_V4SF_INT,
30870 IX86_BUILTIN_GATHERDIV8SF);
30872 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv2di",
30873 V2DI_FTYPE_V2DI_PCINT64_V4SI_V2DI_INT,
30874 IX86_BUILTIN_GATHERSIV2DI);
30876 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv4di",
30877 V4DI_FTYPE_V4DI_PCINT64_V4SI_V4DI_INT,
30878 IX86_BUILTIN_GATHERSIV4DI);
30880 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv2di",
30881 V2DI_FTYPE_V2DI_PCINT64_V2DI_V2DI_INT,
30882 IX86_BUILTIN_GATHERDIV2DI);
30884 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4di",
30885 V4DI_FTYPE_V4DI_PCINT64_V4DI_V4DI_INT,
30886 IX86_BUILTIN_GATHERDIV4DI);
30888 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv4si",
30889 V4SI_FTYPE_V4SI_PCINT_V4SI_V4SI_INT,
30890 IX86_BUILTIN_GATHERSIV4SI);
30892 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv8si",
30893 V8SI_FTYPE_V8SI_PCINT_V8SI_V8SI_INT,
30894 IX86_BUILTIN_GATHERSIV8SI);
30896 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4si",
30897 V4SI_FTYPE_V4SI_PCINT_V2DI_V4SI_INT,
30898 IX86_BUILTIN_GATHERDIV4SI);
30900 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4si256",
30901 V4SI_FTYPE_V4SI_PCINT_V4DI_V4SI_INT,
30902 IX86_BUILTIN_GATHERDIV8SI);
30904 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatheraltsiv4df ",
30905 V4DF_FTYPE_V4DF_PCDOUBLE_V8SI_V4DF_INT,
30906 IX86_BUILTIN_GATHERALTSIV4DF);
30908 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatheraltdiv4sf256 ",
30909 V8SF_FTYPE_V8SF_PCFLOAT_V4DI_V8SF_INT,
30910 IX86_BUILTIN_GATHERALTDIV8SF);
30912 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatheraltsiv4di ",
30913 V4DI_FTYPE_V4DI_PCINT64_V8SI_V4DI_INT,
30914 IX86_BUILTIN_GATHERALTSIV4DI);
30916 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatheraltdiv4si256 ",
30917 V8SI_FTYPE_V8SI_PCINT_V4DI_V8SI_INT,
30918 IX86_BUILTIN_GATHERALTDIV8SI);
30920 /* AVX512F */
30921 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gathersiv16sf",
30922 V16SF_FTYPE_V16SF_PCFLOAT_V16SI_HI_INT,
30923 IX86_BUILTIN_GATHER3SIV16SF);
30925 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gathersiv8df",
30926 V8DF_FTYPE_V8DF_PCDOUBLE_V8SI_QI_INT,
30927 IX86_BUILTIN_GATHER3SIV8DF);
30929 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatherdiv16sf",
30930 V8SF_FTYPE_V8SF_PCFLOAT_V8DI_QI_INT,
30931 IX86_BUILTIN_GATHER3DIV16SF);
30933 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatherdiv8df",
30934 V8DF_FTYPE_V8DF_PCDOUBLE_V8DI_QI_INT,
30935 IX86_BUILTIN_GATHER3DIV8DF);
30937 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gathersiv16si",
30938 V16SI_FTYPE_V16SI_PCINT_V16SI_HI_INT,
30939 IX86_BUILTIN_GATHER3SIV16SI);
30941 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gathersiv8di",
30942 V8DI_FTYPE_V8DI_PCINT64_V8SI_QI_INT,
30943 IX86_BUILTIN_GATHER3SIV8DI);
30945 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatherdiv16si",
30946 V8SI_FTYPE_V8SI_PCINT_V8DI_QI_INT,
30947 IX86_BUILTIN_GATHER3DIV16SI);
30949 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatherdiv8di",
30950 V8DI_FTYPE_V8DI_PCINT64_V8DI_QI_INT,
30951 IX86_BUILTIN_GATHER3DIV8DI);
30953 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatheraltsiv8df ",
30954 V8DF_FTYPE_V8DF_PCDOUBLE_V16SI_QI_INT,
30955 IX86_BUILTIN_GATHER3ALTSIV8DF);
30957 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatheraltdiv8sf ",
30958 V16SF_FTYPE_V16SF_PCFLOAT_V8DI_HI_INT,
30959 IX86_BUILTIN_GATHER3ALTDIV16SF);
30961 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatheraltsiv8di ",
30962 V8DI_FTYPE_V8DI_PCINT64_V16SI_QI_INT,
30963 IX86_BUILTIN_GATHER3ALTSIV8DI);
30965 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatheraltdiv8si ",
30966 V16SI_FTYPE_V16SI_PCINT_V8DI_HI_INT,
30967 IX86_BUILTIN_GATHER3ALTDIV16SI);
30969 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scattersiv16sf",
30970 VOID_FTYPE_PFLOAT_HI_V16SI_V16SF_INT,
30971 IX86_BUILTIN_SCATTERSIV16SF);
30973 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scattersiv8df",
30974 VOID_FTYPE_PDOUBLE_QI_V8SI_V8DF_INT,
30975 IX86_BUILTIN_SCATTERSIV8DF);
30977 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scatterdiv16sf",
30978 VOID_FTYPE_PFLOAT_QI_V8DI_V8SF_INT,
30979 IX86_BUILTIN_SCATTERDIV16SF);
30981 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scatterdiv8df",
30982 VOID_FTYPE_PDOUBLE_QI_V8DI_V8DF_INT,
30983 IX86_BUILTIN_SCATTERDIV8DF);
30985 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scattersiv16si",
30986 VOID_FTYPE_PINT_HI_V16SI_V16SI_INT,
30987 IX86_BUILTIN_SCATTERSIV16SI);
30989 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scattersiv8di",
30990 VOID_FTYPE_PLONGLONG_QI_V8SI_V8DI_INT,
30991 IX86_BUILTIN_SCATTERSIV8DI);
30993 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scatterdiv16si",
30994 VOID_FTYPE_PINT_QI_V8DI_V8SI_INT,
30995 IX86_BUILTIN_SCATTERDIV16SI);
30997 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scatterdiv8di",
30998 VOID_FTYPE_PLONGLONG_QI_V8DI_V8DI_INT,
30999 IX86_BUILTIN_SCATTERDIV8DI);
31001 /* AVX512PF */
31002 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_gatherpfdpd",
31003 VOID_FTYPE_QI_V8SI_PCINT64_INT_INT,
31004 IX86_BUILTIN_GATHERPFDPD);
31005 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_gatherpfdps",
31006 VOID_FTYPE_HI_V16SI_PCINT_INT_INT,
31007 IX86_BUILTIN_GATHERPFDPS);
31008 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_gatherpfqpd",
31009 VOID_FTYPE_QI_V8DI_PCINT64_INT_INT,
31010 IX86_BUILTIN_GATHERPFQPD);
31011 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_gatherpfqps",
31012 VOID_FTYPE_QI_V8DI_PCINT_INT_INT,
31013 IX86_BUILTIN_GATHERPFQPS);
31014 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_scatterpfdpd",
31015 VOID_FTYPE_QI_V8SI_PCINT64_INT_INT,
31016 IX86_BUILTIN_SCATTERPFDPD);
31017 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_scatterpfdps",
31018 VOID_FTYPE_HI_V16SI_PCINT_INT_INT,
31019 IX86_BUILTIN_SCATTERPFDPS);
31020 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_scatterpfqpd",
31021 VOID_FTYPE_QI_V8DI_PCINT64_INT_INT,
31022 IX86_BUILTIN_SCATTERPFQPD);
31023 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_scatterpfqps",
31024 VOID_FTYPE_QI_V8DI_PCINT_INT_INT,
31025 IX86_BUILTIN_SCATTERPFQPS);
31027 /* SHA */
31028 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha1msg1",
31029 V4SI_FTYPE_V4SI_V4SI, IX86_BUILTIN_SHA1MSG1);
31030 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha1msg2",
31031 V4SI_FTYPE_V4SI_V4SI, IX86_BUILTIN_SHA1MSG2);
31032 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha1nexte",
31033 V4SI_FTYPE_V4SI_V4SI, IX86_BUILTIN_SHA1NEXTE);
31034 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha1rnds4",
31035 V4SI_FTYPE_V4SI_V4SI_INT, IX86_BUILTIN_SHA1RNDS4);
31036 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha256msg1",
31037 V4SI_FTYPE_V4SI_V4SI, IX86_BUILTIN_SHA256MSG1);
31038 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha256msg2",
31039 V4SI_FTYPE_V4SI_V4SI, IX86_BUILTIN_SHA256MSG2);
31040 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha256rnds2",
31041 V4SI_FTYPE_V4SI_V4SI_V4SI, IX86_BUILTIN_SHA256RNDS2);
31043 /* RTM. */
31044 def_builtin (OPTION_MASK_ISA_RTM, "__builtin_ia32_xabort",
31045 VOID_FTYPE_UNSIGNED, IX86_BUILTIN_XABORT);
31047 /* MMX access to the vec_init patterns. */
31048 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v2si",
31049 V2SI_FTYPE_INT_INT, IX86_BUILTIN_VEC_INIT_V2SI);
31051 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v4hi",
31052 V4HI_FTYPE_HI_HI_HI_HI,
31053 IX86_BUILTIN_VEC_INIT_V4HI);
31055 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v8qi",
31056 V8QI_FTYPE_QI_QI_QI_QI_QI_QI_QI_QI,
31057 IX86_BUILTIN_VEC_INIT_V8QI);
31059 /* Access to the vec_extract patterns. */
31060 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v2df",
31061 DOUBLE_FTYPE_V2DF_INT, IX86_BUILTIN_VEC_EXT_V2DF);
31062 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v2di",
31063 DI_FTYPE_V2DI_INT, IX86_BUILTIN_VEC_EXT_V2DI);
31064 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_vec_ext_v4sf",
31065 FLOAT_FTYPE_V4SF_INT, IX86_BUILTIN_VEC_EXT_V4SF);
31066 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v4si",
31067 SI_FTYPE_V4SI_INT, IX86_BUILTIN_VEC_EXT_V4SI);
31068 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v8hi",
31069 HI_FTYPE_V8HI_INT, IX86_BUILTIN_VEC_EXT_V8HI);
31071 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A,
31072 "__builtin_ia32_vec_ext_v4hi",
31073 HI_FTYPE_V4HI_INT, IX86_BUILTIN_VEC_EXT_V4HI);
31075 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_ext_v2si",
31076 SI_FTYPE_V2SI_INT, IX86_BUILTIN_VEC_EXT_V2SI);
31078 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v16qi",
31079 QI_FTYPE_V16QI_INT, IX86_BUILTIN_VEC_EXT_V16QI);
31081 /* Access to the vec_set patterns. */
31082 def_builtin_const (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_64BIT,
31083 "__builtin_ia32_vec_set_v2di",
31084 V2DI_FTYPE_V2DI_DI_INT, IX86_BUILTIN_VEC_SET_V2DI);
31086 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v4sf",
31087 V4SF_FTYPE_V4SF_FLOAT_INT, IX86_BUILTIN_VEC_SET_V4SF);
31089 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v4si",
31090 V4SI_FTYPE_V4SI_SI_INT, IX86_BUILTIN_VEC_SET_V4SI);
31092 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_set_v8hi",
31093 V8HI_FTYPE_V8HI_HI_INT, IX86_BUILTIN_VEC_SET_V8HI);
31095 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A,
31096 "__builtin_ia32_vec_set_v4hi",
31097 V4HI_FTYPE_V4HI_HI_INT, IX86_BUILTIN_VEC_SET_V4HI);
31099 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v16qi",
31100 V16QI_FTYPE_V16QI_QI_INT, IX86_BUILTIN_VEC_SET_V16QI);
31102 /* RDSEED */
31103 def_builtin (OPTION_MASK_ISA_RDSEED, "__builtin_ia32_rdseed_hi_step",
31104 INT_FTYPE_PUSHORT, IX86_BUILTIN_RDSEED16_STEP);
31105 def_builtin (OPTION_MASK_ISA_RDSEED, "__builtin_ia32_rdseed_si_step",
31106 INT_FTYPE_PUNSIGNED, IX86_BUILTIN_RDSEED32_STEP);
31107 def_builtin (OPTION_MASK_ISA_RDSEED | OPTION_MASK_ISA_64BIT,
31108 "__builtin_ia32_rdseed_di_step",
31109 INT_FTYPE_PULONGLONG, IX86_BUILTIN_RDSEED64_STEP);
31111 /* ADCX */
31112 def_builtin (0, "__builtin_ia32_addcarryx_u32",
31113 UCHAR_FTYPE_UCHAR_UINT_UINT_PUNSIGNED, IX86_BUILTIN_ADDCARRYX32);
31114 def_builtin (OPTION_MASK_ISA_64BIT,
31115 "__builtin_ia32_addcarryx_u64",
31116 UCHAR_FTYPE_UCHAR_ULONGLONG_ULONGLONG_PULONGLONG,
31117 IX86_BUILTIN_ADDCARRYX64);
31119 /* Read/write FLAGS. */
31120 def_builtin (~OPTION_MASK_ISA_64BIT, "__builtin_ia32_readeflags_u32",
31121 UNSIGNED_FTYPE_VOID, IX86_BUILTIN_READ_FLAGS);
31122 def_builtin (OPTION_MASK_ISA_64BIT, "__builtin_ia32_readeflags_u64",
31123 UINT64_FTYPE_VOID, IX86_BUILTIN_READ_FLAGS);
31124 def_builtin (~OPTION_MASK_ISA_64BIT, "__builtin_ia32_writeeflags_u32",
31125 VOID_FTYPE_UNSIGNED, IX86_BUILTIN_WRITE_FLAGS);
31126 def_builtin (OPTION_MASK_ISA_64BIT, "__builtin_ia32_writeeflags_u64",
31127 VOID_FTYPE_UINT64, IX86_BUILTIN_WRITE_FLAGS);
31130 /* Add FMA4 multi-arg argument instructions */
31131 for (i = 0, d = bdesc_multi_arg; i < ARRAY_SIZE (bdesc_multi_arg); i++, d++)
31133 if (d->name == 0)
31134 continue;
31136 ftype = (enum ix86_builtin_func_type) d->flag;
31137 def_builtin_const (d->mask, d->name, ftype, d->code);
31141 /* This adds a condition to the basic_block NEW_BB in function FUNCTION_DECL
31142 to return a pointer to VERSION_DECL if the outcome of the expression
31143 formed by PREDICATE_CHAIN is true. This function will be called during
31144 version dispatch to decide which function version to execute. It returns
31145 the basic block at the end, to which more conditions can be added. */
31147 static basic_block
31148 add_condition_to_bb (tree function_decl, tree version_decl,
31149 tree predicate_chain, basic_block new_bb)
31151 gimple return_stmt;
31152 tree convert_expr, result_var;
31153 gimple convert_stmt;
31154 gimple call_cond_stmt;
31155 gimple if_else_stmt;
31157 basic_block bb1, bb2, bb3;
31158 edge e12, e23;
31160 tree cond_var, and_expr_var = NULL_TREE;
31161 gimple_seq gseq;
31163 tree predicate_decl, predicate_arg;
31165 push_cfun (DECL_STRUCT_FUNCTION (function_decl));
31167 gcc_assert (new_bb != NULL);
31168 gseq = bb_seq (new_bb);
31171 convert_expr = build1 (CONVERT_EXPR, ptr_type_node,
31172 build_fold_addr_expr (version_decl));
31173 result_var = create_tmp_var (ptr_type_node, NULL);
31174 convert_stmt = gimple_build_assign (result_var, convert_expr);
31175 return_stmt = gimple_build_return (result_var);
31177 if (predicate_chain == NULL_TREE)
31179 gimple_seq_add_stmt (&gseq, convert_stmt);
31180 gimple_seq_add_stmt (&gseq, return_stmt);
31181 set_bb_seq (new_bb, gseq);
31182 gimple_set_bb (convert_stmt, new_bb);
31183 gimple_set_bb (return_stmt, new_bb);
31184 pop_cfun ();
31185 return new_bb;
31188 while (predicate_chain != NULL)
31190 cond_var = create_tmp_var (integer_type_node, NULL);
31191 predicate_decl = TREE_PURPOSE (predicate_chain);
31192 predicate_arg = TREE_VALUE (predicate_chain);
31193 call_cond_stmt = gimple_build_call (predicate_decl, 1, predicate_arg);
31194 gimple_call_set_lhs (call_cond_stmt, cond_var);
31196 gimple_set_block (call_cond_stmt, DECL_INITIAL (function_decl));
31197 gimple_set_bb (call_cond_stmt, new_bb);
31198 gimple_seq_add_stmt (&gseq, call_cond_stmt);
31200 predicate_chain = TREE_CHAIN (predicate_chain);
31202 if (and_expr_var == NULL)
31203 and_expr_var = cond_var;
31204 else
31206 gimple assign_stmt;
31207 /* Use MIN_EXPR to check if any integer is zero?.
31208 and_expr_var = min_expr <cond_var, and_expr_var> */
31209 assign_stmt = gimple_build_assign (and_expr_var,
31210 build2 (MIN_EXPR, integer_type_node,
31211 cond_var, and_expr_var));
31213 gimple_set_block (assign_stmt, DECL_INITIAL (function_decl));
31214 gimple_set_bb (assign_stmt, new_bb);
31215 gimple_seq_add_stmt (&gseq, assign_stmt);
31219 if_else_stmt = gimple_build_cond (GT_EXPR, and_expr_var,
31220 integer_zero_node,
31221 NULL_TREE, NULL_TREE);
31222 gimple_set_block (if_else_stmt, DECL_INITIAL (function_decl));
31223 gimple_set_bb (if_else_stmt, new_bb);
31224 gimple_seq_add_stmt (&gseq, if_else_stmt);
31226 gimple_seq_add_stmt (&gseq, convert_stmt);
31227 gimple_seq_add_stmt (&gseq, return_stmt);
31228 set_bb_seq (new_bb, gseq);
31230 bb1 = new_bb;
31231 e12 = split_block (bb1, if_else_stmt);
31232 bb2 = e12->dest;
31233 e12->flags &= ~EDGE_FALLTHRU;
31234 e12->flags |= EDGE_TRUE_VALUE;
31236 e23 = split_block (bb2, return_stmt);
31238 gimple_set_bb (convert_stmt, bb2);
31239 gimple_set_bb (return_stmt, bb2);
31241 bb3 = e23->dest;
31242 make_edge (bb1, bb3, EDGE_FALSE_VALUE);
31244 remove_edge (e23);
31245 make_edge (bb2, EXIT_BLOCK_PTR_FOR_FN (cfun), 0);
31247 pop_cfun ();
31249 return bb3;
31252 /* This parses the attribute arguments to target in DECL and determines
31253 the right builtin to use to match the platform specification.
31254 It returns the priority value for this version decl. If PREDICATE_LIST
31255 is not NULL, it stores the list of cpu features that need to be checked
31256 before dispatching this function. */
31258 static unsigned int
31259 get_builtin_code_for_version (tree decl, tree *predicate_list)
31261 tree attrs;
31262 struct cl_target_option cur_target;
31263 tree target_node;
31264 struct cl_target_option *new_target;
31265 const char *arg_str = NULL;
31266 const char *attrs_str = NULL;
31267 char *tok_str = NULL;
31268 char *token;
31270 /* Priority of i386 features, greater value is higher priority. This is
31271 used to decide the order in which function dispatch must happen. For
31272 instance, a version specialized for SSE4.2 should be checked for dispatch
31273 before a version for SSE3, as SSE4.2 implies SSE3. */
31274 enum feature_priority
31276 P_ZERO = 0,
31277 P_MMX,
31278 P_SSE,
31279 P_SSE2,
31280 P_SSE3,
31281 P_SSSE3,
31282 P_PROC_SSSE3,
31283 P_SSE4_A,
31284 P_PROC_SSE4_A,
31285 P_SSE4_1,
31286 P_SSE4_2,
31287 P_PROC_SSE4_2,
31288 P_POPCNT,
31289 P_AVX,
31290 P_PROC_AVX,
31291 P_FMA4,
31292 P_XOP,
31293 P_PROC_XOP,
31294 P_FMA,
31295 P_PROC_FMA,
31296 P_AVX2,
31297 P_PROC_AVX2
31300 enum feature_priority priority = P_ZERO;
31302 /* These are the target attribute strings for which a dispatcher is
31303 available, from fold_builtin_cpu. */
31305 static struct _feature_list
31307 const char *const name;
31308 const enum feature_priority priority;
31310 const feature_list[] =
31312 {"mmx", P_MMX},
31313 {"sse", P_SSE},
31314 {"sse2", P_SSE2},
31315 {"sse3", P_SSE3},
31316 {"sse4a", P_SSE4_A},
31317 {"ssse3", P_SSSE3},
31318 {"sse4.1", P_SSE4_1},
31319 {"sse4.2", P_SSE4_2},
31320 {"popcnt", P_POPCNT},
31321 {"avx", P_AVX},
31322 {"fma4", P_FMA4},
31323 {"xop", P_XOP},
31324 {"fma", P_FMA},
31325 {"avx2", P_AVX2}
31329 static unsigned int NUM_FEATURES
31330 = sizeof (feature_list) / sizeof (struct _feature_list);
31332 unsigned int i;
31334 tree predicate_chain = NULL_TREE;
31335 tree predicate_decl, predicate_arg;
31337 attrs = lookup_attribute ("target", DECL_ATTRIBUTES (decl));
31338 gcc_assert (attrs != NULL);
31340 attrs = TREE_VALUE (TREE_VALUE (attrs));
31342 gcc_assert (TREE_CODE (attrs) == STRING_CST);
31343 attrs_str = TREE_STRING_POINTER (attrs);
31345 /* Return priority zero for default function. */
31346 if (strcmp (attrs_str, "default") == 0)
31347 return 0;
31349 /* Handle arch= if specified. For priority, set it to be 1 more than
31350 the best instruction set the processor can handle. For instance, if
31351 there is a version for atom and a version for ssse3 (the highest ISA
31352 priority for atom), the atom version must be checked for dispatch
31353 before the ssse3 version. */
31354 if (strstr (attrs_str, "arch=") != NULL)
31356 cl_target_option_save (&cur_target, &global_options);
31357 target_node = ix86_valid_target_attribute_tree (attrs, &global_options,
31358 &global_options_set);
31360 gcc_assert (target_node);
31361 new_target = TREE_TARGET_OPTION (target_node);
31362 gcc_assert (new_target);
31364 if (new_target->arch_specified && new_target->arch > 0)
31366 switch (new_target->arch)
31368 case PROCESSOR_CORE2:
31369 arg_str = "core2";
31370 priority = P_PROC_SSSE3;
31371 break;
31372 case PROCESSOR_NEHALEM:
31373 if (new_target->x_ix86_isa_flags & OPTION_MASK_ISA_AES)
31374 arg_str = "westmere";
31375 else
31376 /* We translate "arch=corei7" and "arch=nehalem" to
31377 "corei7" so that it will be mapped to M_INTEL_COREI7
31378 as cpu type to cover all M_INTEL_COREI7_XXXs. */
31379 arg_str = "corei7";
31380 priority = P_PROC_SSE4_2;
31381 break;
31382 case PROCESSOR_SANDYBRIDGE:
31383 if (new_target->x_ix86_isa_flags & OPTION_MASK_ISA_F16C)
31384 arg_str = "ivybridge";
31385 else
31386 arg_str = "sandybridge";
31387 priority = P_PROC_AVX;
31388 break;
31389 case PROCESSOR_HASWELL:
31390 if (new_target->x_ix86_isa_flags & OPTION_MASK_ISA_ADX)
31391 arg_str = "broadwell";
31392 else
31393 arg_str = "haswell";
31394 priority = P_PROC_AVX2;
31395 break;
31396 case PROCESSOR_BONNELL:
31397 arg_str = "bonnell";
31398 priority = P_PROC_SSSE3;
31399 break;
31400 case PROCESSOR_SILVERMONT:
31401 arg_str = "silvermont";
31402 priority = P_PROC_SSE4_2;
31403 break;
31404 case PROCESSOR_AMDFAM10:
31405 arg_str = "amdfam10h";
31406 priority = P_PROC_SSE4_A;
31407 break;
31408 case PROCESSOR_BTVER1:
31409 arg_str = "btver1";
31410 priority = P_PROC_SSE4_A;
31411 break;
31412 case PROCESSOR_BTVER2:
31413 arg_str = "btver2";
31414 priority = P_PROC_AVX;
31415 break;
31416 case PROCESSOR_BDVER1:
31417 arg_str = "bdver1";
31418 priority = P_PROC_XOP;
31419 break;
31420 case PROCESSOR_BDVER2:
31421 arg_str = "bdver2";
31422 priority = P_PROC_FMA;
31423 break;
31424 case PROCESSOR_BDVER3:
31425 arg_str = "bdver3";
31426 priority = P_PROC_FMA;
31427 break;
31428 case PROCESSOR_BDVER4:
31429 arg_str = "bdver4";
31430 priority = P_PROC_AVX2;
31431 break;
31435 cl_target_option_restore (&global_options, &cur_target);
31437 if (predicate_list && arg_str == NULL)
31439 error_at (DECL_SOURCE_LOCATION (decl),
31440 "No dispatcher found for the versioning attributes");
31441 return 0;
31444 if (predicate_list)
31446 predicate_decl = ix86_builtins [(int) IX86_BUILTIN_CPU_IS];
31447 /* For a C string literal the length includes the trailing NULL. */
31448 predicate_arg = build_string_literal (strlen (arg_str) + 1, arg_str);
31449 predicate_chain = tree_cons (predicate_decl, predicate_arg,
31450 predicate_chain);
31454 /* Process feature name. */
31455 tok_str = (char *) xmalloc (strlen (attrs_str) + 1);
31456 strcpy (tok_str, attrs_str);
31457 token = strtok (tok_str, ",");
31458 predicate_decl = ix86_builtins [(int) IX86_BUILTIN_CPU_SUPPORTS];
31460 while (token != NULL)
31462 /* Do not process "arch=" */
31463 if (strncmp (token, "arch=", 5) == 0)
31465 token = strtok (NULL, ",");
31466 continue;
31468 for (i = 0; i < NUM_FEATURES; ++i)
31470 if (strcmp (token, feature_list[i].name) == 0)
31472 if (predicate_list)
31474 predicate_arg = build_string_literal (
31475 strlen (feature_list[i].name) + 1,
31476 feature_list[i].name);
31477 predicate_chain = tree_cons (predicate_decl, predicate_arg,
31478 predicate_chain);
31480 /* Find the maximum priority feature. */
31481 if (feature_list[i].priority > priority)
31482 priority = feature_list[i].priority;
31484 break;
31487 if (predicate_list && i == NUM_FEATURES)
31489 error_at (DECL_SOURCE_LOCATION (decl),
31490 "No dispatcher found for %s", token);
31491 return 0;
31493 token = strtok (NULL, ",");
31495 free (tok_str);
31497 if (predicate_list && predicate_chain == NULL_TREE)
31499 error_at (DECL_SOURCE_LOCATION (decl),
31500 "No dispatcher found for the versioning attributes : %s",
31501 attrs_str);
31502 return 0;
31504 else if (predicate_list)
31506 predicate_chain = nreverse (predicate_chain);
31507 *predicate_list = predicate_chain;
31510 return priority;
31513 /* This compares the priority of target features in function DECL1
31514 and DECL2. It returns positive value if DECL1 is higher priority,
31515 negative value if DECL2 is higher priority and 0 if they are the
31516 same. */
31518 static int
31519 ix86_compare_version_priority (tree decl1, tree decl2)
31521 unsigned int priority1 = get_builtin_code_for_version (decl1, NULL);
31522 unsigned int priority2 = get_builtin_code_for_version (decl2, NULL);
31524 return (int)priority1 - (int)priority2;
31527 /* V1 and V2 point to function versions with different priorities
31528 based on the target ISA. This function compares their priorities. */
31530 static int
31531 feature_compare (const void *v1, const void *v2)
31533 typedef struct _function_version_info
31535 tree version_decl;
31536 tree predicate_chain;
31537 unsigned int dispatch_priority;
31538 } function_version_info;
31540 const function_version_info c1 = *(const function_version_info *)v1;
31541 const function_version_info c2 = *(const function_version_info *)v2;
31542 return (c2.dispatch_priority - c1.dispatch_priority);
31545 /* This function generates the dispatch function for
31546 multi-versioned functions. DISPATCH_DECL is the function which will
31547 contain the dispatch logic. FNDECLS are the function choices for
31548 dispatch, and is a tree chain. EMPTY_BB is the basic block pointer
31549 in DISPATCH_DECL in which the dispatch code is generated. */
31551 static int
31552 dispatch_function_versions (tree dispatch_decl,
31553 void *fndecls_p,
31554 basic_block *empty_bb)
31556 tree default_decl;
31557 gimple ifunc_cpu_init_stmt;
31558 gimple_seq gseq;
31559 int ix;
31560 tree ele;
31561 vec<tree> *fndecls;
31562 unsigned int num_versions = 0;
31563 unsigned int actual_versions = 0;
31564 unsigned int i;
31566 struct _function_version_info
31568 tree version_decl;
31569 tree predicate_chain;
31570 unsigned int dispatch_priority;
31571 }*function_version_info;
31573 gcc_assert (dispatch_decl != NULL
31574 && fndecls_p != NULL
31575 && empty_bb != NULL);
31577 /*fndecls_p is actually a vector. */
31578 fndecls = static_cast<vec<tree> *> (fndecls_p);
31580 /* At least one more version other than the default. */
31581 num_versions = fndecls->length ();
31582 gcc_assert (num_versions >= 2);
31584 function_version_info = (struct _function_version_info *)
31585 XNEWVEC (struct _function_version_info, (num_versions - 1));
31587 /* The first version in the vector is the default decl. */
31588 default_decl = (*fndecls)[0];
31590 push_cfun (DECL_STRUCT_FUNCTION (dispatch_decl));
31592 gseq = bb_seq (*empty_bb);
31593 /* Function version dispatch is via IFUNC. IFUNC resolvers fire before
31594 constructors, so explicity call __builtin_cpu_init here. */
31595 ifunc_cpu_init_stmt = gimple_build_call_vec (
31596 ix86_builtins [(int) IX86_BUILTIN_CPU_INIT], vNULL);
31597 gimple_seq_add_stmt (&gseq, ifunc_cpu_init_stmt);
31598 gimple_set_bb (ifunc_cpu_init_stmt, *empty_bb);
31599 set_bb_seq (*empty_bb, gseq);
31601 pop_cfun ();
31604 for (ix = 1; fndecls->iterate (ix, &ele); ++ix)
31606 tree version_decl = ele;
31607 tree predicate_chain = NULL_TREE;
31608 unsigned int priority;
31609 /* Get attribute string, parse it and find the right predicate decl.
31610 The predicate function could be a lengthy combination of many
31611 features, like arch-type and various isa-variants. */
31612 priority = get_builtin_code_for_version (version_decl,
31613 &predicate_chain);
31615 if (predicate_chain == NULL_TREE)
31616 continue;
31618 function_version_info [actual_versions].version_decl = version_decl;
31619 function_version_info [actual_versions].predicate_chain
31620 = predicate_chain;
31621 function_version_info [actual_versions].dispatch_priority = priority;
31622 actual_versions++;
31625 /* Sort the versions according to descending order of dispatch priority. The
31626 priority is based on the ISA. This is not a perfect solution. There
31627 could still be ambiguity. If more than one function version is suitable
31628 to execute, which one should be dispatched? In future, allow the user
31629 to specify a dispatch priority next to the version. */
31630 qsort (function_version_info, actual_versions,
31631 sizeof (struct _function_version_info), feature_compare);
31633 for (i = 0; i < actual_versions; ++i)
31634 *empty_bb = add_condition_to_bb (dispatch_decl,
31635 function_version_info[i].version_decl,
31636 function_version_info[i].predicate_chain,
31637 *empty_bb);
31639 /* dispatch default version at the end. */
31640 *empty_bb = add_condition_to_bb (dispatch_decl, default_decl,
31641 NULL, *empty_bb);
31643 free (function_version_info);
31644 return 0;
31647 /* Comparator function to be used in qsort routine to sort attribute
31648 specification strings to "target". */
31650 static int
31651 attr_strcmp (const void *v1, const void *v2)
31653 const char *c1 = *(char *const*)v1;
31654 const char *c2 = *(char *const*)v2;
31655 return strcmp (c1, c2);
31658 /* ARGLIST is the argument to target attribute. This function tokenizes
31659 the comma separated arguments, sorts them and returns a string which
31660 is a unique identifier for the comma separated arguments. It also
31661 replaces non-identifier characters "=,-" with "_". */
31663 static char *
31664 sorted_attr_string (tree arglist)
31666 tree arg;
31667 size_t str_len_sum = 0;
31668 char **args = NULL;
31669 char *attr_str, *ret_str;
31670 char *attr = NULL;
31671 unsigned int argnum = 1;
31672 unsigned int i;
31674 for (arg = arglist; arg; arg = TREE_CHAIN (arg))
31676 const char *str = TREE_STRING_POINTER (TREE_VALUE (arg));
31677 size_t len = strlen (str);
31678 str_len_sum += len + 1;
31679 if (arg != arglist)
31680 argnum++;
31681 for (i = 0; i < strlen (str); i++)
31682 if (str[i] == ',')
31683 argnum++;
31686 attr_str = XNEWVEC (char, str_len_sum);
31687 str_len_sum = 0;
31688 for (arg = arglist; arg; arg = TREE_CHAIN (arg))
31690 const char *str = TREE_STRING_POINTER (TREE_VALUE (arg));
31691 size_t len = strlen (str);
31692 memcpy (attr_str + str_len_sum, str, len);
31693 attr_str[str_len_sum + len] = TREE_CHAIN (arg) ? ',' : '\0';
31694 str_len_sum += len + 1;
31697 /* Replace "=,-" with "_". */
31698 for (i = 0; i < strlen (attr_str); i++)
31699 if (attr_str[i] == '=' || attr_str[i]== '-')
31700 attr_str[i] = '_';
31702 if (argnum == 1)
31703 return attr_str;
31705 args = XNEWVEC (char *, argnum);
31707 i = 0;
31708 attr = strtok (attr_str, ",");
31709 while (attr != NULL)
31711 args[i] = attr;
31712 i++;
31713 attr = strtok (NULL, ",");
31716 qsort (args, argnum, sizeof (char *), attr_strcmp);
31718 ret_str = XNEWVEC (char, str_len_sum);
31719 str_len_sum = 0;
31720 for (i = 0; i < argnum; i++)
31722 size_t len = strlen (args[i]);
31723 memcpy (ret_str + str_len_sum, args[i], len);
31724 ret_str[str_len_sum + len] = i < argnum - 1 ? '_' : '\0';
31725 str_len_sum += len + 1;
31728 XDELETEVEC (args);
31729 XDELETEVEC (attr_str);
31730 return ret_str;
31733 /* This function changes the assembler name for functions that are
31734 versions. If DECL is a function version and has a "target"
31735 attribute, it appends the attribute string to its assembler name. */
31737 static tree
31738 ix86_mangle_function_version_assembler_name (tree decl, tree id)
31740 tree version_attr;
31741 const char *orig_name, *version_string;
31742 char *attr_str, *assembler_name;
31744 if (DECL_DECLARED_INLINE_P (decl)
31745 && lookup_attribute ("gnu_inline",
31746 DECL_ATTRIBUTES (decl)))
31747 error_at (DECL_SOURCE_LOCATION (decl),
31748 "Function versions cannot be marked as gnu_inline,"
31749 " bodies have to be generated");
31751 if (DECL_VIRTUAL_P (decl)
31752 || DECL_VINDEX (decl))
31753 sorry ("Virtual function multiversioning not supported");
31755 version_attr = lookup_attribute ("target", DECL_ATTRIBUTES (decl));
31757 /* target attribute string cannot be NULL. */
31758 gcc_assert (version_attr != NULL_TREE);
31760 orig_name = IDENTIFIER_POINTER (id);
31761 version_string
31762 = TREE_STRING_POINTER (TREE_VALUE (TREE_VALUE (version_attr)));
31764 if (strcmp (version_string, "default") == 0)
31765 return id;
31767 attr_str = sorted_attr_string (TREE_VALUE (version_attr));
31768 assembler_name = XNEWVEC (char, strlen (orig_name) + strlen (attr_str) + 2);
31770 sprintf (assembler_name, "%s.%s", orig_name, attr_str);
31772 /* Allow assembler name to be modified if already set. */
31773 if (DECL_ASSEMBLER_NAME_SET_P (decl))
31774 SET_DECL_RTL (decl, NULL);
31776 tree ret = get_identifier (assembler_name);
31777 XDELETEVEC (attr_str);
31778 XDELETEVEC (assembler_name);
31779 return ret;
31782 /* This function returns true if FN1 and FN2 are versions of the same function,
31783 that is, the target strings of the function decls are different. This assumes
31784 that FN1 and FN2 have the same signature. */
31786 static bool
31787 ix86_function_versions (tree fn1, tree fn2)
31789 tree attr1, attr2;
31790 char *target1, *target2;
31791 bool result;
31793 if (TREE_CODE (fn1) != FUNCTION_DECL
31794 || TREE_CODE (fn2) != FUNCTION_DECL)
31795 return false;
31797 attr1 = lookup_attribute ("target", DECL_ATTRIBUTES (fn1));
31798 attr2 = lookup_attribute ("target", DECL_ATTRIBUTES (fn2));
31800 /* At least one function decl should have the target attribute specified. */
31801 if (attr1 == NULL_TREE && attr2 == NULL_TREE)
31802 return false;
31804 /* Diagnose missing target attribute if one of the decls is already
31805 multi-versioned. */
31806 if (attr1 == NULL_TREE || attr2 == NULL_TREE)
31808 if (DECL_FUNCTION_VERSIONED (fn1) || DECL_FUNCTION_VERSIONED (fn2))
31810 if (attr2 != NULL_TREE)
31812 tree tem = fn1;
31813 fn1 = fn2;
31814 fn2 = tem;
31815 attr1 = attr2;
31817 error_at (DECL_SOURCE_LOCATION (fn2),
31818 "missing %<target%> attribute for multi-versioned %D",
31819 fn2);
31820 inform (DECL_SOURCE_LOCATION (fn1),
31821 "previous declaration of %D", fn1);
31822 /* Prevent diagnosing of the same error multiple times. */
31823 DECL_ATTRIBUTES (fn2)
31824 = tree_cons (get_identifier ("target"),
31825 copy_node (TREE_VALUE (attr1)),
31826 DECL_ATTRIBUTES (fn2));
31828 return false;
31831 target1 = sorted_attr_string (TREE_VALUE (attr1));
31832 target2 = sorted_attr_string (TREE_VALUE (attr2));
31834 /* The sorted target strings must be different for fn1 and fn2
31835 to be versions. */
31836 if (strcmp (target1, target2) == 0)
31837 result = false;
31838 else
31839 result = true;
31841 XDELETEVEC (target1);
31842 XDELETEVEC (target2);
31844 return result;
31847 static tree
31848 ix86_mangle_decl_assembler_name (tree decl, tree id)
31850 /* For function version, add the target suffix to the assembler name. */
31851 if (TREE_CODE (decl) == FUNCTION_DECL
31852 && DECL_FUNCTION_VERSIONED (decl))
31853 id = ix86_mangle_function_version_assembler_name (decl, id);
31854 #ifdef SUBTARGET_MANGLE_DECL_ASSEMBLER_NAME
31855 id = SUBTARGET_MANGLE_DECL_ASSEMBLER_NAME (decl, id);
31856 #endif
31858 return id;
31861 /* Return a new name by appending SUFFIX to the DECL name. If make_unique
31862 is true, append the full path name of the source file. */
31864 static char *
31865 make_name (tree decl, const char *suffix, bool make_unique)
31867 char *global_var_name;
31868 int name_len;
31869 const char *name;
31870 const char *unique_name = NULL;
31872 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
31874 /* Get a unique name that can be used globally without any chances
31875 of collision at link time. */
31876 if (make_unique)
31877 unique_name = IDENTIFIER_POINTER (get_file_function_name ("\0"));
31879 name_len = strlen (name) + strlen (suffix) + 2;
31881 if (make_unique)
31882 name_len += strlen (unique_name) + 1;
31883 global_var_name = XNEWVEC (char, name_len);
31885 /* Use '.' to concatenate names as it is demangler friendly. */
31886 if (make_unique)
31887 snprintf (global_var_name, name_len, "%s.%s.%s", name, unique_name,
31888 suffix);
31889 else
31890 snprintf (global_var_name, name_len, "%s.%s", name, suffix);
31892 return global_var_name;
31895 #if defined (ASM_OUTPUT_TYPE_DIRECTIVE)
31897 /* Make a dispatcher declaration for the multi-versioned function DECL.
31898 Calls to DECL function will be replaced with calls to the dispatcher
31899 by the front-end. Return the decl created. */
31901 static tree
31902 make_dispatcher_decl (const tree decl)
31904 tree func_decl;
31905 char *func_name;
31906 tree fn_type, func_type;
31907 bool is_uniq = false;
31909 if (TREE_PUBLIC (decl) == 0)
31910 is_uniq = true;
31912 func_name = make_name (decl, "ifunc", is_uniq);
31914 fn_type = TREE_TYPE (decl);
31915 func_type = build_function_type (TREE_TYPE (fn_type),
31916 TYPE_ARG_TYPES (fn_type));
31918 func_decl = build_fn_decl (func_name, func_type);
31919 XDELETEVEC (func_name);
31920 TREE_USED (func_decl) = 1;
31921 DECL_CONTEXT (func_decl) = NULL_TREE;
31922 DECL_INITIAL (func_decl) = error_mark_node;
31923 DECL_ARTIFICIAL (func_decl) = 1;
31924 /* Mark this func as external, the resolver will flip it again if
31925 it gets generated. */
31926 DECL_EXTERNAL (func_decl) = 1;
31927 /* This will be of type IFUNCs have to be externally visible. */
31928 TREE_PUBLIC (func_decl) = 1;
31930 return func_decl;
31933 #endif
31935 /* Returns true if decl is multi-versioned and DECL is the default function,
31936 that is it is not tagged with target specific optimization. */
31938 static bool
31939 is_function_default_version (const tree decl)
31941 if (TREE_CODE (decl) != FUNCTION_DECL
31942 || !DECL_FUNCTION_VERSIONED (decl))
31943 return false;
31944 tree attr = lookup_attribute ("target", DECL_ATTRIBUTES (decl));
31945 gcc_assert (attr);
31946 attr = TREE_VALUE (TREE_VALUE (attr));
31947 return (TREE_CODE (attr) == STRING_CST
31948 && strcmp (TREE_STRING_POINTER (attr), "default") == 0);
31951 /* Make a dispatcher declaration for the multi-versioned function DECL.
31952 Calls to DECL function will be replaced with calls to the dispatcher
31953 by the front-end. Returns the decl of the dispatcher function. */
31955 static tree
31956 ix86_get_function_versions_dispatcher (void *decl)
31958 tree fn = (tree) decl;
31959 struct cgraph_node *node = NULL;
31960 struct cgraph_node *default_node = NULL;
31961 struct cgraph_function_version_info *node_v = NULL;
31962 struct cgraph_function_version_info *first_v = NULL;
31964 tree dispatch_decl = NULL;
31966 struct cgraph_function_version_info *default_version_info = NULL;
31968 gcc_assert (fn != NULL && DECL_FUNCTION_VERSIONED (fn));
31970 node = cgraph_get_node (fn);
31971 gcc_assert (node != NULL);
31973 node_v = get_cgraph_node_version (node);
31974 gcc_assert (node_v != NULL);
31976 if (node_v->dispatcher_resolver != NULL)
31977 return node_v->dispatcher_resolver;
31979 /* Find the default version and make it the first node. */
31980 first_v = node_v;
31981 /* Go to the beginning of the chain. */
31982 while (first_v->prev != NULL)
31983 first_v = first_v->prev;
31984 default_version_info = first_v;
31985 while (default_version_info != NULL)
31987 if (is_function_default_version
31988 (default_version_info->this_node->decl))
31989 break;
31990 default_version_info = default_version_info->next;
31993 /* If there is no default node, just return NULL. */
31994 if (default_version_info == NULL)
31995 return NULL;
31997 /* Make default info the first node. */
31998 if (first_v != default_version_info)
32000 default_version_info->prev->next = default_version_info->next;
32001 if (default_version_info->next)
32002 default_version_info->next->prev = default_version_info->prev;
32003 first_v->prev = default_version_info;
32004 default_version_info->next = first_v;
32005 default_version_info->prev = NULL;
32008 default_node = default_version_info->this_node;
32010 #if defined (ASM_OUTPUT_TYPE_DIRECTIVE)
32011 if (targetm.has_ifunc_p ())
32013 struct cgraph_function_version_info *it_v = NULL;
32014 struct cgraph_node *dispatcher_node = NULL;
32015 struct cgraph_function_version_info *dispatcher_version_info = NULL;
32017 /* Right now, the dispatching is done via ifunc. */
32018 dispatch_decl = make_dispatcher_decl (default_node->decl);
32020 dispatcher_node = cgraph_get_create_node (dispatch_decl);
32021 gcc_assert (dispatcher_node != NULL);
32022 dispatcher_node->dispatcher_function = 1;
32023 dispatcher_version_info
32024 = insert_new_cgraph_node_version (dispatcher_node);
32025 dispatcher_version_info->next = default_version_info;
32026 dispatcher_node->definition = 1;
32028 /* Set the dispatcher for all the versions. */
32029 it_v = default_version_info;
32030 while (it_v != NULL)
32032 it_v->dispatcher_resolver = dispatch_decl;
32033 it_v = it_v->next;
32036 else
32037 #endif
32039 error_at (DECL_SOURCE_LOCATION (default_node->decl),
32040 "multiversioning needs ifunc which is not supported "
32041 "on this target");
32044 return dispatch_decl;
32047 /* Makes a function attribute of the form NAME(ARG_NAME) and chains
32048 it to CHAIN. */
32050 static tree
32051 make_attribute (const char *name, const char *arg_name, tree chain)
32053 tree attr_name;
32054 tree attr_arg_name;
32055 tree attr_args;
32056 tree attr;
32058 attr_name = get_identifier (name);
32059 attr_arg_name = build_string (strlen (arg_name), arg_name);
32060 attr_args = tree_cons (NULL_TREE, attr_arg_name, NULL_TREE);
32061 attr = tree_cons (attr_name, attr_args, chain);
32062 return attr;
32065 /* Make the resolver function decl to dispatch the versions of
32066 a multi-versioned function, DEFAULT_DECL. Create an
32067 empty basic block in the resolver and store the pointer in
32068 EMPTY_BB. Return the decl of the resolver function. */
32070 static tree
32071 make_resolver_func (const tree default_decl,
32072 const tree dispatch_decl,
32073 basic_block *empty_bb)
32075 char *resolver_name;
32076 tree decl, type, decl_name, t;
32077 bool is_uniq = false;
32079 /* IFUNC's have to be globally visible. So, if the default_decl is
32080 not, then the name of the IFUNC should be made unique. */
32081 if (TREE_PUBLIC (default_decl) == 0)
32082 is_uniq = true;
32084 /* Append the filename to the resolver function if the versions are
32085 not externally visible. This is because the resolver function has
32086 to be externally visible for the loader to find it. So, appending
32087 the filename will prevent conflicts with a resolver function from
32088 another module which is based on the same version name. */
32089 resolver_name = make_name (default_decl, "resolver", is_uniq);
32091 /* The resolver function should return a (void *). */
32092 type = build_function_type_list (ptr_type_node, NULL_TREE);
32094 decl = build_fn_decl (resolver_name, type);
32095 decl_name = get_identifier (resolver_name);
32096 SET_DECL_ASSEMBLER_NAME (decl, decl_name);
32098 DECL_NAME (decl) = decl_name;
32099 TREE_USED (decl) = 1;
32100 DECL_ARTIFICIAL (decl) = 1;
32101 DECL_IGNORED_P (decl) = 0;
32102 /* IFUNC resolvers have to be externally visible. */
32103 TREE_PUBLIC (decl) = 1;
32104 DECL_UNINLINABLE (decl) = 1;
32106 /* Resolver is not external, body is generated. */
32107 DECL_EXTERNAL (decl) = 0;
32108 DECL_EXTERNAL (dispatch_decl) = 0;
32110 DECL_CONTEXT (decl) = NULL_TREE;
32111 DECL_INITIAL (decl) = make_node (BLOCK);
32112 DECL_STATIC_CONSTRUCTOR (decl) = 0;
32114 if (DECL_COMDAT_GROUP (default_decl)
32115 || TREE_PUBLIC (default_decl))
32117 /* In this case, each translation unit with a call to this
32118 versioned function will put out a resolver. Ensure it
32119 is comdat to keep just one copy. */
32120 DECL_COMDAT (decl) = 1;
32121 make_decl_one_only (decl, DECL_ASSEMBLER_NAME (decl));
32123 /* Build result decl and add to function_decl. */
32124 t = build_decl (UNKNOWN_LOCATION, RESULT_DECL, NULL_TREE, ptr_type_node);
32125 DECL_ARTIFICIAL (t) = 1;
32126 DECL_IGNORED_P (t) = 1;
32127 DECL_RESULT (decl) = t;
32129 gimplify_function_tree (decl);
32130 push_cfun (DECL_STRUCT_FUNCTION (decl));
32131 *empty_bb = init_lowered_empty_function (decl, false);
32133 cgraph_add_new_function (decl, true);
32134 cgraph_call_function_insertion_hooks (cgraph_get_create_node (decl));
32136 pop_cfun ();
32138 gcc_assert (dispatch_decl != NULL);
32139 /* Mark dispatch_decl as "ifunc" with resolver as resolver_name. */
32140 DECL_ATTRIBUTES (dispatch_decl)
32141 = make_attribute ("ifunc", resolver_name, DECL_ATTRIBUTES (dispatch_decl));
32143 /* Create the alias for dispatch to resolver here. */
32144 /*cgraph_create_function_alias (dispatch_decl, decl);*/
32145 cgraph_same_body_alias (NULL, dispatch_decl, decl);
32146 XDELETEVEC (resolver_name);
32147 return decl;
32150 /* Generate the dispatching code body to dispatch multi-versioned function
32151 DECL. The target hook is called to process the "target" attributes and
32152 provide the code to dispatch the right function at run-time. NODE points
32153 to the dispatcher decl whose body will be created. */
32155 static tree
32156 ix86_generate_version_dispatcher_body (void *node_p)
32158 tree resolver_decl;
32159 basic_block empty_bb;
32160 tree default_ver_decl;
32161 struct cgraph_node *versn;
32162 struct cgraph_node *node;
32164 struct cgraph_function_version_info *node_version_info = NULL;
32165 struct cgraph_function_version_info *versn_info = NULL;
32167 node = (cgraph_node *)node_p;
32169 node_version_info = get_cgraph_node_version (node);
32170 gcc_assert (node->dispatcher_function
32171 && node_version_info != NULL);
32173 if (node_version_info->dispatcher_resolver)
32174 return node_version_info->dispatcher_resolver;
32176 /* The first version in the chain corresponds to the default version. */
32177 default_ver_decl = node_version_info->next->this_node->decl;
32179 /* node is going to be an alias, so remove the finalized bit. */
32180 node->definition = false;
32182 resolver_decl = make_resolver_func (default_ver_decl,
32183 node->decl, &empty_bb);
32185 node_version_info->dispatcher_resolver = resolver_decl;
32187 push_cfun (DECL_STRUCT_FUNCTION (resolver_decl));
32189 auto_vec<tree, 2> fn_ver_vec;
32191 for (versn_info = node_version_info->next; versn_info;
32192 versn_info = versn_info->next)
32194 versn = versn_info->this_node;
32195 /* Check for virtual functions here again, as by this time it should
32196 have been determined if this function needs a vtable index or
32197 not. This happens for methods in derived classes that override
32198 virtual methods in base classes but are not explicitly marked as
32199 virtual. */
32200 if (DECL_VINDEX (versn->decl))
32201 sorry ("Virtual function multiversioning not supported");
32203 fn_ver_vec.safe_push (versn->decl);
32206 dispatch_function_versions (resolver_decl, &fn_ver_vec, &empty_bb);
32207 rebuild_cgraph_edges ();
32208 pop_cfun ();
32209 return resolver_decl;
32211 /* This builds the processor_model struct type defined in
32212 libgcc/config/i386/cpuinfo.c */
32214 static tree
32215 build_processor_model_struct (void)
32217 const char *field_name[] = {"__cpu_vendor", "__cpu_type", "__cpu_subtype",
32218 "__cpu_features"};
32219 tree field = NULL_TREE, field_chain = NULL_TREE;
32220 int i;
32221 tree type = make_node (RECORD_TYPE);
32223 /* The first 3 fields are unsigned int. */
32224 for (i = 0; i < 3; ++i)
32226 field = build_decl (UNKNOWN_LOCATION, FIELD_DECL,
32227 get_identifier (field_name[i]), unsigned_type_node);
32228 if (field_chain != NULL_TREE)
32229 DECL_CHAIN (field) = field_chain;
32230 field_chain = field;
32233 /* The last field is an array of unsigned integers of size one. */
32234 field = build_decl (UNKNOWN_LOCATION, FIELD_DECL,
32235 get_identifier (field_name[3]),
32236 build_array_type (unsigned_type_node,
32237 build_index_type (size_one_node)));
32238 if (field_chain != NULL_TREE)
32239 DECL_CHAIN (field) = field_chain;
32240 field_chain = field;
32242 finish_builtin_struct (type, "__processor_model", field_chain, NULL_TREE);
32243 return type;
32246 /* Returns a extern, comdat VAR_DECL of type TYPE and name NAME. */
32248 static tree
32249 make_var_decl (tree type, const char *name)
32251 tree new_decl;
32253 new_decl = build_decl (UNKNOWN_LOCATION,
32254 VAR_DECL,
32255 get_identifier(name),
32256 type);
32258 DECL_EXTERNAL (new_decl) = 1;
32259 TREE_STATIC (new_decl) = 1;
32260 TREE_PUBLIC (new_decl) = 1;
32261 DECL_INITIAL (new_decl) = 0;
32262 DECL_ARTIFICIAL (new_decl) = 0;
32263 DECL_PRESERVE_P (new_decl) = 1;
32265 make_decl_one_only (new_decl, DECL_ASSEMBLER_NAME (new_decl));
32266 assemble_variable (new_decl, 0, 0, 0);
32268 return new_decl;
32271 /* FNDECL is a __builtin_cpu_is or a __builtin_cpu_supports call that is folded
32272 into an integer defined in libgcc/config/i386/cpuinfo.c */
32274 static tree
32275 fold_builtin_cpu (tree fndecl, tree *args)
32277 unsigned int i;
32278 enum ix86_builtins fn_code = (enum ix86_builtins)
32279 DECL_FUNCTION_CODE (fndecl);
32280 tree param_string_cst = NULL;
32282 /* This is the order of bit-fields in __processor_features in cpuinfo.c */
32283 enum processor_features
32285 F_CMOV = 0,
32286 F_MMX,
32287 F_POPCNT,
32288 F_SSE,
32289 F_SSE2,
32290 F_SSE3,
32291 F_SSSE3,
32292 F_SSE4_1,
32293 F_SSE4_2,
32294 F_AVX,
32295 F_AVX2,
32296 F_SSE4_A,
32297 F_FMA4,
32298 F_XOP,
32299 F_FMA,
32300 F_MAX
32303 /* These are the values for vendor types and cpu types and subtypes
32304 in cpuinfo.c. Cpu types and subtypes should be subtracted by
32305 the corresponding start value. */
32306 enum processor_model
32308 M_INTEL = 1,
32309 M_AMD,
32310 M_CPU_TYPE_START,
32311 M_INTEL_BONNELL,
32312 M_INTEL_CORE2,
32313 M_INTEL_COREI7,
32314 M_AMDFAM10H,
32315 M_AMDFAM15H,
32316 M_INTEL_SILVERMONT,
32317 M_AMD_BTVER1,
32318 M_AMD_BTVER2,
32319 M_CPU_SUBTYPE_START,
32320 M_INTEL_COREI7_NEHALEM,
32321 M_INTEL_COREI7_WESTMERE,
32322 M_INTEL_COREI7_SANDYBRIDGE,
32323 M_AMDFAM10H_BARCELONA,
32324 M_AMDFAM10H_SHANGHAI,
32325 M_AMDFAM10H_ISTANBUL,
32326 M_AMDFAM15H_BDVER1,
32327 M_AMDFAM15H_BDVER2,
32328 M_AMDFAM15H_BDVER3,
32329 M_AMDFAM15H_BDVER4,
32330 M_INTEL_COREI7_IVYBRIDGE,
32331 M_INTEL_COREI7_HASWELL
32334 static struct _arch_names_table
32336 const char *const name;
32337 const enum processor_model model;
32339 const arch_names_table[] =
32341 {"amd", M_AMD},
32342 {"intel", M_INTEL},
32343 {"atom", M_INTEL_BONNELL},
32344 {"slm", M_INTEL_SILVERMONT},
32345 {"core2", M_INTEL_CORE2},
32346 {"corei7", M_INTEL_COREI7},
32347 {"nehalem", M_INTEL_COREI7_NEHALEM},
32348 {"westmere", M_INTEL_COREI7_WESTMERE},
32349 {"sandybridge", M_INTEL_COREI7_SANDYBRIDGE},
32350 {"ivybridge", M_INTEL_COREI7_IVYBRIDGE},
32351 {"haswell", M_INTEL_COREI7_HASWELL},
32352 {"bonnell", M_INTEL_BONNELL},
32353 {"silvermont", M_INTEL_SILVERMONT},
32354 {"amdfam10h", M_AMDFAM10H},
32355 {"barcelona", M_AMDFAM10H_BARCELONA},
32356 {"shanghai", M_AMDFAM10H_SHANGHAI},
32357 {"istanbul", M_AMDFAM10H_ISTANBUL},
32358 {"btver1", M_AMD_BTVER1},
32359 {"amdfam15h", M_AMDFAM15H},
32360 {"bdver1", M_AMDFAM15H_BDVER1},
32361 {"bdver2", M_AMDFAM15H_BDVER2},
32362 {"bdver3", M_AMDFAM15H_BDVER3},
32363 {"bdver4", M_AMDFAM15H_BDVER4},
32364 {"btver2", M_AMD_BTVER2},
32367 static struct _isa_names_table
32369 const char *const name;
32370 const enum processor_features feature;
32372 const isa_names_table[] =
32374 {"cmov", F_CMOV},
32375 {"mmx", F_MMX},
32376 {"popcnt", F_POPCNT},
32377 {"sse", F_SSE},
32378 {"sse2", F_SSE2},
32379 {"sse3", F_SSE3},
32380 {"ssse3", F_SSSE3},
32381 {"sse4a", F_SSE4_A},
32382 {"sse4.1", F_SSE4_1},
32383 {"sse4.2", F_SSE4_2},
32384 {"avx", F_AVX},
32385 {"fma4", F_FMA4},
32386 {"xop", F_XOP},
32387 {"fma", F_FMA},
32388 {"avx2", F_AVX2}
32391 tree __processor_model_type = build_processor_model_struct ();
32392 tree __cpu_model_var = make_var_decl (__processor_model_type,
32393 "__cpu_model");
32396 varpool_add_new_variable (__cpu_model_var);
32398 gcc_assert ((args != NULL) && (*args != NULL));
32400 param_string_cst = *args;
32401 while (param_string_cst
32402 && TREE_CODE (param_string_cst) != STRING_CST)
32404 /* *args must be a expr that can contain other EXPRS leading to a
32405 STRING_CST. */
32406 if (!EXPR_P (param_string_cst))
32408 error ("Parameter to builtin must be a string constant or literal");
32409 return integer_zero_node;
32411 param_string_cst = TREE_OPERAND (EXPR_CHECK (param_string_cst), 0);
32414 gcc_assert (param_string_cst);
32416 if (fn_code == IX86_BUILTIN_CPU_IS)
32418 tree ref;
32419 tree field;
32420 tree final;
32422 unsigned int field_val = 0;
32423 unsigned int NUM_ARCH_NAMES
32424 = sizeof (arch_names_table) / sizeof (struct _arch_names_table);
32426 for (i = 0; i < NUM_ARCH_NAMES; i++)
32427 if (strcmp (arch_names_table[i].name,
32428 TREE_STRING_POINTER (param_string_cst)) == 0)
32429 break;
32431 if (i == NUM_ARCH_NAMES)
32433 error ("Parameter to builtin not valid: %s",
32434 TREE_STRING_POINTER (param_string_cst));
32435 return integer_zero_node;
32438 field = TYPE_FIELDS (__processor_model_type);
32439 field_val = arch_names_table[i].model;
32441 /* CPU types are stored in the next field. */
32442 if (field_val > M_CPU_TYPE_START
32443 && field_val < M_CPU_SUBTYPE_START)
32445 field = DECL_CHAIN (field);
32446 field_val -= M_CPU_TYPE_START;
32449 /* CPU subtypes are stored in the next field. */
32450 if (field_val > M_CPU_SUBTYPE_START)
32452 field = DECL_CHAIN ( DECL_CHAIN (field));
32453 field_val -= M_CPU_SUBTYPE_START;
32456 /* Get the appropriate field in __cpu_model. */
32457 ref = build3 (COMPONENT_REF, TREE_TYPE (field), __cpu_model_var,
32458 field, NULL_TREE);
32460 /* Check the value. */
32461 final = build2 (EQ_EXPR, unsigned_type_node, ref,
32462 build_int_cstu (unsigned_type_node, field_val));
32463 return build1 (CONVERT_EXPR, integer_type_node, final);
32465 else if (fn_code == IX86_BUILTIN_CPU_SUPPORTS)
32467 tree ref;
32468 tree array_elt;
32469 tree field;
32470 tree final;
32472 unsigned int field_val = 0;
32473 unsigned int NUM_ISA_NAMES
32474 = sizeof (isa_names_table) / sizeof (struct _isa_names_table);
32476 for (i = 0; i < NUM_ISA_NAMES; i++)
32477 if (strcmp (isa_names_table[i].name,
32478 TREE_STRING_POINTER (param_string_cst)) == 0)
32479 break;
32481 if (i == NUM_ISA_NAMES)
32483 error ("Parameter to builtin not valid: %s",
32484 TREE_STRING_POINTER (param_string_cst));
32485 return integer_zero_node;
32488 field = TYPE_FIELDS (__processor_model_type);
32489 /* Get the last field, which is __cpu_features. */
32490 while (DECL_CHAIN (field))
32491 field = DECL_CHAIN (field);
32493 /* Get the appropriate field: __cpu_model.__cpu_features */
32494 ref = build3 (COMPONENT_REF, TREE_TYPE (field), __cpu_model_var,
32495 field, NULL_TREE);
32497 /* Access the 0th element of __cpu_features array. */
32498 array_elt = build4 (ARRAY_REF, unsigned_type_node, ref,
32499 integer_zero_node, NULL_TREE, NULL_TREE);
32501 field_val = (1 << isa_names_table[i].feature);
32502 /* Return __cpu_model.__cpu_features[0] & field_val */
32503 final = build2 (BIT_AND_EXPR, unsigned_type_node, array_elt,
32504 build_int_cstu (unsigned_type_node, field_val));
32505 return build1 (CONVERT_EXPR, integer_type_node, final);
32507 gcc_unreachable ();
32510 static tree
32511 ix86_fold_builtin (tree fndecl, int n_args,
32512 tree *args, bool ignore ATTRIBUTE_UNUSED)
32514 if (DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_MD)
32516 enum ix86_builtins fn_code = (enum ix86_builtins)
32517 DECL_FUNCTION_CODE (fndecl);
32518 if (fn_code == IX86_BUILTIN_CPU_IS
32519 || fn_code == IX86_BUILTIN_CPU_SUPPORTS)
32521 gcc_assert (n_args == 1);
32522 return fold_builtin_cpu (fndecl, args);
32526 #ifdef SUBTARGET_FOLD_BUILTIN
32527 return SUBTARGET_FOLD_BUILTIN (fndecl, n_args, args, ignore);
32528 #endif
32530 return NULL_TREE;
32533 /* Make builtins to detect cpu type and features supported. NAME is
32534 the builtin name, CODE is the builtin code, and FTYPE is the function
32535 type of the builtin. */
32537 static void
32538 make_cpu_type_builtin (const char* name, int code,
32539 enum ix86_builtin_func_type ftype, bool is_const)
32541 tree decl;
32542 tree type;
32544 type = ix86_get_builtin_func_type (ftype);
32545 decl = add_builtin_function (name, type, code, BUILT_IN_MD,
32546 NULL, NULL_TREE);
32547 gcc_assert (decl != NULL_TREE);
32548 ix86_builtins[(int) code] = decl;
32549 TREE_READONLY (decl) = is_const;
32552 /* Make builtins to get CPU type and features supported. The created
32553 builtins are :
32555 __builtin_cpu_init (), to detect cpu type and features,
32556 __builtin_cpu_is ("<CPUNAME>"), to check if cpu is of type <CPUNAME>,
32557 __builtin_cpu_supports ("<FEATURE>"), to check if cpu supports <FEATURE>
32560 static void
32561 ix86_init_platform_type_builtins (void)
32563 make_cpu_type_builtin ("__builtin_cpu_init", IX86_BUILTIN_CPU_INIT,
32564 INT_FTYPE_VOID, false);
32565 make_cpu_type_builtin ("__builtin_cpu_is", IX86_BUILTIN_CPU_IS,
32566 INT_FTYPE_PCCHAR, true);
32567 make_cpu_type_builtin ("__builtin_cpu_supports", IX86_BUILTIN_CPU_SUPPORTS,
32568 INT_FTYPE_PCCHAR, true);
32571 /* Internal method for ix86_init_builtins. */
32573 static void
32574 ix86_init_builtins_va_builtins_abi (void)
32576 tree ms_va_ref, sysv_va_ref;
32577 tree fnvoid_va_end_ms, fnvoid_va_end_sysv;
32578 tree fnvoid_va_start_ms, fnvoid_va_start_sysv;
32579 tree fnvoid_va_copy_ms, fnvoid_va_copy_sysv;
32580 tree fnattr_ms = NULL_TREE, fnattr_sysv = NULL_TREE;
32582 if (!TARGET_64BIT)
32583 return;
32584 fnattr_ms = build_tree_list (get_identifier ("ms_abi"), NULL_TREE);
32585 fnattr_sysv = build_tree_list (get_identifier ("sysv_abi"), NULL_TREE);
32586 ms_va_ref = build_reference_type (ms_va_list_type_node);
32587 sysv_va_ref =
32588 build_pointer_type (TREE_TYPE (sysv_va_list_type_node));
32590 fnvoid_va_end_ms =
32591 build_function_type_list (void_type_node, ms_va_ref, NULL_TREE);
32592 fnvoid_va_start_ms =
32593 build_varargs_function_type_list (void_type_node, ms_va_ref, NULL_TREE);
32594 fnvoid_va_end_sysv =
32595 build_function_type_list (void_type_node, sysv_va_ref, NULL_TREE);
32596 fnvoid_va_start_sysv =
32597 build_varargs_function_type_list (void_type_node, sysv_va_ref,
32598 NULL_TREE);
32599 fnvoid_va_copy_ms =
32600 build_function_type_list (void_type_node, ms_va_ref, ms_va_list_type_node,
32601 NULL_TREE);
32602 fnvoid_va_copy_sysv =
32603 build_function_type_list (void_type_node, sysv_va_ref,
32604 sysv_va_ref, NULL_TREE);
32606 add_builtin_function ("__builtin_ms_va_start", fnvoid_va_start_ms,
32607 BUILT_IN_VA_START, BUILT_IN_NORMAL, NULL, fnattr_ms);
32608 add_builtin_function ("__builtin_ms_va_end", fnvoid_va_end_ms,
32609 BUILT_IN_VA_END, BUILT_IN_NORMAL, NULL, fnattr_ms);
32610 add_builtin_function ("__builtin_ms_va_copy", fnvoid_va_copy_ms,
32611 BUILT_IN_VA_COPY, BUILT_IN_NORMAL, NULL, fnattr_ms);
32612 add_builtin_function ("__builtin_sysv_va_start", fnvoid_va_start_sysv,
32613 BUILT_IN_VA_START, BUILT_IN_NORMAL, NULL, fnattr_sysv);
32614 add_builtin_function ("__builtin_sysv_va_end", fnvoid_va_end_sysv,
32615 BUILT_IN_VA_END, BUILT_IN_NORMAL, NULL, fnattr_sysv);
32616 add_builtin_function ("__builtin_sysv_va_copy", fnvoid_va_copy_sysv,
32617 BUILT_IN_VA_COPY, BUILT_IN_NORMAL, NULL, fnattr_sysv);
32620 static void
32621 ix86_init_builtin_types (void)
32623 tree float128_type_node, float80_type_node;
32625 /* The __float80 type. */
32626 float80_type_node = long_double_type_node;
32627 if (TYPE_MODE (float80_type_node) != XFmode)
32629 /* The __float80 type. */
32630 float80_type_node = make_node (REAL_TYPE);
32632 TYPE_PRECISION (float80_type_node) = 80;
32633 layout_type (float80_type_node);
32635 lang_hooks.types.register_builtin_type (float80_type_node, "__float80");
32637 /* The __float128 type. */
32638 float128_type_node = make_node (REAL_TYPE);
32639 TYPE_PRECISION (float128_type_node) = 128;
32640 layout_type (float128_type_node);
32641 lang_hooks.types.register_builtin_type (float128_type_node, "__float128");
32643 /* This macro is built by i386-builtin-types.awk. */
32644 DEFINE_BUILTIN_PRIMITIVE_TYPES;
32647 static void
32648 ix86_init_builtins (void)
32650 tree t;
32652 ix86_init_builtin_types ();
32654 /* Builtins to get CPU type and features. */
32655 ix86_init_platform_type_builtins ();
32657 /* TFmode support builtins. */
32658 def_builtin_const (0, "__builtin_infq",
32659 FLOAT128_FTYPE_VOID, IX86_BUILTIN_INFQ);
32660 def_builtin_const (0, "__builtin_huge_valq",
32661 FLOAT128_FTYPE_VOID, IX86_BUILTIN_HUGE_VALQ);
32663 /* We will expand them to normal call if SSE isn't available since
32664 they are used by libgcc. */
32665 t = ix86_get_builtin_func_type (FLOAT128_FTYPE_FLOAT128);
32666 t = add_builtin_function ("__builtin_fabsq", t, IX86_BUILTIN_FABSQ,
32667 BUILT_IN_MD, "__fabstf2", NULL_TREE);
32668 TREE_READONLY (t) = 1;
32669 ix86_builtins[(int) IX86_BUILTIN_FABSQ] = t;
32671 t = ix86_get_builtin_func_type (FLOAT128_FTYPE_FLOAT128_FLOAT128);
32672 t = add_builtin_function ("__builtin_copysignq", t, IX86_BUILTIN_COPYSIGNQ,
32673 BUILT_IN_MD, "__copysigntf3", NULL_TREE);
32674 TREE_READONLY (t) = 1;
32675 ix86_builtins[(int) IX86_BUILTIN_COPYSIGNQ] = t;
32677 ix86_init_tm_builtins ();
32678 ix86_init_mmx_sse_builtins ();
32680 if (TARGET_LP64)
32681 ix86_init_builtins_va_builtins_abi ();
32683 #ifdef SUBTARGET_INIT_BUILTINS
32684 SUBTARGET_INIT_BUILTINS;
32685 #endif
32688 /* Return the ix86 builtin for CODE. */
32690 static tree
32691 ix86_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED)
32693 if (code >= IX86_BUILTIN_MAX)
32694 return error_mark_node;
32696 return ix86_builtins[code];
32699 /* Errors in the source file can cause expand_expr to return const0_rtx
32700 where we expect a vector. To avoid crashing, use one of the vector
32701 clear instructions. */
32702 static rtx
32703 safe_vector_operand (rtx x, enum machine_mode mode)
32705 if (x == const0_rtx)
32706 x = CONST0_RTX (mode);
32707 return x;
32710 /* Subroutine of ix86_expand_builtin to take care of binop insns. */
32712 static rtx
32713 ix86_expand_binop_builtin (enum insn_code icode, tree exp, rtx target)
32715 rtx pat;
32716 tree arg0 = CALL_EXPR_ARG (exp, 0);
32717 tree arg1 = CALL_EXPR_ARG (exp, 1);
32718 rtx op0 = expand_normal (arg0);
32719 rtx op1 = expand_normal (arg1);
32720 enum machine_mode tmode = insn_data[icode].operand[0].mode;
32721 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
32722 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
32724 if (VECTOR_MODE_P (mode0))
32725 op0 = safe_vector_operand (op0, mode0);
32726 if (VECTOR_MODE_P (mode1))
32727 op1 = safe_vector_operand (op1, mode1);
32729 if (optimize || !target
32730 || GET_MODE (target) != tmode
32731 || !insn_data[icode].operand[0].predicate (target, tmode))
32732 target = gen_reg_rtx (tmode);
32734 if (GET_MODE (op1) == SImode && mode1 == TImode)
32736 rtx x = gen_reg_rtx (V4SImode);
32737 emit_insn (gen_sse2_loadd (x, op1));
32738 op1 = gen_lowpart (TImode, x);
32741 if (!insn_data[icode].operand[1].predicate (op0, mode0))
32742 op0 = copy_to_mode_reg (mode0, op0);
32743 if (!insn_data[icode].operand[2].predicate (op1, mode1))
32744 op1 = copy_to_mode_reg (mode1, op1);
32746 pat = GEN_FCN (icode) (target, op0, op1);
32747 if (! pat)
32748 return 0;
32750 emit_insn (pat);
32752 return target;
32755 /* Subroutine of ix86_expand_builtin to take care of 2-4 argument insns. */
32757 static rtx
32758 ix86_expand_multi_arg_builtin (enum insn_code icode, tree exp, rtx target,
32759 enum ix86_builtin_func_type m_type,
32760 enum rtx_code sub_code)
32762 rtx pat;
32763 int i;
32764 int nargs;
32765 bool comparison_p = false;
32766 bool tf_p = false;
32767 bool last_arg_constant = false;
32768 int num_memory = 0;
32769 struct {
32770 rtx op;
32771 enum machine_mode mode;
32772 } args[4];
32774 enum machine_mode tmode = insn_data[icode].operand[0].mode;
32776 switch (m_type)
32778 case MULTI_ARG_4_DF2_DI_I:
32779 case MULTI_ARG_4_DF2_DI_I1:
32780 case MULTI_ARG_4_SF2_SI_I:
32781 case MULTI_ARG_4_SF2_SI_I1:
32782 nargs = 4;
32783 last_arg_constant = true;
32784 break;
32786 case MULTI_ARG_3_SF:
32787 case MULTI_ARG_3_DF:
32788 case MULTI_ARG_3_SF2:
32789 case MULTI_ARG_3_DF2:
32790 case MULTI_ARG_3_DI:
32791 case MULTI_ARG_3_SI:
32792 case MULTI_ARG_3_SI_DI:
32793 case MULTI_ARG_3_HI:
32794 case MULTI_ARG_3_HI_SI:
32795 case MULTI_ARG_3_QI:
32796 case MULTI_ARG_3_DI2:
32797 case MULTI_ARG_3_SI2:
32798 case MULTI_ARG_3_HI2:
32799 case MULTI_ARG_3_QI2:
32800 nargs = 3;
32801 break;
32803 case MULTI_ARG_2_SF:
32804 case MULTI_ARG_2_DF:
32805 case MULTI_ARG_2_DI:
32806 case MULTI_ARG_2_SI:
32807 case MULTI_ARG_2_HI:
32808 case MULTI_ARG_2_QI:
32809 nargs = 2;
32810 break;
32812 case MULTI_ARG_2_DI_IMM:
32813 case MULTI_ARG_2_SI_IMM:
32814 case MULTI_ARG_2_HI_IMM:
32815 case MULTI_ARG_2_QI_IMM:
32816 nargs = 2;
32817 last_arg_constant = true;
32818 break;
32820 case MULTI_ARG_1_SF:
32821 case MULTI_ARG_1_DF:
32822 case MULTI_ARG_1_SF2:
32823 case MULTI_ARG_1_DF2:
32824 case MULTI_ARG_1_DI:
32825 case MULTI_ARG_1_SI:
32826 case MULTI_ARG_1_HI:
32827 case MULTI_ARG_1_QI:
32828 case MULTI_ARG_1_SI_DI:
32829 case MULTI_ARG_1_HI_DI:
32830 case MULTI_ARG_1_HI_SI:
32831 case MULTI_ARG_1_QI_DI:
32832 case MULTI_ARG_1_QI_SI:
32833 case MULTI_ARG_1_QI_HI:
32834 nargs = 1;
32835 break;
32837 case MULTI_ARG_2_DI_CMP:
32838 case MULTI_ARG_2_SI_CMP:
32839 case MULTI_ARG_2_HI_CMP:
32840 case MULTI_ARG_2_QI_CMP:
32841 nargs = 2;
32842 comparison_p = true;
32843 break;
32845 case MULTI_ARG_2_SF_TF:
32846 case MULTI_ARG_2_DF_TF:
32847 case MULTI_ARG_2_DI_TF:
32848 case MULTI_ARG_2_SI_TF:
32849 case MULTI_ARG_2_HI_TF:
32850 case MULTI_ARG_2_QI_TF:
32851 nargs = 2;
32852 tf_p = true;
32853 break;
32855 default:
32856 gcc_unreachable ();
32859 if (optimize || !target
32860 || GET_MODE (target) != tmode
32861 || !insn_data[icode].operand[0].predicate (target, tmode))
32862 target = gen_reg_rtx (tmode);
32864 gcc_assert (nargs <= 4);
32866 for (i = 0; i < nargs; i++)
32868 tree arg = CALL_EXPR_ARG (exp, i);
32869 rtx op = expand_normal (arg);
32870 int adjust = (comparison_p) ? 1 : 0;
32871 enum machine_mode mode = insn_data[icode].operand[i+adjust+1].mode;
32873 if (last_arg_constant && i == nargs - 1)
32875 if (!insn_data[icode].operand[i + 1].predicate (op, mode))
32877 enum insn_code new_icode = icode;
32878 switch (icode)
32880 case CODE_FOR_xop_vpermil2v2df3:
32881 case CODE_FOR_xop_vpermil2v4sf3:
32882 case CODE_FOR_xop_vpermil2v4df3:
32883 case CODE_FOR_xop_vpermil2v8sf3:
32884 error ("the last argument must be a 2-bit immediate");
32885 return gen_reg_rtx (tmode);
32886 case CODE_FOR_xop_rotlv2di3:
32887 new_icode = CODE_FOR_rotlv2di3;
32888 goto xop_rotl;
32889 case CODE_FOR_xop_rotlv4si3:
32890 new_icode = CODE_FOR_rotlv4si3;
32891 goto xop_rotl;
32892 case CODE_FOR_xop_rotlv8hi3:
32893 new_icode = CODE_FOR_rotlv8hi3;
32894 goto xop_rotl;
32895 case CODE_FOR_xop_rotlv16qi3:
32896 new_icode = CODE_FOR_rotlv16qi3;
32897 xop_rotl:
32898 if (CONST_INT_P (op))
32900 int mask = GET_MODE_BITSIZE (GET_MODE_INNER (tmode)) - 1;
32901 op = GEN_INT (INTVAL (op) & mask);
32902 gcc_checking_assert
32903 (insn_data[icode].operand[i + 1].predicate (op, mode));
32905 else
32907 gcc_checking_assert
32908 (nargs == 2
32909 && insn_data[new_icode].operand[0].mode == tmode
32910 && insn_data[new_icode].operand[1].mode == tmode
32911 && insn_data[new_icode].operand[2].mode == mode
32912 && insn_data[new_icode].operand[0].predicate
32913 == insn_data[icode].operand[0].predicate
32914 && insn_data[new_icode].operand[1].predicate
32915 == insn_data[icode].operand[1].predicate);
32916 icode = new_icode;
32917 goto non_constant;
32919 break;
32920 default:
32921 gcc_unreachable ();
32925 else
32927 non_constant:
32928 if (VECTOR_MODE_P (mode))
32929 op = safe_vector_operand (op, mode);
32931 /* If we aren't optimizing, only allow one memory operand to be
32932 generated. */
32933 if (memory_operand (op, mode))
32934 num_memory++;
32936 gcc_assert (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode);
32938 if (optimize
32939 || !insn_data[icode].operand[i+adjust+1].predicate (op, mode)
32940 || num_memory > 1)
32941 op = force_reg (mode, op);
32944 args[i].op = op;
32945 args[i].mode = mode;
32948 switch (nargs)
32950 case 1:
32951 pat = GEN_FCN (icode) (target, args[0].op);
32952 break;
32954 case 2:
32955 if (tf_p)
32956 pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
32957 GEN_INT ((int)sub_code));
32958 else if (! comparison_p)
32959 pat = GEN_FCN (icode) (target, args[0].op, args[1].op);
32960 else
32962 rtx cmp_op = gen_rtx_fmt_ee (sub_code, GET_MODE (target),
32963 args[0].op,
32964 args[1].op);
32966 pat = GEN_FCN (icode) (target, cmp_op, args[0].op, args[1].op);
32968 break;
32970 case 3:
32971 pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op);
32972 break;
32974 case 4:
32975 pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op, args[3].op);
32976 break;
32978 default:
32979 gcc_unreachable ();
32982 if (! pat)
32983 return 0;
32985 emit_insn (pat);
32986 return target;
32989 /* Subroutine of ix86_expand_args_builtin to take care of scalar unop
32990 insns with vec_merge. */
32992 static rtx
32993 ix86_expand_unop_vec_merge_builtin (enum insn_code icode, tree exp,
32994 rtx target)
32996 rtx pat;
32997 tree arg0 = CALL_EXPR_ARG (exp, 0);
32998 rtx op1, op0 = expand_normal (arg0);
32999 enum machine_mode tmode = insn_data[icode].operand[0].mode;
33000 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
33002 if (optimize || !target
33003 || GET_MODE (target) != tmode
33004 || !insn_data[icode].operand[0].predicate (target, tmode))
33005 target = gen_reg_rtx (tmode);
33007 if (VECTOR_MODE_P (mode0))
33008 op0 = safe_vector_operand (op0, mode0);
33010 if ((optimize && !register_operand (op0, mode0))
33011 || !insn_data[icode].operand[1].predicate (op0, mode0))
33012 op0 = copy_to_mode_reg (mode0, op0);
33014 op1 = op0;
33015 if (!insn_data[icode].operand[2].predicate (op1, mode0))
33016 op1 = copy_to_mode_reg (mode0, op1);
33018 pat = GEN_FCN (icode) (target, op0, op1);
33019 if (! pat)
33020 return 0;
33021 emit_insn (pat);
33022 return target;
33025 /* Subroutine of ix86_expand_builtin to take care of comparison insns. */
33027 static rtx
33028 ix86_expand_sse_compare (const struct builtin_description *d,
33029 tree exp, rtx target, bool swap)
33031 rtx pat;
33032 tree arg0 = CALL_EXPR_ARG (exp, 0);
33033 tree arg1 = CALL_EXPR_ARG (exp, 1);
33034 rtx op0 = expand_normal (arg0);
33035 rtx op1 = expand_normal (arg1);
33036 rtx op2;
33037 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
33038 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
33039 enum machine_mode mode1 = insn_data[d->icode].operand[2].mode;
33040 enum rtx_code comparison = d->comparison;
33042 if (VECTOR_MODE_P (mode0))
33043 op0 = safe_vector_operand (op0, mode0);
33044 if (VECTOR_MODE_P (mode1))
33045 op1 = safe_vector_operand (op1, mode1);
33047 /* Swap operands if we have a comparison that isn't available in
33048 hardware. */
33049 if (swap)
33051 rtx tmp = gen_reg_rtx (mode1);
33052 emit_move_insn (tmp, op1);
33053 op1 = op0;
33054 op0 = tmp;
33057 if (optimize || !target
33058 || GET_MODE (target) != tmode
33059 || !insn_data[d->icode].operand[0].predicate (target, tmode))
33060 target = gen_reg_rtx (tmode);
33062 if ((optimize && !register_operand (op0, mode0))
33063 || !insn_data[d->icode].operand[1].predicate (op0, mode0))
33064 op0 = copy_to_mode_reg (mode0, op0);
33065 if ((optimize && !register_operand (op1, mode1))
33066 || !insn_data[d->icode].operand[2].predicate (op1, mode1))
33067 op1 = copy_to_mode_reg (mode1, op1);
33069 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
33070 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
33071 if (! pat)
33072 return 0;
33073 emit_insn (pat);
33074 return target;
33077 /* Subroutine of ix86_expand_builtin to take care of comi insns. */
33079 static rtx
33080 ix86_expand_sse_comi (const struct builtin_description *d, tree exp,
33081 rtx target)
33083 rtx pat;
33084 tree arg0 = CALL_EXPR_ARG (exp, 0);
33085 tree arg1 = CALL_EXPR_ARG (exp, 1);
33086 rtx op0 = expand_normal (arg0);
33087 rtx op1 = expand_normal (arg1);
33088 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
33089 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
33090 enum rtx_code comparison = d->comparison;
33092 if (VECTOR_MODE_P (mode0))
33093 op0 = safe_vector_operand (op0, mode0);
33094 if (VECTOR_MODE_P (mode1))
33095 op1 = safe_vector_operand (op1, mode1);
33097 /* Swap operands if we have a comparison that isn't available in
33098 hardware. */
33099 if (d->flag & BUILTIN_DESC_SWAP_OPERANDS)
33101 rtx tmp = op1;
33102 op1 = op0;
33103 op0 = tmp;
33106 target = gen_reg_rtx (SImode);
33107 emit_move_insn (target, const0_rtx);
33108 target = gen_rtx_SUBREG (QImode, target, 0);
33110 if ((optimize && !register_operand (op0, mode0))
33111 || !insn_data[d->icode].operand[0].predicate (op0, mode0))
33112 op0 = copy_to_mode_reg (mode0, op0);
33113 if ((optimize && !register_operand (op1, mode1))
33114 || !insn_data[d->icode].operand[1].predicate (op1, mode1))
33115 op1 = copy_to_mode_reg (mode1, op1);
33117 pat = GEN_FCN (d->icode) (op0, op1);
33118 if (! pat)
33119 return 0;
33120 emit_insn (pat);
33121 emit_insn (gen_rtx_SET (VOIDmode,
33122 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
33123 gen_rtx_fmt_ee (comparison, QImode,
33124 SET_DEST (pat),
33125 const0_rtx)));
33127 return SUBREG_REG (target);
33130 /* Subroutines of ix86_expand_args_builtin to take care of round insns. */
33132 static rtx
33133 ix86_expand_sse_round (const struct builtin_description *d, tree exp,
33134 rtx target)
33136 rtx pat;
33137 tree arg0 = CALL_EXPR_ARG (exp, 0);
33138 rtx op1, op0 = expand_normal (arg0);
33139 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
33140 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
33142 if (optimize || target == 0
33143 || GET_MODE (target) != tmode
33144 || !insn_data[d->icode].operand[0].predicate (target, tmode))
33145 target = gen_reg_rtx (tmode);
33147 if (VECTOR_MODE_P (mode0))
33148 op0 = safe_vector_operand (op0, mode0);
33150 if ((optimize && !register_operand (op0, mode0))
33151 || !insn_data[d->icode].operand[0].predicate (op0, mode0))
33152 op0 = copy_to_mode_reg (mode0, op0);
33154 op1 = GEN_INT (d->comparison);
33156 pat = GEN_FCN (d->icode) (target, op0, op1);
33157 if (! pat)
33158 return 0;
33159 emit_insn (pat);
33160 return target;
33163 static rtx
33164 ix86_expand_sse_round_vec_pack_sfix (const struct builtin_description *d,
33165 tree exp, rtx target)
33167 rtx pat;
33168 tree arg0 = CALL_EXPR_ARG (exp, 0);
33169 tree arg1 = CALL_EXPR_ARG (exp, 1);
33170 rtx op0 = expand_normal (arg0);
33171 rtx op1 = expand_normal (arg1);
33172 rtx op2;
33173 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
33174 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
33175 enum machine_mode mode1 = insn_data[d->icode].operand[2].mode;
33177 if (optimize || target == 0
33178 || GET_MODE (target) != tmode
33179 || !insn_data[d->icode].operand[0].predicate (target, tmode))
33180 target = gen_reg_rtx (tmode);
33182 op0 = safe_vector_operand (op0, mode0);
33183 op1 = safe_vector_operand (op1, mode1);
33185 if ((optimize && !register_operand (op0, mode0))
33186 || !insn_data[d->icode].operand[0].predicate (op0, mode0))
33187 op0 = copy_to_mode_reg (mode0, op0);
33188 if ((optimize && !register_operand (op1, mode1))
33189 || !insn_data[d->icode].operand[1].predicate (op1, mode1))
33190 op1 = copy_to_mode_reg (mode1, op1);
33192 op2 = GEN_INT (d->comparison);
33194 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
33195 if (! pat)
33196 return 0;
33197 emit_insn (pat);
33198 return target;
33201 /* Subroutine of ix86_expand_builtin to take care of ptest insns. */
33203 static rtx
33204 ix86_expand_sse_ptest (const struct builtin_description *d, tree exp,
33205 rtx target)
33207 rtx pat;
33208 tree arg0 = CALL_EXPR_ARG (exp, 0);
33209 tree arg1 = CALL_EXPR_ARG (exp, 1);
33210 rtx op0 = expand_normal (arg0);
33211 rtx op1 = expand_normal (arg1);
33212 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
33213 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
33214 enum rtx_code comparison = d->comparison;
33216 if (VECTOR_MODE_P (mode0))
33217 op0 = safe_vector_operand (op0, mode0);
33218 if (VECTOR_MODE_P (mode1))
33219 op1 = safe_vector_operand (op1, mode1);
33221 target = gen_reg_rtx (SImode);
33222 emit_move_insn (target, const0_rtx);
33223 target = gen_rtx_SUBREG (QImode, target, 0);
33225 if ((optimize && !register_operand (op0, mode0))
33226 || !insn_data[d->icode].operand[0].predicate (op0, mode0))
33227 op0 = copy_to_mode_reg (mode0, op0);
33228 if ((optimize && !register_operand (op1, mode1))
33229 || !insn_data[d->icode].operand[1].predicate (op1, mode1))
33230 op1 = copy_to_mode_reg (mode1, op1);
33232 pat = GEN_FCN (d->icode) (op0, op1);
33233 if (! pat)
33234 return 0;
33235 emit_insn (pat);
33236 emit_insn (gen_rtx_SET (VOIDmode,
33237 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
33238 gen_rtx_fmt_ee (comparison, QImode,
33239 SET_DEST (pat),
33240 const0_rtx)));
33242 return SUBREG_REG (target);
33245 /* Subroutine of ix86_expand_builtin to take care of pcmpestr[im] insns. */
33247 static rtx
33248 ix86_expand_sse_pcmpestr (const struct builtin_description *d,
33249 tree exp, rtx target)
33251 rtx pat;
33252 tree arg0 = CALL_EXPR_ARG (exp, 0);
33253 tree arg1 = CALL_EXPR_ARG (exp, 1);
33254 tree arg2 = CALL_EXPR_ARG (exp, 2);
33255 tree arg3 = CALL_EXPR_ARG (exp, 3);
33256 tree arg4 = CALL_EXPR_ARG (exp, 4);
33257 rtx scratch0, scratch1;
33258 rtx op0 = expand_normal (arg0);
33259 rtx op1 = expand_normal (arg1);
33260 rtx op2 = expand_normal (arg2);
33261 rtx op3 = expand_normal (arg3);
33262 rtx op4 = expand_normal (arg4);
33263 enum machine_mode tmode0, tmode1, modev2, modei3, modev4, modei5, modeimm;
33265 tmode0 = insn_data[d->icode].operand[0].mode;
33266 tmode1 = insn_data[d->icode].operand[1].mode;
33267 modev2 = insn_data[d->icode].operand[2].mode;
33268 modei3 = insn_data[d->icode].operand[3].mode;
33269 modev4 = insn_data[d->icode].operand[4].mode;
33270 modei5 = insn_data[d->icode].operand[5].mode;
33271 modeimm = insn_data[d->icode].operand[6].mode;
33273 if (VECTOR_MODE_P (modev2))
33274 op0 = safe_vector_operand (op0, modev2);
33275 if (VECTOR_MODE_P (modev4))
33276 op2 = safe_vector_operand (op2, modev4);
33278 if (!insn_data[d->icode].operand[2].predicate (op0, modev2))
33279 op0 = copy_to_mode_reg (modev2, op0);
33280 if (!insn_data[d->icode].operand[3].predicate (op1, modei3))
33281 op1 = copy_to_mode_reg (modei3, op1);
33282 if ((optimize && !register_operand (op2, modev4))
33283 || !insn_data[d->icode].operand[4].predicate (op2, modev4))
33284 op2 = copy_to_mode_reg (modev4, op2);
33285 if (!insn_data[d->icode].operand[5].predicate (op3, modei5))
33286 op3 = copy_to_mode_reg (modei5, op3);
33288 if (!insn_data[d->icode].operand[6].predicate (op4, modeimm))
33290 error ("the fifth argument must be an 8-bit immediate");
33291 return const0_rtx;
33294 if (d->code == IX86_BUILTIN_PCMPESTRI128)
33296 if (optimize || !target
33297 || GET_MODE (target) != tmode0
33298 || !insn_data[d->icode].operand[0].predicate (target, tmode0))
33299 target = gen_reg_rtx (tmode0);
33301 scratch1 = gen_reg_rtx (tmode1);
33303 pat = GEN_FCN (d->icode) (target, scratch1, op0, op1, op2, op3, op4);
33305 else if (d->code == IX86_BUILTIN_PCMPESTRM128)
33307 if (optimize || !target
33308 || GET_MODE (target) != tmode1
33309 || !insn_data[d->icode].operand[1].predicate (target, tmode1))
33310 target = gen_reg_rtx (tmode1);
33312 scratch0 = gen_reg_rtx (tmode0);
33314 pat = GEN_FCN (d->icode) (scratch0, target, op0, op1, op2, op3, op4);
33316 else
33318 gcc_assert (d->flag);
33320 scratch0 = gen_reg_rtx (tmode0);
33321 scratch1 = gen_reg_rtx (tmode1);
33323 pat = GEN_FCN (d->icode) (scratch0, scratch1, op0, op1, op2, op3, op4);
33326 if (! pat)
33327 return 0;
33329 emit_insn (pat);
33331 if (d->flag)
33333 target = gen_reg_rtx (SImode);
33334 emit_move_insn (target, const0_rtx);
33335 target = gen_rtx_SUBREG (QImode, target, 0);
33337 emit_insn
33338 (gen_rtx_SET (VOIDmode, gen_rtx_STRICT_LOW_PART (VOIDmode, target),
33339 gen_rtx_fmt_ee (EQ, QImode,
33340 gen_rtx_REG ((enum machine_mode) d->flag,
33341 FLAGS_REG),
33342 const0_rtx)));
33343 return SUBREG_REG (target);
33345 else
33346 return target;
33350 /* Subroutine of ix86_expand_builtin to take care of pcmpistr[im] insns. */
33352 static rtx
33353 ix86_expand_sse_pcmpistr (const struct builtin_description *d,
33354 tree exp, rtx target)
33356 rtx pat;
33357 tree arg0 = CALL_EXPR_ARG (exp, 0);
33358 tree arg1 = CALL_EXPR_ARG (exp, 1);
33359 tree arg2 = CALL_EXPR_ARG (exp, 2);
33360 rtx scratch0, scratch1;
33361 rtx op0 = expand_normal (arg0);
33362 rtx op1 = expand_normal (arg1);
33363 rtx op2 = expand_normal (arg2);
33364 enum machine_mode tmode0, tmode1, modev2, modev3, modeimm;
33366 tmode0 = insn_data[d->icode].operand[0].mode;
33367 tmode1 = insn_data[d->icode].operand[1].mode;
33368 modev2 = insn_data[d->icode].operand[2].mode;
33369 modev3 = insn_data[d->icode].operand[3].mode;
33370 modeimm = insn_data[d->icode].operand[4].mode;
33372 if (VECTOR_MODE_P (modev2))
33373 op0 = safe_vector_operand (op0, modev2);
33374 if (VECTOR_MODE_P (modev3))
33375 op1 = safe_vector_operand (op1, modev3);
33377 if (!insn_data[d->icode].operand[2].predicate (op0, modev2))
33378 op0 = copy_to_mode_reg (modev2, op0);
33379 if ((optimize && !register_operand (op1, modev3))
33380 || !insn_data[d->icode].operand[3].predicate (op1, modev3))
33381 op1 = copy_to_mode_reg (modev3, op1);
33383 if (!insn_data[d->icode].operand[4].predicate (op2, modeimm))
33385 error ("the third argument must be an 8-bit immediate");
33386 return const0_rtx;
33389 if (d->code == IX86_BUILTIN_PCMPISTRI128)
33391 if (optimize || !target
33392 || GET_MODE (target) != tmode0
33393 || !insn_data[d->icode].operand[0].predicate (target, tmode0))
33394 target = gen_reg_rtx (tmode0);
33396 scratch1 = gen_reg_rtx (tmode1);
33398 pat = GEN_FCN (d->icode) (target, scratch1, op0, op1, op2);
33400 else if (d->code == IX86_BUILTIN_PCMPISTRM128)
33402 if (optimize || !target
33403 || GET_MODE (target) != tmode1
33404 || !insn_data[d->icode].operand[1].predicate (target, tmode1))
33405 target = gen_reg_rtx (tmode1);
33407 scratch0 = gen_reg_rtx (tmode0);
33409 pat = GEN_FCN (d->icode) (scratch0, target, op0, op1, op2);
33411 else
33413 gcc_assert (d->flag);
33415 scratch0 = gen_reg_rtx (tmode0);
33416 scratch1 = gen_reg_rtx (tmode1);
33418 pat = GEN_FCN (d->icode) (scratch0, scratch1, op0, op1, op2);
33421 if (! pat)
33422 return 0;
33424 emit_insn (pat);
33426 if (d->flag)
33428 target = gen_reg_rtx (SImode);
33429 emit_move_insn (target, const0_rtx);
33430 target = gen_rtx_SUBREG (QImode, target, 0);
33432 emit_insn
33433 (gen_rtx_SET (VOIDmode, gen_rtx_STRICT_LOW_PART (VOIDmode, target),
33434 gen_rtx_fmt_ee (EQ, QImode,
33435 gen_rtx_REG ((enum machine_mode) d->flag,
33436 FLAGS_REG),
33437 const0_rtx)));
33438 return SUBREG_REG (target);
33440 else
33441 return target;
33444 /* Subroutine of ix86_expand_builtin to take care of insns with
33445 variable number of operands. */
33447 static rtx
33448 ix86_expand_args_builtin (const struct builtin_description *d,
33449 tree exp, rtx target)
33451 rtx pat, real_target;
33452 unsigned int i, nargs;
33453 unsigned int nargs_constant = 0;
33454 unsigned int mask_pos = 0;
33455 int num_memory = 0;
33456 struct
33458 rtx op;
33459 enum machine_mode mode;
33460 } args[6];
33461 bool last_arg_count = false;
33462 enum insn_code icode = d->icode;
33463 const struct insn_data_d *insn_p = &insn_data[icode];
33464 enum machine_mode tmode = insn_p->operand[0].mode;
33465 enum machine_mode rmode = VOIDmode;
33466 bool swap = false;
33467 enum rtx_code comparison = d->comparison;
33469 switch ((enum ix86_builtin_func_type) d->flag)
33471 case V2DF_FTYPE_V2DF_ROUND:
33472 case V4DF_FTYPE_V4DF_ROUND:
33473 case V4SF_FTYPE_V4SF_ROUND:
33474 case V8SF_FTYPE_V8SF_ROUND:
33475 case V4SI_FTYPE_V4SF_ROUND:
33476 case V8SI_FTYPE_V8SF_ROUND:
33477 return ix86_expand_sse_round (d, exp, target);
33478 case V4SI_FTYPE_V2DF_V2DF_ROUND:
33479 case V8SI_FTYPE_V4DF_V4DF_ROUND:
33480 case V16SI_FTYPE_V8DF_V8DF_ROUND:
33481 return ix86_expand_sse_round_vec_pack_sfix (d, exp, target);
33482 case INT_FTYPE_V8SF_V8SF_PTEST:
33483 case INT_FTYPE_V4DI_V4DI_PTEST:
33484 case INT_FTYPE_V4DF_V4DF_PTEST:
33485 case INT_FTYPE_V4SF_V4SF_PTEST:
33486 case INT_FTYPE_V2DI_V2DI_PTEST:
33487 case INT_FTYPE_V2DF_V2DF_PTEST:
33488 return ix86_expand_sse_ptest (d, exp, target);
33489 case FLOAT128_FTYPE_FLOAT128:
33490 case FLOAT_FTYPE_FLOAT:
33491 case INT_FTYPE_INT:
33492 case UINT64_FTYPE_INT:
33493 case UINT16_FTYPE_UINT16:
33494 case INT64_FTYPE_INT64:
33495 case INT64_FTYPE_V4SF:
33496 case INT64_FTYPE_V2DF:
33497 case INT_FTYPE_V16QI:
33498 case INT_FTYPE_V8QI:
33499 case INT_FTYPE_V8SF:
33500 case INT_FTYPE_V4DF:
33501 case INT_FTYPE_V4SF:
33502 case INT_FTYPE_V2DF:
33503 case INT_FTYPE_V32QI:
33504 case V16QI_FTYPE_V16QI:
33505 case V8SI_FTYPE_V8SF:
33506 case V8SI_FTYPE_V4SI:
33507 case V8HI_FTYPE_V8HI:
33508 case V8HI_FTYPE_V16QI:
33509 case V8QI_FTYPE_V8QI:
33510 case V8SF_FTYPE_V8SF:
33511 case V8SF_FTYPE_V8SI:
33512 case V8SF_FTYPE_V4SF:
33513 case V8SF_FTYPE_V8HI:
33514 case V4SI_FTYPE_V4SI:
33515 case V4SI_FTYPE_V16QI:
33516 case V4SI_FTYPE_V4SF:
33517 case V4SI_FTYPE_V8SI:
33518 case V4SI_FTYPE_V8HI:
33519 case V4SI_FTYPE_V4DF:
33520 case V4SI_FTYPE_V2DF:
33521 case V4HI_FTYPE_V4HI:
33522 case V4DF_FTYPE_V4DF:
33523 case V4DF_FTYPE_V4SI:
33524 case V4DF_FTYPE_V4SF:
33525 case V4DF_FTYPE_V2DF:
33526 case V4SF_FTYPE_V4SF:
33527 case V4SF_FTYPE_V4SI:
33528 case V4SF_FTYPE_V8SF:
33529 case V4SF_FTYPE_V4DF:
33530 case V4SF_FTYPE_V8HI:
33531 case V4SF_FTYPE_V2DF:
33532 case V2DI_FTYPE_V2DI:
33533 case V2DI_FTYPE_V16QI:
33534 case V2DI_FTYPE_V8HI:
33535 case V2DI_FTYPE_V4SI:
33536 case V2DF_FTYPE_V2DF:
33537 case V2DF_FTYPE_V4SI:
33538 case V2DF_FTYPE_V4DF:
33539 case V2DF_FTYPE_V4SF:
33540 case V2DF_FTYPE_V2SI:
33541 case V2SI_FTYPE_V2SI:
33542 case V2SI_FTYPE_V4SF:
33543 case V2SI_FTYPE_V2SF:
33544 case V2SI_FTYPE_V2DF:
33545 case V2SF_FTYPE_V2SF:
33546 case V2SF_FTYPE_V2SI:
33547 case V32QI_FTYPE_V32QI:
33548 case V32QI_FTYPE_V16QI:
33549 case V16HI_FTYPE_V16HI:
33550 case V16HI_FTYPE_V8HI:
33551 case V8SI_FTYPE_V8SI:
33552 case V16HI_FTYPE_V16QI:
33553 case V8SI_FTYPE_V16QI:
33554 case V4DI_FTYPE_V16QI:
33555 case V8SI_FTYPE_V8HI:
33556 case V4DI_FTYPE_V8HI:
33557 case V4DI_FTYPE_V4SI:
33558 case V4DI_FTYPE_V2DI:
33559 case HI_FTYPE_HI:
33560 case UINT_FTYPE_V2DF:
33561 case UINT_FTYPE_V4SF:
33562 case UINT64_FTYPE_V2DF:
33563 case UINT64_FTYPE_V4SF:
33564 case V16QI_FTYPE_V8DI:
33565 case V16HI_FTYPE_V16SI:
33566 case V16SI_FTYPE_HI:
33567 case V16SI_FTYPE_V16SI:
33568 case V16SI_FTYPE_INT:
33569 case V16SF_FTYPE_FLOAT:
33570 case V16SF_FTYPE_V4SF:
33571 case V16SF_FTYPE_V16SF:
33572 case V8HI_FTYPE_V8DI:
33573 case V8UHI_FTYPE_V8UHI:
33574 case V8SI_FTYPE_V8DI:
33575 case V8USI_FTYPE_V8USI:
33576 case V8SF_FTYPE_V8DF:
33577 case V8DI_FTYPE_QI:
33578 case V8DI_FTYPE_INT64:
33579 case V8DI_FTYPE_V4DI:
33580 case V8DI_FTYPE_V8DI:
33581 case V8DF_FTYPE_DOUBLE:
33582 case V8DF_FTYPE_V4DF:
33583 case V8DF_FTYPE_V8DF:
33584 case V8DF_FTYPE_V8SI:
33585 nargs = 1;
33586 break;
33587 case V4SF_FTYPE_V4SF_VEC_MERGE:
33588 case V2DF_FTYPE_V2DF_VEC_MERGE:
33589 return ix86_expand_unop_vec_merge_builtin (icode, exp, target);
33590 case FLOAT128_FTYPE_FLOAT128_FLOAT128:
33591 case V16QI_FTYPE_V16QI_V16QI:
33592 case V16QI_FTYPE_V8HI_V8HI:
33593 case V16SI_FTYPE_V16SI_V16SI:
33594 case V16SF_FTYPE_V16SF_V16SF:
33595 case V16SF_FTYPE_V16SF_V16SI:
33596 case V8QI_FTYPE_V8QI_V8QI:
33597 case V8QI_FTYPE_V4HI_V4HI:
33598 case V8HI_FTYPE_V8HI_V8HI:
33599 case V8HI_FTYPE_V16QI_V16QI:
33600 case V8HI_FTYPE_V4SI_V4SI:
33601 case V8SF_FTYPE_V8SF_V8SF:
33602 case V8SF_FTYPE_V8SF_V8SI:
33603 case V8DI_FTYPE_V8DI_V8DI:
33604 case V8DF_FTYPE_V8DF_V8DF:
33605 case V8DF_FTYPE_V8DF_V8DI:
33606 case V4SI_FTYPE_V4SI_V4SI:
33607 case V4SI_FTYPE_V8HI_V8HI:
33608 case V4SI_FTYPE_V4SF_V4SF:
33609 case V4SI_FTYPE_V2DF_V2DF:
33610 case V4HI_FTYPE_V4HI_V4HI:
33611 case V4HI_FTYPE_V8QI_V8QI:
33612 case V4HI_FTYPE_V2SI_V2SI:
33613 case V4DF_FTYPE_V4DF_V4DF:
33614 case V4DF_FTYPE_V4DF_V4DI:
33615 case V4SF_FTYPE_V4SF_V4SF:
33616 case V4SF_FTYPE_V4SF_V4SI:
33617 case V4SF_FTYPE_V4SF_V2SI:
33618 case V4SF_FTYPE_V4SF_V2DF:
33619 case V4SF_FTYPE_V4SF_UINT:
33620 case V4SF_FTYPE_V4SF_UINT64:
33621 case V4SF_FTYPE_V4SF_DI:
33622 case V4SF_FTYPE_V4SF_SI:
33623 case V2DI_FTYPE_V2DI_V2DI:
33624 case V2DI_FTYPE_V16QI_V16QI:
33625 case V2DI_FTYPE_V4SI_V4SI:
33626 case V2UDI_FTYPE_V4USI_V4USI:
33627 case V2DI_FTYPE_V2DI_V16QI:
33628 case V2DI_FTYPE_V2DF_V2DF:
33629 case V2SI_FTYPE_V2SI_V2SI:
33630 case V2SI_FTYPE_V4HI_V4HI:
33631 case V2SI_FTYPE_V2SF_V2SF:
33632 case V2DF_FTYPE_V2DF_V2DF:
33633 case V2DF_FTYPE_V2DF_V4SF:
33634 case V2DF_FTYPE_V2DF_V2DI:
33635 case V2DF_FTYPE_V2DF_DI:
33636 case V2DF_FTYPE_V2DF_SI:
33637 case V2DF_FTYPE_V2DF_UINT:
33638 case V2DF_FTYPE_V2DF_UINT64:
33639 case V2SF_FTYPE_V2SF_V2SF:
33640 case V1DI_FTYPE_V1DI_V1DI:
33641 case V1DI_FTYPE_V8QI_V8QI:
33642 case V1DI_FTYPE_V2SI_V2SI:
33643 case V32QI_FTYPE_V16HI_V16HI:
33644 case V16HI_FTYPE_V8SI_V8SI:
33645 case V32QI_FTYPE_V32QI_V32QI:
33646 case V16HI_FTYPE_V32QI_V32QI:
33647 case V16HI_FTYPE_V16HI_V16HI:
33648 case V8SI_FTYPE_V4DF_V4DF:
33649 case V8SI_FTYPE_V8SI_V8SI:
33650 case V8SI_FTYPE_V16HI_V16HI:
33651 case V4DI_FTYPE_V4DI_V4DI:
33652 case V4DI_FTYPE_V8SI_V8SI:
33653 case V4UDI_FTYPE_V8USI_V8USI:
33654 case QI_FTYPE_V8DI_V8DI:
33655 case HI_FTYPE_V16SI_V16SI:
33656 if (comparison == UNKNOWN)
33657 return ix86_expand_binop_builtin (icode, exp, target);
33658 nargs = 2;
33659 break;
33660 case V4SF_FTYPE_V4SF_V4SF_SWAP:
33661 case V2DF_FTYPE_V2DF_V2DF_SWAP:
33662 gcc_assert (comparison != UNKNOWN);
33663 nargs = 2;
33664 swap = true;
33665 break;
33666 case V16HI_FTYPE_V16HI_V8HI_COUNT:
33667 case V16HI_FTYPE_V16HI_SI_COUNT:
33668 case V8SI_FTYPE_V8SI_V4SI_COUNT:
33669 case V8SI_FTYPE_V8SI_SI_COUNT:
33670 case V4DI_FTYPE_V4DI_V2DI_COUNT:
33671 case V4DI_FTYPE_V4DI_INT_COUNT:
33672 case V8HI_FTYPE_V8HI_V8HI_COUNT:
33673 case V8HI_FTYPE_V8HI_SI_COUNT:
33674 case V4SI_FTYPE_V4SI_V4SI_COUNT:
33675 case V4SI_FTYPE_V4SI_SI_COUNT:
33676 case V4HI_FTYPE_V4HI_V4HI_COUNT:
33677 case V4HI_FTYPE_V4HI_SI_COUNT:
33678 case V2DI_FTYPE_V2DI_V2DI_COUNT:
33679 case V2DI_FTYPE_V2DI_SI_COUNT:
33680 case V2SI_FTYPE_V2SI_V2SI_COUNT:
33681 case V2SI_FTYPE_V2SI_SI_COUNT:
33682 case V1DI_FTYPE_V1DI_V1DI_COUNT:
33683 case V1DI_FTYPE_V1DI_SI_COUNT:
33684 nargs = 2;
33685 last_arg_count = true;
33686 break;
33687 case UINT64_FTYPE_UINT64_UINT64:
33688 case UINT_FTYPE_UINT_UINT:
33689 case UINT_FTYPE_UINT_USHORT:
33690 case UINT_FTYPE_UINT_UCHAR:
33691 case UINT16_FTYPE_UINT16_INT:
33692 case UINT8_FTYPE_UINT8_INT:
33693 case HI_FTYPE_HI_HI:
33694 case V16SI_FTYPE_V8DF_V8DF:
33695 nargs = 2;
33696 break;
33697 case V2DI_FTYPE_V2DI_INT_CONVERT:
33698 nargs = 2;
33699 rmode = V1TImode;
33700 nargs_constant = 1;
33701 break;
33702 case V4DI_FTYPE_V4DI_INT_CONVERT:
33703 nargs = 2;
33704 rmode = V2TImode;
33705 nargs_constant = 1;
33706 break;
33707 case V8HI_FTYPE_V8HI_INT:
33708 case V8HI_FTYPE_V8SF_INT:
33709 case V16HI_FTYPE_V16SF_INT:
33710 case V8HI_FTYPE_V4SF_INT:
33711 case V8SF_FTYPE_V8SF_INT:
33712 case V4SF_FTYPE_V16SF_INT:
33713 case V16SF_FTYPE_V16SF_INT:
33714 case V4SI_FTYPE_V4SI_INT:
33715 case V4SI_FTYPE_V8SI_INT:
33716 case V4HI_FTYPE_V4HI_INT:
33717 case V4DF_FTYPE_V4DF_INT:
33718 case V4DF_FTYPE_V8DF_INT:
33719 case V4SF_FTYPE_V4SF_INT:
33720 case V4SF_FTYPE_V8SF_INT:
33721 case V2DI_FTYPE_V2DI_INT:
33722 case V2DF_FTYPE_V2DF_INT:
33723 case V2DF_FTYPE_V4DF_INT:
33724 case V16HI_FTYPE_V16HI_INT:
33725 case V8SI_FTYPE_V8SI_INT:
33726 case V16SI_FTYPE_V16SI_INT:
33727 case V4SI_FTYPE_V16SI_INT:
33728 case V4DI_FTYPE_V4DI_INT:
33729 case V2DI_FTYPE_V4DI_INT:
33730 case V4DI_FTYPE_V8DI_INT:
33731 case HI_FTYPE_HI_INT:
33732 nargs = 2;
33733 nargs_constant = 1;
33734 break;
33735 case V16QI_FTYPE_V16QI_V16QI_V16QI:
33736 case V8SF_FTYPE_V8SF_V8SF_V8SF:
33737 case V4DF_FTYPE_V4DF_V4DF_V4DF:
33738 case V4SF_FTYPE_V4SF_V4SF_V4SF:
33739 case V2DF_FTYPE_V2DF_V2DF_V2DF:
33740 case V32QI_FTYPE_V32QI_V32QI_V32QI:
33741 case HI_FTYPE_V16SI_V16SI_HI:
33742 case QI_FTYPE_V8DI_V8DI_QI:
33743 case V16HI_FTYPE_V16SI_V16HI_HI:
33744 case V16QI_FTYPE_V16SI_V16QI_HI:
33745 case V16QI_FTYPE_V8DI_V16QI_QI:
33746 case V16SF_FTYPE_V16SF_V16SF_HI:
33747 case V16SF_FTYPE_V16SF_V16SF_V16SF:
33748 case V16SF_FTYPE_V16SF_V16SI_V16SF:
33749 case V16SF_FTYPE_V16SI_V16SF_HI:
33750 case V16SF_FTYPE_V16SI_V16SF_V16SF:
33751 case V16SF_FTYPE_V4SF_V16SF_HI:
33752 case V16SI_FTYPE_SI_V16SI_HI:
33753 case V16SI_FTYPE_V16HI_V16SI_HI:
33754 case V16SI_FTYPE_V16QI_V16SI_HI:
33755 case V16SI_FTYPE_V16SF_V16SI_HI:
33756 case V16SI_FTYPE_V16SI_V16SI_HI:
33757 case V16SI_FTYPE_V16SI_V16SI_V16SI:
33758 case V16SI_FTYPE_V4SI_V16SI_HI:
33759 case V2DI_FTYPE_V2DI_V2DI_V2DI:
33760 case V4DI_FTYPE_V4DI_V4DI_V4DI:
33761 case V8DF_FTYPE_V2DF_V8DF_QI:
33762 case V8DF_FTYPE_V4DF_V8DF_QI:
33763 case V8DF_FTYPE_V8DF_V8DF_QI:
33764 case V8DF_FTYPE_V8DF_V8DF_V8DF:
33765 case V8DF_FTYPE_V8DF_V8DI_V8DF:
33766 case V8DF_FTYPE_V8DI_V8DF_V8DF:
33767 case V8DF_FTYPE_V8SF_V8DF_QI:
33768 case V8DF_FTYPE_V8SI_V8DF_QI:
33769 case V8DI_FTYPE_DI_V8DI_QI:
33770 case V8DI_FTYPE_V16QI_V8DI_QI:
33771 case V8DI_FTYPE_V2DI_V8DI_QI:
33772 case V8DI_FTYPE_V4DI_V8DI_QI:
33773 case V8DI_FTYPE_V8DI_V8DI_QI:
33774 case V8DI_FTYPE_V8DI_V8DI_V8DI:
33775 case V8DI_FTYPE_V8HI_V8DI_QI:
33776 case V8DI_FTYPE_V8SI_V8DI_QI:
33777 case V8HI_FTYPE_V8DI_V8HI_QI:
33778 case V8SF_FTYPE_V8DF_V8SF_QI:
33779 case V8SI_FTYPE_V8DF_V8SI_QI:
33780 case V8SI_FTYPE_V8DI_V8SI_QI:
33781 case V4SI_FTYPE_V4SI_V4SI_V4SI:
33782 nargs = 3;
33783 break;
33784 case V32QI_FTYPE_V32QI_V32QI_INT:
33785 case V16HI_FTYPE_V16HI_V16HI_INT:
33786 case V16QI_FTYPE_V16QI_V16QI_INT:
33787 case V4DI_FTYPE_V4DI_V4DI_INT:
33788 case V8HI_FTYPE_V8HI_V8HI_INT:
33789 case V8SI_FTYPE_V8SI_V8SI_INT:
33790 case V8SI_FTYPE_V8SI_V4SI_INT:
33791 case V8SF_FTYPE_V8SF_V8SF_INT:
33792 case V8SF_FTYPE_V8SF_V4SF_INT:
33793 case V4SI_FTYPE_V4SI_V4SI_INT:
33794 case V4DF_FTYPE_V4DF_V4DF_INT:
33795 case V16SF_FTYPE_V16SF_V16SF_INT:
33796 case V16SF_FTYPE_V16SF_V4SF_INT:
33797 case V16SI_FTYPE_V16SI_V4SI_INT:
33798 case V4DF_FTYPE_V4DF_V2DF_INT:
33799 case V4SF_FTYPE_V4SF_V4SF_INT:
33800 case V2DI_FTYPE_V2DI_V2DI_INT:
33801 case V4DI_FTYPE_V4DI_V2DI_INT:
33802 case V2DF_FTYPE_V2DF_V2DF_INT:
33803 case QI_FTYPE_V8DI_V8DI_INT:
33804 case QI_FTYPE_V8DF_V8DF_INT:
33805 case QI_FTYPE_V2DF_V2DF_INT:
33806 case QI_FTYPE_V4SF_V4SF_INT:
33807 case HI_FTYPE_V16SI_V16SI_INT:
33808 case HI_FTYPE_V16SF_V16SF_INT:
33809 nargs = 3;
33810 nargs_constant = 1;
33811 break;
33812 case V4DI_FTYPE_V4DI_V4DI_INT_CONVERT:
33813 nargs = 3;
33814 rmode = V4DImode;
33815 nargs_constant = 1;
33816 break;
33817 case V2DI_FTYPE_V2DI_V2DI_INT_CONVERT:
33818 nargs = 3;
33819 rmode = V2DImode;
33820 nargs_constant = 1;
33821 break;
33822 case V1DI_FTYPE_V1DI_V1DI_INT_CONVERT:
33823 nargs = 3;
33824 rmode = DImode;
33825 nargs_constant = 1;
33826 break;
33827 case V2DI_FTYPE_V2DI_UINT_UINT:
33828 nargs = 3;
33829 nargs_constant = 2;
33830 break;
33831 case V16SF_FTYPE_V16SF_V16SF_V16SF_HI:
33832 case V16SF_FTYPE_V16SF_V16SI_V16SF_HI:
33833 case V16SF_FTYPE_V16SI_V16SF_V16SF_HI:
33834 case V16SI_FTYPE_V16SI_V16SI_V16SI_HI:
33835 case V16SI_FTYPE_V16SI_V4SI_V16SI_HI:
33836 case V2DF_FTYPE_V2DF_V2DF_V2DF_QI:
33837 case V2DF_FTYPE_V2DF_V4SF_V2DF_QI:
33838 case V4SF_FTYPE_V4SF_V2DF_V4SF_QI:
33839 case V4SF_FTYPE_V4SF_V4SF_V4SF_QI:
33840 case V8DF_FTYPE_V8DF_V8DF_V8DF_QI:
33841 case V8DF_FTYPE_V8DF_V8DI_V8DF_QI:
33842 case V8DF_FTYPE_V8DI_V8DF_V8DF_QI:
33843 case V8DI_FTYPE_V16SI_V16SI_V8DI_QI:
33844 case V8DI_FTYPE_V8DI_SI_V8DI_V8DI:
33845 case V8DI_FTYPE_V8DI_V2DI_V8DI_QI:
33846 case V8DI_FTYPE_V8DI_V8DI_V8DI_QI:
33847 nargs = 4;
33848 break;
33849 case V2DF_FTYPE_V2DF_V2DF_V2DI_INT:
33850 case V4DF_FTYPE_V4DF_V4DF_V4DI_INT:
33851 case V4SF_FTYPE_V4SF_V4SF_V4SI_INT:
33852 case V8SF_FTYPE_V8SF_V8SF_V8SI_INT:
33853 case V16SF_FTYPE_V16SF_V16SF_V16SI_INT:
33854 nargs = 4;
33855 nargs_constant = 1;
33856 break;
33857 case QI_FTYPE_V2DF_V2DF_INT_QI:
33858 case QI_FTYPE_V4SF_V4SF_INT_QI:
33859 nargs = 4;
33860 mask_pos = 1;
33861 nargs_constant = 1;
33862 break;
33863 case V2DI_FTYPE_V2DI_V2DI_UINT_UINT:
33864 nargs = 4;
33865 nargs_constant = 2;
33866 break;
33867 case UCHAR_FTYPE_UCHAR_UINT_UINT_PUNSIGNED:
33868 case UCHAR_FTYPE_UCHAR_ULONGLONG_ULONGLONG_PULONGLONG:
33869 nargs = 4;
33870 break;
33871 case QI_FTYPE_V8DI_V8DI_INT_QI:
33872 case HI_FTYPE_V16SI_V16SI_INT_HI:
33873 case QI_FTYPE_V8DF_V8DF_INT_QI:
33874 case HI_FTYPE_V16SF_V16SF_INT_HI:
33875 mask_pos = 1;
33876 nargs = 4;
33877 nargs_constant = 1;
33878 break;
33879 case V8DF_FTYPE_V8DF_INT_V8DF_QI:
33880 case V16SF_FTYPE_V16SF_INT_V16SF_HI:
33881 case V16HI_FTYPE_V16SF_INT_V16HI_HI:
33882 case V16SI_FTYPE_V16SI_INT_V16SI_HI:
33883 case V4SI_FTYPE_V16SI_INT_V4SI_QI:
33884 case V4DI_FTYPE_V8DI_INT_V4DI_QI:
33885 case V4DF_FTYPE_V8DF_INT_V4DF_QI:
33886 case V4SF_FTYPE_V16SF_INT_V4SF_QI:
33887 case V8DI_FTYPE_V8DI_INT_V8DI_QI:
33888 nargs = 4;
33889 mask_pos = 2;
33890 nargs_constant = 1;
33891 break;
33892 case V16SF_FTYPE_V16SF_V4SF_INT_V16SF_HI:
33893 case V16SI_FTYPE_V16SI_V4SI_INT_V16SI_HI:
33894 case V8DF_FTYPE_V8DF_V8DF_INT_V8DF_QI:
33895 case V8DI_FTYPE_V8DI_V8DI_INT_V8DI_QI:
33896 case V16SF_FTYPE_V16SF_V16SF_INT_V16SF_HI:
33897 case V16SI_FTYPE_V16SI_V16SI_INT_V16SI_HI:
33898 case V4SF_FTYPE_V4SF_V4SF_INT_V4SF_QI:
33899 case V2DF_FTYPE_V2DF_V2DF_INT_V2DF_QI:
33900 case V8DF_FTYPE_V8DF_V4DF_INT_V8DF_QI:
33901 case V8DI_FTYPE_V8DI_V4DI_INT_V8DI_QI:
33902 nargs = 5;
33903 mask_pos = 2;
33904 nargs_constant = 1;
33905 break;
33906 case V8DI_FTYPE_V8DI_V8DI_V8DI_INT_QI:
33907 case V16SF_FTYPE_V16SF_V16SF_V16SI_INT_HI:
33908 case V16SI_FTYPE_V16SI_V16SI_V16SI_INT_HI:
33909 case V2DF_FTYPE_V2DF_V2DF_V2DI_INT_QI:
33910 case V4SF_FTYPE_V4SF_V4SF_V4SI_INT_QI:
33911 nargs = 5;
33912 mask_pos = 1;
33913 nargs_constant = 1;
33914 break;
33916 default:
33917 gcc_unreachable ();
33920 gcc_assert (nargs <= ARRAY_SIZE (args));
33922 if (comparison != UNKNOWN)
33924 gcc_assert (nargs == 2);
33925 return ix86_expand_sse_compare (d, exp, target, swap);
33928 if (rmode == VOIDmode || rmode == tmode)
33930 if (optimize
33931 || target == 0
33932 || GET_MODE (target) != tmode
33933 || !insn_p->operand[0].predicate (target, tmode))
33934 target = gen_reg_rtx (tmode);
33935 real_target = target;
33937 else
33939 real_target = gen_reg_rtx (tmode);
33940 target = simplify_gen_subreg (rmode, real_target, tmode, 0);
33943 for (i = 0; i < nargs; i++)
33945 tree arg = CALL_EXPR_ARG (exp, i);
33946 rtx op = expand_normal (arg);
33947 enum machine_mode mode = insn_p->operand[i + 1].mode;
33948 bool match = insn_p->operand[i + 1].predicate (op, mode);
33950 if (last_arg_count && (i + 1) == nargs)
33952 /* SIMD shift insns take either an 8-bit immediate or
33953 register as count. But builtin functions take int as
33954 count. If count doesn't match, we put it in register. */
33955 if (!match)
33957 op = simplify_gen_subreg (SImode, op, GET_MODE (op), 0);
33958 if (!insn_p->operand[i + 1].predicate (op, mode))
33959 op = copy_to_reg (op);
33962 else if ((mask_pos && (nargs - i - mask_pos) == nargs_constant) ||
33963 (!mask_pos && (nargs - i) <= nargs_constant))
33965 if (!match)
33966 switch (icode)
33968 case CODE_FOR_avx2_inserti128:
33969 case CODE_FOR_avx2_extracti128:
33970 error ("the last argument must be an 1-bit immediate");
33971 return const0_rtx;
33973 case CODE_FOR_avx512f_cmpv8di3_mask:
33974 case CODE_FOR_avx512f_cmpv16si3_mask:
33975 case CODE_FOR_avx512f_ucmpv8di3_mask:
33976 case CODE_FOR_avx512f_ucmpv16si3_mask:
33977 error ("the last argument must be a 3-bit immediate");
33978 return const0_rtx;
33980 case CODE_FOR_sse4_1_roundsd:
33981 case CODE_FOR_sse4_1_roundss:
33983 case CODE_FOR_sse4_1_roundpd:
33984 case CODE_FOR_sse4_1_roundps:
33985 case CODE_FOR_avx_roundpd256:
33986 case CODE_FOR_avx_roundps256:
33988 case CODE_FOR_sse4_1_roundpd_vec_pack_sfix:
33989 case CODE_FOR_sse4_1_roundps_sfix:
33990 case CODE_FOR_avx_roundpd_vec_pack_sfix256:
33991 case CODE_FOR_avx_roundps_sfix256:
33993 case CODE_FOR_sse4_1_blendps:
33994 case CODE_FOR_avx_blendpd256:
33995 case CODE_FOR_avx_vpermilv4df:
33996 case CODE_FOR_avx512f_getmantv8df_mask:
33997 case CODE_FOR_avx512f_getmantv16sf_mask:
33998 error ("the last argument must be a 4-bit immediate");
33999 return const0_rtx;
34001 case CODE_FOR_sha1rnds4:
34002 case CODE_FOR_sse4_1_blendpd:
34003 case CODE_FOR_avx_vpermilv2df:
34004 case CODE_FOR_xop_vpermil2v2df3:
34005 case CODE_FOR_xop_vpermil2v4sf3:
34006 case CODE_FOR_xop_vpermil2v4df3:
34007 case CODE_FOR_xop_vpermil2v8sf3:
34008 case CODE_FOR_avx512f_vinsertf32x4_mask:
34009 case CODE_FOR_avx512f_vinserti32x4_mask:
34010 case CODE_FOR_avx512f_vextractf32x4_mask:
34011 case CODE_FOR_avx512f_vextracti32x4_mask:
34012 error ("the last argument must be a 2-bit immediate");
34013 return const0_rtx;
34015 case CODE_FOR_avx_vextractf128v4df:
34016 case CODE_FOR_avx_vextractf128v8sf:
34017 case CODE_FOR_avx_vextractf128v8si:
34018 case CODE_FOR_avx_vinsertf128v4df:
34019 case CODE_FOR_avx_vinsertf128v8sf:
34020 case CODE_FOR_avx_vinsertf128v8si:
34021 case CODE_FOR_avx512f_vinsertf64x4_mask:
34022 case CODE_FOR_avx512f_vinserti64x4_mask:
34023 case CODE_FOR_avx512f_vextractf64x4_mask:
34024 case CODE_FOR_avx512f_vextracti64x4_mask:
34025 error ("the last argument must be a 1-bit immediate");
34026 return const0_rtx;
34028 case CODE_FOR_avx_vmcmpv2df3:
34029 case CODE_FOR_avx_vmcmpv4sf3:
34030 case CODE_FOR_avx_cmpv2df3:
34031 case CODE_FOR_avx_cmpv4sf3:
34032 case CODE_FOR_avx_cmpv4df3:
34033 case CODE_FOR_avx_cmpv8sf3:
34034 case CODE_FOR_avx512f_cmpv8df3_mask:
34035 case CODE_FOR_avx512f_cmpv16sf3_mask:
34036 case CODE_FOR_avx512f_vmcmpv2df3_mask:
34037 case CODE_FOR_avx512f_vmcmpv4sf3_mask:
34038 error ("the last argument must be a 5-bit immediate");
34039 return const0_rtx;
34041 default:
34042 switch (nargs_constant)
34044 case 2:
34045 if ((mask_pos && (nargs - i - mask_pos) == nargs_constant) ||
34046 (!mask_pos && (nargs - i) == nargs_constant))
34048 error ("the next to last argument must be an 8-bit immediate");
34049 break;
34051 case 1:
34052 error ("the last argument must be an 8-bit immediate");
34053 break;
34054 default:
34055 gcc_unreachable ();
34057 return const0_rtx;
34060 else
34062 if (VECTOR_MODE_P (mode))
34063 op = safe_vector_operand (op, mode);
34065 /* If we aren't optimizing, only allow one memory operand to
34066 be generated. */
34067 if (memory_operand (op, mode))
34068 num_memory++;
34070 if (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode)
34072 if (optimize || !match || num_memory > 1)
34073 op = copy_to_mode_reg (mode, op);
34075 else
34077 op = copy_to_reg (op);
34078 op = simplify_gen_subreg (mode, op, GET_MODE (op), 0);
34082 args[i].op = op;
34083 args[i].mode = mode;
34086 switch (nargs)
34088 case 1:
34089 pat = GEN_FCN (icode) (real_target, args[0].op);
34090 break;
34091 case 2:
34092 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op);
34093 break;
34094 case 3:
34095 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
34096 args[2].op);
34097 break;
34098 case 4:
34099 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
34100 args[2].op, args[3].op);
34101 break;
34102 case 5:
34103 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
34104 args[2].op, args[3].op, args[4].op);
34105 case 6:
34106 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
34107 args[2].op, args[3].op, args[4].op,
34108 args[5].op);
34109 break;
34110 default:
34111 gcc_unreachable ();
34114 if (! pat)
34115 return 0;
34117 emit_insn (pat);
34118 return target;
34121 /* Transform pattern of following layout:
34122 (parallel [
34123 set (A B)
34124 (unspec [C] UNSPEC_EMBEDDED_ROUNDING)])
34126 into:
34127 (set (A B))
34130 (parallel [ A B
34132 (unspec [C] UNSPEC_EMBEDDED_ROUNDING)
34135 into:
34136 (parallel [ A B ... ]) */
34138 static rtx
34139 ix86_erase_embedded_rounding (rtx pat)
34141 if (GET_CODE (pat) == INSN)
34142 pat = PATTERN (pat);
34144 gcc_assert (GET_CODE (pat) == PARALLEL);
34146 if (XVECLEN (pat, 0) == 2)
34148 rtx p0 = XVECEXP (pat, 0, 0);
34149 rtx p1 = XVECEXP (pat, 0, 1);
34151 gcc_assert (GET_CODE (p0) == SET
34152 && GET_CODE (p1) == UNSPEC
34153 && XINT (p1, 1) == UNSPEC_EMBEDDED_ROUNDING);
34155 return p0;
34157 else
34159 rtx *res = XALLOCAVEC (rtx, XVECLEN (pat, 0));
34160 int i = 0;
34161 int j = 0;
34163 for (; i < XVECLEN (pat, 0); ++i)
34165 rtx elem = XVECEXP (pat, 0, i);
34166 if (GET_CODE (elem) != UNSPEC
34167 || XINT (elem, 1) != UNSPEC_EMBEDDED_ROUNDING)
34168 res [j++] = elem;
34171 /* No more than 1 occurence was removed. */
34172 gcc_assert (j >= XVECLEN (pat, 0) - 1);
34174 return gen_rtx_PARALLEL (GET_MODE (pat), gen_rtvec_v (j, res));
34178 /* Subroutine of ix86_expand_round_builtin to take care of comi insns
34179 with rounding. */
34180 static rtx
34181 ix86_expand_sse_comi_round (const struct builtin_description *d,
34182 tree exp, rtx target)
34184 rtx pat, set_dst;
34185 tree arg0 = CALL_EXPR_ARG (exp, 0);
34186 tree arg1 = CALL_EXPR_ARG (exp, 1);
34187 tree arg2 = CALL_EXPR_ARG (exp, 2);
34188 tree arg3 = CALL_EXPR_ARG (exp, 3);
34189 rtx op0 = expand_normal (arg0);
34190 rtx op1 = expand_normal (arg1);
34191 rtx op2 = expand_normal (arg2);
34192 rtx op3 = expand_normal (arg3);
34193 enum insn_code icode = d->icode;
34194 const struct insn_data_d *insn_p = &insn_data[icode];
34195 enum machine_mode mode0 = insn_p->operand[0].mode;
34196 enum machine_mode mode1 = insn_p->operand[1].mode;
34197 enum rtx_code comparison = UNEQ;
34198 bool need_ucomi = false;
34200 /* See avxintrin.h for values. */
34201 enum rtx_code comi_comparisons[32] =
34203 UNEQ, GT, GE, UNORDERED, LTGT, UNLE, UNLT, ORDERED, UNEQ, UNLT,
34204 UNLE, LT, LTGT, GE, GT, LT, UNEQ, GT, GE, UNORDERED, LTGT, UNLE,
34205 UNLT, ORDERED, UNEQ, UNLT, UNLE, LT, LTGT, GE, GT, LT
34207 bool need_ucomi_values[32] =
34209 true, false, false, true, true, false, false, true,
34210 true, false, false, true, true, false, false, true,
34211 false, true, true, false, false, true, true, false,
34212 false, true, true, false, false, true, true, false
34215 if (!CONST_INT_P (op2))
34217 error ("the third argument must be comparison constant");
34218 return const0_rtx;
34220 if (INTVAL (op2) < 0 || INTVAL (op2) >= 32)
34222 error ("incorect comparison mode");
34223 return const0_rtx;
34226 if (!insn_p->operand[2].predicate (op3, SImode))
34228 error ("incorrect rounding operand");
34229 return const0_rtx;
34232 comparison = comi_comparisons[INTVAL (op2)];
34233 need_ucomi = need_ucomi_values[INTVAL (op2)];
34235 if (VECTOR_MODE_P (mode0))
34236 op0 = safe_vector_operand (op0, mode0);
34237 if (VECTOR_MODE_P (mode1))
34238 op1 = safe_vector_operand (op1, mode1);
34240 target = gen_reg_rtx (SImode);
34241 emit_move_insn (target, const0_rtx);
34242 target = gen_rtx_SUBREG (QImode, target, 0);
34244 if ((optimize && !register_operand (op0, mode0))
34245 || !insn_p->operand[0].predicate (op0, mode0))
34246 op0 = copy_to_mode_reg (mode0, op0);
34247 if ((optimize && !register_operand (op1, mode1))
34248 || !insn_p->operand[1].predicate (op1, mode1))
34249 op1 = copy_to_mode_reg (mode1, op1);
34251 if (need_ucomi)
34252 icode = icode == CODE_FOR_sse_comi_round
34253 ? CODE_FOR_sse_ucomi_round
34254 : CODE_FOR_sse2_ucomi_round;
34256 pat = GEN_FCN (icode) (op0, op1, op3);
34257 if (! pat)
34258 return 0;
34260 /* Rounding operand can be either NO_ROUND or ROUND_SAE at this point. */
34261 if (INTVAL (op3) == NO_ROUND)
34263 pat = ix86_erase_embedded_rounding (pat);
34264 if (! pat)
34265 return 0;
34267 set_dst = SET_DEST (pat);
34269 else
34271 gcc_assert (GET_CODE (XVECEXP (pat, 0, 0)) == SET);
34272 set_dst = SET_DEST (XVECEXP (pat, 0, 0));
34275 emit_insn (pat);
34276 emit_insn (gen_rtx_SET (VOIDmode,
34277 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
34278 gen_rtx_fmt_ee (comparison, QImode,
34279 set_dst,
34280 const0_rtx)));
34282 return SUBREG_REG (target);
34285 static rtx
34286 ix86_expand_round_builtin (const struct builtin_description *d,
34287 tree exp, rtx target)
34289 rtx pat;
34290 unsigned int i, nargs;
34291 struct
34293 rtx op;
34294 enum machine_mode mode;
34295 } args[6];
34296 enum insn_code icode = d->icode;
34297 const struct insn_data_d *insn_p = &insn_data[icode];
34298 enum machine_mode tmode = insn_p->operand[0].mode;
34299 unsigned int nargs_constant = 0;
34300 unsigned int redundant_embed_rnd = 0;
34302 switch ((enum ix86_builtin_func_type) d->flag)
34304 case UINT64_FTYPE_V2DF_INT:
34305 case UINT64_FTYPE_V4SF_INT:
34306 case UINT_FTYPE_V2DF_INT:
34307 case UINT_FTYPE_V4SF_INT:
34308 case INT64_FTYPE_V2DF_INT:
34309 case INT64_FTYPE_V4SF_INT:
34310 case INT_FTYPE_V2DF_INT:
34311 case INT_FTYPE_V4SF_INT:
34312 nargs = 2;
34313 break;
34314 case V4SF_FTYPE_V4SF_UINT_INT:
34315 case V4SF_FTYPE_V4SF_UINT64_INT:
34316 case V2DF_FTYPE_V2DF_UINT64_INT:
34317 case V4SF_FTYPE_V4SF_INT_INT:
34318 case V4SF_FTYPE_V4SF_INT64_INT:
34319 case V2DF_FTYPE_V2DF_INT64_INT:
34320 case V4SF_FTYPE_V4SF_V4SF_INT:
34321 case V2DF_FTYPE_V2DF_V2DF_INT:
34322 case V4SF_FTYPE_V4SF_V2DF_INT:
34323 case V2DF_FTYPE_V2DF_V4SF_INT:
34324 nargs = 3;
34325 break;
34326 case V8SF_FTYPE_V8DF_V8SF_QI_INT:
34327 case V8DF_FTYPE_V8DF_V8DF_QI_INT:
34328 case V8SI_FTYPE_V8DF_V8SI_QI_INT:
34329 case V16SF_FTYPE_V16SF_V16SF_HI_INT:
34330 case V16SF_FTYPE_V16SI_V16SF_HI_INT:
34331 case V16SI_FTYPE_V16SF_V16SI_HI_INT:
34332 case V8DF_FTYPE_V8SF_V8DF_QI_INT:
34333 case V16SF_FTYPE_V16HI_V16SF_HI_INT:
34334 case V2DF_FTYPE_V2DF_V2DF_V2DF_INT:
34335 case V4SF_FTYPE_V4SF_V4SF_V4SF_INT:
34336 nargs = 4;
34337 break;
34338 case V4SF_FTYPE_V4SF_V4SF_INT_INT:
34339 case V2DF_FTYPE_V2DF_V2DF_INT_INT:
34340 nargs_constant = 2;
34341 nargs = 4;
34342 break;
34343 case INT_FTYPE_V4SF_V4SF_INT_INT:
34344 case INT_FTYPE_V2DF_V2DF_INT_INT:
34345 return ix86_expand_sse_comi_round (d, exp, target);
34346 case V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT:
34347 case V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT:
34348 case V2DF_FTYPE_V2DF_V2DF_V2DF_QI_INT:
34349 case V2DF_FTYPE_V2DF_V4SF_V2DF_QI_INT:
34350 case V4SF_FTYPE_V4SF_V4SF_V4SF_QI_INT:
34351 case V4SF_FTYPE_V4SF_V2DF_V4SF_QI_INT:
34352 nargs = 5;
34353 break;
34354 case V16SF_FTYPE_V16SF_INT_V16SF_HI_INT:
34355 case V8DF_FTYPE_V8DF_INT_V8DF_QI_INT:
34356 nargs_constant = 4;
34357 nargs = 5;
34358 break;
34359 case QI_FTYPE_V8DF_V8DF_INT_QI_INT:
34360 case QI_FTYPE_V2DF_V2DF_INT_QI_INT:
34361 case HI_FTYPE_V16SF_V16SF_INT_HI_INT:
34362 case QI_FTYPE_V4SF_V4SF_INT_QI_INT:
34363 nargs_constant = 3;
34364 nargs = 5;
34365 break;
34366 case V4SF_FTYPE_V4SF_V4SF_INT_V4SF_QI_INT:
34367 case V2DF_FTYPE_V2DF_V2DF_INT_V2DF_QI_INT:
34368 nargs = 6;
34369 nargs_constant = 4;
34370 break;
34371 case V8DF_FTYPE_V8DF_V8DF_V8DI_INT_QI_INT:
34372 case V16SF_FTYPE_V16SF_V16SF_V16SI_INT_HI_INT:
34373 case V2DF_FTYPE_V2DF_V2DF_V2DI_INT_QI_INT:
34374 case V4SF_FTYPE_V4SF_V4SF_V4SI_INT_QI_INT:
34375 nargs = 6;
34376 nargs_constant = 3;
34377 break;
34378 default:
34379 gcc_unreachable ();
34381 gcc_assert (nargs <= ARRAY_SIZE (args));
34383 if (optimize
34384 || target == 0
34385 || GET_MODE (target) != tmode
34386 || !insn_p->operand[0].predicate (target, tmode))
34387 target = gen_reg_rtx (tmode);
34389 for (i = 0; i < nargs; i++)
34391 tree arg = CALL_EXPR_ARG (exp, i);
34392 rtx op = expand_normal (arg);
34393 enum machine_mode mode = insn_p->operand[i + 1].mode;
34394 bool match = insn_p->operand[i + 1].predicate (op, mode);
34396 if (i == nargs - nargs_constant)
34398 if (!match)
34400 switch (icode)
34402 case CODE_FOR_avx512f_getmantv8df_mask_round:
34403 case CODE_FOR_avx512f_getmantv16sf_mask_round:
34404 case CODE_FOR_avx512f_getmantv2df_round:
34405 case CODE_FOR_avx512f_getmantv4sf_round:
34406 error ("the immediate argument must be a 4-bit immediate");
34407 return const0_rtx;
34408 case CODE_FOR_avx512f_cmpv8df3_mask_round:
34409 case CODE_FOR_avx512f_cmpv16sf3_mask_round:
34410 case CODE_FOR_avx512f_vmcmpv2df3_mask_round:
34411 case CODE_FOR_avx512f_vmcmpv4sf3_mask_round:
34412 error ("the immediate argument must be a 5-bit immediate");
34413 return const0_rtx;
34414 default:
34415 error ("the immediate argument must be an 8-bit immediate");
34416 return const0_rtx;
34420 else if (i == nargs-1)
34422 if (!insn_p->operand[nargs].predicate (op, SImode))
34424 error ("incorrect rounding operand");
34425 return const0_rtx;
34428 /* If there is no rounding use normal version of the pattern. */
34429 if (INTVAL (op) == NO_ROUND)
34430 redundant_embed_rnd = 1;
34432 else
34434 if (VECTOR_MODE_P (mode))
34435 op = safe_vector_operand (op, mode);
34437 if (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode)
34439 if (optimize || !match)
34440 op = copy_to_mode_reg (mode, op);
34442 else
34444 op = copy_to_reg (op);
34445 op = simplify_gen_subreg (mode, op, GET_MODE (op), 0);
34449 args[i].op = op;
34450 args[i].mode = mode;
34453 switch (nargs)
34455 case 1:
34456 pat = GEN_FCN (icode) (target, args[0].op);
34457 break;
34458 case 2:
34459 pat = GEN_FCN (icode) (target, args[0].op, args[1].op);
34460 break;
34461 case 3:
34462 pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
34463 args[2].op);
34464 break;
34465 case 4:
34466 pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
34467 args[2].op, args[3].op);
34468 break;
34469 case 5:
34470 pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
34471 args[2].op, args[3].op, args[4].op);
34472 case 6:
34473 pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
34474 args[2].op, args[3].op, args[4].op,
34475 args[5].op);
34476 break;
34477 default:
34478 gcc_unreachable ();
34481 if (!pat)
34482 return 0;
34484 if (redundant_embed_rnd)
34485 pat = ix86_erase_embedded_rounding (pat);
34487 emit_insn (pat);
34488 return target;
34491 /* Subroutine of ix86_expand_builtin to take care of special insns
34492 with variable number of operands. */
34494 static rtx
34495 ix86_expand_special_args_builtin (const struct builtin_description *d,
34496 tree exp, rtx target)
34498 tree arg;
34499 rtx pat, op;
34500 unsigned int i, nargs, arg_adjust, memory;
34501 bool aligned_mem = false;
34502 struct
34504 rtx op;
34505 enum machine_mode mode;
34506 } args[3];
34507 enum insn_code icode = d->icode;
34508 bool last_arg_constant = false;
34509 const struct insn_data_d *insn_p = &insn_data[icode];
34510 enum machine_mode tmode = insn_p->operand[0].mode;
34511 enum { load, store } klass;
34513 switch ((enum ix86_builtin_func_type) d->flag)
34515 case VOID_FTYPE_VOID:
34516 emit_insn (GEN_FCN (icode) (target));
34517 return 0;
34518 case VOID_FTYPE_UINT64:
34519 case VOID_FTYPE_UNSIGNED:
34520 nargs = 0;
34521 klass = store;
34522 memory = 0;
34523 break;
34525 case INT_FTYPE_VOID:
34526 case UINT64_FTYPE_VOID:
34527 case UNSIGNED_FTYPE_VOID:
34528 nargs = 0;
34529 klass = load;
34530 memory = 0;
34531 break;
34532 case UINT64_FTYPE_PUNSIGNED:
34533 case V2DI_FTYPE_PV2DI:
34534 case V4DI_FTYPE_PV4DI:
34535 case V32QI_FTYPE_PCCHAR:
34536 case V16QI_FTYPE_PCCHAR:
34537 case V8SF_FTYPE_PCV4SF:
34538 case V8SF_FTYPE_PCFLOAT:
34539 case V4SF_FTYPE_PCFLOAT:
34540 case V4DF_FTYPE_PCV2DF:
34541 case V4DF_FTYPE_PCDOUBLE:
34542 case V2DF_FTYPE_PCDOUBLE:
34543 case VOID_FTYPE_PVOID:
34544 case V16SI_FTYPE_PV4SI:
34545 case V16SF_FTYPE_PV4SF:
34546 case V8DI_FTYPE_PV4DI:
34547 case V8DI_FTYPE_PV8DI:
34548 case V8DF_FTYPE_PV4DF:
34549 nargs = 1;
34550 klass = load;
34551 memory = 0;
34552 switch (icode)
34554 case CODE_FOR_sse4_1_movntdqa:
34555 case CODE_FOR_avx2_movntdqa:
34556 case CODE_FOR_avx512f_movntdqa:
34557 aligned_mem = true;
34558 break;
34559 default:
34560 break;
34562 break;
34563 case VOID_FTYPE_PV2SF_V4SF:
34564 case VOID_FTYPE_PV8DI_V8DI:
34565 case VOID_FTYPE_PV4DI_V4DI:
34566 case VOID_FTYPE_PV2DI_V2DI:
34567 case VOID_FTYPE_PCHAR_V32QI:
34568 case VOID_FTYPE_PCHAR_V16QI:
34569 case VOID_FTYPE_PFLOAT_V16SF:
34570 case VOID_FTYPE_PFLOAT_V8SF:
34571 case VOID_FTYPE_PFLOAT_V4SF:
34572 case VOID_FTYPE_PDOUBLE_V8DF:
34573 case VOID_FTYPE_PDOUBLE_V4DF:
34574 case VOID_FTYPE_PDOUBLE_V2DF:
34575 case VOID_FTYPE_PLONGLONG_LONGLONG:
34576 case VOID_FTYPE_PULONGLONG_ULONGLONG:
34577 case VOID_FTYPE_PINT_INT:
34578 nargs = 1;
34579 klass = store;
34580 /* Reserve memory operand for target. */
34581 memory = ARRAY_SIZE (args);
34582 switch (icode)
34584 /* These builtins and instructions require the memory
34585 to be properly aligned. */
34586 case CODE_FOR_avx_movntv4di:
34587 case CODE_FOR_sse2_movntv2di:
34588 case CODE_FOR_avx_movntv8sf:
34589 case CODE_FOR_sse_movntv4sf:
34590 case CODE_FOR_sse4a_vmmovntv4sf:
34591 case CODE_FOR_avx_movntv4df:
34592 case CODE_FOR_sse2_movntv2df:
34593 case CODE_FOR_sse4a_vmmovntv2df:
34594 case CODE_FOR_sse2_movntidi:
34595 case CODE_FOR_sse_movntq:
34596 case CODE_FOR_sse2_movntisi:
34597 case CODE_FOR_avx512f_movntv16sf:
34598 case CODE_FOR_avx512f_movntv8df:
34599 case CODE_FOR_avx512f_movntv8di:
34600 aligned_mem = true;
34601 break;
34602 default:
34603 break;
34605 break;
34606 case V4SF_FTYPE_V4SF_PCV2SF:
34607 case V2DF_FTYPE_V2DF_PCDOUBLE:
34608 nargs = 2;
34609 klass = load;
34610 memory = 1;
34611 break;
34612 case V8SF_FTYPE_PCV8SF_V8SI:
34613 case V4DF_FTYPE_PCV4DF_V4DI:
34614 case V4SF_FTYPE_PCV4SF_V4SI:
34615 case V2DF_FTYPE_PCV2DF_V2DI:
34616 case V8SI_FTYPE_PCV8SI_V8SI:
34617 case V4DI_FTYPE_PCV4DI_V4DI:
34618 case V4SI_FTYPE_PCV4SI_V4SI:
34619 case V2DI_FTYPE_PCV2DI_V2DI:
34620 nargs = 2;
34621 klass = load;
34622 memory = 0;
34623 break;
34624 case VOID_FTYPE_PV8DF_V8DF_QI:
34625 case VOID_FTYPE_PV16SF_V16SF_HI:
34626 case VOID_FTYPE_PV8DI_V8DI_QI:
34627 case VOID_FTYPE_PV16SI_V16SI_HI:
34628 switch (icode)
34630 /* These builtins and instructions require the memory
34631 to be properly aligned. */
34632 case CODE_FOR_avx512f_storev16sf_mask:
34633 case CODE_FOR_avx512f_storev16si_mask:
34634 case CODE_FOR_avx512f_storev8df_mask:
34635 case CODE_FOR_avx512f_storev8di_mask:
34636 aligned_mem = true;
34637 break;
34638 default:
34639 break;
34641 /* FALLTHRU */
34642 case VOID_FTYPE_PV8SF_V8SI_V8SF:
34643 case VOID_FTYPE_PV4DF_V4DI_V4DF:
34644 case VOID_FTYPE_PV4SF_V4SI_V4SF:
34645 case VOID_FTYPE_PV2DF_V2DI_V2DF:
34646 case VOID_FTYPE_PV8SI_V8SI_V8SI:
34647 case VOID_FTYPE_PV4DI_V4DI_V4DI:
34648 case VOID_FTYPE_PV4SI_V4SI_V4SI:
34649 case VOID_FTYPE_PV2DI_V2DI_V2DI:
34650 case VOID_FTYPE_PDOUBLE_V2DF_QI:
34651 case VOID_FTYPE_PFLOAT_V4SF_QI:
34652 case VOID_FTYPE_PV8SI_V8DI_QI:
34653 case VOID_FTYPE_PV8HI_V8DI_QI:
34654 case VOID_FTYPE_PV16HI_V16SI_HI:
34655 case VOID_FTYPE_PV16QI_V8DI_QI:
34656 case VOID_FTYPE_PV16QI_V16SI_HI:
34657 nargs = 2;
34658 klass = store;
34659 /* Reserve memory operand for target. */
34660 memory = ARRAY_SIZE (args);
34661 break;
34662 case V16SF_FTYPE_PCV16SF_V16SF_HI:
34663 case V16SI_FTYPE_PCV16SI_V16SI_HI:
34664 case V8DF_FTYPE_PCV8DF_V8DF_QI:
34665 case V8DI_FTYPE_PCV8DI_V8DI_QI:
34666 case V2DF_FTYPE_PCDOUBLE_V2DF_QI:
34667 case V4SF_FTYPE_PCFLOAT_V4SF_QI:
34668 nargs = 3;
34669 klass = load;
34670 memory = 0;
34671 switch (icode)
34673 /* These builtins and instructions require the memory
34674 to be properly aligned. */
34675 case CODE_FOR_avx512f_loadv16sf_mask:
34676 case CODE_FOR_avx512f_loadv16si_mask:
34677 case CODE_FOR_avx512f_loadv8df_mask:
34678 case CODE_FOR_avx512f_loadv8di_mask:
34679 aligned_mem = true;
34680 break;
34681 default:
34682 break;
34684 break;
34685 case VOID_FTYPE_UINT_UINT_UINT:
34686 case VOID_FTYPE_UINT64_UINT_UINT:
34687 case UCHAR_FTYPE_UINT_UINT_UINT:
34688 case UCHAR_FTYPE_UINT64_UINT_UINT:
34689 nargs = 3;
34690 klass = load;
34691 memory = ARRAY_SIZE (args);
34692 last_arg_constant = true;
34693 break;
34694 default:
34695 gcc_unreachable ();
34698 gcc_assert (nargs <= ARRAY_SIZE (args));
34700 if (klass == store)
34702 arg = CALL_EXPR_ARG (exp, 0);
34703 op = expand_normal (arg);
34704 gcc_assert (target == 0);
34705 if (memory)
34707 op = ix86_zero_extend_to_Pmode (op);
34708 target = gen_rtx_MEM (tmode, op);
34709 /* target at this point has just BITS_PER_UNIT MEM_ALIGN
34710 on it. Try to improve it using get_pointer_alignment,
34711 and if the special builtin is one that requires strict
34712 mode alignment, also from it's GET_MODE_ALIGNMENT.
34713 Failure to do so could lead to ix86_legitimate_combined_insn
34714 rejecting all changes to such insns. */
34715 unsigned int align = get_pointer_alignment (arg);
34716 if (aligned_mem && align < GET_MODE_ALIGNMENT (tmode))
34717 align = GET_MODE_ALIGNMENT (tmode);
34718 if (MEM_ALIGN (target) < align)
34719 set_mem_align (target, align);
34721 else
34722 target = force_reg (tmode, op);
34723 arg_adjust = 1;
34725 else
34727 arg_adjust = 0;
34728 if (optimize
34729 || target == 0
34730 || !register_operand (target, tmode)
34731 || GET_MODE (target) != tmode)
34732 target = gen_reg_rtx (tmode);
34735 for (i = 0; i < nargs; i++)
34737 enum machine_mode mode = insn_p->operand[i + 1].mode;
34738 bool match;
34740 arg = CALL_EXPR_ARG (exp, i + arg_adjust);
34741 op = expand_normal (arg);
34742 match = insn_p->operand[i + 1].predicate (op, mode);
34744 if (last_arg_constant && (i + 1) == nargs)
34746 if (!match)
34748 if (icode == CODE_FOR_lwp_lwpvalsi3
34749 || icode == CODE_FOR_lwp_lwpinssi3
34750 || icode == CODE_FOR_lwp_lwpvaldi3
34751 || icode == CODE_FOR_lwp_lwpinsdi3)
34752 error ("the last argument must be a 32-bit immediate");
34753 else
34754 error ("the last argument must be an 8-bit immediate");
34755 return const0_rtx;
34758 else
34760 if (i == memory)
34762 /* This must be the memory operand. */
34763 op = ix86_zero_extend_to_Pmode (op);
34764 op = gen_rtx_MEM (mode, op);
34765 /* op at this point has just BITS_PER_UNIT MEM_ALIGN
34766 on it. Try to improve it using get_pointer_alignment,
34767 and if the special builtin is one that requires strict
34768 mode alignment, also from it's GET_MODE_ALIGNMENT.
34769 Failure to do so could lead to ix86_legitimate_combined_insn
34770 rejecting all changes to such insns. */
34771 unsigned int align = get_pointer_alignment (arg);
34772 if (aligned_mem && align < GET_MODE_ALIGNMENT (mode))
34773 align = GET_MODE_ALIGNMENT (mode);
34774 if (MEM_ALIGN (op) < align)
34775 set_mem_align (op, align);
34777 else
34779 /* This must be register. */
34780 if (VECTOR_MODE_P (mode))
34781 op = safe_vector_operand (op, mode);
34783 if (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode)
34784 op = copy_to_mode_reg (mode, op);
34785 else
34787 op = copy_to_reg (op);
34788 op = simplify_gen_subreg (mode, op, GET_MODE (op), 0);
34793 args[i].op = op;
34794 args[i].mode = mode;
34797 switch (nargs)
34799 case 0:
34800 pat = GEN_FCN (icode) (target);
34801 break;
34802 case 1:
34803 pat = GEN_FCN (icode) (target, args[0].op);
34804 break;
34805 case 2:
34806 pat = GEN_FCN (icode) (target, args[0].op, args[1].op);
34807 break;
34808 case 3:
34809 pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op);
34810 break;
34811 default:
34812 gcc_unreachable ();
34815 if (! pat)
34816 return 0;
34817 emit_insn (pat);
34818 return klass == store ? 0 : target;
34821 /* Return the integer constant in ARG. Constrain it to be in the range
34822 of the subparts of VEC_TYPE; issue an error if not. */
34824 static int
34825 get_element_number (tree vec_type, tree arg)
34827 unsigned HOST_WIDE_INT elt, max = TYPE_VECTOR_SUBPARTS (vec_type) - 1;
34829 if (!tree_fits_uhwi_p (arg)
34830 || (elt = tree_to_uhwi (arg), elt > max))
34832 error ("selector must be an integer constant in the range 0..%wi", max);
34833 return 0;
34836 return elt;
34839 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
34840 ix86_expand_vector_init. We DO have language-level syntax for this, in
34841 the form of (type){ init-list }. Except that since we can't place emms
34842 instructions from inside the compiler, we can't allow the use of MMX
34843 registers unless the user explicitly asks for it. So we do *not* define
34844 vec_set/vec_extract/vec_init patterns for MMX modes in mmx.md. Instead
34845 we have builtins invoked by mmintrin.h that gives us license to emit
34846 these sorts of instructions. */
34848 static rtx
34849 ix86_expand_vec_init_builtin (tree type, tree exp, rtx target)
34851 enum machine_mode tmode = TYPE_MODE (type);
34852 enum machine_mode inner_mode = GET_MODE_INNER (tmode);
34853 int i, n_elt = GET_MODE_NUNITS (tmode);
34854 rtvec v = rtvec_alloc (n_elt);
34856 gcc_assert (VECTOR_MODE_P (tmode));
34857 gcc_assert (call_expr_nargs (exp) == n_elt);
34859 for (i = 0; i < n_elt; ++i)
34861 rtx x = expand_normal (CALL_EXPR_ARG (exp, i));
34862 RTVEC_ELT (v, i) = gen_lowpart (inner_mode, x);
34865 if (!target || !register_operand (target, tmode))
34866 target = gen_reg_rtx (tmode);
34868 ix86_expand_vector_init (true, target, gen_rtx_PARALLEL (tmode, v));
34869 return target;
34872 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
34873 ix86_expand_vector_extract. They would be redundant (for non-MMX) if we
34874 had a language-level syntax for referencing vector elements. */
34876 static rtx
34877 ix86_expand_vec_ext_builtin (tree exp, rtx target)
34879 enum machine_mode tmode, mode0;
34880 tree arg0, arg1;
34881 int elt;
34882 rtx op0;
34884 arg0 = CALL_EXPR_ARG (exp, 0);
34885 arg1 = CALL_EXPR_ARG (exp, 1);
34887 op0 = expand_normal (arg0);
34888 elt = get_element_number (TREE_TYPE (arg0), arg1);
34890 tmode = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
34891 mode0 = TYPE_MODE (TREE_TYPE (arg0));
34892 gcc_assert (VECTOR_MODE_P (mode0));
34894 op0 = force_reg (mode0, op0);
34896 if (optimize || !target || !register_operand (target, tmode))
34897 target = gen_reg_rtx (tmode);
34899 ix86_expand_vector_extract (true, target, op0, elt);
34901 return target;
34904 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
34905 ix86_expand_vector_set. They would be redundant (for non-MMX) if we had
34906 a language-level syntax for referencing vector elements. */
34908 static rtx
34909 ix86_expand_vec_set_builtin (tree exp)
34911 enum machine_mode tmode, mode1;
34912 tree arg0, arg1, arg2;
34913 int elt;
34914 rtx op0, op1, target;
34916 arg0 = CALL_EXPR_ARG (exp, 0);
34917 arg1 = CALL_EXPR_ARG (exp, 1);
34918 arg2 = CALL_EXPR_ARG (exp, 2);
34920 tmode = TYPE_MODE (TREE_TYPE (arg0));
34921 mode1 = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
34922 gcc_assert (VECTOR_MODE_P (tmode));
34924 op0 = expand_expr (arg0, NULL_RTX, tmode, EXPAND_NORMAL);
34925 op1 = expand_expr (arg1, NULL_RTX, mode1, EXPAND_NORMAL);
34926 elt = get_element_number (TREE_TYPE (arg0), arg2);
34928 if (GET_MODE (op1) != mode1 && GET_MODE (op1) != VOIDmode)
34929 op1 = convert_modes (mode1, GET_MODE (op1), op1, true);
34931 op0 = force_reg (tmode, op0);
34932 op1 = force_reg (mode1, op1);
34934 /* OP0 is the source of these builtin functions and shouldn't be
34935 modified. Create a copy, use it and return it as target. */
34936 target = gen_reg_rtx (tmode);
34937 emit_move_insn (target, op0);
34938 ix86_expand_vector_set (true, target, op1, elt);
34940 return target;
34943 /* Expand an expression EXP that calls a built-in function,
34944 with result going to TARGET if that's convenient
34945 (and in mode MODE if that's convenient).
34946 SUBTARGET may be used as the target for computing one of EXP's operands.
34947 IGNORE is nonzero if the value is to be ignored. */
34949 static rtx
34950 ix86_expand_builtin (tree exp, rtx target, rtx subtarget,
34951 enum machine_mode mode, int ignore)
34953 const struct builtin_description *d;
34954 size_t i;
34955 enum insn_code icode;
34956 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
34957 tree arg0, arg1, arg2, arg3, arg4;
34958 rtx op0, op1, op2, op3, op4, pat, insn;
34959 enum machine_mode mode0, mode1, mode2, mode3, mode4;
34960 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
34962 /* For CPU builtins that can be folded, fold first and expand the fold. */
34963 switch (fcode)
34965 case IX86_BUILTIN_CPU_INIT:
34967 /* Make it call __cpu_indicator_init in libgcc. */
34968 tree call_expr, fndecl, type;
34969 type = build_function_type_list (integer_type_node, NULL_TREE);
34970 fndecl = build_fn_decl ("__cpu_indicator_init", type);
34971 call_expr = build_call_expr (fndecl, 0);
34972 return expand_expr (call_expr, target, mode, EXPAND_NORMAL);
34974 case IX86_BUILTIN_CPU_IS:
34975 case IX86_BUILTIN_CPU_SUPPORTS:
34977 tree arg0 = CALL_EXPR_ARG (exp, 0);
34978 tree fold_expr = fold_builtin_cpu (fndecl, &arg0);
34979 gcc_assert (fold_expr != NULL_TREE);
34980 return expand_expr (fold_expr, target, mode, EXPAND_NORMAL);
34984 /* Determine whether the builtin function is available under the current ISA.
34985 Originally the builtin was not created if it wasn't applicable to the
34986 current ISA based on the command line switches. With function specific
34987 options, we need to check in the context of the function making the call
34988 whether it is supported. */
34989 if (ix86_builtins_isa[fcode].isa
34990 && !(ix86_builtins_isa[fcode].isa & ix86_isa_flags))
34992 char *opts = ix86_target_string (ix86_builtins_isa[fcode].isa, 0, NULL,
34993 NULL, (enum fpmath_unit) 0, false);
34995 if (!opts)
34996 error ("%qE needs unknown isa option", fndecl);
34997 else
34999 gcc_assert (opts != NULL);
35000 error ("%qE needs isa option %s", fndecl, opts);
35001 free (opts);
35003 return const0_rtx;
35006 switch (fcode)
35008 case IX86_BUILTIN_MASKMOVQ:
35009 case IX86_BUILTIN_MASKMOVDQU:
35010 icode = (fcode == IX86_BUILTIN_MASKMOVQ
35011 ? CODE_FOR_mmx_maskmovq
35012 : CODE_FOR_sse2_maskmovdqu);
35013 /* Note the arg order is different from the operand order. */
35014 arg1 = CALL_EXPR_ARG (exp, 0);
35015 arg2 = CALL_EXPR_ARG (exp, 1);
35016 arg0 = CALL_EXPR_ARG (exp, 2);
35017 op0 = expand_normal (arg0);
35018 op1 = expand_normal (arg1);
35019 op2 = expand_normal (arg2);
35020 mode0 = insn_data[icode].operand[0].mode;
35021 mode1 = insn_data[icode].operand[1].mode;
35022 mode2 = insn_data[icode].operand[2].mode;
35024 op0 = ix86_zero_extend_to_Pmode (op0);
35025 op0 = gen_rtx_MEM (mode1, op0);
35027 if (!insn_data[icode].operand[0].predicate (op0, mode0))
35028 op0 = copy_to_mode_reg (mode0, op0);
35029 if (!insn_data[icode].operand[1].predicate (op1, mode1))
35030 op1 = copy_to_mode_reg (mode1, op1);
35031 if (!insn_data[icode].operand[2].predicate (op2, mode2))
35032 op2 = copy_to_mode_reg (mode2, op2);
35033 pat = GEN_FCN (icode) (op0, op1, op2);
35034 if (! pat)
35035 return 0;
35036 emit_insn (pat);
35037 return 0;
35039 case IX86_BUILTIN_LDMXCSR:
35040 op0 = expand_normal (CALL_EXPR_ARG (exp, 0));
35041 target = assign_386_stack_local (SImode, SLOT_TEMP);
35042 emit_move_insn (target, op0);
35043 emit_insn (gen_sse_ldmxcsr (target));
35044 return 0;
35046 case IX86_BUILTIN_STMXCSR:
35047 target = assign_386_stack_local (SImode, SLOT_TEMP);
35048 emit_insn (gen_sse_stmxcsr (target));
35049 return copy_to_mode_reg (SImode, target);
35051 case IX86_BUILTIN_CLFLUSH:
35052 arg0 = CALL_EXPR_ARG (exp, 0);
35053 op0 = expand_normal (arg0);
35054 icode = CODE_FOR_sse2_clflush;
35055 if (!insn_data[icode].operand[0].predicate (op0, Pmode))
35056 op0 = ix86_zero_extend_to_Pmode (op0);
35058 emit_insn (gen_sse2_clflush (op0));
35059 return 0;
35061 case IX86_BUILTIN_MONITOR:
35062 arg0 = CALL_EXPR_ARG (exp, 0);
35063 arg1 = CALL_EXPR_ARG (exp, 1);
35064 arg2 = CALL_EXPR_ARG (exp, 2);
35065 op0 = expand_normal (arg0);
35066 op1 = expand_normal (arg1);
35067 op2 = expand_normal (arg2);
35068 if (!REG_P (op0))
35069 op0 = ix86_zero_extend_to_Pmode (op0);
35070 if (!REG_P (op1))
35071 op1 = copy_to_mode_reg (SImode, op1);
35072 if (!REG_P (op2))
35073 op2 = copy_to_mode_reg (SImode, op2);
35074 emit_insn (ix86_gen_monitor (op0, op1, op2));
35075 return 0;
35077 case IX86_BUILTIN_MWAIT:
35078 arg0 = CALL_EXPR_ARG (exp, 0);
35079 arg1 = CALL_EXPR_ARG (exp, 1);
35080 op0 = expand_normal (arg0);
35081 op1 = expand_normal (arg1);
35082 if (!REG_P (op0))
35083 op0 = copy_to_mode_reg (SImode, op0);
35084 if (!REG_P (op1))
35085 op1 = copy_to_mode_reg (SImode, op1);
35086 emit_insn (gen_sse3_mwait (op0, op1));
35087 return 0;
35089 case IX86_BUILTIN_VEC_INIT_V2SI:
35090 case IX86_BUILTIN_VEC_INIT_V4HI:
35091 case IX86_BUILTIN_VEC_INIT_V8QI:
35092 return ix86_expand_vec_init_builtin (TREE_TYPE (exp), exp, target);
35094 case IX86_BUILTIN_VEC_EXT_V2DF:
35095 case IX86_BUILTIN_VEC_EXT_V2DI:
35096 case IX86_BUILTIN_VEC_EXT_V4SF:
35097 case IX86_BUILTIN_VEC_EXT_V4SI:
35098 case IX86_BUILTIN_VEC_EXT_V8HI:
35099 case IX86_BUILTIN_VEC_EXT_V2SI:
35100 case IX86_BUILTIN_VEC_EXT_V4HI:
35101 case IX86_BUILTIN_VEC_EXT_V16QI:
35102 return ix86_expand_vec_ext_builtin (exp, target);
35104 case IX86_BUILTIN_VEC_SET_V2DI:
35105 case IX86_BUILTIN_VEC_SET_V4SF:
35106 case IX86_BUILTIN_VEC_SET_V4SI:
35107 case IX86_BUILTIN_VEC_SET_V8HI:
35108 case IX86_BUILTIN_VEC_SET_V4HI:
35109 case IX86_BUILTIN_VEC_SET_V16QI:
35110 return ix86_expand_vec_set_builtin (exp);
35112 case IX86_BUILTIN_INFQ:
35113 case IX86_BUILTIN_HUGE_VALQ:
35115 REAL_VALUE_TYPE inf;
35116 rtx tmp;
35118 real_inf (&inf);
35119 tmp = CONST_DOUBLE_FROM_REAL_VALUE (inf, mode);
35121 tmp = validize_mem (force_const_mem (mode, tmp));
35123 if (target == 0)
35124 target = gen_reg_rtx (mode);
35126 emit_move_insn (target, tmp);
35127 return target;
35130 case IX86_BUILTIN_RDPMC:
35131 case IX86_BUILTIN_RDTSC:
35132 case IX86_BUILTIN_RDTSCP:
35134 op0 = gen_reg_rtx (DImode);
35135 op1 = gen_reg_rtx (DImode);
35137 if (fcode == IX86_BUILTIN_RDPMC)
35139 arg0 = CALL_EXPR_ARG (exp, 0);
35140 op2 = expand_normal (arg0);
35141 if (!register_operand (op2, SImode))
35142 op2 = copy_to_mode_reg (SImode, op2);
35144 insn = (TARGET_64BIT
35145 ? gen_rdpmc_rex64 (op0, op1, op2)
35146 : gen_rdpmc (op0, op2));
35147 emit_insn (insn);
35149 else if (fcode == IX86_BUILTIN_RDTSC)
35151 insn = (TARGET_64BIT
35152 ? gen_rdtsc_rex64 (op0, op1)
35153 : gen_rdtsc (op0));
35154 emit_insn (insn);
35156 else
35158 op2 = gen_reg_rtx (SImode);
35160 insn = (TARGET_64BIT
35161 ? gen_rdtscp_rex64 (op0, op1, op2)
35162 : gen_rdtscp (op0, op2));
35163 emit_insn (insn);
35165 arg0 = CALL_EXPR_ARG (exp, 0);
35166 op4 = expand_normal (arg0);
35167 if (!address_operand (op4, VOIDmode))
35169 op4 = convert_memory_address (Pmode, op4);
35170 op4 = copy_addr_to_reg (op4);
35172 emit_move_insn (gen_rtx_MEM (SImode, op4), op2);
35175 if (target == 0)
35177 /* mode is VOIDmode if __builtin_rd* has been called
35178 without lhs. */
35179 if (mode == VOIDmode)
35180 return target;
35181 target = gen_reg_rtx (mode);
35184 if (TARGET_64BIT)
35186 op1 = expand_simple_binop (DImode, ASHIFT, op1, GEN_INT (32),
35187 op1, 1, OPTAB_DIRECT);
35188 op0 = expand_simple_binop (DImode, IOR, op0, op1,
35189 op0, 1, OPTAB_DIRECT);
35192 emit_move_insn (target, op0);
35193 return target;
35195 case IX86_BUILTIN_FXSAVE:
35196 case IX86_BUILTIN_FXRSTOR:
35197 case IX86_BUILTIN_FXSAVE64:
35198 case IX86_BUILTIN_FXRSTOR64:
35199 case IX86_BUILTIN_FNSTENV:
35200 case IX86_BUILTIN_FLDENV:
35201 case IX86_BUILTIN_FNSTSW:
35202 mode0 = BLKmode;
35203 switch (fcode)
35205 case IX86_BUILTIN_FXSAVE:
35206 icode = CODE_FOR_fxsave;
35207 break;
35208 case IX86_BUILTIN_FXRSTOR:
35209 icode = CODE_FOR_fxrstor;
35210 break;
35211 case IX86_BUILTIN_FXSAVE64:
35212 icode = CODE_FOR_fxsave64;
35213 break;
35214 case IX86_BUILTIN_FXRSTOR64:
35215 icode = CODE_FOR_fxrstor64;
35216 break;
35217 case IX86_BUILTIN_FNSTENV:
35218 icode = CODE_FOR_fnstenv;
35219 break;
35220 case IX86_BUILTIN_FLDENV:
35221 icode = CODE_FOR_fldenv;
35222 break;
35223 case IX86_BUILTIN_FNSTSW:
35224 icode = CODE_FOR_fnstsw;
35225 mode0 = HImode;
35226 break;
35227 default:
35228 gcc_unreachable ();
35231 arg0 = CALL_EXPR_ARG (exp, 0);
35232 op0 = expand_normal (arg0);
35234 if (!address_operand (op0, VOIDmode))
35236 op0 = convert_memory_address (Pmode, op0);
35237 op0 = copy_addr_to_reg (op0);
35239 op0 = gen_rtx_MEM (mode0, op0);
35241 pat = GEN_FCN (icode) (op0);
35242 if (pat)
35243 emit_insn (pat);
35244 return 0;
35246 case IX86_BUILTIN_XSAVE:
35247 case IX86_BUILTIN_XRSTOR:
35248 case IX86_BUILTIN_XSAVE64:
35249 case IX86_BUILTIN_XRSTOR64:
35250 case IX86_BUILTIN_XSAVEOPT:
35251 case IX86_BUILTIN_XSAVEOPT64:
35252 arg0 = CALL_EXPR_ARG (exp, 0);
35253 arg1 = CALL_EXPR_ARG (exp, 1);
35254 op0 = expand_normal (arg0);
35255 op1 = expand_normal (arg1);
35257 if (!address_operand (op0, VOIDmode))
35259 op0 = convert_memory_address (Pmode, op0);
35260 op0 = copy_addr_to_reg (op0);
35262 op0 = gen_rtx_MEM (BLKmode, op0);
35264 op1 = force_reg (DImode, op1);
35266 if (TARGET_64BIT)
35268 op2 = expand_simple_binop (DImode, LSHIFTRT, op1, GEN_INT (32),
35269 NULL, 1, OPTAB_DIRECT);
35270 switch (fcode)
35272 case IX86_BUILTIN_XSAVE:
35273 icode = CODE_FOR_xsave_rex64;
35274 break;
35275 case IX86_BUILTIN_XRSTOR:
35276 icode = CODE_FOR_xrstor_rex64;
35277 break;
35278 case IX86_BUILTIN_XSAVE64:
35279 icode = CODE_FOR_xsave64;
35280 break;
35281 case IX86_BUILTIN_XRSTOR64:
35282 icode = CODE_FOR_xrstor64;
35283 break;
35284 case IX86_BUILTIN_XSAVEOPT:
35285 icode = CODE_FOR_xsaveopt_rex64;
35286 break;
35287 case IX86_BUILTIN_XSAVEOPT64:
35288 icode = CODE_FOR_xsaveopt64;
35289 break;
35290 default:
35291 gcc_unreachable ();
35294 op2 = gen_lowpart (SImode, op2);
35295 op1 = gen_lowpart (SImode, op1);
35296 pat = GEN_FCN (icode) (op0, op1, op2);
35298 else
35300 switch (fcode)
35302 case IX86_BUILTIN_XSAVE:
35303 icode = CODE_FOR_xsave;
35304 break;
35305 case IX86_BUILTIN_XRSTOR:
35306 icode = CODE_FOR_xrstor;
35307 break;
35308 case IX86_BUILTIN_XSAVEOPT:
35309 icode = CODE_FOR_xsaveopt;
35310 break;
35311 default:
35312 gcc_unreachable ();
35314 pat = GEN_FCN (icode) (op0, op1);
35317 if (pat)
35318 emit_insn (pat);
35319 return 0;
35321 case IX86_BUILTIN_LLWPCB:
35322 arg0 = CALL_EXPR_ARG (exp, 0);
35323 op0 = expand_normal (arg0);
35324 icode = CODE_FOR_lwp_llwpcb;
35325 if (!insn_data[icode].operand[0].predicate (op0, Pmode))
35326 op0 = ix86_zero_extend_to_Pmode (op0);
35327 emit_insn (gen_lwp_llwpcb (op0));
35328 return 0;
35330 case IX86_BUILTIN_SLWPCB:
35331 icode = CODE_FOR_lwp_slwpcb;
35332 if (!target
35333 || !insn_data[icode].operand[0].predicate (target, Pmode))
35334 target = gen_reg_rtx (Pmode);
35335 emit_insn (gen_lwp_slwpcb (target));
35336 return target;
35338 case IX86_BUILTIN_BEXTRI32:
35339 case IX86_BUILTIN_BEXTRI64:
35340 arg0 = CALL_EXPR_ARG (exp, 0);
35341 arg1 = CALL_EXPR_ARG (exp, 1);
35342 op0 = expand_normal (arg0);
35343 op1 = expand_normal (arg1);
35344 icode = (fcode == IX86_BUILTIN_BEXTRI32
35345 ? CODE_FOR_tbm_bextri_si
35346 : CODE_FOR_tbm_bextri_di);
35347 if (!CONST_INT_P (op1))
35349 error ("last argument must be an immediate");
35350 return const0_rtx;
35352 else
35354 unsigned char length = (INTVAL (op1) >> 8) & 0xFF;
35355 unsigned char lsb_index = INTVAL (op1) & 0xFF;
35356 op1 = GEN_INT (length);
35357 op2 = GEN_INT (lsb_index);
35358 pat = GEN_FCN (icode) (target, op0, op1, op2);
35359 if (pat)
35360 emit_insn (pat);
35361 return target;
35364 case IX86_BUILTIN_RDRAND16_STEP:
35365 icode = CODE_FOR_rdrandhi_1;
35366 mode0 = HImode;
35367 goto rdrand_step;
35369 case IX86_BUILTIN_RDRAND32_STEP:
35370 icode = CODE_FOR_rdrandsi_1;
35371 mode0 = SImode;
35372 goto rdrand_step;
35374 case IX86_BUILTIN_RDRAND64_STEP:
35375 icode = CODE_FOR_rdranddi_1;
35376 mode0 = DImode;
35378 rdrand_step:
35379 op0 = gen_reg_rtx (mode0);
35380 emit_insn (GEN_FCN (icode) (op0));
35382 arg0 = CALL_EXPR_ARG (exp, 0);
35383 op1 = expand_normal (arg0);
35384 if (!address_operand (op1, VOIDmode))
35386 op1 = convert_memory_address (Pmode, op1);
35387 op1 = copy_addr_to_reg (op1);
35389 emit_move_insn (gen_rtx_MEM (mode0, op1), op0);
35391 op1 = gen_reg_rtx (SImode);
35392 emit_move_insn (op1, CONST1_RTX (SImode));
35394 /* Emit SImode conditional move. */
35395 if (mode0 == HImode)
35397 op2 = gen_reg_rtx (SImode);
35398 emit_insn (gen_zero_extendhisi2 (op2, op0));
35400 else if (mode0 == SImode)
35401 op2 = op0;
35402 else
35403 op2 = gen_rtx_SUBREG (SImode, op0, 0);
35405 if (target == 0)
35406 target = gen_reg_rtx (SImode);
35408 pat = gen_rtx_GEU (VOIDmode, gen_rtx_REG (CCCmode, FLAGS_REG),
35409 const0_rtx);
35410 emit_insn (gen_rtx_SET (VOIDmode, target,
35411 gen_rtx_IF_THEN_ELSE (SImode, pat, op2, op1)));
35412 return target;
35414 case IX86_BUILTIN_RDSEED16_STEP:
35415 icode = CODE_FOR_rdseedhi_1;
35416 mode0 = HImode;
35417 goto rdseed_step;
35419 case IX86_BUILTIN_RDSEED32_STEP:
35420 icode = CODE_FOR_rdseedsi_1;
35421 mode0 = SImode;
35422 goto rdseed_step;
35424 case IX86_BUILTIN_RDSEED64_STEP:
35425 icode = CODE_FOR_rdseeddi_1;
35426 mode0 = DImode;
35428 rdseed_step:
35429 op0 = gen_reg_rtx (mode0);
35430 emit_insn (GEN_FCN (icode) (op0));
35432 arg0 = CALL_EXPR_ARG (exp, 0);
35433 op1 = expand_normal (arg0);
35434 if (!address_operand (op1, VOIDmode))
35436 op1 = convert_memory_address (Pmode, op1);
35437 op1 = copy_addr_to_reg (op1);
35439 emit_move_insn (gen_rtx_MEM (mode0, op1), op0);
35441 op2 = gen_reg_rtx (QImode);
35443 pat = gen_rtx_LTU (QImode, gen_rtx_REG (CCCmode, FLAGS_REG),
35444 const0_rtx);
35445 emit_insn (gen_rtx_SET (VOIDmode, op2, pat));
35447 if (target == 0)
35448 target = gen_reg_rtx (SImode);
35450 emit_insn (gen_zero_extendqisi2 (target, op2));
35451 return target;
35453 case IX86_BUILTIN_ADDCARRYX32:
35454 icode = TARGET_ADX ? CODE_FOR_adcxsi3 : CODE_FOR_addsi3_carry;
35455 mode0 = SImode;
35456 goto addcarryx;
35458 case IX86_BUILTIN_ADDCARRYX64:
35459 icode = TARGET_ADX ? CODE_FOR_adcxdi3 : CODE_FOR_adddi3_carry;
35460 mode0 = DImode;
35462 addcarryx:
35463 arg0 = CALL_EXPR_ARG (exp, 0); /* unsigned char c_in. */
35464 arg1 = CALL_EXPR_ARG (exp, 1); /* unsigned int src1. */
35465 arg2 = CALL_EXPR_ARG (exp, 2); /* unsigned int src2. */
35466 arg3 = CALL_EXPR_ARG (exp, 3); /* unsigned int *sum_out. */
35468 op0 = gen_reg_rtx (QImode);
35470 /* Generate CF from input operand. */
35471 op1 = expand_normal (arg0);
35472 op1 = copy_to_mode_reg (QImode, convert_to_mode (QImode, op1, 1));
35473 emit_insn (gen_addqi3_cc (op0, op1, constm1_rtx));
35475 /* Gen ADCX instruction to compute X+Y+CF. */
35476 op2 = expand_normal (arg1);
35477 op3 = expand_normal (arg2);
35479 if (!REG_P (op2))
35480 op2 = copy_to_mode_reg (mode0, op2);
35481 if (!REG_P (op3))
35482 op3 = copy_to_mode_reg (mode0, op3);
35484 op0 = gen_reg_rtx (mode0);
35486 op4 = gen_rtx_REG (CCCmode, FLAGS_REG);
35487 pat = gen_rtx_LTU (VOIDmode, op4, const0_rtx);
35488 emit_insn (GEN_FCN (icode) (op0, op2, op3, op4, pat));
35490 /* Store the result. */
35491 op4 = expand_normal (arg3);
35492 if (!address_operand (op4, VOIDmode))
35494 op4 = convert_memory_address (Pmode, op4);
35495 op4 = copy_addr_to_reg (op4);
35497 emit_move_insn (gen_rtx_MEM (mode0, op4), op0);
35499 /* Return current CF value. */
35500 if (target == 0)
35501 target = gen_reg_rtx (QImode);
35503 PUT_MODE (pat, QImode);
35504 emit_insn (gen_rtx_SET (VOIDmode, target, pat));
35505 return target;
35507 case IX86_BUILTIN_READ_FLAGS:
35508 emit_insn (gen_push (gen_rtx_REG (word_mode, FLAGS_REG)));
35510 if (optimize
35511 || target == NULL_RTX
35512 || !nonimmediate_operand (target, word_mode)
35513 || GET_MODE (target) != word_mode)
35514 target = gen_reg_rtx (word_mode);
35516 emit_insn (gen_pop (target));
35517 return target;
35519 case IX86_BUILTIN_WRITE_FLAGS:
35521 arg0 = CALL_EXPR_ARG (exp, 0);
35522 op0 = expand_normal (arg0);
35523 if (!general_no_elim_operand (op0, word_mode))
35524 op0 = copy_to_mode_reg (word_mode, op0);
35526 emit_insn (gen_push (op0));
35527 emit_insn (gen_pop (gen_rtx_REG (word_mode, FLAGS_REG)));
35528 return 0;
35530 case IX86_BUILTIN_KORTESTC16:
35531 icode = CODE_FOR_kortestchi;
35532 mode0 = HImode;
35533 mode1 = CCCmode;
35534 goto kortest;
35536 case IX86_BUILTIN_KORTESTZ16:
35537 icode = CODE_FOR_kortestzhi;
35538 mode0 = HImode;
35539 mode1 = CCZmode;
35541 kortest:
35542 arg0 = CALL_EXPR_ARG (exp, 0); /* Mask reg src1. */
35543 arg1 = CALL_EXPR_ARG (exp, 1); /* Mask reg src2. */
35544 op0 = expand_normal (arg0);
35545 op1 = expand_normal (arg1);
35547 op0 = copy_to_reg (op0);
35548 op0 = simplify_gen_subreg (mode0, op0, GET_MODE (op0), 0);
35549 op1 = copy_to_reg (op1);
35550 op1 = simplify_gen_subreg (mode0, op1, GET_MODE (op1), 0);
35552 target = gen_reg_rtx (QImode);
35553 emit_insn (gen_rtx_SET (mode0, target, const0_rtx));
35555 /* Emit kortest. */
35556 emit_insn (GEN_FCN (icode) (op0, op1));
35557 /* And use setcc to return result from flags. */
35558 ix86_expand_setcc (target, EQ,
35559 gen_rtx_REG (mode1, FLAGS_REG), const0_rtx);
35560 return target;
35562 case IX86_BUILTIN_GATHERSIV2DF:
35563 icode = CODE_FOR_avx2_gathersiv2df;
35564 goto gather_gen;
35565 case IX86_BUILTIN_GATHERSIV4DF:
35566 icode = CODE_FOR_avx2_gathersiv4df;
35567 goto gather_gen;
35568 case IX86_BUILTIN_GATHERDIV2DF:
35569 icode = CODE_FOR_avx2_gatherdiv2df;
35570 goto gather_gen;
35571 case IX86_BUILTIN_GATHERDIV4DF:
35572 icode = CODE_FOR_avx2_gatherdiv4df;
35573 goto gather_gen;
35574 case IX86_BUILTIN_GATHERSIV4SF:
35575 icode = CODE_FOR_avx2_gathersiv4sf;
35576 goto gather_gen;
35577 case IX86_BUILTIN_GATHERSIV8SF:
35578 icode = CODE_FOR_avx2_gathersiv8sf;
35579 goto gather_gen;
35580 case IX86_BUILTIN_GATHERDIV4SF:
35581 icode = CODE_FOR_avx2_gatherdiv4sf;
35582 goto gather_gen;
35583 case IX86_BUILTIN_GATHERDIV8SF:
35584 icode = CODE_FOR_avx2_gatherdiv8sf;
35585 goto gather_gen;
35586 case IX86_BUILTIN_GATHERSIV2DI:
35587 icode = CODE_FOR_avx2_gathersiv2di;
35588 goto gather_gen;
35589 case IX86_BUILTIN_GATHERSIV4DI:
35590 icode = CODE_FOR_avx2_gathersiv4di;
35591 goto gather_gen;
35592 case IX86_BUILTIN_GATHERDIV2DI:
35593 icode = CODE_FOR_avx2_gatherdiv2di;
35594 goto gather_gen;
35595 case IX86_BUILTIN_GATHERDIV4DI:
35596 icode = CODE_FOR_avx2_gatherdiv4di;
35597 goto gather_gen;
35598 case IX86_BUILTIN_GATHERSIV4SI:
35599 icode = CODE_FOR_avx2_gathersiv4si;
35600 goto gather_gen;
35601 case IX86_BUILTIN_GATHERSIV8SI:
35602 icode = CODE_FOR_avx2_gathersiv8si;
35603 goto gather_gen;
35604 case IX86_BUILTIN_GATHERDIV4SI:
35605 icode = CODE_FOR_avx2_gatherdiv4si;
35606 goto gather_gen;
35607 case IX86_BUILTIN_GATHERDIV8SI:
35608 icode = CODE_FOR_avx2_gatherdiv8si;
35609 goto gather_gen;
35610 case IX86_BUILTIN_GATHERALTSIV4DF:
35611 icode = CODE_FOR_avx2_gathersiv4df;
35612 goto gather_gen;
35613 case IX86_BUILTIN_GATHERALTDIV8SF:
35614 icode = CODE_FOR_avx2_gatherdiv8sf;
35615 goto gather_gen;
35616 case IX86_BUILTIN_GATHERALTSIV4DI:
35617 icode = CODE_FOR_avx2_gathersiv4di;
35618 goto gather_gen;
35619 case IX86_BUILTIN_GATHERALTDIV8SI:
35620 icode = CODE_FOR_avx2_gatherdiv8si;
35621 goto gather_gen;
35622 case IX86_BUILTIN_GATHER3SIV16SF:
35623 icode = CODE_FOR_avx512f_gathersiv16sf;
35624 goto gather_gen;
35625 case IX86_BUILTIN_GATHER3SIV8DF:
35626 icode = CODE_FOR_avx512f_gathersiv8df;
35627 goto gather_gen;
35628 case IX86_BUILTIN_GATHER3DIV16SF:
35629 icode = CODE_FOR_avx512f_gatherdiv16sf;
35630 goto gather_gen;
35631 case IX86_BUILTIN_GATHER3DIV8DF:
35632 icode = CODE_FOR_avx512f_gatherdiv8df;
35633 goto gather_gen;
35634 case IX86_BUILTIN_GATHER3SIV16SI:
35635 icode = CODE_FOR_avx512f_gathersiv16si;
35636 goto gather_gen;
35637 case IX86_BUILTIN_GATHER3SIV8DI:
35638 icode = CODE_FOR_avx512f_gathersiv8di;
35639 goto gather_gen;
35640 case IX86_BUILTIN_GATHER3DIV16SI:
35641 icode = CODE_FOR_avx512f_gatherdiv16si;
35642 goto gather_gen;
35643 case IX86_BUILTIN_GATHER3DIV8DI:
35644 icode = CODE_FOR_avx512f_gatherdiv8di;
35645 goto gather_gen;
35646 case IX86_BUILTIN_GATHER3ALTSIV8DF:
35647 icode = CODE_FOR_avx512f_gathersiv8df;
35648 goto gather_gen;
35649 case IX86_BUILTIN_GATHER3ALTDIV16SF:
35650 icode = CODE_FOR_avx512f_gatherdiv16sf;
35651 goto gather_gen;
35652 case IX86_BUILTIN_GATHER3ALTSIV8DI:
35653 icode = CODE_FOR_avx512f_gathersiv8di;
35654 goto gather_gen;
35655 case IX86_BUILTIN_GATHER3ALTDIV16SI:
35656 icode = CODE_FOR_avx512f_gatherdiv16si;
35657 goto gather_gen;
35658 case IX86_BUILTIN_SCATTERSIV16SF:
35659 icode = CODE_FOR_avx512f_scattersiv16sf;
35660 goto scatter_gen;
35661 case IX86_BUILTIN_SCATTERSIV8DF:
35662 icode = CODE_FOR_avx512f_scattersiv8df;
35663 goto scatter_gen;
35664 case IX86_BUILTIN_SCATTERDIV16SF:
35665 icode = CODE_FOR_avx512f_scatterdiv16sf;
35666 goto scatter_gen;
35667 case IX86_BUILTIN_SCATTERDIV8DF:
35668 icode = CODE_FOR_avx512f_scatterdiv8df;
35669 goto scatter_gen;
35670 case IX86_BUILTIN_SCATTERSIV16SI:
35671 icode = CODE_FOR_avx512f_scattersiv16si;
35672 goto scatter_gen;
35673 case IX86_BUILTIN_SCATTERSIV8DI:
35674 icode = CODE_FOR_avx512f_scattersiv8di;
35675 goto scatter_gen;
35676 case IX86_BUILTIN_SCATTERDIV16SI:
35677 icode = CODE_FOR_avx512f_scatterdiv16si;
35678 goto scatter_gen;
35679 case IX86_BUILTIN_SCATTERDIV8DI:
35680 icode = CODE_FOR_avx512f_scatterdiv8di;
35681 goto scatter_gen;
35683 case IX86_BUILTIN_GATHERPFDPD:
35684 icode = CODE_FOR_avx512pf_gatherpfv8sidf;
35685 goto vec_prefetch_gen;
35686 case IX86_BUILTIN_GATHERPFDPS:
35687 icode = CODE_FOR_avx512pf_gatherpfv16sisf;
35688 goto vec_prefetch_gen;
35689 case IX86_BUILTIN_GATHERPFQPD:
35690 icode = CODE_FOR_avx512pf_gatherpfv8didf;
35691 goto vec_prefetch_gen;
35692 case IX86_BUILTIN_GATHERPFQPS:
35693 icode = CODE_FOR_avx512pf_gatherpfv8disf;
35694 goto vec_prefetch_gen;
35695 case IX86_BUILTIN_SCATTERPFDPD:
35696 icode = CODE_FOR_avx512pf_scatterpfv8sidf;
35697 goto vec_prefetch_gen;
35698 case IX86_BUILTIN_SCATTERPFDPS:
35699 icode = CODE_FOR_avx512pf_scatterpfv16sisf;
35700 goto vec_prefetch_gen;
35701 case IX86_BUILTIN_SCATTERPFQPD:
35702 icode = CODE_FOR_avx512pf_scatterpfv8didf;
35703 goto vec_prefetch_gen;
35704 case IX86_BUILTIN_SCATTERPFQPS:
35705 icode = CODE_FOR_avx512pf_scatterpfv8disf;
35706 goto vec_prefetch_gen;
35708 gather_gen:
35709 rtx half;
35710 rtx (*gen) (rtx, rtx);
35712 arg0 = CALL_EXPR_ARG (exp, 0);
35713 arg1 = CALL_EXPR_ARG (exp, 1);
35714 arg2 = CALL_EXPR_ARG (exp, 2);
35715 arg3 = CALL_EXPR_ARG (exp, 3);
35716 arg4 = CALL_EXPR_ARG (exp, 4);
35717 op0 = expand_normal (arg0);
35718 op1 = expand_normal (arg1);
35719 op2 = expand_normal (arg2);
35720 op3 = expand_normal (arg3);
35721 op4 = expand_normal (arg4);
35722 /* Note the arg order is different from the operand order. */
35723 mode0 = insn_data[icode].operand[1].mode;
35724 mode2 = insn_data[icode].operand[3].mode;
35725 mode3 = insn_data[icode].operand[4].mode;
35726 mode4 = insn_data[icode].operand[5].mode;
35728 if (target == NULL_RTX
35729 || GET_MODE (target) != insn_data[icode].operand[0].mode
35730 || !insn_data[icode].operand[0].predicate (target,
35731 GET_MODE (target)))
35732 subtarget = gen_reg_rtx (insn_data[icode].operand[0].mode);
35733 else
35734 subtarget = target;
35736 switch (fcode)
35738 case IX86_BUILTIN_GATHER3ALTSIV8DF:
35739 case IX86_BUILTIN_GATHER3ALTSIV8DI:
35740 half = gen_reg_rtx (V8SImode);
35741 if (!nonimmediate_operand (op2, V16SImode))
35742 op2 = copy_to_mode_reg (V16SImode, op2);
35743 emit_insn (gen_vec_extract_lo_v16si (half, op2));
35744 op2 = half;
35745 break;
35746 case IX86_BUILTIN_GATHERALTSIV4DF:
35747 case IX86_BUILTIN_GATHERALTSIV4DI:
35748 half = gen_reg_rtx (V4SImode);
35749 if (!nonimmediate_operand (op2, V8SImode))
35750 op2 = copy_to_mode_reg (V8SImode, op2);
35751 emit_insn (gen_vec_extract_lo_v8si (half, op2));
35752 op2 = half;
35753 break;
35754 case IX86_BUILTIN_GATHER3ALTDIV16SF:
35755 case IX86_BUILTIN_GATHER3ALTDIV16SI:
35756 half = gen_reg_rtx (mode0);
35757 if (mode0 == V8SFmode)
35758 gen = gen_vec_extract_lo_v16sf;
35759 else
35760 gen = gen_vec_extract_lo_v16si;
35761 if (!nonimmediate_operand (op0, GET_MODE (op0)))
35762 op0 = copy_to_mode_reg (GET_MODE (op0), op0);
35763 emit_insn (gen (half, op0));
35764 op0 = half;
35765 if (GET_MODE (op3) != VOIDmode)
35767 if (!nonimmediate_operand (op3, GET_MODE (op3)))
35768 op3 = copy_to_mode_reg (GET_MODE (op3), op3);
35769 emit_insn (gen (half, op3));
35770 op3 = half;
35772 break;
35773 case IX86_BUILTIN_GATHERALTDIV8SF:
35774 case IX86_BUILTIN_GATHERALTDIV8SI:
35775 half = gen_reg_rtx (mode0);
35776 if (mode0 == V4SFmode)
35777 gen = gen_vec_extract_lo_v8sf;
35778 else
35779 gen = gen_vec_extract_lo_v8si;
35780 if (!nonimmediate_operand (op0, GET_MODE (op0)))
35781 op0 = copy_to_mode_reg (GET_MODE (op0), op0);
35782 emit_insn (gen (half, op0));
35783 op0 = half;
35784 if (GET_MODE (op3) != VOIDmode)
35786 if (!nonimmediate_operand (op3, GET_MODE (op3)))
35787 op3 = copy_to_mode_reg (GET_MODE (op3), op3);
35788 emit_insn (gen (half, op3));
35789 op3 = half;
35791 break;
35792 default:
35793 break;
35796 /* Force memory operand only with base register here. But we
35797 don't want to do it on memory operand for other builtin
35798 functions. */
35799 op1 = ix86_zero_extend_to_Pmode (op1);
35801 if (!insn_data[icode].operand[1].predicate (op0, mode0))
35802 op0 = copy_to_mode_reg (mode0, op0);
35803 if (!insn_data[icode].operand[2].predicate (op1, Pmode))
35804 op1 = copy_to_mode_reg (Pmode, op1);
35805 if (!insn_data[icode].operand[3].predicate (op2, mode2))
35806 op2 = copy_to_mode_reg (mode2, op2);
35807 if (GET_MODE (op3) == mode3 || GET_MODE (op3) == VOIDmode)
35809 if (!insn_data[icode].operand[4].predicate (op3, mode3))
35810 op3 = copy_to_mode_reg (mode3, op3);
35812 else
35814 op3 = copy_to_reg (op3);
35815 op3 = simplify_gen_subreg (mode3, op3, GET_MODE (op3), 0);
35817 if (!insn_data[icode].operand[5].predicate (op4, mode4))
35819 error ("the last argument must be scale 1, 2, 4, 8");
35820 return const0_rtx;
35823 /* Optimize. If mask is known to have all high bits set,
35824 replace op0 with pc_rtx to signal that the instruction
35825 overwrites the whole destination and doesn't use its
35826 previous contents. */
35827 if (optimize)
35829 if (TREE_CODE (arg3) == INTEGER_CST)
35831 if (integer_all_onesp (arg3))
35832 op0 = pc_rtx;
35834 else if (TREE_CODE (arg3) == VECTOR_CST)
35836 unsigned int negative = 0;
35837 for (i = 0; i < VECTOR_CST_NELTS (arg3); ++i)
35839 tree cst = VECTOR_CST_ELT (arg3, i);
35840 if (TREE_CODE (cst) == INTEGER_CST
35841 && tree_int_cst_sign_bit (cst))
35842 negative++;
35843 else if (TREE_CODE (cst) == REAL_CST
35844 && REAL_VALUE_NEGATIVE (TREE_REAL_CST (cst)))
35845 negative++;
35847 if (negative == TYPE_VECTOR_SUBPARTS (TREE_TYPE (arg3)))
35848 op0 = pc_rtx;
35850 else if (TREE_CODE (arg3) == SSA_NAME
35851 && TREE_CODE (TREE_TYPE (arg3)) == VECTOR_TYPE)
35853 /* Recognize also when mask is like:
35854 __v2df src = _mm_setzero_pd ();
35855 __v2df mask = _mm_cmpeq_pd (src, src);
35857 __v8sf src = _mm256_setzero_ps ();
35858 __v8sf mask = _mm256_cmp_ps (src, src, _CMP_EQ_OQ);
35859 as that is a cheaper way to load all ones into
35860 a register than having to load a constant from
35861 memory. */
35862 gimple def_stmt = SSA_NAME_DEF_STMT (arg3);
35863 if (is_gimple_call (def_stmt))
35865 tree fndecl = gimple_call_fndecl (def_stmt);
35866 if (fndecl
35867 && DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_MD)
35868 switch ((unsigned int) DECL_FUNCTION_CODE (fndecl))
35870 case IX86_BUILTIN_CMPPD:
35871 case IX86_BUILTIN_CMPPS:
35872 case IX86_BUILTIN_CMPPD256:
35873 case IX86_BUILTIN_CMPPS256:
35874 if (!integer_zerop (gimple_call_arg (def_stmt, 2)))
35875 break;
35876 /* FALLTHRU */
35877 case IX86_BUILTIN_CMPEQPD:
35878 case IX86_BUILTIN_CMPEQPS:
35879 if (initializer_zerop (gimple_call_arg (def_stmt, 0))
35880 && initializer_zerop (gimple_call_arg (def_stmt,
35881 1)))
35882 op0 = pc_rtx;
35883 break;
35884 default:
35885 break;
35891 pat = GEN_FCN (icode) (subtarget, op0, op1, op2, op3, op4);
35892 if (! pat)
35893 return const0_rtx;
35894 emit_insn (pat);
35896 switch (fcode)
35898 case IX86_BUILTIN_GATHER3DIV16SF:
35899 if (target == NULL_RTX)
35900 target = gen_reg_rtx (V8SFmode);
35901 emit_insn (gen_vec_extract_lo_v16sf (target, subtarget));
35902 break;
35903 case IX86_BUILTIN_GATHER3DIV16SI:
35904 if (target == NULL_RTX)
35905 target = gen_reg_rtx (V8SImode);
35906 emit_insn (gen_vec_extract_lo_v16si (target, subtarget));
35907 break;
35908 case IX86_BUILTIN_GATHERDIV8SF:
35909 if (target == NULL_RTX)
35910 target = gen_reg_rtx (V4SFmode);
35911 emit_insn (gen_vec_extract_lo_v8sf (target, subtarget));
35912 break;
35913 case IX86_BUILTIN_GATHERDIV8SI:
35914 if (target == NULL_RTX)
35915 target = gen_reg_rtx (V4SImode);
35916 emit_insn (gen_vec_extract_lo_v8si (target, subtarget));
35917 break;
35918 default:
35919 target = subtarget;
35920 break;
35922 return target;
35924 scatter_gen:
35925 arg0 = CALL_EXPR_ARG (exp, 0);
35926 arg1 = CALL_EXPR_ARG (exp, 1);
35927 arg2 = CALL_EXPR_ARG (exp, 2);
35928 arg3 = CALL_EXPR_ARG (exp, 3);
35929 arg4 = CALL_EXPR_ARG (exp, 4);
35930 op0 = expand_normal (arg0);
35931 op1 = expand_normal (arg1);
35932 op2 = expand_normal (arg2);
35933 op3 = expand_normal (arg3);
35934 op4 = expand_normal (arg4);
35935 mode1 = insn_data[icode].operand[1].mode;
35936 mode2 = insn_data[icode].operand[2].mode;
35937 mode3 = insn_data[icode].operand[3].mode;
35938 mode4 = insn_data[icode].operand[4].mode;
35940 /* Force memory operand only with base register here. But we
35941 don't want to do it on memory operand for other builtin
35942 functions. */
35943 op0 = force_reg (Pmode, convert_to_mode (Pmode, op0, 1));
35945 if (!insn_data[icode].operand[0].predicate (op0, Pmode))
35946 op0 = copy_to_mode_reg (Pmode, op0);
35948 if (GET_MODE (op1) == mode1 || GET_MODE (op1) == VOIDmode)
35950 if (!insn_data[icode].operand[1].predicate (op1, mode1))
35951 op1 = copy_to_mode_reg (mode1, op1);
35953 else
35955 op1 = copy_to_reg (op1);
35956 op1 = simplify_gen_subreg (mode1, op1, GET_MODE (op1), 0);
35959 if (!insn_data[icode].operand[2].predicate (op2, mode2))
35960 op2 = copy_to_mode_reg (mode2, op2);
35962 if (!insn_data[icode].operand[3].predicate (op3, mode3))
35963 op3 = copy_to_mode_reg (mode3, op3);
35965 if (!insn_data[icode].operand[4].predicate (op4, mode4))
35967 error ("the last argument must be scale 1, 2, 4, 8");
35968 return const0_rtx;
35971 pat = GEN_FCN (icode) (op0, op1, op2, op3, op4);
35972 if (! pat)
35973 return const0_rtx;
35975 emit_insn (pat);
35976 return 0;
35978 vec_prefetch_gen:
35979 arg0 = CALL_EXPR_ARG (exp, 0);
35980 arg1 = CALL_EXPR_ARG (exp, 1);
35981 arg2 = CALL_EXPR_ARG (exp, 2);
35982 arg3 = CALL_EXPR_ARG (exp, 3);
35983 arg4 = CALL_EXPR_ARG (exp, 4);
35984 op0 = expand_normal (arg0);
35985 op1 = expand_normal (arg1);
35986 op2 = expand_normal (arg2);
35987 op3 = expand_normal (arg3);
35988 op4 = expand_normal (arg4);
35989 mode0 = insn_data[icode].operand[0].mode;
35990 mode1 = insn_data[icode].operand[1].mode;
35991 mode3 = insn_data[icode].operand[3].mode;
35992 mode4 = insn_data[icode].operand[4].mode;
35994 if (GET_MODE (op0) == mode0
35995 || (GET_MODE (op0) == VOIDmode && op0 != constm1_rtx))
35997 if (!insn_data[icode].operand[0].predicate (op0, mode0))
35998 op0 = copy_to_mode_reg (mode0, op0);
36000 else if (op0 != constm1_rtx)
36002 op0 = copy_to_reg (op0);
36003 op0 = simplify_gen_subreg (mode0, op0, GET_MODE (op0), 0);
36006 if (!insn_data[icode].operand[1].predicate (op1, mode1))
36007 op1 = copy_to_mode_reg (mode1, op1);
36009 /* Force memory operand only with base register here. But we
36010 don't want to do it on memory operand for other builtin
36011 functions. */
36012 op2 = force_reg (Pmode, convert_to_mode (Pmode, op2, 1));
36014 if (!insn_data[icode].operand[2].predicate (op2, Pmode))
36015 op2 = copy_to_mode_reg (Pmode, op2);
36017 if (!insn_data[icode].operand[3].predicate (op3, mode3))
36019 error ("the forth argument must be scale 1, 2, 4, 8");
36020 return const0_rtx;
36023 if (!insn_data[icode].operand[4].predicate (op4, mode4))
36025 error ("incorrect hint operand");
36026 return const0_rtx;
36029 pat = GEN_FCN (icode) (op0, op1, op2, op3, op4);
36030 if (! pat)
36031 return const0_rtx;
36033 emit_insn (pat);
36035 return 0;
36037 case IX86_BUILTIN_XABORT:
36038 icode = CODE_FOR_xabort;
36039 arg0 = CALL_EXPR_ARG (exp, 0);
36040 op0 = expand_normal (arg0);
36041 mode0 = insn_data[icode].operand[0].mode;
36042 if (!insn_data[icode].operand[0].predicate (op0, mode0))
36044 error ("the xabort's argument must be an 8-bit immediate");
36045 return const0_rtx;
36047 emit_insn (gen_xabort (op0));
36048 return 0;
36050 default:
36051 break;
36054 for (i = 0, d = bdesc_special_args;
36055 i < ARRAY_SIZE (bdesc_special_args);
36056 i++, d++)
36057 if (d->code == fcode)
36058 return ix86_expand_special_args_builtin (d, exp, target);
36060 for (i = 0, d = bdesc_args;
36061 i < ARRAY_SIZE (bdesc_args);
36062 i++, d++)
36063 if (d->code == fcode)
36064 switch (fcode)
36066 case IX86_BUILTIN_FABSQ:
36067 case IX86_BUILTIN_COPYSIGNQ:
36068 if (!TARGET_SSE)
36069 /* Emit a normal call if SSE isn't available. */
36070 return expand_call (exp, target, ignore);
36071 default:
36072 return ix86_expand_args_builtin (d, exp, target);
36075 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
36076 if (d->code == fcode)
36077 return ix86_expand_sse_comi (d, exp, target);
36079 for (i = 0, d = bdesc_round_args; i < ARRAY_SIZE (bdesc_round_args); i++, d++)
36080 if (d->code == fcode)
36081 return ix86_expand_round_builtin (d, exp, target);
36083 for (i = 0, d = bdesc_pcmpestr;
36084 i < ARRAY_SIZE (bdesc_pcmpestr);
36085 i++, d++)
36086 if (d->code == fcode)
36087 return ix86_expand_sse_pcmpestr (d, exp, target);
36089 for (i = 0, d = bdesc_pcmpistr;
36090 i < ARRAY_SIZE (bdesc_pcmpistr);
36091 i++, d++)
36092 if (d->code == fcode)
36093 return ix86_expand_sse_pcmpistr (d, exp, target);
36095 for (i = 0, d = bdesc_multi_arg; i < ARRAY_SIZE (bdesc_multi_arg); i++, d++)
36096 if (d->code == fcode)
36097 return ix86_expand_multi_arg_builtin (d->icode, exp, target,
36098 (enum ix86_builtin_func_type)
36099 d->flag, d->comparison);
36101 gcc_unreachable ();
36104 /* This returns the target-specific builtin with code CODE if
36105 current_function_decl has visibility on this builtin, which is checked
36106 using isa flags. Returns NULL_TREE otherwise. */
36108 static tree ix86_get_builtin (enum ix86_builtins code)
36110 struct cl_target_option *opts;
36111 tree target_tree = NULL_TREE;
36113 /* Determine the isa flags of current_function_decl. */
36115 if (current_function_decl)
36116 target_tree = DECL_FUNCTION_SPECIFIC_TARGET (current_function_decl);
36118 if (target_tree == NULL)
36119 target_tree = target_option_default_node;
36121 opts = TREE_TARGET_OPTION (target_tree);
36123 if (ix86_builtins_isa[(int) code].isa & opts->x_ix86_isa_flags)
36124 return ix86_builtin_decl (code, true);
36125 else
36126 return NULL_TREE;
36129 /* Returns a function decl for a vectorized version of the builtin function
36130 with builtin function code FN and the result vector type TYPE, or NULL_TREE
36131 if it is not available. */
36133 static tree
36134 ix86_builtin_vectorized_function (tree fndecl, tree type_out,
36135 tree type_in)
36137 enum machine_mode in_mode, out_mode;
36138 int in_n, out_n;
36139 enum built_in_function fn = DECL_FUNCTION_CODE (fndecl);
36141 if (TREE_CODE (type_out) != VECTOR_TYPE
36142 || TREE_CODE (type_in) != VECTOR_TYPE
36143 || DECL_BUILT_IN_CLASS (fndecl) != BUILT_IN_NORMAL)
36144 return NULL_TREE;
36146 out_mode = TYPE_MODE (TREE_TYPE (type_out));
36147 out_n = TYPE_VECTOR_SUBPARTS (type_out);
36148 in_mode = TYPE_MODE (TREE_TYPE (type_in));
36149 in_n = TYPE_VECTOR_SUBPARTS (type_in);
36151 switch (fn)
36153 case BUILT_IN_SQRT:
36154 if (out_mode == DFmode && in_mode == DFmode)
36156 if (out_n == 2 && in_n == 2)
36157 return ix86_get_builtin (IX86_BUILTIN_SQRTPD);
36158 else if (out_n == 4 && in_n == 4)
36159 return ix86_get_builtin (IX86_BUILTIN_SQRTPD256);
36160 else if (out_n == 8 && in_n == 8)
36161 return ix86_get_builtin (IX86_BUILTIN_SQRTPD512);
36163 break;
36165 case BUILT_IN_EXP2F:
36166 if (out_mode == SFmode && in_mode == SFmode)
36168 if (out_n == 16 && in_n == 16)
36169 return ix86_get_builtin (IX86_BUILTIN_EXP2PS);
36171 break;
36173 case BUILT_IN_SQRTF:
36174 if (out_mode == SFmode && in_mode == SFmode)
36176 if (out_n == 4 && in_n == 4)
36177 return ix86_get_builtin (IX86_BUILTIN_SQRTPS_NR);
36178 else if (out_n == 8 && in_n == 8)
36179 return ix86_get_builtin (IX86_BUILTIN_SQRTPS_NR256);
36180 else if (out_n == 16 && in_n == 16)
36181 return ix86_get_builtin (IX86_BUILTIN_SQRTPS_NR512);
36183 break;
36185 case BUILT_IN_IFLOOR:
36186 case BUILT_IN_LFLOOR:
36187 case BUILT_IN_LLFLOOR:
36188 /* The round insn does not trap on denormals. */
36189 if (flag_trapping_math || !TARGET_ROUND)
36190 break;
36192 if (out_mode == SImode && in_mode == DFmode)
36194 if (out_n == 4 && in_n == 2)
36195 return ix86_get_builtin (IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX);
36196 else if (out_n == 8 && in_n == 4)
36197 return ix86_get_builtin (IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX256);
36198 else if (out_n == 16 && in_n == 8)
36199 return ix86_get_builtin (IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX512);
36201 break;
36203 case BUILT_IN_IFLOORF:
36204 case BUILT_IN_LFLOORF:
36205 case BUILT_IN_LLFLOORF:
36206 /* The round insn does not trap on denormals. */
36207 if (flag_trapping_math || !TARGET_ROUND)
36208 break;
36210 if (out_mode == SImode && in_mode == SFmode)
36212 if (out_n == 4 && in_n == 4)
36213 return ix86_get_builtin (IX86_BUILTIN_FLOORPS_SFIX);
36214 else if (out_n == 8 && in_n == 8)
36215 return ix86_get_builtin (IX86_BUILTIN_FLOORPS_SFIX256);
36217 break;
36219 case BUILT_IN_ICEIL:
36220 case BUILT_IN_LCEIL:
36221 case BUILT_IN_LLCEIL:
36222 /* The round insn does not trap on denormals. */
36223 if (flag_trapping_math || !TARGET_ROUND)
36224 break;
36226 if (out_mode == SImode && in_mode == DFmode)
36228 if (out_n == 4 && in_n == 2)
36229 return ix86_get_builtin (IX86_BUILTIN_CEILPD_VEC_PACK_SFIX);
36230 else if (out_n == 8 && in_n == 4)
36231 return ix86_get_builtin (IX86_BUILTIN_CEILPD_VEC_PACK_SFIX256);
36232 else if (out_n == 16 && in_n == 8)
36233 return ix86_get_builtin (IX86_BUILTIN_CEILPD_VEC_PACK_SFIX512);
36235 break;
36237 case BUILT_IN_ICEILF:
36238 case BUILT_IN_LCEILF:
36239 case BUILT_IN_LLCEILF:
36240 /* The round insn does not trap on denormals. */
36241 if (flag_trapping_math || !TARGET_ROUND)
36242 break;
36244 if (out_mode == SImode && in_mode == SFmode)
36246 if (out_n == 4 && in_n == 4)
36247 return ix86_get_builtin (IX86_BUILTIN_CEILPS_SFIX);
36248 else if (out_n == 8 && in_n == 8)
36249 return ix86_get_builtin (IX86_BUILTIN_CEILPS_SFIX256);
36251 break;
36253 case BUILT_IN_IRINT:
36254 case BUILT_IN_LRINT:
36255 case BUILT_IN_LLRINT:
36256 if (out_mode == SImode && in_mode == DFmode)
36258 if (out_n == 4 && in_n == 2)
36259 return ix86_get_builtin (IX86_BUILTIN_VEC_PACK_SFIX);
36260 else if (out_n == 8 && in_n == 4)
36261 return ix86_get_builtin (IX86_BUILTIN_VEC_PACK_SFIX256);
36263 break;
36265 case BUILT_IN_IRINTF:
36266 case BUILT_IN_LRINTF:
36267 case BUILT_IN_LLRINTF:
36268 if (out_mode == SImode && in_mode == SFmode)
36270 if (out_n == 4 && in_n == 4)
36271 return ix86_get_builtin (IX86_BUILTIN_CVTPS2DQ);
36272 else if (out_n == 8 && in_n == 8)
36273 return ix86_get_builtin (IX86_BUILTIN_CVTPS2DQ256);
36275 break;
36277 case BUILT_IN_IROUND:
36278 case BUILT_IN_LROUND:
36279 case BUILT_IN_LLROUND:
36280 /* The round insn does not trap on denormals. */
36281 if (flag_trapping_math || !TARGET_ROUND)
36282 break;
36284 if (out_mode == SImode && in_mode == DFmode)
36286 if (out_n == 4 && in_n == 2)
36287 return ix86_get_builtin (IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX);
36288 else if (out_n == 8 && in_n == 4)
36289 return ix86_get_builtin (IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX256);
36290 else if (out_n == 16 && in_n == 8)
36291 return ix86_get_builtin (IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX512);
36293 break;
36295 case BUILT_IN_IROUNDF:
36296 case BUILT_IN_LROUNDF:
36297 case BUILT_IN_LLROUNDF:
36298 /* The round insn does not trap on denormals. */
36299 if (flag_trapping_math || !TARGET_ROUND)
36300 break;
36302 if (out_mode == SImode && in_mode == SFmode)
36304 if (out_n == 4 && in_n == 4)
36305 return ix86_get_builtin (IX86_BUILTIN_ROUNDPS_AZ_SFIX);
36306 else if (out_n == 8 && in_n == 8)
36307 return ix86_get_builtin (IX86_BUILTIN_ROUNDPS_AZ_SFIX256);
36309 break;
36311 case BUILT_IN_COPYSIGN:
36312 if (out_mode == DFmode && in_mode == DFmode)
36314 if (out_n == 2 && in_n == 2)
36315 return ix86_get_builtin (IX86_BUILTIN_CPYSGNPD);
36316 else if (out_n == 4 && in_n == 4)
36317 return ix86_get_builtin (IX86_BUILTIN_CPYSGNPD256);
36318 else if (out_n == 8 && in_n == 8)
36319 return ix86_get_builtin (IX86_BUILTIN_CPYSGNPD512);
36321 break;
36323 case BUILT_IN_COPYSIGNF:
36324 if (out_mode == SFmode && in_mode == SFmode)
36326 if (out_n == 4 && in_n == 4)
36327 return ix86_get_builtin (IX86_BUILTIN_CPYSGNPS);
36328 else if (out_n == 8 && in_n == 8)
36329 return ix86_get_builtin (IX86_BUILTIN_CPYSGNPS256);
36330 else if (out_n == 16 && in_n == 16)
36331 return ix86_get_builtin (IX86_BUILTIN_CPYSGNPS512);
36333 break;
36335 case BUILT_IN_FLOOR:
36336 /* The round insn does not trap on denormals. */
36337 if (flag_trapping_math || !TARGET_ROUND)
36338 break;
36340 if (out_mode == DFmode && in_mode == DFmode)
36342 if (out_n == 2 && in_n == 2)
36343 return ix86_get_builtin (IX86_BUILTIN_FLOORPD);
36344 else if (out_n == 4 && in_n == 4)
36345 return ix86_get_builtin (IX86_BUILTIN_FLOORPD256);
36347 break;
36349 case BUILT_IN_FLOORF:
36350 /* The round insn does not trap on denormals. */
36351 if (flag_trapping_math || !TARGET_ROUND)
36352 break;
36354 if (out_mode == SFmode && in_mode == SFmode)
36356 if (out_n == 4 && in_n == 4)
36357 return ix86_get_builtin (IX86_BUILTIN_FLOORPS);
36358 else if (out_n == 8 && in_n == 8)
36359 return ix86_get_builtin (IX86_BUILTIN_FLOORPS256);
36361 break;
36363 case BUILT_IN_CEIL:
36364 /* The round insn does not trap on denormals. */
36365 if (flag_trapping_math || !TARGET_ROUND)
36366 break;
36368 if (out_mode == DFmode && in_mode == DFmode)
36370 if (out_n == 2 && in_n == 2)
36371 return ix86_get_builtin (IX86_BUILTIN_CEILPD);
36372 else if (out_n == 4 && in_n == 4)
36373 return ix86_get_builtin (IX86_BUILTIN_CEILPD256);
36375 break;
36377 case BUILT_IN_CEILF:
36378 /* The round insn does not trap on denormals. */
36379 if (flag_trapping_math || !TARGET_ROUND)
36380 break;
36382 if (out_mode == SFmode && in_mode == SFmode)
36384 if (out_n == 4 && in_n == 4)
36385 return ix86_get_builtin (IX86_BUILTIN_CEILPS);
36386 else if (out_n == 8 && in_n == 8)
36387 return ix86_get_builtin (IX86_BUILTIN_CEILPS256);
36389 break;
36391 case BUILT_IN_TRUNC:
36392 /* The round insn does not trap on denormals. */
36393 if (flag_trapping_math || !TARGET_ROUND)
36394 break;
36396 if (out_mode == DFmode && in_mode == DFmode)
36398 if (out_n == 2 && in_n == 2)
36399 return ix86_get_builtin (IX86_BUILTIN_TRUNCPD);
36400 else if (out_n == 4 && in_n == 4)
36401 return ix86_get_builtin (IX86_BUILTIN_TRUNCPD256);
36403 break;
36405 case BUILT_IN_TRUNCF:
36406 /* The round insn does not trap on denormals. */
36407 if (flag_trapping_math || !TARGET_ROUND)
36408 break;
36410 if (out_mode == SFmode && in_mode == SFmode)
36412 if (out_n == 4 && in_n == 4)
36413 return ix86_get_builtin (IX86_BUILTIN_TRUNCPS);
36414 else if (out_n == 8 && in_n == 8)
36415 return ix86_get_builtin (IX86_BUILTIN_TRUNCPS256);
36417 break;
36419 case BUILT_IN_RINT:
36420 /* The round insn does not trap on denormals. */
36421 if (flag_trapping_math || !TARGET_ROUND)
36422 break;
36424 if (out_mode == DFmode && in_mode == DFmode)
36426 if (out_n == 2 && in_n == 2)
36427 return ix86_get_builtin (IX86_BUILTIN_RINTPD);
36428 else if (out_n == 4 && in_n == 4)
36429 return ix86_get_builtin (IX86_BUILTIN_RINTPD256);
36431 break;
36433 case BUILT_IN_RINTF:
36434 /* The round insn does not trap on denormals. */
36435 if (flag_trapping_math || !TARGET_ROUND)
36436 break;
36438 if (out_mode == SFmode && in_mode == SFmode)
36440 if (out_n == 4 && in_n == 4)
36441 return ix86_get_builtin (IX86_BUILTIN_RINTPS);
36442 else if (out_n == 8 && in_n == 8)
36443 return ix86_get_builtin (IX86_BUILTIN_RINTPS256);
36445 break;
36447 case BUILT_IN_ROUND:
36448 /* The round insn does not trap on denormals. */
36449 if (flag_trapping_math || !TARGET_ROUND)
36450 break;
36452 if (out_mode == DFmode && in_mode == DFmode)
36454 if (out_n == 2 && in_n == 2)
36455 return ix86_get_builtin (IX86_BUILTIN_ROUNDPD_AZ);
36456 else if (out_n == 4 && in_n == 4)
36457 return ix86_get_builtin (IX86_BUILTIN_ROUNDPD_AZ256);
36459 break;
36461 case BUILT_IN_ROUNDF:
36462 /* The round insn does not trap on denormals. */
36463 if (flag_trapping_math || !TARGET_ROUND)
36464 break;
36466 if (out_mode == SFmode && in_mode == SFmode)
36468 if (out_n == 4 && in_n == 4)
36469 return ix86_get_builtin (IX86_BUILTIN_ROUNDPS_AZ);
36470 else if (out_n == 8 && in_n == 8)
36471 return ix86_get_builtin (IX86_BUILTIN_ROUNDPS_AZ256);
36473 break;
36475 case BUILT_IN_FMA:
36476 if (out_mode == DFmode && in_mode == DFmode)
36478 if (out_n == 2 && in_n == 2)
36479 return ix86_get_builtin (IX86_BUILTIN_VFMADDPD);
36480 if (out_n == 4 && in_n == 4)
36481 return ix86_get_builtin (IX86_BUILTIN_VFMADDPD256);
36483 break;
36485 case BUILT_IN_FMAF:
36486 if (out_mode == SFmode && in_mode == SFmode)
36488 if (out_n == 4 && in_n == 4)
36489 return ix86_get_builtin (IX86_BUILTIN_VFMADDPS);
36490 if (out_n == 8 && in_n == 8)
36491 return ix86_get_builtin (IX86_BUILTIN_VFMADDPS256);
36493 break;
36495 default:
36496 break;
36499 /* Dispatch to a handler for a vectorization library. */
36500 if (ix86_veclib_handler)
36501 return ix86_veclib_handler ((enum built_in_function) fn, type_out,
36502 type_in);
36504 return NULL_TREE;
36507 /* Handler for an SVML-style interface to
36508 a library with vectorized intrinsics. */
36510 static tree
36511 ix86_veclibabi_svml (enum built_in_function fn, tree type_out, tree type_in)
36513 char name[20];
36514 tree fntype, new_fndecl, args;
36515 unsigned arity;
36516 const char *bname;
36517 enum machine_mode el_mode, in_mode;
36518 int n, in_n;
36520 /* The SVML is suitable for unsafe math only. */
36521 if (!flag_unsafe_math_optimizations)
36522 return NULL_TREE;
36524 el_mode = TYPE_MODE (TREE_TYPE (type_out));
36525 n = TYPE_VECTOR_SUBPARTS (type_out);
36526 in_mode = TYPE_MODE (TREE_TYPE (type_in));
36527 in_n = TYPE_VECTOR_SUBPARTS (type_in);
36528 if (el_mode != in_mode
36529 || n != in_n)
36530 return NULL_TREE;
36532 switch (fn)
36534 case BUILT_IN_EXP:
36535 case BUILT_IN_LOG:
36536 case BUILT_IN_LOG10:
36537 case BUILT_IN_POW:
36538 case BUILT_IN_TANH:
36539 case BUILT_IN_TAN:
36540 case BUILT_IN_ATAN:
36541 case BUILT_IN_ATAN2:
36542 case BUILT_IN_ATANH:
36543 case BUILT_IN_CBRT:
36544 case BUILT_IN_SINH:
36545 case BUILT_IN_SIN:
36546 case BUILT_IN_ASINH:
36547 case BUILT_IN_ASIN:
36548 case BUILT_IN_COSH:
36549 case BUILT_IN_COS:
36550 case BUILT_IN_ACOSH:
36551 case BUILT_IN_ACOS:
36552 if (el_mode != DFmode || n != 2)
36553 return NULL_TREE;
36554 break;
36556 case BUILT_IN_EXPF:
36557 case BUILT_IN_LOGF:
36558 case BUILT_IN_LOG10F:
36559 case BUILT_IN_POWF:
36560 case BUILT_IN_TANHF:
36561 case BUILT_IN_TANF:
36562 case BUILT_IN_ATANF:
36563 case BUILT_IN_ATAN2F:
36564 case BUILT_IN_ATANHF:
36565 case BUILT_IN_CBRTF:
36566 case BUILT_IN_SINHF:
36567 case BUILT_IN_SINF:
36568 case BUILT_IN_ASINHF:
36569 case BUILT_IN_ASINF:
36570 case BUILT_IN_COSHF:
36571 case BUILT_IN_COSF:
36572 case BUILT_IN_ACOSHF:
36573 case BUILT_IN_ACOSF:
36574 if (el_mode != SFmode || n != 4)
36575 return NULL_TREE;
36576 break;
36578 default:
36579 return NULL_TREE;
36582 bname = IDENTIFIER_POINTER (DECL_NAME (builtin_decl_implicit (fn)));
36584 if (fn == BUILT_IN_LOGF)
36585 strcpy (name, "vmlsLn4");
36586 else if (fn == BUILT_IN_LOG)
36587 strcpy (name, "vmldLn2");
36588 else if (n == 4)
36590 sprintf (name, "vmls%s", bname+10);
36591 name[strlen (name)-1] = '4';
36593 else
36594 sprintf (name, "vmld%s2", bname+10);
36596 /* Convert to uppercase. */
36597 name[4] &= ~0x20;
36599 arity = 0;
36600 for (args = DECL_ARGUMENTS (builtin_decl_implicit (fn));
36601 args;
36602 args = TREE_CHAIN (args))
36603 arity++;
36605 if (arity == 1)
36606 fntype = build_function_type_list (type_out, type_in, NULL);
36607 else
36608 fntype = build_function_type_list (type_out, type_in, type_in, NULL);
36610 /* Build a function declaration for the vectorized function. */
36611 new_fndecl = build_decl (BUILTINS_LOCATION,
36612 FUNCTION_DECL, get_identifier (name), fntype);
36613 TREE_PUBLIC (new_fndecl) = 1;
36614 DECL_EXTERNAL (new_fndecl) = 1;
36615 DECL_IS_NOVOPS (new_fndecl) = 1;
36616 TREE_READONLY (new_fndecl) = 1;
36618 return new_fndecl;
36621 /* Handler for an ACML-style interface to
36622 a library with vectorized intrinsics. */
36624 static tree
36625 ix86_veclibabi_acml (enum built_in_function fn, tree type_out, tree type_in)
36627 char name[20] = "__vr.._";
36628 tree fntype, new_fndecl, args;
36629 unsigned arity;
36630 const char *bname;
36631 enum machine_mode el_mode, in_mode;
36632 int n, in_n;
36634 /* The ACML is 64bits only and suitable for unsafe math only as
36635 it does not correctly support parts of IEEE with the required
36636 precision such as denormals. */
36637 if (!TARGET_64BIT
36638 || !flag_unsafe_math_optimizations)
36639 return NULL_TREE;
36641 el_mode = TYPE_MODE (TREE_TYPE (type_out));
36642 n = TYPE_VECTOR_SUBPARTS (type_out);
36643 in_mode = TYPE_MODE (TREE_TYPE (type_in));
36644 in_n = TYPE_VECTOR_SUBPARTS (type_in);
36645 if (el_mode != in_mode
36646 || n != in_n)
36647 return NULL_TREE;
36649 switch (fn)
36651 case BUILT_IN_SIN:
36652 case BUILT_IN_COS:
36653 case BUILT_IN_EXP:
36654 case BUILT_IN_LOG:
36655 case BUILT_IN_LOG2:
36656 case BUILT_IN_LOG10:
36657 name[4] = 'd';
36658 name[5] = '2';
36659 if (el_mode != DFmode
36660 || n != 2)
36661 return NULL_TREE;
36662 break;
36664 case BUILT_IN_SINF:
36665 case BUILT_IN_COSF:
36666 case BUILT_IN_EXPF:
36667 case BUILT_IN_POWF:
36668 case BUILT_IN_LOGF:
36669 case BUILT_IN_LOG2F:
36670 case BUILT_IN_LOG10F:
36671 name[4] = 's';
36672 name[5] = '4';
36673 if (el_mode != SFmode
36674 || n != 4)
36675 return NULL_TREE;
36676 break;
36678 default:
36679 return NULL_TREE;
36682 bname = IDENTIFIER_POINTER (DECL_NAME (builtin_decl_implicit (fn)));
36683 sprintf (name + 7, "%s", bname+10);
36685 arity = 0;
36686 for (args = DECL_ARGUMENTS (builtin_decl_implicit (fn));
36687 args;
36688 args = TREE_CHAIN (args))
36689 arity++;
36691 if (arity == 1)
36692 fntype = build_function_type_list (type_out, type_in, NULL);
36693 else
36694 fntype = build_function_type_list (type_out, type_in, type_in, NULL);
36696 /* Build a function declaration for the vectorized function. */
36697 new_fndecl = build_decl (BUILTINS_LOCATION,
36698 FUNCTION_DECL, get_identifier (name), fntype);
36699 TREE_PUBLIC (new_fndecl) = 1;
36700 DECL_EXTERNAL (new_fndecl) = 1;
36701 DECL_IS_NOVOPS (new_fndecl) = 1;
36702 TREE_READONLY (new_fndecl) = 1;
36704 return new_fndecl;
36707 /* Returns a decl of a function that implements gather load with
36708 memory type MEM_VECTYPE and index type INDEX_VECTYPE and SCALE.
36709 Return NULL_TREE if it is not available. */
36711 static tree
36712 ix86_vectorize_builtin_gather (const_tree mem_vectype,
36713 const_tree index_type, int scale)
36715 bool si;
36716 enum ix86_builtins code;
36718 if (! TARGET_AVX2)
36719 return NULL_TREE;
36721 if ((TREE_CODE (index_type) != INTEGER_TYPE
36722 && !POINTER_TYPE_P (index_type))
36723 || (TYPE_MODE (index_type) != SImode
36724 && TYPE_MODE (index_type) != DImode))
36725 return NULL_TREE;
36727 if (TYPE_PRECISION (index_type) > POINTER_SIZE)
36728 return NULL_TREE;
36730 /* v*gather* insn sign extends index to pointer mode. */
36731 if (TYPE_PRECISION (index_type) < POINTER_SIZE
36732 && TYPE_UNSIGNED (index_type))
36733 return NULL_TREE;
36735 if (scale <= 0
36736 || scale > 8
36737 || (scale & (scale - 1)) != 0)
36738 return NULL_TREE;
36740 si = TYPE_MODE (index_type) == SImode;
36741 switch (TYPE_MODE (mem_vectype))
36743 case V2DFmode:
36744 code = si ? IX86_BUILTIN_GATHERSIV2DF : IX86_BUILTIN_GATHERDIV2DF;
36745 break;
36746 case V4DFmode:
36747 code = si ? IX86_BUILTIN_GATHERALTSIV4DF : IX86_BUILTIN_GATHERDIV4DF;
36748 break;
36749 case V2DImode:
36750 code = si ? IX86_BUILTIN_GATHERSIV2DI : IX86_BUILTIN_GATHERDIV2DI;
36751 break;
36752 case V4DImode:
36753 code = si ? IX86_BUILTIN_GATHERALTSIV4DI : IX86_BUILTIN_GATHERDIV4DI;
36754 break;
36755 case V4SFmode:
36756 code = si ? IX86_BUILTIN_GATHERSIV4SF : IX86_BUILTIN_GATHERDIV4SF;
36757 break;
36758 case V8SFmode:
36759 code = si ? IX86_BUILTIN_GATHERSIV8SF : IX86_BUILTIN_GATHERALTDIV8SF;
36760 break;
36761 case V4SImode:
36762 code = si ? IX86_BUILTIN_GATHERSIV4SI : IX86_BUILTIN_GATHERDIV4SI;
36763 break;
36764 case V8SImode:
36765 code = si ? IX86_BUILTIN_GATHERSIV8SI : IX86_BUILTIN_GATHERALTDIV8SI;
36766 break;
36767 case V8DFmode:
36768 if (TARGET_AVX512F)
36769 code = si ? IX86_BUILTIN_GATHER3ALTSIV8DF : IX86_BUILTIN_GATHER3DIV8DF;
36770 else
36771 return NULL_TREE;
36772 break;
36773 case V8DImode:
36774 if (TARGET_AVX512F)
36775 code = si ? IX86_BUILTIN_GATHER3ALTSIV8DI : IX86_BUILTIN_GATHER3DIV8DI;
36776 else
36777 return NULL_TREE;
36778 break;
36779 case V16SFmode:
36780 if (TARGET_AVX512F)
36781 code = si ? IX86_BUILTIN_GATHER3SIV16SF : IX86_BUILTIN_GATHER3ALTDIV16SF;
36782 else
36783 return NULL_TREE;
36784 break;
36785 case V16SImode:
36786 if (TARGET_AVX512F)
36787 code = si ? IX86_BUILTIN_GATHER3SIV16SI : IX86_BUILTIN_GATHER3ALTDIV16SI;
36788 else
36789 return NULL_TREE;
36790 break;
36791 default:
36792 return NULL_TREE;
36795 return ix86_get_builtin (code);
36798 /* Returns a code for a target-specific builtin that implements
36799 reciprocal of the function, or NULL_TREE if not available. */
36801 static tree
36802 ix86_builtin_reciprocal (unsigned int fn, bool md_fn,
36803 bool sqrt ATTRIBUTE_UNUSED)
36805 if (! (TARGET_SSE_MATH && !optimize_insn_for_size_p ()
36806 && flag_finite_math_only && !flag_trapping_math
36807 && flag_unsafe_math_optimizations))
36808 return NULL_TREE;
36810 if (md_fn)
36811 /* Machine dependent builtins. */
36812 switch (fn)
36814 /* Vectorized version of sqrt to rsqrt conversion. */
36815 case IX86_BUILTIN_SQRTPS_NR:
36816 return ix86_get_builtin (IX86_BUILTIN_RSQRTPS_NR);
36818 case IX86_BUILTIN_SQRTPS_NR256:
36819 return ix86_get_builtin (IX86_BUILTIN_RSQRTPS_NR256);
36821 default:
36822 return NULL_TREE;
36824 else
36825 /* Normal builtins. */
36826 switch (fn)
36828 /* Sqrt to rsqrt conversion. */
36829 case BUILT_IN_SQRTF:
36830 return ix86_get_builtin (IX86_BUILTIN_RSQRTF);
36832 default:
36833 return NULL_TREE;
36837 /* Helper for avx_vpermilps256_operand et al. This is also used by
36838 the expansion functions to turn the parallel back into a mask.
36839 The return value is 0 for no match and the imm8+1 for a match. */
36842 avx_vpermilp_parallel (rtx par, enum machine_mode mode)
36844 unsigned i, nelt = GET_MODE_NUNITS (mode);
36845 unsigned mask = 0;
36846 unsigned char ipar[16] = {}; /* Silence -Wuninitialized warning. */
36848 if (XVECLEN (par, 0) != (int) nelt)
36849 return 0;
36851 /* Validate that all of the elements are constants, and not totally
36852 out of range. Copy the data into an integral array to make the
36853 subsequent checks easier. */
36854 for (i = 0; i < nelt; ++i)
36856 rtx er = XVECEXP (par, 0, i);
36857 unsigned HOST_WIDE_INT ei;
36859 if (!CONST_INT_P (er))
36860 return 0;
36861 ei = INTVAL (er);
36862 if (ei >= nelt)
36863 return 0;
36864 ipar[i] = ei;
36867 switch (mode)
36869 case V8DFmode:
36870 /* In the 512-bit DFmode case, we can only move elements within
36871 a 128-bit lane. First fill the second part of the mask,
36872 then fallthru. */
36873 for (i = 4; i < 6; ++i)
36875 if (ipar[i] < 4 || ipar[i] >= 6)
36876 return 0;
36877 mask |= (ipar[i] - 4) << i;
36879 for (i = 6; i < 8; ++i)
36881 if (ipar[i] < 6)
36882 return 0;
36883 mask |= (ipar[i] - 6) << i;
36885 /* FALLTHRU */
36887 case V4DFmode:
36888 /* In the 256-bit DFmode case, we can only move elements within
36889 a 128-bit lane. */
36890 for (i = 0; i < 2; ++i)
36892 if (ipar[i] >= 2)
36893 return 0;
36894 mask |= ipar[i] << i;
36896 for (i = 2; i < 4; ++i)
36898 if (ipar[i] < 2)
36899 return 0;
36900 mask |= (ipar[i] - 2) << i;
36902 break;
36904 case V16SFmode:
36905 /* In 512 bit SFmode case, permutation in the upper 256 bits
36906 must mirror the permutation in the lower 256-bits. */
36907 for (i = 0; i < 8; ++i)
36908 if (ipar[i] + 8 != ipar[i + 8])
36909 return 0;
36910 /* FALLTHRU */
36912 case V8SFmode:
36913 /* In 256 bit SFmode case, we have full freedom of
36914 movement within the low 128-bit lane, but the high 128-bit
36915 lane must mirror the exact same pattern. */
36916 for (i = 0; i < 4; ++i)
36917 if (ipar[i] + 4 != ipar[i + 4])
36918 return 0;
36919 nelt = 4;
36920 /* FALLTHRU */
36922 case V2DFmode:
36923 case V4SFmode:
36924 /* In the 128-bit case, we've full freedom in the placement of
36925 the elements from the source operand. */
36926 for (i = 0; i < nelt; ++i)
36927 mask |= ipar[i] << (i * (nelt / 2));
36928 break;
36930 default:
36931 gcc_unreachable ();
36934 /* Make sure success has a non-zero value by adding one. */
36935 return mask + 1;
36938 /* Helper for avx_vperm2f128_v4df_operand et al. This is also used by
36939 the expansion functions to turn the parallel back into a mask.
36940 The return value is 0 for no match and the imm8+1 for a match. */
36943 avx_vperm2f128_parallel (rtx par, enum machine_mode mode)
36945 unsigned i, nelt = GET_MODE_NUNITS (mode), nelt2 = nelt / 2;
36946 unsigned mask = 0;
36947 unsigned char ipar[8] = {}; /* Silence -Wuninitialized warning. */
36949 if (XVECLEN (par, 0) != (int) nelt)
36950 return 0;
36952 /* Validate that all of the elements are constants, and not totally
36953 out of range. Copy the data into an integral array to make the
36954 subsequent checks easier. */
36955 for (i = 0; i < nelt; ++i)
36957 rtx er = XVECEXP (par, 0, i);
36958 unsigned HOST_WIDE_INT ei;
36960 if (!CONST_INT_P (er))
36961 return 0;
36962 ei = INTVAL (er);
36963 if (ei >= 2 * nelt)
36964 return 0;
36965 ipar[i] = ei;
36968 /* Validate that the halves of the permute are halves. */
36969 for (i = 0; i < nelt2 - 1; ++i)
36970 if (ipar[i] + 1 != ipar[i + 1])
36971 return 0;
36972 for (i = nelt2; i < nelt - 1; ++i)
36973 if (ipar[i] + 1 != ipar[i + 1])
36974 return 0;
36976 /* Reconstruct the mask. */
36977 for (i = 0; i < 2; ++i)
36979 unsigned e = ipar[i * nelt2];
36980 if (e % nelt2)
36981 return 0;
36982 e /= nelt2;
36983 mask |= e << (i * 4);
36986 /* Make sure success has a non-zero value by adding one. */
36987 return mask + 1;
36990 /* Store OPERAND to the memory after reload is completed. This means
36991 that we can't easily use assign_stack_local. */
36993 ix86_force_to_memory (enum machine_mode mode, rtx operand)
36995 rtx result;
36997 gcc_assert (reload_completed);
36998 if (ix86_using_red_zone ())
37000 result = gen_rtx_MEM (mode,
37001 gen_rtx_PLUS (Pmode,
37002 stack_pointer_rtx,
37003 GEN_INT (-RED_ZONE_SIZE)));
37004 emit_move_insn (result, operand);
37006 else if (TARGET_64BIT)
37008 switch (mode)
37010 case HImode:
37011 case SImode:
37012 operand = gen_lowpart (DImode, operand);
37013 /* FALLTHRU */
37014 case DImode:
37015 emit_insn (
37016 gen_rtx_SET (VOIDmode,
37017 gen_rtx_MEM (DImode,
37018 gen_rtx_PRE_DEC (DImode,
37019 stack_pointer_rtx)),
37020 operand));
37021 break;
37022 default:
37023 gcc_unreachable ();
37025 result = gen_rtx_MEM (mode, stack_pointer_rtx);
37027 else
37029 switch (mode)
37031 case DImode:
37033 rtx operands[2];
37034 split_double_mode (mode, &operand, 1, operands, operands + 1);
37035 emit_insn (
37036 gen_rtx_SET (VOIDmode,
37037 gen_rtx_MEM (SImode,
37038 gen_rtx_PRE_DEC (Pmode,
37039 stack_pointer_rtx)),
37040 operands[1]));
37041 emit_insn (
37042 gen_rtx_SET (VOIDmode,
37043 gen_rtx_MEM (SImode,
37044 gen_rtx_PRE_DEC (Pmode,
37045 stack_pointer_rtx)),
37046 operands[0]));
37048 break;
37049 case HImode:
37050 /* Store HImodes as SImodes. */
37051 operand = gen_lowpart (SImode, operand);
37052 /* FALLTHRU */
37053 case SImode:
37054 emit_insn (
37055 gen_rtx_SET (VOIDmode,
37056 gen_rtx_MEM (GET_MODE (operand),
37057 gen_rtx_PRE_DEC (SImode,
37058 stack_pointer_rtx)),
37059 operand));
37060 break;
37061 default:
37062 gcc_unreachable ();
37064 result = gen_rtx_MEM (mode, stack_pointer_rtx);
37066 return result;
37069 /* Free operand from the memory. */
37070 void
37071 ix86_free_from_memory (enum machine_mode mode)
37073 if (!ix86_using_red_zone ())
37075 int size;
37077 if (mode == DImode || TARGET_64BIT)
37078 size = 8;
37079 else
37080 size = 4;
37081 /* Use LEA to deallocate stack space. In peephole2 it will be converted
37082 to pop or add instruction if registers are available. */
37083 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
37084 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
37085 GEN_INT (size))));
37089 /* Return a register priority for hard reg REGNO. */
37090 static int
37091 ix86_register_priority (int hard_regno)
37093 /* ebp and r13 as the base always wants a displacement, r12 as the
37094 base always wants an index. So discourage their usage in an
37095 address. */
37096 if (hard_regno == R12_REG || hard_regno == R13_REG)
37097 return 0;
37098 if (hard_regno == BP_REG)
37099 return 1;
37100 /* New x86-64 int registers result in bigger code size. Discourage
37101 them. */
37102 if (FIRST_REX_INT_REG <= hard_regno && hard_regno <= LAST_REX_INT_REG)
37103 return 2;
37104 /* New x86-64 SSE registers result in bigger code size. Discourage
37105 them. */
37106 if (FIRST_REX_SSE_REG <= hard_regno && hard_regno <= LAST_REX_SSE_REG)
37107 return 2;
37108 /* Usage of AX register results in smaller code. Prefer it. */
37109 if (hard_regno == 0)
37110 return 4;
37111 return 3;
37114 /* Implement TARGET_PREFERRED_RELOAD_CLASS.
37116 Put float CONST_DOUBLE in the constant pool instead of fp regs.
37117 QImode must go into class Q_REGS.
37118 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
37119 movdf to do mem-to-mem moves through integer regs. */
37121 static reg_class_t
37122 ix86_preferred_reload_class (rtx x, reg_class_t regclass)
37124 enum machine_mode mode = GET_MODE (x);
37126 /* We're only allowed to return a subclass of CLASS. Many of the
37127 following checks fail for NO_REGS, so eliminate that early. */
37128 if (regclass == NO_REGS)
37129 return NO_REGS;
37131 /* All classes can load zeros. */
37132 if (x == CONST0_RTX (mode))
37133 return regclass;
37135 /* Force constants into memory if we are loading a (nonzero) constant into
37136 an MMX, SSE or MASK register. This is because there are no MMX/SSE/MASK
37137 instructions to load from a constant. */
37138 if (CONSTANT_P (x)
37139 && (MAYBE_MMX_CLASS_P (regclass)
37140 || MAYBE_SSE_CLASS_P (regclass)
37141 || MAYBE_MASK_CLASS_P (regclass)))
37142 return NO_REGS;
37144 /* Prefer SSE regs only, if we can use them for math. */
37145 if (TARGET_SSE_MATH && !TARGET_MIX_SSE_I387 && SSE_FLOAT_MODE_P (mode))
37146 return SSE_CLASS_P (regclass) ? regclass : NO_REGS;
37148 /* Floating-point constants need more complex checks. */
37149 if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) != VOIDmode)
37151 /* General regs can load everything. */
37152 if (reg_class_subset_p (regclass, GENERAL_REGS))
37153 return regclass;
37155 /* Floats can load 0 and 1 plus some others. Note that we eliminated
37156 zero above. We only want to wind up preferring 80387 registers if
37157 we plan on doing computation with them. */
37158 if (TARGET_80387
37159 && standard_80387_constant_p (x) > 0)
37161 /* Limit class to non-sse. */
37162 if (regclass == FLOAT_SSE_REGS)
37163 return FLOAT_REGS;
37164 if (regclass == FP_TOP_SSE_REGS)
37165 return FP_TOP_REG;
37166 if (regclass == FP_SECOND_SSE_REGS)
37167 return FP_SECOND_REG;
37168 if (regclass == FLOAT_INT_REGS || regclass == FLOAT_REGS)
37169 return regclass;
37172 return NO_REGS;
37175 /* Generally when we see PLUS here, it's the function invariant
37176 (plus soft-fp const_int). Which can only be computed into general
37177 regs. */
37178 if (GET_CODE (x) == PLUS)
37179 return reg_class_subset_p (regclass, GENERAL_REGS) ? regclass : NO_REGS;
37181 /* QImode constants are easy to load, but non-constant QImode data
37182 must go into Q_REGS. */
37183 if (GET_MODE (x) == QImode && !CONSTANT_P (x))
37185 if (reg_class_subset_p (regclass, Q_REGS))
37186 return regclass;
37187 if (reg_class_subset_p (Q_REGS, regclass))
37188 return Q_REGS;
37189 return NO_REGS;
37192 return regclass;
37195 /* Discourage putting floating-point values in SSE registers unless
37196 SSE math is being used, and likewise for the 387 registers. */
37197 static reg_class_t
37198 ix86_preferred_output_reload_class (rtx x, reg_class_t regclass)
37200 enum machine_mode mode = GET_MODE (x);
37202 /* Restrict the output reload class to the register bank that we are doing
37203 math on. If we would like not to return a subset of CLASS, reject this
37204 alternative: if reload cannot do this, it will still use its choice. */
37205 mode = GET_MODE (x);
37206 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
37207 return MAYBE_SSE_CLASS_P (regclass) ? ALL_SSE_REGS : NO_REGS;
37209 if (X87_FLOAT_MODE_P (mode))
37211 if (regclass == FP_TOP_SSE_REGS)
37212 return FP_TOP_REG;
37213 else if (regclass == FP_SECOND_SSE_REGS)
37214 return FP_SECOND_REG;
37215 else
37216 return FLOAT_CLASS_P (regclass) ? regclass : NO_REGS;
37219 return regclass;
37222 static reg_class_t
37223 ix86_secondary_reload (bool in_p, rtx x, reg_class_t rclass,
37224 enum machine_mode mode, secondary_reload_info *sri)
37226 /* Double-word spills from general registers to non-offsettable memory
37227 references (zero-extended addresses) require special handling. */
37228 if (TARGET_64BIT
37229 && MEM_P (x)
37230 && GET_MODE_SIZE (mode) > UNITS_PER_WORD
37231 && INTEGER_CLASS_P (rclass)
37232 && !offsettable_memref_p (x))
37234 sri->icode = (in_p
37235 ? CODE_FOR_reload_noff_load
37236 : CODE_FOR_reload_noff_store);
37237 /* Add the cost of moving address to a temporary. */
37238 sri->extra_cost = 1;
37240 return NO_REGS;
37243 /* QImode spills from non-QI registers require
37244 intermediate register on 32bit targets. */
37245 if (mode == QImode
37246 && (MAYBE_MASK_CLASS_P (rclass)
37247 || (!TARGET_64BIT && !in_p
37248 && INTEGER_CLASS_P (rclass)
37249 && MAYBE_NON_Q_CLASS_P (rclass))))
37251 int regno;
37253 if (REG_P (x))
37254 regno = REGNO (x);
37255 else
37256 regno = -1;
37258 if (regno >= FIRST_PSEUDO_REGISTER || GET_CODE (x) == SUBREG)
37259 regno = true_regnum (x);
37261 /* Return Q_REGS if the operand is in memory. */
37262 if (regno == -1)
37263 return Q_REGS;
37266 /* This condition handles corner case where an expression involving
37267 pointers gets vectorized. We're trying to use the address of a
37268 stack slot as a vector initializer.
37270 (set (reg:V2DI 74 [ vect_cst_.2 ])
37271 (vec_duplicate:V2DI (reg/f:DI 20 frame)))
37273 Eventually frame gets turned into sp+offset like this:
37275 (set (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
37276 (vec_duplicate:V2DI (plus:DI (reg/f:DI 7 sp)
37277 (const_int 392 [0x188]))))
37279 That later gets turned into:
37281 (set (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
37282 (vec_duplicate:V2DI (plus:DI (reg/f:DI 7 sp)
37283 (mem/u/c/i:DI (symbol_ref/u:DI ("*.LC0") [flags 0x2]) [0 S8 A64]))))
37285 We'll have the following reload recorded:
37287 Reload 0: reload_in (DI) =
37288 (plus:DI (reg/f:DI 7 sp)
37289 (mem/u/c/i:DI (symbol_ref/u:DI ("*.LC0") [flags 0x2]) [0 S8 A64]))
37290 reload_out (V2DI) = (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
37291 SSE_REGS, RELOAD_OTHER (opnum = 0), can't combine
37292 reload_in_reg: (plus:DI (reg/f:DI 7 sp) (const_int 392 [0x188]))
37293 reload_out_reg: (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
37294 reload_reg_rtx: (reg:V2DI 22 xmm1)
37296 Which isn't going to work since SSE instructions can't handle scalar
37297 additions. Returning GENERAL_REGS forces the addition into integer
37298 register and reload can handle subsequent reloads without problems. */
37300 if (in_p && GET_CODE (x) == PLUS
37301 && SSE_CLASS_P (rclass)
37302 && SCALAR_INT_MODE_P (mode))
37303 return GENERAL_REGS;
37305 return NO_REGS;
37308 /* Implement TARGET_CLASS_LIKELY_SPILLED_P. */
37310 static bool
37311 ix86_class_likely_spilled_p (reg_class_t rclass)
37313 switch (rclass)
37315 case AREG:
37316 case DREG:
37317 case CREG:
37318 case BREG:
37319 case AD_REGS:
37320 case SIREG:
37321 case DIREG:
37322 case SSE_FIRST_REG:
37323 case FP_TOP_REG:
37324 case FP_SECOND_REG:
37325 return true;
37327 default:
37328 break;
37331 return false;
37334 /* If we are copying between general and FP registers, we need a memory
37335 location. The same is true for SSE and MMX registers.
37337 To optimize register_move_cost performance, allow inline variant.
37339 The macro can't work reliably when one of the CLASSES is class containing
37340 registers from multiple units (SSE, MMX, integer). We avoid this by never
37341 combining those units in single alternative in the machine description.
37342 Ensure that this constraint holds to avoid unexpected surprises.
37344 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
37345 enforce these sanity checks. */
37347 static inline bool
37348 inline_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
37349 enum machine_mode mode, int strict)
37351 if (lra_in_progress && (class1 == NO_REGS || class2 == NO_REGS))
37352 return false;
37353 if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
37354 || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)
37355 || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)
37356 || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2)
37357 || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1)
37358 || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2))
37360 gcc_assert (!strict || lra_in_progress);
37361 return true;
37364 if (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2))
37365 return true;
37367 /* ??? This is a lie. We do have moves between mmx/general, and for
37368 mmx/sse2. But by saying we need secondary memory we discourage the
37369 register allocator from using the mmx registers unless needed. */
37370 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2))
37371 return true;
37373 if (SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
37375 /* SSE1 doesn't have any direct moves from other classes. */
37376 if (!TARGET_SSE2)
37377 return true;
37379 /* If the target says that inter-unit moves are more expensive
37380 than moving through memory, then don't generate them. */
37381 if ((SSE_CLASS_P (class1) && !TARGET_INTER_UNIT_MOVES_FROM_VEC)
37382 || (SSE_CLASS_P (class2) && !TARGET_INTER_UNIT_MOVES_TO_VEC))
37383 return true;
37385 /* Between SSE and general, we have moves no larger than word size. */
37386 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
37387 return true;
37390 return false;
37393 bool
37394 ix86_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
37395 enum machine_mode mode, int strict)
37397 return inline_secondary_memory_needed (class1, class2, mode, strict);
37400 /* Implement the TARGET_CLASS_MAX_NREGS hook.
37402 On the 80386, this is the size of MODE in words,
37403 except in the FP regs, where a single reg is always enough. */
37405 static unsigned char
37406 ix86_class_max_nregs (reg_class_t rclass, enum machine_mode mode)
37408 if (MAYBE_INTEGER_CLASS_P (rclass))
37410 if (mode == XFmode)
37411 return (TARGET_64BIT ? 2 : 3);
37412 else if (mode == XCmode)
37413 return (TARGET_64BIT ? 4 : 6);
37414 else
37415 return ((GET_MODE_SIZE (mode) + UNITS_PER_WORD - 1) / UNITS_PER_WORD);
37417 else
37419 if (COMPLEX_MODE_P (mode))
37420 return 2;
37421 else
37422 return 1;
37426 /* Return true if the registers in CLASS cannot represent the change from
37427 modes FROM to TO. */
37429 bool
37430 ix86_cannot_change_mode_class (enum machine_mode from, enum machine_mode to,
37431 enum reg_class regclass)
37433 if (from == to)
37434 return false;
37436 /* x87 registers can't do subreg at all, as all values are reformatted
37437 to extended precision. */
37438 if (MAYBE_FLOAT_CLASS_P (regclass))
37439 return true;
37441 if (MAYBE_SSE_CLASS_P (regclass) || MAYBE_MMX_CLASS_P (regclass))
37443 /* Vector registers do not support QI or HImode loads. If we don't
37444 disallow a change to these modes, reload will assume it's ok to
37445 drop the subreg from (subreg:SI (reg:HI 100) 0). This affects
37446 the vec_dupv4hi pattern. */
37447 if (GET_MODE_SIZE (from) < 4)
37448 return true;
37450 /* Vector registers do not support subreg with nonzero offsets, which
37451 are otherwise valid for integer registers. Since we can't see
37452 whether we have a nonzero offset from here, prohibit all
37453 nonparadoxical subregs changing size. */
37454 if (GET_MODE_SIZE (to) < GET_MODE_SIZE (from))
37455 return true;
37458 return false;
37461 /* Return the cost of moving data of mode M between a
37462 register and memory. A value of 2 is the default; this cost is
37463 relative to those in `REGISTER_MOVE_COST'.
37465 This function is used extensively by register_move_cost that is used to
37466 build tables at startup. Make it inline in this case.
37467 When IN is 2, return maximum of in and out move cost.
37469 If moving between registers and memory is more expensive than
37470 between two registers, you should define this macro to express the
37471 relative cost.
37473 Model also increased moving costs of QImode registers in non
37474 Q_REGS classes.
37476 static inline int
37477 inline_memory_move_cost (enum machine_mode mode, enum reg_class regclass,
37478 int in)
37480 int cost;
37481 if (FLOAT_CLASS_P (regclass))
37483 int index;
37484 switch (mode)
37486 case SFmode:
37487 index = 0;
37488 break;
37489 case DFmode:
37490 index = 1;
37491 break;
37492 case XFmode:
37493 index = 2;
37494 break;
37495 default:
37496 return 100;
37498 if (in == 2)
37499 return MAX (ix86_cost->fp_load [index], ix86_cost->fp_store [index]);
37500 return in ? ix86_cost->fp_load [index] : ix86_cost->fp_store [index];
37502 if (SSE_CLASS_P (regclass))
37504 int index;
37505 switch (GET_MODE_SIZE (mode))
37507 case 4:
37508 index = 0;
37509 break;
37510 case 8:
37511 index = 1;
37512 break;
37513 case 16:
37514 index = 2;
37515 break;
37516 default:
37517 return 100;
37519 if (in == 2)
37520 return MAX (ix86_cost->sse_load [index], ix86_cost->sse_store [index]);
37521 return in ? ix86_cost->sse_load [index] : ix86_cost->sse_store [index];
37523 if (MMX_CLASS_P (regclass))
37525 int index;
37526 switch (GET_MODE_SIZE (mode))
37528 case 4:
37529 index = 0;
37530 break;
37531 case 8:
37532 index = 1;
37533 break;
37534 default:
37535 return 100;
37537 if (in)
37538 return MAX (ix86_cost->mmx_load [index], ix86_cost->mmx_store [index]);
37539 return in ? ix86_cost->mmx_load [index] : ix86_cost->mmx_store [index];
37541 switch (GET_MODE_SIZE (mode))
37543 case 1:
37544 if (Q_CLASS_P (regclass) || TARGET_64BIT)
37546 if (!in)
37547 return ix86_cost->int_store[0];
37548 if (TARGET_PARTIAL_REG_DEPENDENCY
37549 && optimize_function_for_speed_p (cfun))
37550 cost = ix86_cost->movzbl_load;
37551 else
37552 cost = ix86_cost->int_load[0];
37553 if (in == 2)
37554 return MAX (cost, ix86_cost->int_store[0]);
37555 return cost;
37557 else
37559 if (in == 2)
37560 return MAX (ix86_cost->movzbl_load, ix86_cost->int_store[0] + 4);
37561 if (in)
37562 return ix86_cost->movzbl_load;
37563 else
37564 return ix86_cost->int_store[0] + 4;
37566 break;
37567 case 2:
37568 if (in == 2)
37569 return MAX (ix86_cost->int_load[1], ix86_cost->int_store[1]);
37570 return in ? ix86_cost->int_load[1] : ix86_cost->int_store[1];
37571 default:
37572 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
37573 if (mode == TFmode)
37574 mode = XFmode;
37575 if (in == 2)
37576 cost = MAX (ix86_cost->int_load[2] , ix86_cost->int_store[2]);
37577 else if (in)
37578 cost = ix86_cost->int_load[2];
37579 else
37580 cost = ix86_cost->int_store[2];
37581 return (cost * (((int) GET_MODE_SIZE (mode)
37582 + UNITS_PER_WORD - 1) / UNITS_PER_WORD));
37586 static int
37587 ix86_memory_move_cost (enum machine_mode mode, reg_class_t regclass,
37588 bool in)
37590 return inline_memory_move_cost (mode, (enum reg_class) regclass, in ? 1 : 0);
37594 /* Return the cost of moving data from a register in class CLASS1 to
37595 one in class CLASS2.
37597 It is not required that the cost always equal 2 when FROM is the same as TO;
37598 on some machines it is expensive to move between registers if they are not
37599 general registers. */
37601 static int
37602 ix86_register_move_cost (enum machine_mode mode, reg_class_t class1_i,
37603 reg_class_t class2_i)
37605 enum reg_class class1 = (enum reg_class) class1_i;
37606 enum reg_class class2 = (enum reg_class) class2_i;
37608 /* In case we require secondary memory, compute cost of the store followed
37609 by load. In order to avoid bad register allocation choices, we need
37610 for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */
37612 if (inline_secondary_memory_needed (class1, class2, mode, 0))
37614 int cost = 1;
37616 cost += inline_memory_move_cost (mode, class1, 2);
37617 cost += inline_memory_move_cost (mode, class2, 2);
37619 /* In case of copying from general_purpose_register we may emit multiple
37620 stores followed by single load causing memory size mismatch stall.
37621 Count this as arbitrarily high cost of 20. */
37622 if (targetm.class_max_nregs (class1, mode)
37623 > targetm.class_max_nregs (class2, mode))
37624 cost += 20;
37626 /* In the case of FP/MMX moves, the registers actually overlap, and we
37627 have to switch modes in order to treat them differently. */
37628 if ((MMX_CLASS_P (class1) && MAYBE_FLOAT_CLASS_P (class2))
37629 || (MMX_CLASS_P (class2) && MAYBE_FLOAT_CLASS_P (class1)))
37630 cost += 20;
37632 return cost;
37635 /* Moves between SSE/MMX and integer unit are expensive. */
37636 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
37637 || SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
37639 /* ??? By keeping returned value relatively high, we limit the number
37640 of moves between integer and MMX/SSE registers for all targets.
37641 Additionally, high value prevents problem with x86_modes_tieable_p(),
37642 where integer modes in MMX/SSE registers are not tieable
37643 because of missing QImode and HImode moves to, from or between
37644 MMX/SSE registers. */
37645 return MAX (8, ix86_cost->mmxsse_to_integer);
37647 if (MAYBE_FLOAT_CLASS_P (class1))
37648 return ix86_cost->fp_move;
37649 if (MAYBE_SSE_CLASS_P (class1))
37650 return ix86_cost->sse_move;
37651 if (MAYBE_MMX_CLASS_P (class1))
37652 return ix86_cost->mmx_move;
37653 return 2;
37656 /* Return TRUE if hard register REGNO can hold a value of machine-mode
37657 MODE. */
37659 bool
37660 ix86_hard_regno_mode_ok (int regno, enum machine_mode mode)
37662 /* Flags and only flags can only hold CCmode values. */
37663 if (CC_REGNO_P (regno))
37664 return GET_MODE_CLASS (mode) == MODE_CC;
37665 if (GET_MODE_CLASS (mode) == MODE_CC
37666 || GET_MODE_CLASS (mode) == MODE_RANDOM
37667 || GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
37668 return false;
37669 if (STACK_REGNO_P (regno))
37670 return VALID_FP_MODE_P (mode);
37671 if (MASK_REGNO_P (regno))
37672 return VALID_MASK_REG_MODE (mode);
37673 if (SSE_REGNO_P (regno))
37675 /* We implement the move patterns for all vector modes into and
37676 out of SSE registers, even when no operation instructions
37677 are available. */
37679 /* For AVX-512 we allow, regardless of regno:
37680 - XI mode
37681 - any of 512-bit wide vector mode
37682 - any scalar mode. */
37683 if (TARGET_AVX512F
37684 && (mode == XImode
37685 || VALID_AVX512F_REG_MODE (mode)
37686 || VALID_AVX512F_SCALAR_MODE (mode)))
37687 return true;
37689 /* xmm16-xmm31 are only available for AVX-512. */
37690 if (EXT_REX_SSE_REGNO_P (regno))
37691 return false;
37693 /* OImode and AVX modes are available only when AVX is enabled. */
37694 return ((TARGET_AVX
37695 && VALID_AVX256_REG_OR_OI_MODE (mode))
37696 || VALID_SSE_REG_MODE (mode)
37697 || VALID_SSE2_REG_MODE (mode)
37698 || VALID_MMX_REG_MODE (mode)
37699 || VALID_MMX_REG_MODE_3DNOW (mode));
37701 if (MMX_REGNO_P (regno))
37703 /* We implement the move patterns for 3DNOW modes even in MMX mode,
37704 so if the register is available at all, then we can move data of
37705 the given mode into or out of it. */
37706 return (VALID_MMX_REG_MODE (mode)
37707 || VALID_MMX_REG_MODE_3DNOW (mode));
37710 if (mode == QImode)
37712 /* Take care for QImode values - they can be in non-QI regs,
37713 but then they do cause partial register stalls. */
37714 if (ANY_QI_REGNO_P (regno))
37715 return true;
37716 if (!TARGET_PARTIAL_REG_STALL)
37717 return true;
37718 /* LRA checks if the hard register is OK for the given mode.
37719 QImode values can live in non-QI regs, so we allow all
37720 registers here. */
37721 if (lra_in_progress)
37722 return true;
37723 return !can_create_pseudo_p ();
37725 /* We handle both integer and floats in the general purpose registers. */
37726 else if (VALID_INT_MODE_P (mode))
37727 return true;
37728 else if (VALID_FP_MODE_P (mode))
37729 return true;
37730 else if (VALID_DFP_MODE_P (mode))
37731 return true;
37732 /* Lots of MMX code casts 8 byte vector modes to DImode. If we then go
37733 on to use that value in smaller contexts, this can easily force a
37734 pseudo to be allocated to GENERAL_REGS. Since this is no worse than
37735 supporting DImode, allow it. */
37736 else if (VALID_MMX_REG_MODE_3DNOW (mode) || VALID_MMX_REG_MODE (mode))
37737 return true;
37739 return false;
37742 /* A subroutine of ix86_modes_tieable_p. Return true if MODE is a
37743 tieable integer mode. */
37745 static bool
37746 ix86_tieable_integer_mode_p (enum machine_mode mode)
37748 switch (mode)
37750 case HImode:
37751 case SImode:
37752 return true;
37754 case QImode:
37755 return TARGET_64BIT || !TARGET_PARTIAL_REG_STALL;
37757 case DImode:
37758 return TARGET_64BIT;
37760 default:
37761 return false;
37765 /* Return true if MODE1 is accessible in a register that can hold MODE2
37766 without copying. That is, all register classes that can hold MODE2
37767 can also hold MODE1. */
37769 bool
37770 ix86_modes_tieable_p (enum machine_mode mode1, enum machine_mode mode2)
37772 if (mode1 == mode2)
37773 return true;
37775 if (ix86_tieable_integer_mode_p (mode1)
37776 && ix86_tieable_integer_mode_p (mode2))
37777 return true;
37779 /* MODE2 being XFmode implies fp stack or general regs, which means we
37780 can tie any smaller floating point modes to it. Note that we do not
37781 tie this with TFmode. */
37782 if (mode2 == XFmode)
37783 return mode1 == SFmode || mode1 == DFmode;
37785 /* MODE2 being DFmode implies fp stack, general or sse regs, which means
37786 that we can tie it with SFmode. */
37787 if (mode2 == DFmode)
37788 return mode1 == SFmode;
37790 /* If MODE2 is only appropriate for an SSE register, then tie with
37791 any other mode acceptable to SSE registers. */
37792 if (GET_MODE_SIZE (mode2) == 32
37793 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode2))
37794 return (GET_MODE_SIZE (mode1) == 32
37795 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode1));
37796 if (GET_MODE_SIZE (mode2) == 16
37797 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode2))
37798 return (GET_MODE_SIZE (mode1) == 16
37799 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode1));
37801 /* If MODE2 is appropriate for an MMX register, then tie
37802 with any other mode acceptable to MMX registers. */
37803 if (GET_MODE_SIZE (mode2) == 8
37804 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode2))
37805 return (GET_MODE_SIZE (mode1) == 8
37806 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode1));
37808 return false;
37811 /* Return the cost of moving between two registers of mode MODE. */
37813 static int
37814 ix86_set_reg_reg_cost (enum machine_mode mode)
37816 unsigned int units = UNITS_PER_WORD;
37818 switch (GET_MODE_CLASS (mode))
37820 default:
37821 break;
37823 case MODE_CC:
37824 units = GET_MODE_SIZE (CCmode);
37825 break;
37827 case MODE_FLOAT:
37828 if ((TARGET_SSE && mode == TFmode)
37829 || (TARGET_80387 && mode == XFmode)
37830 || ((TARGET_80387 || TARGET_SSE2) && mode == DFmode)
37831 || ((TARGET_80387 || TARGET_SSE) && mode == SFmode))
37832 units = GET_MODE_SIZE (mode);
37833 break;
37835 case MODE_COMPLEX_FLOAT:
37836 if ((TARGET_SSE && mode == TCmode)
37837 || (TARGET_80387 && mode == XCmode)
37838 || ((TARGET_80387 || TARGET_SSE2) && mode == DCmode)
37839 || ((TARGET_80387 || TARGET_SSE) && mode == SCmode))
37840 units = GET_MODE_SIZE (mode);
37841 break;
37843 case MODE_VECTOR_INT:
37844 case MODE_VECTOR_FLOAT:
37845 if ((TARGET_AVX512F && VALID_AVX512F_REG_MODE (mode))
37846 || (TARGET_AVX && VALID_AVX256_REG_MODE (mode))
37847 || (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode))
37848 || (TARGET_SSE && VALID_SSE_REG_MODE (mode))
37849 || (TARGET_MMX && VALID_MMX_REG_MODE (mode)))
37850 units = GET_MODE_SIZE (mode);
37853 /* Return the cost of moving between two registers of mode MODE,
37854 assuming that the move will be in pieces of at most UNITS bytes. */
37855 return COSTS_N_INSNS ((GET_MODE_SIZE (mode) + units - 1) / units);
37858 /* Compute a (partial) cost for rtx X. Return true if the complete
37859 cost has been computed, and false if subexpressions should be
37860 scanned. In either case, *TOTAL contains the cost result. */
37862 static bool
37863 ix86_rtx_costs (rtx x, int code_i, int outer_code_i, int opno, int *total,
37864 bool speed)
37866 rtx mask;
37867 enum rtx_code code = (enum rtx_code) code_i;
37868 enum rtx_code outer_code = (enum rtx_code) outer_code_i;
37869 enum machine_mode mode = GET_MODE (x);
37870 const struct processor_costs *cost = speed ? ix86_cost : &ix86_size_cost;
37872 switch (code)
37874 case SET:
37875 if (register_operand (SET_DEST (x), VOIDmode)
37876 && reg_or_0_operand (SET_SRC (x), VOIDmode))
37878 *total = ix86_set_reg_reg_cost (GET_MODE (SET_DEST (x)));
37879 return true;
37881 return false;
37883 case CONST_INT:
37884 case CONST:
37885 case LABEL_REF:
37886 case SYMBOL_REF:
37887 if (TARGET_64BIT && !x86_64_immediate_operand (x, VOIDmode))
37888 *total = 3;
37889 else if (TARGET_64BIT && !x86_64_zext_immediate_operand (x, VOIDmode))
37890 *total = 2;
37891 else if (flag_pic && SYMBOLIC_CONST (x)
37892 && (!TARGET_64BIT
37893 || (!GET_CODE (x) != LABEL_REF
37894 && (GET_CODE (x) != SYMBOL_REF
37895 || !SYMBOL_REF_LOCAL_P (x)))))
37896 *total = 1;
37897 else
37898 *total = 0;
37899 return true;
37901 case CONST_DOUBLE:
37902 if (mode == VOIDmode)
37904 *total = 0;
37905 return true;
37907 switch (standard_80387_constant_p (x))
37909 case 1: /* 0.0 */
37910 *total = 1;
37911 return true;
37912 default: /* Other constants */
37913 *total = 2;
37914 return true;
37915 case 0:
37916 case -1:
37917 break;
37919 if (SSE_FLOAT_MODE_P (mode))
37921 case CONST_VECTOR:
37922 switch (standard_sse_constant_p (x))
37924 case 0:
37925 break;
37926 case 1: /* 0: xor eliminates false dependency */
37927 *total = 0;
37928 return true;
37929 default: /* -1: cmp contains false dependency */
37930 *total = 1;
37931 return true;
37934 /* Fall back to (MEM (SYMBOL_REF)), since that's where
37935 it'll probably end up. Add a penalty for size. */
37936 *total = (COSTS_N_INSNS (1)
37937 + (flag_pic != 0 && !TARGET_64BIT)
37938 + (mode == SFmode ? 0 : mode == DFmode ? 1 : 2));
37939 return true;
37941 case ZERO_EXTEND:
37942 /* The zero extensions is often completely free on x86_64, so make
37943 it as cheap as possible. */
37944 if (TARGET_64BIT && mode == DImode
37945 && GET_MODE (XEXP (x, 0)) == SImode)
37946 *total = 1;
37947 else if (TARGET_ZERO_EXTEND_WITH_AND)
37948 *total = cost->add;
37949 else
37950 *total = cost->movzx;
37951 return false;
37953 case SIGN_EXTEND:
37954 *total = cost->movsx;
37955 return false;
37957 case ASHIFT:
37958 if (SCALAR_INT_MODE_P (mode)
37959 && GET_MODE_SIZE (mode) < UNITS_PER_WORD
37960 && CONST_INT_P (XEXP (x, 1)))
37962 HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
37963 if (value == 1)
37965 *total = cost->add;
37966 return false;
37968 if ((value == 2 || value == 3)
37969 && cost->lea <= cost->shift_const)
37971 *total = cost->lea;
37972 return false;
37975 /* FALLTHRU */
37977 case ROTATE:
37978 case ASHIFTRT:
37979 case LSHIFTRT:
37980 case ROTATERT:
37981 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
37983 /* ??? Should be SSE vector operation cost. */
37984 /* At least for published AMD latencies, this really is the same
37985 as the latency for a simple fpu operation like fabs. */
37986 /* V*QImode is emulated with 1-11 insns. */
37987 if (mode == V16QImode || mode == V32QImode)
37989 int count = 11;
37990 if (TARGET_XOP && mode == V16QImode)
37992 /* For XOP we use vpshab, which requires a broadcast of the
37993 value to the variable shift insn. For constants this
37994 means a V16Q const in mem; even when we can perform the
37995 shift with one insn set the cost to prefer paddb. */
37996 if (CONSTANT_P (XEXP (x, 1)))
37998 *total = (cost->fabs
37999 + rtx_cost (XEXP (x, 0), code, 0, speed)
38000 + (speed ? 2 : COSTS_N_BYTES (16)));
38001 return true;
38003 count = 3;
38005 else if (TARGET_SSSE3)
38006 count = 7;
38007 *total = cost->fabs * count;
38009 else
38010 *total = cost->fabs;
38012 else if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
38014 if (CONST_INT_P (XEXP (x, 1)))
38016 if (INTVAL (XEXP (x, 1)) > 32)
38017 *total = cost->shift_const + COSTS_N_INSNS (2);
38018 else
38019 *total = cost->shift_const * 2;
38021 else
38023 if (GET_CODE (XEXP (x, 1)) == AND)
38024 *total = cost->shift_var * 2;
38025 else
38026 *total = cost->shift_var * 6 + COSTS_N_INSNS (2);
38029 else
38031 if (CONST_INT_P (XEXP (x, 1)))
38032 *total = cost->shift_const;
38033 else if (GET_CODE (XEXP (x, 1)) == SUBREG
38034 && GET_CODE (XEXP (XEXP (x, 1), 0)) == AND)
38036 /* Return the cost after shift-and truncation. */
38037 *total = cost->shift_var;
38038 return true;
38040 else
38041 *total = cost->shift_var;
38043 return false;
38045 case FMA:
38047 rtx sub;
38049 gcc_assert (FLOAT_MODE_P (mode));
38050 gcc_assert (TARGET_FMA || TARGET_FMA4 || TARGET_AVX512F);
38052 /* ??? SSE scalar/vector cost should be used here. */
38053 /* ??? Bald assumption that fma has the same cost as fmul. */
38054 *total = cost->fmul;
38055 *total += rtx_cost (XEXP (x, 1), FMA, 1, speed);
38057 /* Negate in op0 or op2 is free: FMS, FNMA, FNMS. */
38058 sub = XEXP (x, 0);
38059 if (GET_CODE (sub) == NEG)
38060 sub = XEXP (sub, 0);
38061 *total += rtx_cost (sub, FMA, 0, speed);
38063 sub = XEXP (x, 2);
38064 if (GET_CODE (sub) == NEG)
38065 sub = XEXP (sub, 0);
38066 *total += rtx_cost (sub, FMA, 2, speed);
38067 return true;
38070 case MULT:
38071 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
38073 /* ??? SSE scalar cost should be used here. */
38074 *total = cost->fmul;
38075 return false;
38077 else if (X87_FLOAT_MODE_P (mode))
38079 *total = cost->fmul;
38080 return false;
38082 else if (FLOAT_MODE_P (mode))
38084 /* ??? SSE vector cost should be used here. */
38085 *total = cost->fmul;
38086 return false;
38088 else if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
38090 /* V*QImode is emulated with 7-13 insns. */
38091 if (mode == V16QImode || mode == V32QImode)
38093 int extra = 11;
38094 if (TARGET_XOP && mode == V16QImode)
38095 extra = 5;
38096 else if (TARGET_SSSE3)
38097 extra = 6;
38098 *total = cost->fmul * 2 + cost->fabs * extra;
38100 /* V*DImode is emulated with 5-8 insns. */
38101 else if (mode == V2DImode || mode == V4DImode)
38103 if (TARGET_XOP && mode == V2DImode)
38104 *total = cost->fmul * 2 + cost->fabs * 3;
38105 else
38106 *total = cost->fmul * 3 + cost->fabs * 5;
38108 /* Without sse4.1, we don't have PMULLD; it's emulated with 7
38109 insns, including two PMULUDQ. */
38110 else if (mode == V4SImode && !(TARGET_SSE4_1 || TARGET_AVX))
38111 *total = cost->fmul * 2 + cost->fabs * 5;
38112 else
38113 *total = cost->fmul;
38114 return false;
38116 else
38118 rtx op0 = XEXP (x, 0);
38119 rtx op1 = XEXP (x, 1);
38120 int nbits;
38121 if (CONST_INT_P (XEXP (x, 1)))
38123 unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
38124 for (nbits = 0; value != 0; value &= value - 1)
38125 nbits++;
38127 else
38128 /* This is arbitrary. */
38129 nbits = 7;
38131 /* Compute costs correctly for widening multiplication. */
38132 if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op0) == ZERO_EXTEND)
38133 && GET_MODE_SIZE (GET_MODE (XEXP (op0, 0))) * 2
38134 == GET_MODE_SIZE (mode))
38136 int is_mulwiden = 0;
38137 enum machine_mode inner_mode = GET_MODE (op0);
38139 if (GET_CODE (op0) == GET_CODE (op1))
38140 is_mulwiden = 1, op1 = XEXP (op1, 0);
38141 else if (CONST_INT_P (op1))
38143 if (GET_CODE (op0) == SIGN_EXTEND)
38144 is_mulwiden = trunc_int_for_mode (INTVAL (op1), inner_mode)
38145 == INTVAL (op1);
38146 else
38147 is_mulwiden = !(INTVAL (op1) & ~GET_MODE_MASK (inner_mode));
38150 if (is_mulwiden)
38151 op0 = XEXP (op0, 0), mode = GET_MODE (op0);
38154 *total = (cost->mult_init[MODE_INDEX (mode)]
38155 + nbits * cost->mult_bit
38156 + rtx_cost (op0, outer_code, opno, speed)
38157 + rtx_cost (op1, outer_code, opno, speed));
38159 return true;
38162 case DIV:
38163 case UDIV:
38164 case MOD:
38165 case UMOD:
38166 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
38167 /* ??? SSE cost should be used here. */
38168 *total = cost->fdiv;
38169 else if (X87_FLOAT_MODE_P (mode))
38170 *total = cost->fdiv;
38171 else if (FLOAT_MODE_P (mode))
38172 /* ??? SSE vector cost should be used here. */
38173 *total = cost->fdiv;
38174 else
38175 *total = cost->divide[MODE_INDEX (mode)];
38176 return false;
38178 case PLUS:
38179 if (GET_MODE_CLASS (mode) == MODE_INT
38180 && GET_MODE_SIZE (mode) <= UNITS_PER_WORD)
38182 if (GET_CODE (XEXP (x, 0)) == PLUS
38183 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
38184 && CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 0), 1))
38185 && CONSTANT_P (XEXP (x, 1)))
38187 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1));
38188 if (val == 2 || val == 4 || val == 8)
38190 *total = cost->lea;
38191 *total += rtx_cost (XEXP (XEXP (x, 0), 1),
38192 outer_code, opno, speed);
38193 *total += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0),
38194 outer_code, opno, speed);
38195 *total += rtx_cost (XEXP (x, 1), outer_code, opno, speed);
38196 return true;
38199 else if (GET_CODE (XEXP (x, 0)) == MULT
38200 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
38202 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (x, 0), 1));
38203 if (val == 2 || val == 4 || val == 8)
38205 *total = cost->lea;
38206 *total += rtx_cost (XEXP (XEXP (x, 0), 0),
38207 outer_code, opno, speed);
38208 *total += rtx_cost (XEXP (x, 1), outer_code, opno, speed);
38209 return true;
38212 else if (GET_CODE (XEXP (x, 0)) == PLUS)
38214 *total = cost->lea;
38215 *total += rtx_cost (XEXP (XEXP (x, 0), 0),
38216 outer_code, opno, speed);
38217 *total += rtx_cost (XEXP (XEXP (x, 0), 1),
38218 outer_code, opno, speed);
38219 *total += rtx_cost (XEXP (x, 1), outer_code, opno, speed);
38220 return true;
38223 /* FALLTHRU */
38225 case MINUS:
38226 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
38228 /* ??? SSE cost should be used here. */
38229 *total = cost->fadd;
38230 return false;
38232 else if (X87_FLOAT_MODE_P (mode))
38234 *total = cost->fadd;
38235 return false;
38237 else if (FLOAT_MODE_P (mode))
38239 /* ??? SSE vector cost should be used here. */
38240 *total = cost->fadd;
38241 return false;
38243 /* FALLTHRU */
38245 case AND:
38246 case IOR:
38247 case XOR:
38248 if (GET_MODE_CLASS (mode) == MODE_INT
38249 && GET_MODE_SIZE (mode) > UNITS_PER_WORD)
38251 *total = (cost->add * 2
38252 + (rtx_cost (XEXP (x, 0), outer_code, opno, speed)
38253 << (GET_MODE (XEXP (x, 0)) != DImode))
38254 + (rtx_cost (XEXP (x, 1), outer_code, opno, speed)
38255 << (GET_MODE (XEXP (x, 1)) != DImode)));
38256 return true;
38258 /* FALLTHRU */
38260 case NEG:
38261 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
38263 /* ??? SSE cost should be used here. */
38264 *total = cost->fchs;
38265 return false;
38267 else if (X87_FLOAT_MODE_P (mode))
38269 *total = cost->fchs;
38270 return false;
38272 else if (FLOAT_MODE_P (mode))
38274 /* ??? SSE vector cost should be used here. */
38275 *total = cost->fchs;
38276 return false;
38278 /* FALLTHRU */
38280 case NOT:
38281 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
38283 /* ??? Should be SSE vector operation cost. */
38284 /* At least for published AMD latencies, this really is the same
38285 as the latency for a simple fpu operation like fabs. */
38286 *total = cost->fabs;
38288 else if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
38289 *total = cost->add * 2;
38290 else
38291 *total = cost->add;
38292 return false;
38294 case COMPARE:
38295 if (GET_CODE (XEXP (x, 0)) == ZERO_EXTRACT
38296 && XEXP (XEXP (x, 0), 1) == const1_rtx
38297 && CONST_INT_P (XEXP (XEXP (x, 0), 2))
38298 && XEXP (x, 1) == const0_rtx)
38300 /* This kind of construct is implemented using test[bwl].
38301 Treat it as if we had an AND. */
38302 *total = (cost->add
38303 + rtx_cost (XEXP (XEXP (x, 0), 0), outer_code, opno, speed)
38304 + rtx_cost (const1_rtx, outer_code, opno, speed));
38305 return true;
38307 return false;
38309 case FLOAT_EXTEND:
38310 if (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH))
38311 *total = 0;
38312 return false;
38314 case ABS:
38315 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
38316 /* ??? SSE cost should be used here. */
38317 *total = cost->fabs;
38318 else if (X87_FLOAT_MODE_P (mode))
38319 *total = cost->fabs;
38320 else if (FLOAT_MODE_P (mode))
38321 /* ??? SSE vector cost should be used here. */
38322 *total = cost->fabs;
38323 return false;
38325 case SQRT:
38326 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
38327 /* ??? SSE cost should be used here. */
38328 *total = cost->fsqrt;
38329 else if (X87_FLOAT_MODE_P (mode))
38330 *total = cost->fsqrt;
38331 else if (FLOAT_MODE_P (mode))
38332 /* ??? SSE vector cost should be used here. */
38333 *total = cost->fsqrt;
38334 return false;
38336 case UNSPEC:
38337 if (XINT (x, 1) == UNSPEC_TP)
38338 *total = 0;
38339 return false;
38341 case VEC_SELECT:
38342 case VEC_CONCAT:
38343 case VEC_DUPLICATE:
38344 /* ??? Assume all of these vector manipulation patterns are
38345 recognizable. In which case they all pretty much have the
38346 same cost. */
38347 *total = cost->fabs;
38348 return true;
38349 case VEC_MERGE:
38350 mask = XEXP (x, 2);
38351 /* This is masked instruction, assume the same cost,
38352 as nonmasked variant. */
38353 if (TARGET_AVX512F && register_operand (mask, GET_MODE (mask)))
38354 *total = rtx_cost (XEXP (x, 0), outer_code, opno, speed);
38355 else
38356 *total = cost->fabs;
38357 return true;
38359 default:
38360 return false;
38364 #if TARGET_MACHO
38366 static int current_machopic_label_num;
38368 /* Given a symbol name and its associated stub, write out the
38369 definition of the stub. */
38371 void
38372 machopic_output_stub (FILE *file, const char *symb, const char *stub)
38374 unsigned int length;
38375 char *binder_name, *symbol_name, lazy_ptr_name[32];
38376 int label = ++current_machopic_label_num;
38378 /* For 64-bit we shouldn't get here. */
38379 gcc_assert (!TARGET_64BIT);
38381 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
38382 symb = targetm.strip_name_encoding (symb);
38384 length = strlen (stub);
38385 binder_name = XALLOCAVEC (char, length + 32);
38386 GEN_BINDER_NAME_FOR_STUB (binder_name, stub, length);
38388 length = strlen (symb);
38389 symbol_name = XALLOCAVEC (char, length + 32);
38390 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
38392 sprintf (lazy_ptr_name, "L%d$lz", label);
38394 if (MACHOPIC_ATT_STUB)
38395 switch_to_section (darwin_sections[machopic_picsymbol_stub3_section]);
38396 else if (MACHOPIC_PURE)
38397 switch_to_section (darwin_sections[machopic_picsymbol_stub2_section]);
38398 else
38399 switch_to_section (darwin_sections[machopic_symbol_stub_section]);
38401 fprintf (file, "%s:\n", stub);
38402 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
38404 if (MACHOPIC_ATT_STUB)
38406 fprintf (file, "\thlt ; hlt ; hlt ; hlt ; hlt\n");
38408 else if (MACHOPIC_PURE)
38410 /* PIC stub. */
38411 /* 25-byte PIC stub using "CALL get_pc_thunk". */
38412 rtx tmp = gen_rtx_REG (SImode, 2 /* ECX */);
38413 output_set_got (tmp, NULL_RTX); /* "CALL ___<cpu>.get_pc_thunk.cx". */
38414 fprintf (file, "LPC$%d:\tmovl\t%s-LPC$%d(%%ecx),%%ecx\n",
38415 label, lazy_ptr_name, label);
38416 fprintf (file, "\tjmp\t*%%ecx\n");
38418 else
38419 fprintf (file, "\tjmp\t*%s\n", lazy_ptr_name);
38421 /* The AT&T-style ("self-modifying") stub is not lazily bound, thus
38422 it needs no stub-binding-helper. */
38423 if (MACHOPIC_ATT_STUB)
38424 return;
38426 fprintf (file, "%s:\n", binder_name);
38428 if (MACHOPIC_PURE)
38430 fprintf (file, "\tlea\t%s-%s(%%ecx),%%ecx\n", lazy_ptr_name, binder_name);
38431 fprintf (file, "\tpushl\t%%ecx\n");
38433 else
38434 fprintf (file, "\tpushl\t$%s\n", lazy_ptr_name);
38436 fputs ("\tjmp\tdyld_stub_binding_helper\n", file);
38438 /* N.B. Keep the correspondence of these
38439 'symbol_ptr/symbol_ptr2/symbol_ptr3' sections consistent with the
38440 old-pic/new-pic/non-pic stubs; altering this will break
38441 compatibility with existing dylibs. */
38442 if (MACHOPIC_PURE)
38444 /* 25-byte PIC stub using "CALL get_pc_thunk". */
38445 switch_to_section (darwin_sections[machopic_lazy_symbol_ptr2_section]);
38447 else
38448 /* 16-byte -mdynamic-no-pic stub. */
38449 switch_to_section(darwin_sections[machopic_lazy_symbol_ptr3_section]);
38451 fprintf (file, "%s:\n", lazy_ptr_name);
38452 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
38453 fprintf (file, ASM_LONG "%s\n", binder_name);
38455 #endif /* TARGET_MACHO */
38457 /* Order the registers for register allocator. */
38459 void
38460 x86_order_regs_for_local_alloc (void)
38462 int pos = 0;
38463 int i;
38465 /* First allocate the local general purpose registers. */
38466 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
38467 if (GENERAL_REGNO_P (i) && call_used_regs[i])
38468 reg_alloc_order [pos++] = i;
38470 /* Global general purpose registers. */
38471 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
38472 if (GENERAL_REGNO_P (i) && !call_used_regs[i])
38473 reg_alloc_order [pos++] = i;
38475 /* x87 registers come first in case we are doing FP math
38476 using them. */
38477 if (!TARGET_SSE_MATH)
38478 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
38479 reg_alloc_order [pos++] = i;
38481 /* SSE registers. */
38482 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
38483 reg_alloc_order [pos++] = i;
38484 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
38485 reg_alloc_order [pos++] = i;
38487 /* Extended REX SSE registers. */
38488 for (i = FIRST_EXT_REX_SSE_REG; i <= LAST_EXT_REX_SSE_REG; i++)
38489 reg_alloc_order [pos++] = i;
38491 /* Mask register. */
38492 for (i = FIRST_MASK_REG; i <= LAST_MASK_REG; i++)
38493 reg_alloc_order [pos++] = i;
38495 /* x87 registers. */
38496 if (TARGET_SSE_MATH)
38497 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
38498 reg_alloc_order [pos++] = i;
38500 for (i = FIRST_MMX_REG; i <= LAST_MMX_REG; i++)
38501 reg_alloc_order [pos++] = i;
38503 /* Initialize the rest of array as we do not allocate some registers
38504 at all. */
38505 while (pos < FIRST_PSEUDO_REGISTER)
38506 reg_alloc_order [pos++] = 0;
38509 /* Handle a "callee_pop_aggregate_return" attribute; arguments as
38510 in struct attribute_spec handler. */
38511 static tree
38512 ix86_handle_callee_pop_aggregate_return (tree *node, tree name,
38513 tree args,
38514 int flags ATTRIBUTE_UNUSED,
38515 bool *no_add_attrs)
38517 if (TREE_CODE (*node) != FUNCTION_TYPE
38518 && TREE_CODE (*node) != METHOD_TYPE
38519 && TREE_CODE (*node) != FIELD_DECL
38520 && TREE_CODE (*node) != TYPE_DECL)
38522 warning (OPT_Wattributes, "%qE attribute only applies to functions",
38523 name);
38524 *no_add_attrs = true;
38525 return NULL_TREE;
38527 if (TARGET_64BIT)
38529 warning (OPT_Wattributes, "%qE attribute only available for 32-bit",
38530 name);
38531 *no_add_attrs = true;
38532 return NULL_TREE;
38534 if (is_attribute_p ("callee_pop_aggregate_return", name))
38536 tree cst;
38538 cst = TREE_VALUE (args);
38539 if (TREE_CODE (cst) != INTEGER_CST)
38541 warning (OPT_Wattributes,
38542 "%qE attribute requires an integer constant argument",
38543 name);
38544 *no_add_attrs = true;
38546 else if (compare_tree_int (cst, 0) != 0
38547 && compare_tree_int (cst, 1) != 0)
38549 warning (OPT_Wattributes,
38550 "argument to %qE attribute is neither zero, nor one",
38551 name);
38552 *no_add_attrs = true;
38555 return NULL_TREE;
38558 return NULL_TREE;
38561 /* Handle a "ms_abi" or "sysv" attribute; arguments as in
38562 struct attribute_spec.handler. */
38563 static tree
38564 ix86_handle_abi_attribute (tree *node, tree name,
38565 tree args ATTRIBUTE_UNUSED,
38566 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
38568 if (TREE_CODE (*node) != FUNCTION_TYPE
38569 && TREE_CODE (*node) != METHOD_TYPE
38570 && TREE_CODE (*node) != FIELD_DECL
38571 && TREE_CODE (*node) != TYPE_DECL)
38573 warning (OPT_Wattributes, "%qE attribute only applies to functions",
38574 name);
38575 *no_add_attrs = true;
38576 return NULL_TREE;
38579 /* Can combine regparm with all attributes but fastcall. */
38580 if (is_attribute_p ("ms_abi", name))
38582 if (lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (*node)))
38584 error ("ms_abi and sysv_abi attributes are not compatible");
38587 return NULL_TREE;
38589 else if (is_attribute_p ("sysv_abi", name))
38591 if (lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (*node)))
38593 error ("ms_abi and sysv_abi attributes are not compatible");
38596 return NULL_TREE;
38599 return NULL_TREE;
38602 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
38603 struct attribute_spec.handler. */
38604 static tree
38605 ix86_handle_struct_attribute (tree *node, tree name,
38606 tree args ATTRIBUTE_UNUSED,
38607 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
38609 tree *type = NULL;
38610 if (DECL_P (*node))
38612 if (TREE_CODE (*node) == TYPE_DECL)
38613 type = &TREE_TYPE (*node);
38615 else
38616 type = node;
38618 if (!(type && RECORD_OR_UNION_TYPE_P (*type)))
38620 warning (OPT_Wattributes, "%qE attribute ignored",
38621 name);
38622 *no_add_attrs = true;
38625 else if ((is_attribute_p ("ms_struct", name)
38626 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type)))
38627 || ((is_attribute_p ("gcc_struct", name)
38628 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type)))))
38630 warning (OPT_Wattributes, "%qE incompatible attribute ignored",
38631 name);
38632 *no_add_attrs = true;
38635 return NULL_TREE;
38638 static tree
38639 ix86_handle_fndecl_attribute (tree *node, tree name,
38640 tree args ATTRIBUTE_UNUSED,
38641 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
38643 if (TREE_CODE (*node) != FUNCTION_DECL)
38645 warning (OPT_Wattributes, "%qE attribute only applies to functions",
38646 name);
38647 *no_add_attrs = true;
38649 return NULL_TREE;
38652 static bool
38653 ix86_ms_bitfield_layout_p (const_tree record_type)
38655 return ((TARGET_MS_BITFIELD_LAYOUT
38656 && !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type)))
38657 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type)));
38660 /* Returns an expression indicating where the this parameter is
38661 located on entry to the FUNCTION. */
38663 static rtx
38664 x86_this_parameter (tree function)
38666 tree type = TREE_TYPE (function);
38667 bool aggr = aggregate_value_p (TREE_TYPE (type), type) != 0;
38668 int nregs;
38670 if (TARGET_64BIT)
38672 const int *parm_regs;
38674 if (ix86_function_type_abi (type) == MS_ABI)
38675 parm_regs = x86_64_ms_abi_int_parameter_registers;
38676 else
38677 parm_regs = x86_64_int_parameter_registers;
38678 return gen_rtx_REG (Pmode, parm_regs[aggr]);
38681 nregs = ix86_function_regparm (type, function);
38683 if (nregs > 0 && !stdarg_p (type))
38685 int regno;
38686 unsigned int ccvt = ix86_get_callcvt (type);
38688 if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
38689 regno = aggr ? DX_REG : CX_REG;
38690 else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
38692 regno = CX_REG;
38693 if (aggr)
38694 return gen_rtx_MEM (SImode,
38695 plus_constant (Pmode, stack_pointer_rtx, 4));
38697 else
38699 regno = AX_REG;
38700 if (aggr)
38702 regno = DX_REG;
38703 if (nregs == 1)
38704 return gen_rtx_MEM (SImode,
38705 plus_constant (Pmode,
38706 stack_pointer_rtx, 4));
38709 return gen_rtx_REG (SImode, regno);
38712 return gen_rtx_MEM (SImode, plus_constant (Pmode, stack_pointer_rtx,
38713 aggr ? 8 : 4));
38716 /* Determine whether x86_output_mi_thunk can succeed. */
38718 static bool
38719 x86_can_output_mi_thunk (const_tree thunk ATTRIBUTE_UNUSED,
38720 HOST_WIDE_INT delta ATTRIBUTE_UNUSED,
38721 HOST_WIDE_INT vcall_offset, const_tree function)
38723 /* 64-bit can handle anything. */
38724 if (TARGET_64BIT)
38725 return true;
38727 /* For 32-bit, everything's fine if we have one free register. */
38728 if (ix86_function_regparm (TREE_TYPE (function), function) < 3)
38729 return true;
38731 /* Need a free register for vcall_offset. */
38732 if (vcall_offset)
38733 return false;
38735 /* Need a free register for GOT references. */
38736 if (flag_pic && !targetm.binds_local_p (function))
38737 return false;
38739 /* Otherwise ok. */
38740 return true;
38743 /* Output the assembler code for a thunk function. THUNK_DECL is the
38744 declaration for the thunk function itself, FUNCTION is the decl for
38745 the target function. DELTA is an immediate constant offset to be
38746 added to THIS. If VCALL_OFFSET is nonzero, the word at
38747 *(*this + vcall_offset) should be added to THIS. */
38749 static void
38750 x86_output_mi_thunk (FILE *file,
38751 tree thunk ATTRIBUTE_UNUSED, HOST_WIDE_INT delta,
38752 HOST_WIDE_INT vcall_offset, tree function)
38754 rtx this_param = x86_this_parameter (function);
38755 rtx this_reg, tmp, fnaddr;
38756 unsigned int tmp_regno;
38758 if (TARGET_64BIT)
38759 tmp_regno = R10_REG;
38760 else
38762 unsigned int ccvt = ix86_get_callcvt (TREE_TYPE (function));
38763 if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
38764 tmp_regno = AX_REG;
38765 else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
38766 tmp_regno = DX_REG;
38767 else
38768 tmp_regno = CX_REG;
38771 emit_note (NOTE_INSN_PROLOGUE_END);
38773 /* If VCALL_OFFSET, we'll need THIS in a register. Might as well
38774 pull it in now and let DELTA benefit. */
38775 if (REG_P (this_param))
38776 this_reg = this_param;
38777 else if (vcall_offset)
38779 /* Put the this parameter into %eax. */
38780 this_reg = gen_rtx_REG (Pmode, AX_REG);
38781 emit_move_insn (this_reg, this_param);
38783 else
38784 this_reg = NULL_RTX;
38786 /* Adjust the this parameter by a fixed constant. */
38787 if (delta)
38789 rtx delta_rtx = GEN_INT (delta);
38790 rtx delta_dst = this_reg ? this_reg : this_param;
38792 if (TARGET_64BIT)
38794 if (!x86_64_general_operand (delta_rtx, Pmode))
38796 tmp = gen_rtx_REG (Pmode, tmp_regno);
38797 emit_move_insn (tmp, delta_rtx);
38798 delta_rtx = tmp;
38802 ix86_emit_binop (PLUS, Pmode, delta_dst, delta_rtx);
38805 /* Adjust the this parameter by a value stored in the vtable. */
38806 if (vcall_offset)
38808 rtx vcall_addr, vcall_mem, this_mem;
38810 tmp = gen_rtx_REG (Pmode, tmp_regno);
38812 this_mem = gen_rtx_MEM (ptr_mode, this_reg);
38813 if (Pmode != ptr_mode)
38814 this_mem = gen_rtx_ZERO_EXTEND (Pmode, this_mem);
38815 emit_move_insn (tmp, this_mem);
38817 /* Adjust the this parameter. */
38818 vcall_addr = plus_constant (Pmode, tmp, vcall_offset);
38819 if (TARGET_64BIT
38820 && !ix86_legitimate_address_p (ptr_mode, vcall_addr, true))
38822 rtx tmp2 = gen_rtx_REG (Pmode, R11_REG);
38823 emit_move_insn (tmp2, GEN_INT (vcall_offset));
38824 vcall_addr = gen_rtx_PLUS (Pmode, tmp, tmp2);
38827 vcall_mem = gen_rtx_MEM (ptr_mode, vcall_addr);
38828 if (Pmode != ptr_mode)
38829 emit_insn (gen_addsi_1_zext (this_reg,
38830 gen_rtx_REG (ptr_mode,
38831 REGNO (this_reg)),
38832 vcall_mem));
38833 else
38834 ix86_emit_binop (PLUS, Pmode, this_reg, vcall_mem);
38837 /* If necessary, drop THIS back to its stack slot. */
38838 if (this_reg && this_reg != this_param)
38839 emit_move_insn (this_param, this_reg);
38841 fnaddr = XEXP (DECL_RTL (function), 0);
38842 if (TARGET_64BIT)
38844 if (!flag_pic || targetm.binds_local_p (function)
38845 || TARGET_PECOFF)
38847 else
38849 tmp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, fnaddr), UNSPEC_GOTPCREL);
38850 tmp = gen_rtx_CONST (Pmode, tmp);
38851 fnaddr = gen_rtx_MEM (Pmode, tmp);
38854 else
38856 if (!flag_pic || targetm.binds_local_p (function))
38858 #if TARGET_MACHO
38859 else if (TARGET_MACHO)
38861 fnaddr = machopic_indirect_call_target (DECL_RTL (function));
38862 fnaddr = XEXP (fnaddr, 0);
38864 #endif /* TARGET_MACHO */
38865 else
38867 tmp = gen_rtx_REG (Pmode, CX_REG);
38868 output_set_got (tmp, NULL_RTX);
38870 fnaddr = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, fnaddr), UNSPEC_GOT);
38871 fnaddr = gen_rtx_PLUS (Pmode, fnaddr, tmp);
38872 fnaddr = gen_rtx_MEM (Pmode, fnaddr);
38876 /* Our sibling call patterns do not allow memories, because we have no
38877 predicate that can distinguish between frame and non-frame memory.
38878 For our purposes here, we can get away with (ab)using a jump pattern,
38879 because we're going to do no optimization. */
38880 if (MEM_P (fnaddr))
38881 emit_jump_insn (gen_indirect_jump (fnaddr));
38882 else
38884 if (ix86_cmodel == CM_LARGE_PIC && SYMBOLIC_CONST (fnaddr))
38885 fnaddr = legitimize_pic_address (fnaddr,
38886 gen_rtx_REG (Pmode, tmp_regno));
38888 if (!sibcall_insn_operand (fnaddr, word_mode))
38890 tmp = gen_rtx_REG (word_mode, tmp_regno);
38891 if (GET_MODE (fnaddr) != word_mode)
38892 fnaddr = gen_rtx_ZERO_EXTEND (word_mode, fnaddr);
38893 emit_move_insn (tmp, fnaddr);
38894 fnaddr = tmp;
38897 tmp = gen_rtx_MEM (QImode, fnaddr);
38898 tmp = gen_rtx_CALL (VOIDmode, tmp, const0_rtx);
38899 tmp = emit_call_insn (tmp);
38900 SIBLING_CALL_P (tmp) = 1;
38902 emit_barrier ();
38904 /* Emit just enough of rest_of_compilation to get the insns emitted.
38905 Note that use_thunk calls assemble_start_function et al. */
38906 tmp = get_insns ();
38907 shorten_branches (tmp);
38908 final_start_function (tmp, file, 1);
38909 final (tmp, file, 1);
38910 final_end_function ();
38913 static void
38914 x86_file_start (void)
38916 default_file_start ();
38917 if (TARGET_16BIT)
38918 fputs ("\t.code16gcc\n", asm_out_file);
38919 #if TARGET_MACHO
38920 darwin_file_start ();
38921 #endif
38922 if (X86_FILE_START_VERSION_DIRECTIVE)
38923 fputs ("\t.version\t\"01.01\"\n", asm_out_file);
38924 if (X86_FILE_START_FLTUSED)
38925 fputs ("\t.global\t__fltused\n", asm_out_file);
38926 if (ix86_asm_dialect == ASM_INTEL)
38927 fputs ("\t.intel_syntax noprefix\n", asm_out_file);
38931 x86_field_alignment (tree field, int computed)
38933 enum machine_mode mode;
38934 tree type = TREE_TYPE (field);
38936 if (TARGET_64BIT || TARGET_ALIGN_DOUBLE)
38937 return computed;
38938 mode = TYPE_MODE (strip_array_types (type));
38939 if (mode == DFmode || mode == DCmode
38940 || GET_MODE_CLASS (mode) == MODE_INT
38941 || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
38942 return MIN (32, computed);
38943 return computed;
38946 /* Output assembler code to FILE to increment profiler label # LABELNO
38947 for profiling a function entry. */
38948 void
38949 x86_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED)
38951 const char *mcount_name = (flag_fentry ? MCOUNT_NAME_BEFORE_PROLOGUE
38952 : MCOUNT_NAME);
38954 if (TARGET_64BIT)
38956 #ifndef NO_PROFILE_COUNTERS
38957 fprintf (file, "\tleaq\t%sP%d(%%rip),%%r11\n", LPREFIX, labelno);
38958 #endif
38960 if (!TARGET_PECOFF && flag_pic)
38961 fprintf (file, "\tcall\t*%s@GOTPCREL(%%rip)\n", mcount_name);
38962 else
38963 fprintf (file, "\tcall\t%s\n", mcount_name);
38965 else if (flag_pic)
38967 #ifndef NO_PROFILE_COUNTERS
38968 fprintf (file, "\tleal\t%sP%d@GOTOFF(%%ebx),%%" PROFILE_COUNT_REGISTER "\n",
38969 LPREFIX, labelno);
38970 #endif
38971 fprintf (file, "\tcall\t*%s@GOT(%%ebx)\n", mcount_name);
38973 else
38975 #ifndef NO_PROFILE_COUNTERS
38976 fprintf (file, "\tmovl\t$%sP%d,%%" PROFILE_COUNT_REGISTER "\n",
38977 LPREFIX, labelno);
38978 #endif
38979 fprintf (file, "\tcall\t%s\n", mcount_name);
38983 /* We don't have exact information about the insn sizes, but we may assume
38984 quite safely that we are informed about all 1 byte insns and memory
38985 address sizes. This is enough to eliminate unnecessary padding in
38986 99% of cases. */
38988 static int
38989 min_insn_size (rtx insn)
38991 int l = 0, len;
38993 if (!INSN_P (insn) || !active_insn_p (insn))
38994 return 0;
38996 /* Discard alignments we've emit and jump instructions. */
38997 if (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
38998 && XINT (PATTERN (insn), 1) == UNSPECV_ALIGN)
38999 return 0;
39001 /* Important case - calls are always 5 bytes.
39002 It is common to have many calls in the row. */
39003 if (CALL_P (insn)
39004 && symbolic_reference_mentioned_p (PATTERN (insn))
39005 && !SIBLING_CALL_P (insn))
39006 return 5;
39007 len = get_attr_length (insn);
39008 if (len <= 1)
39009 return 1;
39011 /* For normal instructions we rely on get_attr_length being exact,
39012 with a few exceptions. */
39013 if (!JUMP_P (insn))
39015 enum attr_type type = get_attr_type (insn);
39017 switch (type)
39019 case TYPE_MULTI:
39020 if (GET_CODE (PATTERN (insn)) == ASM_INPUT
39021 || asm_noperands (PATTERN (insn)) >= 0)
39022 return 0;
39023 break;
39024 case TYPE_OTHER:
39025 case TYPE_FCMP:
39026 break;
39027 default:
39028 /* Otherwise trust get_attr_length. */
39029 return len;
39032 l = get_attr_length_address (insn);
39033 if (l < 4 && symbolic_reference_mentioned_p (PATTERN (insn)))
39034 l = 4;
39036 if (l)
39037 return 1+l;
39038 else
39039 return 2;
39042 #ifdef ASM_OUTPUT_MAX_SKIP_PAD
39044 /* AMD K8 core mispredicts jumps when there are more than 3 jumps in 16 byte
39045 window. */
39047 static void
39048 ix86_avoid_jump_mispredicts (void)
39050 rtx insn, start = get_insns ();
39051 int nbytes = 0, njumps = 0;
39052 int isjump = 0;
39054 /* Look for all minimal intervals of instructions containing 4 jumps.
39055 The intervals are bounded by START and INSN. NBYTES is the total
39056 size of instructions in the interval including INSN and not including
39057 START. When the NBYTES is smaller than 16 bytes, it is possible
39058 that the end of START and INSN ends up in the same 16byte page.
39060 The smallest offset in the page INSN can start is the case where START
39061 ends on the offset 0. Offset of INSN is then NBYTES - sizeof (INSN).
39062 We add p2align to 16byte window with maxskip 15 - NBYTES + sizeof (INSN).
39064 Don't consider asm goto as jump, while it can contain a jump, it doesn't
39065 have to, control transfer to label(s) can be performed through other
39066 means, and also we estimate minimum length of all asm stmts as 0. */
39067 for (insn = start; insn; insn = NEXT_INSN (insn))
39069 int min_size;
39071 if (LABEL_P (insn))
39073 int align = label_to_alignment (insn);
39074 int max_skip = label_to_max_skip (insn);
39076 if (max_skip > 15)
39077 max_skip = 15;
39078 /* If align > 3, only up to 16 - max_skip - 1 bytes can be
39079 already in the current 16 byte page, because otherwise
39080 ASM_OUTPUT_MAX_SKIP_ALIGN could skip max_skip or fewer
39081 bytes to reach 16 byte boundary. */
39082 if (align <= 0
39083 || (align <= 3 && max_skip != (1 << align) - 1))
39084 max_skip = 0;
39085 if (dump_file)
39086 fprintf (dump_file, "Label %i with max_skip %i\n",
39087 INSN_UID (insn), max_skip);
39088 if (max_skip)
39090 while (nbytes + max_skip >= 16)
39092 start = NEXT_INSN (start);
39093 if ((JUMP_P (start) && asm_noperands (PATTERN (start)) < 0)
39094 || CALL_P (start))
39095 njumps--, isjump = 1;
39096 else
39097 isjump = 0;
39098 nbytes -= min_insn_size (start);
39101 continue;
39104 min_size = min_insn_size (insn);
39105 nbytes += min_size;
39106 if (dump_file)
39107 fprintf (dump_file, "Insn %i estimated to %i bytes\n",
39108 INSN_UID (insn), min_size);
39109 if ((JUMP_P (insn) && asm_noperands (PATTERN (insn)) < 0)
39110 || CALL_P (insn))
39111 njumps++;
39112 else
39113 continue;
39115 while (njumps > 3)
39117 start = NEXT_INSN (start);
39118 if ((JUMP_P (start) && asm_noperands (PATTERN (start)) < 0)
39119 || CALL_P (start))
39120 njumps--, isjump = 1;
39121 else
39122 isjump = 0;
39123 nbytes -= min_insn_size (start);
39125 gcc_assert (njumps >= 0);
39126 if (dump_file)
39127 fprintf (dump_file, "Interval %i to %i has %i bytes\n",
39128 INSN_UID (start), INSN_UID (insn), nbytes);
39130 if (njumps == 3 && isjump && nbytes < 16)
39132 int padsize = 15 - nbytes + min_insn_size (insn);
39134 if (dump_file)
39135 fprintf (dump_file, "Padding insn %i by %i bytes!\n",
39136 INSN_UID (insn), padsize);
39137 emit_insn_before (gen_pad (GEN_INT (padsize)), insn);
39141 #endif
39143 /* AMD Athlon works faster
39144 when RET is not destination of conditional jump or directly preceded
39145 by other jump instruction. We avoid the penalty by inserting NOP just
39146 before the RET instructions in such cases. */
39147 static void
39148 ix86_pad_returns (void)
39150 edge e;
39151 edge_iterator ei;
39153 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
39155 basic_block bb = e->src;
39156 rtx ret = BB_END (bb);
39157 rtx prev;
39158 bool replace = false;
39160 if (!JUMP_P (ret) || !ANY_RETURN_P (PATTERN (ret))
39161 || optimize_bb_for_size_p (bb))
39162 continue;
39163 for (prev = PREV_INSN (ret); prev; prev = PREV_INSN (prev))
39164 if (active_insn_p (prev) || LABEL_P (prev))
39165 break;
39166 if (prev && LABEL_P (prev))
39168 edge e;
39169 edge_iterator ei;
39171 FOR_EACH_EDGE (e, ei, bb->preds)
39172 if (EDGE_FREQUENCY (e) && e->src->index >= 0
39173 && !(e->flags & EDGE_FALLTHRU))
39175 replace = true;
39176 break;
39179 if (!replace)
39181 prev = prev_active_insn (ret);
39182 if (prev
39183 && ((JUMP_P (prev) && any_condjump_p (prev))
39184 || CALL_P (prev)))
39185 replace = true;
39186 /* Empty functions get branch mispredict even when
39187 the jump destination is not visible to us. */
39188 if (!prev && !optimize_function_for_size_p (cfun))
39189 replace = true;
39191 if (replace)
39193 emit_jump_insn_before (gen_simple_return_internal_long (), ret);
39194 delete_insn (ret);
39199 /* Count the minimum number of instructions in BB. Return 4 if the
39200 number of instructions >= 4. */
39202 static int
39203 ix86_count_insn_bb (basic_block bb)
39205 rtx insn;
39206 int insn_count = 0;
39208 /* Count number of instructions in this block. Return 4 if the number
39209 of instructions >= 4. */
39210 FOR_BB_INSNS (bb, insn)
39212 /* Only happen in exit blocks. */
39213 if (JUMP_P (insn)
39214 && ANY_RETURN_P (PATTERN (insn)))
39215 break;
39217 if (NONDEBUG_INSN_P (insn)
39218 && GET_CODE (PATTERN (insn)) != USE
39219 && GET_CODE (PATTERN (insn)) != CLOBBER)
39221 insn_count++;
39222 if (insn_count >= 4)
39223 return insn_count;
39227 return insn_count;
39231 /* Count the minimum number of instructions in code path in BB.
39232 Return 4 if the number of instructions >= 4. */
39234 static int
39235 ix86_count_insn (basic_block bb)
39237 edge e;
39238 edge_iterator ei;
39239 int min_prev_count;
39241 /* Only bother counting instructions along paths with no
39242 more than 2 basic blocks between entry and exit. Given
39243 that BB has an edge to exit, determine if a predecessor
39244 of BB has an edge from entry. If so, compute the number
39245 of instructions in the predecessor block. If there
39246 happen to be multiple such blocks, compute the minimum. */
39247 min_prev_count = 4;
39248 FOR_EACH_EDGE (e, ei, bb->preds)
39250 edge prev_e;
39251 edge_iterator prev_ei;
39253 if (e->src == ENTRY_BLOCK_PTR_FOR_FN (cfun))
39255 min_prev_count = 0;
39256 break;
39258 FOR_EACH_EDGE (prev_e, prev_ei, e->src->preds)
39260 if (prev_e->src == ENTRY_BLOCK_PTR_FOR_FN (cfun))
39262 int count = ix86_count_insn_bb (e->src);
39263 if (count < min_prev_count)
39264 min_prev_count = count;
39265 break;
39270 if (min_prev_count < 4)
39271 min_prev_count += ix86_count_insn_bb (bb);
39273 return min_prev_count;
39276 /* Pad short function to 4 instructions. */
39278 static void
39279 ix86_pad_short_function (void)
39281 edge e;
39282 edge_iterator ei;
39284 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
39286 rtx ret = BB_END (e->src);
39287 if (JUMP_P (ret) && ANY_RETURN_P (PATTERN (ret)))
39289 int insn_count = ix86_count_insn (e->src);
39291 /* Pad short function. */
39292 if (insn_count < 4)
39294 rtx insn = ret;
39296 /* Find epilogue. */
39297 while (insn
39298 && (!NOTE_P (insn)
39299 || NOTE_KIND (insn) != NOTE_INSN_EPILOGUE_BEG))
39300 insn = PREV_INSN (insn);
39302 if (!insn)
39303 insn = ret;
39305 /* Two NOPs count as one instruction. */
39306 insn_count = 2 * (4 - insn_count);
39307 emit_insn_before (gen_nops (GEN_INT (insn_count)), insn);
39313 /* Fix up a Windows system unwinder issue. If an EH region falls through into
39314 the epilogue, the Windows system unwinder will apply epilogue logic and
39315 produce incorrect offsets. This can be avoided by adding a nop between
39316 the last insn that can throw and the first insn of the epilogue. */
39318 static void
39319 ix86_seh_fixup_eh_fallthru (void)
39321 edge e;
39322 edge_iterator ei;
39324 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
39326 rtx insn, next;
39328 /* Find the beginning of the epilogue. */
39329 for (insn = BB_END (e->src); insn != NULL; insn = PREV_INSN (insn))
39330 if (NOTE_P (insn) && NOTE_KIND (insn) == NOTE_INSN_EPILOGUE_BEG)
39331 break;
39332 if (insn == NULL)
39333 continue;
39335 /* We only care about preceding insns that can throw. */
39336 insn = prev_active_insn (insn);
39337 if (insn == NULL || !can_throw_internal (insn))
39338 continue;
39340 /* Do not separate calls from their debug information. */
39341 for (next = NEXT_INSN (insn); next != NULL; next = NEXT_INSN (next))
39342 if (NOTE_P (next)
39343 && (NOTE_KIND (next) == NOTE_INSN_VAR_LOCATION
39344 || NOTE_KIND (next) == NOTE_INSN_CALL_ARG_LOCATION))
39345 insn = next;
39346 else
39347 break;
39349 emit_insn_after (gen_nops (const1_rtx), insn);
39353 /* Implement machine specific optimizations. We implement padding of returns
39354 for K8 CPUs and pass to avoid 4 jumps in the single 16 byte window. */
39355 static void
39356 ix86_reorg (void)
39358 /* We are freeing block_for_insn in the toplev to keep compatibility
39359 with old MDEP_REORGS that are not CFG based. Recompute it now. */
39360 compute_bb_for_insn ();
39362 if (TARGET_SEH && current_function_has_exception_handlers ())
39363 ix86_seh_fixup_eh_fallthru ();
39365 if (optimize && optimize_function_for_speed_p (cfun))
39367 if (TARGET_PAD_SHORT_FUNCTION)
39368 ix86_pad_short_function ();
39369 else if (TARGET_PAD_RETURNS)
39370 ix86_pad_returns ();
39371 #ifdef ASM_OUTPUT_MAX_SKIP_PAD
39372 if (TARGET_FOUR_JUMP_LIMIT)
39373 ix86_avoid_jump_mispredicts ();
39374 #endif
39378 /* Return nonzero when QImode register that must be represented via REX prefix
39379 is used. */
39380 bool
39381 x86_extended_QIreg_mentioned_p (rtx insn)
39383 int i;
39384 extract_insn_cached (insn);
39385 for (i = 0; i < recog_data.n_operands; i++)
39386 if (GENERAL_REG_P (recog_data.operand[i])
39387 && !QI_REGNO_P (REGNO (recog_data.operand[i])))
39388 return true;
39389 return false;
39392 /* Return nonzero when P points to register encoded via REX prefix.
39393 Called via for_each_rtx. */
39394 static int
39395 extended_reg_mentioned_1 (rtx *p, void *data ATTRIBUTE_UNUSED)
39397 unsigned int regno;
39398 if (!REG_P (*p))
39399 return 0;
39400 regno = REGNO (*p);
39401 return REX_INT_REGNO_P (regno) || REX_SSE_REGNO_P (regno);
39404 /* Return true when INSN mentions register that must be encoded using REX
39405 prefix. */
39406 bool
39407 x86_extended_reg_mentioned_p (rtx insn)
39409 return for_each_rtx (INSN_P (insn) ? &PATTERN (insn) : &insn,
39410 extended_reg_mentioned_1, NULL);
39413 /* If profitable, negate (without causing overflow) integer constant
39414 of mode MODE at location LOC. Return true in this case. */
39415 bool
39416 x86_maybe_negate_const_int (rtx *loc, enum machine_mode mode)
39418 HOST_WIDE_INT val;
39420 if (!CONST_INT_P (*loc))
39421 return false;
39423 switch (mode)
39425 case DImode:
39426 /* DImode x86_64 constants must fit in 32 bits. */
39427 gcc_assert (x86_64_immediate_operand (*loc, mode));
39429 mode = SImode;
39430 break;
39432 case SImode:
39433 case HImode:
39434 case QImode:
39435 break;
39437 default:
39438 gcc_unreachable ();
39441 /* Avoid overflows. */
39442 if (mode_signbit_p (mode, *loc))
39443 return false;
39445 val = INTVAL (*loc);
39447 /* Make things pretty and `subl $4,%eax' rather than `addl $-4,%eax'.
39448 Exceptions: -128 encodes smaller than 128, so swap sign and op. */
39449 if ((val < 0 && val != -128)
39450 || val == 128)
39452 *loc = GEN_INT (-val);
39453 return true;
39456 return false;
39459 /* Generate an unsigned DImode/SImode to FP conversion. This is the same code
39460 optabs would emit if we didn't have TFmode patterns. */
39462 void
39463 x86_emit_floatuns (rtx operands[2])
39465 rtx neglab, donelab, i0, i1, f0, in, out;
39466 enum machine_mode mode, inmode;
39468 inmode = GET_MODE (operands[1]);
39469 gcc_assert (inmode == SImode || inmode == DImode);
39471 out = operands[0];
39472 in = force_reg (inmode, operands[1]);
39473 mode = GET_MODE (out);
39474 neglab = gen_label_rtx ();
39475 donelab = gen_label_rtx ();
39476 f0 = gen_reg_rtx (mode);
39478 emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, inmode, 0, neglab);
39480 expand_float (out, in, 0);
39482 emit_jump_insn (gen_jump (donelab));
39483 emit_barrier ();
39485 emit_label (neglab);
39487 i0 = expand_simple_binop (inmode, LSHIFTRT, in, const1_rtx, NULL,
39488 1, OPTAB_DIRECT);
39489 i1 = expand_simple_binop (inmode, AND, in, const1_rtx, NULL,
39490 1, OPTAB_DIRECT);
39491 i0 = expand_simple_binop (inmode, IOR, i0, i1, i0, 1, OPTAB_DIRECT);
39493 expand_float (f0, i0, 0);
39495 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_PLUS (mode, f0, f0)));
39497 emit_label (donelab);
39500 /* AVX512F does support 64-byte integer vector operations,
39501 thus the longest vector we are faced with is V64QImode. */
39502 #define MAX_VECT_LEN 64
39504 struct expand_vec_perm_d
39506 rtx target, op0, op1;
39507 unsigned char perm[MAX_VECT_LEN];
39508 enum machine_mode vmode;
39509 unsigned char nelt;
39510 bool one_operand_p;
39511 bool testing_p;
39514 static bool canonicalize_perm (struct expand_vec_perm_d *d);
39515 static bool expand_vec_perm_1 (struct expand_vec_perm_d *d);
39516 static bool expand_vec_perm_broadcast_1 (struct expand_vec_perm_d *d);
39518 /* Get a vector mode of the same size as the original but with elements
39519 twice as wide. This is only guaranteed to apply to integral vectors. */
39521 static inline enum machine_mode
39522 get_mode_wider_vector (enum machine_mode o)
39524 /* ??? Rely on the ordering that genmodes.c gives to vectors. */
39525 enum machine_mode n = GET_MODE_WIDER_MODE (o);
39526 gcc_assert (GET_MODE_NUNITS (o) == GET_MODE_NUNITS (n) * 2);
39527 gcc_assert (GET_MODE_SIZE (o) == GET_MODE_SIZE (n));
39528 return n;
39531 /* A subroutine of ix86_expand_vector_init_duplicate. Tries to
39532 fill target with val via vec_duplicate. */
39534 static bool
39535 ix86_vector_duplicate_value (enum machine_mode mode, rtx target, rtx val)
39537 bool ok;
39538 rtx insn, dup;
39540 /* First attempt to recognize VAL as-is. */
39541 dup = gen_rtx_VEC_DUPLICATE (mode, val);
39542 insn = emit_insn (gen_rtx_SET (VOIDmode, target, dup));
39543 if (recog_memoized (insn) < 0)
39545 rtx seq;
39546 /* If that fails, force VAL into a register. */
39548 start_sequence ();
39549 XEXP (dup, 0) = force_reg (GET_MODE_INNER (mode), val);
39550 seq = get_insns ();
39551 end_sequence ();
39552 if (seq)
39553 emit_insn_before (seq, insn);
39555 ok = recog_memoized (insn) >= 0;
39556 gcc_assert (ok);
39558 return true;
39561 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
39562 with all elements equal to VAR. Return true if successful. */
39564 static bool
39565 ix86_expand_vector_init_duplicate (bool mmx_ok, enum machine_mode mode,
39566 rtx target, rtx val)
39568 bool ok;
39570 switch (mode)
39572 case V2SImode:
39573 case V2SFmode:
39574 if (!mmx_ok)
39575 return false;
39576 /* FALLTHRU */
39578 case V4DFmode:
39579 case V4DImode:
39580 case V8SFmode:
39581 case V8SImode:
39582 case V2DFmode:
39583 case V2DImode:
39584 case V4SFmode:
39585 case V4SImode:
39586 case V16SImode:
39587 case V8DImode:
39588 case V16SFmode:
39589 case V8DFmode:
39590 return ix86_vector_duplicate_value (mode, target, val);
39592 case V4HImode:
39593 if (!mmx_ok)
39594 return false;
39595 if (TARGET_SSE || TARGET_3DNOW_A)
39597 rtx x;
39599 val = gen_lowpart (SImode, val);
39600 x = gen_rtx_TRUNCATE (HImode, val);
39601 x = gen_rtx_VEC_DUPLICATE (mode, x);
39602 emit_insn (gen_rtx_SET (VOIDmode, target, x));
39603 return true;
39605 goto widen;
39607 case V8QImode:
39608 if (!mmx_ok)
39609 return false;
39610 goto widen;
39612 case V8HImode:
39613 if (TARGET_SSE2)
39615 struct expand_vec_perm_d dperm;
39616 rtx tmp1, tmp2;
39618 permute:
39619 memset (&dperm, 0, sizeof (dperm));
39620 dperm.target = target;
39621 dperm.vmode = mode;
39622 dperm.nelt = GET_MODE_NUNITS (mode);
39623 dperm.op0 = dperm.op1 = gen_reg_rtx (mode);
39624 dperm.one_operand_p = true;
39626 /* Extend to SImode using a paradoxical SUBREG. */
39627 tmp1 = gen_reg_rtx (SImode);
39628 emit_move_insn (tmp1, gen_lowpart (SImode, val));
39630 /* Insert the SImode value as low element of a V4SImode vector. */
39631 tmp2 = gen_reg_rtx (V4SImode);
39632 emit_insn (gen_vec_setv4si_0 (tmp2, CONST0_RTX (V4SImode), tmp1));
39633 emit_move_insn (dperm.op0, gen_lowpart (mode, tmp2));
39635 ok = (expand_vec_perm_1 (&dperm)
39636 || expand_vec_perm_broadcast_1 (&dperm));
39637 gcc_assert (ok);
39638 return ok;
39640 goto widen;
39642 case V16QImode:
39643 if (TARGET_SSE2)
39644 goto permute;
39645 goto widen;
39647 widen:
39648 /* Replicate the value once into the next wider mode and recurse. */
39650 enum machine_mode smode, wsmode, wvmode;
39651 rtx x;
39653 smode = GET_MODE_INNER (mode);
39654 wvmode = get_mode_wider_vector (mode);
39655 wsmode = GET_MODE_INNER (wvmode);
39657 val = convert_modes (wsmode, smode, val, true);
39658 x = expand_simple_binop (wsmode, ASHIFT, val,
39659 GEN_INT (GET_MODE_BITSIZE (smode)),
39660 NULL_RTX, 1, OPTAB_LIB_WIDEN);
39661 val = expand_simple_binop (wsmode, IOR, val, x, x, 1, OPTAB_LIB_WIDEN);
39663 x = gen_reg_rtx (wvmode);
39664 ok = ix86_expand_vector_init_duplicate (mmx_ok, wvmode, x, val);
39665 gcc_assert (ok);
39666 emit_move_insn (target, gen_lowpart (GET_MODE (target), x));
39667 return ok;
39670 case V16HImode:
39671 case V32QImode:
39673 enum machine_mode hvmode = (mode == V16HImode ? V8HImode : V16QImode);
39674 rtx x = gen_reg_rtx (hvmode);
39676 ok = ix86_expand_vector_init_duplicate (false, hvmode, x, val);
39677 gcc_assert (ok);
39679 x = gen_rtx_VEC_CONCAT (mode, x, x);
39680 emit_insn (gen_rtx_SET (VOIDmode, target, x));
39682 return true;
39684 default:
39685 return false;
39689 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
39690 whose ONE_VAR element is VAR, and other elements are zero. Return true
39691 if successful. */
39693 static bool
39694 ix86_expand_vector_init_one_nonzero (bool mmx_ok, enum machine_mode mode,
39695 rtx target, rtx var, int one_var)
39697 enum machine_mode vsimode;
39698 rtx new_target;
39699 rtx x, tmp;
39700 bool use_vector_set = false;
39702 switch (mode)
39704 case V2DImode:
39705 /* For SSE4.1, we normally use vector set. But if the second
39706 element is zero and inter-unit moves are OK, we use movq
39707 instead. */
39708 use_vector_set = (TARGET_64BIT && TARGET_SSE4_1
39709 && !(TARGET_INTER_UNIT_MOVES_TO_VEC
39710 && one_var == 0));
39711 break;
39712 case V16QImode:
39713 case V4SImode:
39714 case V4SFmode:
39715 use_vector_set = TARGET_SSE4_1;
39716 break;
39717 case V8HImode:
39718 use_vector_set = TARGET_SSE2;
39719 break;
39720 case V4HImode:
39721 use_vector_set = TARGET_SSE || TARGET_3DNOW_A;
39722 break;
39723 case V32QImode:
39724 case V16HImode:
39725 case V8SImode:
39726 case V8SFmode:
39727 case V4DFmode:
39728 use_vector_set = TARGET_AVX;
39729 break;
39730 case V4DImode:
39731 /* Use ix86_expand_vector_set in 64bit mode only. */
39732 use_vector_set = TARGET_AVX && TARGET_64BIT;
39733 break;
39734 default:
39735 break;
39738 if (use_vector_set)
39740 emit_insn (gen_rtx_SET (VOIDmode, target, CONST0_RTX (mode)));
39741 var = force_reg (GET_MODE_INNER (mode), var);
39742 ix86_expand_vector_set (mmx_ok, target, var, one_var);
39743 return true;
39746 switch (mode)
39748 case V2SFmode:
39749 case V2SImode:
39750 if (!mmx_ok)
39751 return false;
39752 /* FALLTHRU */
39754 case V2DFmode:
39755 case V2DImode:
39756 if (one_var != 0)
39757 return false;
39758 var = force_reg (GET_MODE_INNER (mode), var);
39759 x = gen_rtx_VEC_CONCAT (mode, var, CONST0_RTX (GET_MODE_INNER (mode)));
39760 emit_insn (gen_rtx_SET (VOIDmode, target, x));
39761 return true;
39763 case V4SFmode:
39764 case V4SImode:
39765 if (!REG_P (target) || REGNO (target) < FIRST_PSEUDO_REGISTER)
39766 new_target = gen_reg_rtx (mode);
39767 else
39768 new_target = target;
39769 var = force_reg (GET_MODE_INNER (mode), var);
39770 x = gen_rtx_VEC_DUPLICATE (mode, var);
39771 x = gen_rtx_VEC_MERGE (mode, x, CONST0_RTX (mode), const1_rtx);
39772 emit_insn (gen_rtx_SET (VOIDmode, new_target, x));
39773 if (one_var != 0)
39775 /* We need to shuffle the value to the correct position, so
39776 create a new pseudo to store the intermediate result. */
39778 /* With SSE2, we can use the integer shuffle insns. */
39779 if (mode != V4SFmode && TARGET_SSE2)
39781 emit_insn (gen_sse2_pshufd_1 (new_target, new_target,
39782 const1_rtx,
39783 GEN_INT (one_var == 1 ? 0 : 1),
39784 GEN_INT (one_var == 2 ? 0 : 1),
39785 GEN_INT (one_var == 3 ? 0 : 1)));
39786 if (target != new_target)
39787 emit_move_insn (target, new_target);
39788 return true;
39791 /* Otherwise convert the intermediate result to V4SFmode and
39792 use the SSE1 shuffle instructions. */
39793 if (mode != V4SFmode)
39795 tmp = gen_reg_rtx (V4SFmode);
39796 emit_move_insn (tmp, gen_lowpart (V4SFmode, new_target));
39798 else
39799 tmp = new_target;
39801 emit_insn (gen_sse_shufps_v4sf (tmp, tmp, tmp,
39802 const1_rtx,
39803 GEN_INT (one_var == 1 ? 0 : 1),
39804 GEN_INT (one_var == 2 ? 0+4 : 1+4),
39805 GEN_INT (one_var == 3 ? 0+4 : 1+4)));
39807 if (mode != V4SFmode)
39808 emit_move_insn (target, gen_lowpart (V4SImode, tmp));
39809 else if (tmp != target)
39810 emit_move_insn (target, tmp);
39812 else if (target != new_target)
39813 emit_move_insn (target, new_target);
39814 return true;
39816 case V8HImode:
39817 case V16QImode:
39818 vsimode = V4SImode;
39819 goto widen;
39820 case V4HImode:
39821 case V8QImode:
39822 if (!mmx_ok)
39823 return false;
39824 vsimode = V2SImode;
39825 goto widen;
39826 widen:
39827 if (one_var != 0)
39828 return false;
39830 /* Zero extend the variable element to SImode and recurse. */
39831 var = convert_modes (SImode, GET_MODE_INNER (mode), var, true);
39833 x = gen_reg_rtx (vsimode);
39834 if (!ix86_expand_vector_init_one_nonzero (mmx_ok, vsimode, x,
39835 var, one_var))
39836 gcc_unreachable ();
39838 emit_move_insn (target, gen_lowpart (mode, x));
39839 return true;
39841 default:
39842 return false;
39846 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
39847 consisting of the values in VALS. It is known that all elements
39848 except ONE_VAR are constants. Return true if successful. */
39850 static bool
39851 ix86_expand_vector_init_one_var (bool mmx_ok, enum machine_mode mode,
39852 rtx target, rtx vals, int one_var)
39854 rtx var = XVECEXP (vals, 0, one_var);
39855 enum machine_mode wmode;
39856 rtx const_vec, x;
39858 const_vec = copy_rtx (vals);
39859 XVECEXP (const_vec, 0, one_var) = CONST0_RTX (GET_MODE_INNER (mode));
39860 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (const_vec, 0));
39862 switch (mode)
39864 case V2DFmode:
39865 case V2DImode:
39866 case V2SFmode:
39867 case V2SImode:
39868 /* For the two element vectors, it's just as easy to use
39869 the general case. */
39870 return false;
39872 case V4DImode:
39873 /* Use ix86_expand_vector_set in 64bit mode only. */
39874 if (!TARGET_64BIT)
39875 return false;
39876 case V4DFmode:
39877 case V8SFmode:
39878 case V8SImode:
39879 case V16HImode:
39880 case V32QImode:
39881 case V4SFmode:
39882 case V4SImode:
39883 case V8HImode:
39884 case V4HImode:
39885 break;
39887 case V16QImode:
39888 if (TARGET_SSE4_1)
39889 break;
39890 wmode = V8HImode;
39891 goto widen;
39892 case V8QImode:
39893 wmode = V4HImode;
39894 goto widen;
39895 widen:
39896 /* There's no way to set one QImode entry easily. Combine
39897 the variable value with its adjacent constant value, and
39898 promote to an HImode set. */
39899 x = XVECEXP (vals, 0, one_var ^ 1);
39900 if (one_var & 1)
39902 var = convert_modes (HImode, QImode, var, true);
39903 var = expand_simple_binop (HImode, ASHIFT, var, GEN_INT (8),
39904 NULL_RTX, 1, OPTAB_LIB_WIDEN);
39905 x = GEN_INT (INTVAL (x) & 0xff);
39907 else
39909 var = convert_modes (HImode, QImode, var, true);
39910 x = gen_int_mode (INTVAL (x) << 8, HImode);
39912 if (x != const0_rtx)
39913 var = expand_simple_binop (HImode, IOR, var, x, var,
39914 1, OPTAB_LIB_WIDEN);
39916 x = gen_reg_rtx (wmode);
39917 emit_move_insn (x, gen_lowpart (wmode, const_vec));
39918 ix86_expand_vector_set (mmx_ok, x, var, one_var >> 1);
39920 emit_move_insn (target, gen_lowpart (mode, x));
39921 return true;
39923 default:
39924 return false;
39927 emit_move_insn (target, const_vec);
39928 ix86_expand_vector_set (mmx_ok, target, var, one_var);
39929 return true;
39932 /* A subroutine of ix86_expand_vector_init_general. Use vector
39933 concatenate to handle the most general case: all values variable,
39934 and none identical. */
39936 static void
39937 ix86_expand_vector_init_concat (enum machine_mode mode,
39938 rtx target, rtx *ops, int n)
39940 enum machine_mode cmode, hmode = VOIDmode, gmode = VOIDmode;
39941 rtx first[16], second[8], third[4];
39942 rtvec v;
39943 int i, j;
39945 switch (n)
39947 case 2:
39948 switch (mode)
39950 case V16SImode:
39951 cmode = V8SImode;
39952 break;
39953 case V16SFmode:
39954 cmode = V8SFmode;
39955 break;
39956 case V8DImode:
39957 cmode = V4DImode;
39958 break;
39959 case V8DFmode:
39960 cmode = V4DFmode;
39961 break;
39962 case V8SImode:
39963 cmode = V4SImode;
39964 break;
39965 case V8SFmode:
39966 cmode = V4SFmode;
39967 break;
39968 case V4DImode:
39969 cmode = V2DImode;
39970 break;
39971 case V4DFmode:
39972 cmode = V2DFmode;
39973 break;
39974 case V4SImode:
39975 cmode = V2SImode;
39976 break;
39977 case V4SFmode:
39978 cmode = V2SFmode;
39979 break;
39980 case V2DImode:
39981 cmode = DImode;
39982 break;
39983 case V2SImode:
39984 cmode = SImode;
39985 break;
39986 case V2DFmode:
39987 cmode = DFmode;
39988 break;
39989 case V2SFmode:
39990 cmode = SFmode;
39991 break;
39992 default:
39993 gcc_unreachable ();
39996 if (!register_operand (ops[1], cmode))
39997 ops[1] = force_reg (cmode, ops[1]);
39998 if (!register_operand (ops[0], cmode))
39999 ops[0] = force_reg (cmode, ops[0]);
40000 emit_insn (gen_rtx_SET (VOIDmode, target,
40001 gen_rtx_VEC_CONCAT (mode, ops[0],
40002 ops[1])));
40003 break;
40005 case 4:
40006 switch (mode)
40008 case V4DImode:
40009 cmode = V2DImode;
40010 break;
40011 case V4DFmode:
40012 cmode = V2DFmode;
40013 break;
40014 case V4SImode:
40015 cmode = V2SImode;
40016 break;
40017 case V4SFmode:
40018 cmode = V2SFmode;
40019 break;
40020 default:
40021 gcc_unreachable ();
40023 goto half;
40025 case 8:
40026 switch (mode)
40028 case V8DImode:
40029 cmode = V2DImode;
40030 hmode = V4DImode;
40031 break;
40032 case V8DFmode:
40033 cmode = V2DFmode;
40034 hmode = V4DFmode;
40035 break;
40036 case V8SImode:
40037 cmode = V2SImode;
40038 hmode = V4SImode;
40039 break;
40040 case V8SFmode:
40041 cmode = V2SFmode;
40042 hmode = V4SFmode;
40043 break;
40044 default:
40045 gcc_unreachable ();
40047 goto half;
40049 case 16:
40050 switch (mode)
40052 case V16SImode:
40053 cmode = V2SImode;
40054 hmode = V4SImode;
40055 gmode = V8SImode;
40056 break;
40057 case V16SFmode:
40058 cmode = V2SFmode;
40059 hmode = V4SFmode;
40060 gmode = V8SFmode;
40061 break;
40062 default:
40063 gcc_unreachable ();
40065 goto half;
40067 half:
40068 /* FIXME: We process inputs backward to help RA. PR 36222. */
40069 i = n - 1;
40070 j = (n >> 1) - 1;
40071 for (; i > 0; i -= 2, j--)
40073 first[j] = gen_reg_rtx (cmode);
40074 v = gen_rtvec (2, ops[i - 1], ops[i]);
40075 ix86_expand_vector_init (false, first[j],
40076 gen_rtx_PARALLEL (cmode, v));
40079 n >>= 1;
40080 if (n > 4)
40082 gcc_assert (hmode != VOIDmode);
40083 gcc_assert (gmode != VOIDmode);
40084 for (i = j = 0; i < n; i += 2, j++)
40086 second[j] = gen_reg_rtx (hmode);
40087 ix86_expand_vector_init_concat (hmode, second [j],
40088 &first [i], 2);
40090 n >>= 1;
40091 for (i = j = 0; i < n; i += 2, j++)
40093 third[j] = gen_reg_rtx (gmode);
40094 ix86_expand_vector_init_concat (gmode, third[j],
40095 &second[i], 2);
40097 n >>= 1;
40098 ix86_expand_vector_init_concat (mode, target, third, n);
40100 else if (n > 2)
40102 gcc_assert (hmode != VOIDmode);
40103 for (i = j = 0; i < n; i += 2, j++)
40105 second[j] = gen_reg_rtx (hmode);
40106 ix86_expand_vector_init_concat (hmode, second [j],
40107 &first [i], 2);
40109 n >>= 1;
40110 ix86_expand_vector_init_concat (mode, target, second, n);
40112 else
40113 ix86_expand_vector_init_concat (mode, target, first, n);
40114 break;
40116 default:
40117 gcc_unreachable ();
40121 /* A subroutine of ix86_expand_vector_init_general. Use vector
40122 interleave to handle the most general case: all values variable,
40123 and none identical. */
40125 static void
40126 ix86_expand_vector_init_interleave (enum machine_mode mode,
40127 rtx target, rtx *ops, int n)
40129 enum machine_mode first_imode, second_imode, third_imode, inner_mode;
40130 int i, j;
40131 rtx op0, op1;
40132 rtx (*gen_load_even) (rtx, rtx, rtx);
40133 rtx (*gen_interleave_first_low) (rtx, rtx, rtx);
40134 rtx (*gen_interleave_second_low) (rtx, rtx, rtx);
40136 switch (mode)
40138 case V8HImode:
40139 gen_load_even = gen_vec_setv8hi;
40140 gen_interleave_first_low = gen_vec_interleave_lowv4si;
40141 gen_interleave_second_low = gen_vec_interleave_lowv2di;
40142 inner_mode = HImode;
40143 first_imode = V4SImode;
40144 second_imode = V2DImode;
40145 third_imode = VOIDmode;
40146 break;
40147 case V16QImode:
40148 gen_load_even = gen_vec_setv16qi;
40149 gen_interleave_first_low = gen_vec_interleave_lowv8hi;
40150 gen_interleave_second_low = gen_vec_interleave_lowv4si;
40151 inner_mode = QImode;
40152 first_imode = V8HImode;
40153 second_imode = V4SImode;
40154 third_imode = V2DImode;
40155 break;
40156 default:
40157 gcc_unreachable ();
40160 for (i = 0; i < n; i++)
40162 /* Extend the odd elment to SImode using a paradoxical SUBREG. */
40163 op0 = gen_reg_rtx (SImode);
40164 emit_move_insn (op0, gen_lowpart (SImode, ops [i + i]));
40166 /* Insert the SImode value as low element of V4SImode vector. */
40167 op1 = gen_reg_rtx (V4SImode);
40168 op0 = gen_rtx_VEC_MERGE (V4SImode,
40169 gen_rtx_VEC_DUPLICATE (V4SImode,
40170 op0),
40171 CONST0_RTX (V4SImode),
40172 const1_rtx);
40173 emit_insn (gen_rtx_SET (VOIDmode, op1, op0));
40175 /* Cast the V4SImode vector back to a vector in orignal mode. */
40176 op0 = gen_reg_rtx (mode);
40177 emit_move_insn (op0, gen_lowpart (mode, op1));
40179 /* Load even elements into the second position. */
40180 emit_insn (gen_load_even (op0,
40181 force_reg (inner_mode,
40182 ops [i + i + 1]),
40183 const1_rtx));
40185 /* Cast vector to FIRST_IMODE vector. */
40186 ops[i] = gen_reg_rtx (first_imode);
40187 emit_move_insn (ops[i], gen_lowpart (first_imode, op0));
40190 /* Interleave low FIRST_IMODE vectors. */
40191 for (i = j = 0; i < n; i += 2, j++)
40193 op0 = gen_reg_rtx (first_imode);
40194 emit_insn (gen_interleave_first_low (op0, ops[i], ops[i + 1]));
40196 /* Cast FIRST_IMODE vector to SECOND_IMODE vector. */
40197 ops[j] = gen_reg_rtx (second_imode);
40198 emit_move_insn (ops[j], gen_lowpart (second_imode, op0));
40201 /* Interleave low SECOND_IMODE vectors. */
40202 switch (second_imode)
40204 case V4SImode:
40205 for (i = j = 0; i < n / 2; i += 2, j++)
40207 op0 = gen_reg_rtx (second_imode);
40208 emit_insn (gen_interleave_second_low (op0, ops[i],
40209 ops[i + 1]));
40211 /* Cast the SECOND_IMODE vector to the THIRD_IMODE
40212 vector. */
40213 ops[j] = gen_reg_rtx (third_imode);
40214 emit_move_insn (ops[j], gen_lowpart (third_imode, op0));
40216 second_imode = V2DImode;
40217 gen_interleave_second_low = gen_vec_interleave_lowv2di;
40218 /* FALLTHRU */
40220 case V2DImode:
40221 op0 = gen_reg_rtx (second_imode);
40222 emit_insn (gen_interleave_second_low (op0, ops[0],
40223 ops[1]));
40225 /* Cast the SECOND_IMODE vector back to a vector on original
40226 mode. */
40227 emit_insn (gen_rtx_SET (VOIDmode, target,
40228 gen_lowpart (mode, op0)));
40229 break;
40231 default:
40232 gcc_unreachable ();
40236 /* A subroutine of ix86_expand_vector_init. Handle the most general case:
40237 all values variable, and none identical. */
40239 static void
40240 ix86_expand_vector_init_general (bool mmx_ok, enum machine_mode mode,
40241 rtx target, rtx vals)
40243 rtx ops[64], op0, op1;
40244 enum machine_mode half_mode = VOIDmode;
40245 int n, i;
40247 switch (mode)
40249 case V2SFmode:
40250 case V2SImode:
40251 if (!mmx_ok && !TARGET_SSE)
40252 break;
40253 /* FALLTHRU */
40255 case V16SImode:
40256 case V16SFmode:
40257 case V8DFmode:
40258 case V8DImode:
40259 case V8SFmode:
40260 case V8SImode:
40261 case V4DFmode:
40262 case V4DImode:
40263 case V4SFmode:
40264 case V4SImode:
40265 case V2DFmode:
40266 case V2DImode:
40267 n = GET_MODE_NUNITS (mode);
40268 for (i = 0; i < n; i++)
40269 ops[i] = XVECEXP (vals, 0, i);
40270 ix86_expand_vector_init_concat (mode, target, ops, n);
40271 return;
40273 case V32QImode:
40274 half_mode = V16QImode;
40275 goto half;
40277 case V16HImode:
40278 half_mode = V8HImode;
40279 goto half;
40281 half:
40282 n = GET_MODE_NUNITS (mode);
40283 for (i = 0; i < n; i++)
40284 ops[i] = XVECEXP (vals, 0, i);
40285 op0 = gen_reg_rtx (half_mode);
40286 op1 = gen_reg_rtx (half_mode);
40287 ix86_expand_vector_init_interleave (half_mode, op0, ops,
40288 n >> 2);
40289 ix86_expand_vector_init_interleave (half_mode, op1,
40290 &ops [n >> 1], n >> 2);
40291 emit_insn (gen_rtx_SET (VOIDmode, target,
40292 gen_rtx_VEC_CONCAT (mode, op0, op1)));
40293 return;
40295 case V16QImode:
40296 if (!TARGET_SSE4_1)
40297 break;
40298 /* FALLTHRU */
40300 case V8HImode:
40301 if (!TARGET_SSE2)
40302 break;
40304 /* Don't use ix86_expand_vector_init_interleave if we can't
40305 move from GPR to SSE register directly. */
40306 if (!TARGET_INTER_UNIT_MOVES_TO_VEC)
40307 break;
40309 n = GET_MODE_NUNITS (mode);
40310 for (i = 0; i < n; i++)
40311 ops[i] = XVECEXP (vals, 0, i);
40312 ix86_expand_vector_init_interleave (mode, target, ops, n >> 1);
40313 return;
40315 case V4HImode:
40316 case V8QImode:
40317 break;
40319 default:
40320 gcc_unreachable ();
40324 int i, j, n_elts, n_words, n_elt_per_word;
40325 enum machine_mode inner_mode;
40326 rtx words[4], shift;
40328 inner_mode = GET_MODE_INNER (mode);
40329 n_elts = GET_MODE_NUNITS (mode);
40330 n_words = GET_MODE_SIZE (mode) / UNITS_PER_WORD;
40331 n_elt_per_word = n_elts / n_words;
40332 shift = GEN_INT (GET_MODE_BITSIZE (inner_mode));
40334 for (i = 0; i < n_words; ++i)
40336 rtx word = NULL_RTX;
40338 for (j = 0; j < n_elt_per_word; ++j)
40340 rtx elt = XVECEXP (vals, 0, (i+1)*n_elt_per_word - j - 1);
40341 elt = convert_modes (word_mode, inner_mode, elt, true);
40343 if (j == 0)
40344 word = elt;
40345 else
40347 word = expand_simple_binop (word_mode, ASHIFT, word, shift,
40348 word, 1, OPTAB_LIB_WIDEN);
40349 word = expand_simple_binop (word_mode, IOR, word, elt,
40350 word, 1, OPTAB_LIB_WIDEN);
40354 words[i] = word;
40357 if (n_words == 1)
40358 emit_move_insn (target, gen_lowpart (mode, words[0]));
40359 else if (n_words == 2)
40361 rtx tmp = gen_reg_rtx (mode);
40362 emit_clobber (tmp);
40363 emit_move_insn (gen_lowpart (word_mode, tmp), words[0]);
40364 emit_move_insn (gen_highpart (word_mode, tmp), words[1]);
40365 emit_move_insn (target, tmp);
40367 else if (n_words == 4)
40369 rtx tmp = gen_reg_rtx (V4SImode);
40370 gcc_assert (word_mode == SImode);
40371 vals = gen_rtx_PARALLEL (V4SImode, gen_rtvec_v (4, words));
40372 ix86_expand_vector_init_general (false, V4SImode, tmp, vals);
40373 emit_move_insn (target, gen_lowpart (mode, tmp));
40375 else
40376 gcc_unreachable ();
40380 /* Initialize vector TARGET via VALS. Suppress the use of MMX
40381 instructions unless MMX_OK is true. */
40383 void
40384 ix86_expand_vector_init (bool mmx_ok, rtx target, rtx vals)
40386 enum machine_mode mode = GET_MODE (target);
40387 enum machine_mode inner_mode = GET_MODE_INNER (mode);
40388 int n_elts = GET_MODE_NUNITS (mode);
40389 int n_var = 0, one_var = -1;
40390 bool all_same = true, all_const_zero = true;
40391 int i;
40392 rtx x;
40394 for (i = 0; i < n_elts; ++i)
40396 x = XVECEXP (vals, 0, i);
40397 if (!(CONST_INT_P (x)
40398 || GET_CODE (x) == CONST_DOUBLE
40399 || GET_CODE (x) == CONST_FIXED))
40400 n_var++, one_var = i;
40401 else if (x != CONST0_RTX (inner_mode))
40402 all_const_zero = false;
40403 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
40404 all_same = false;
40407 /* Constants are best loaded from the constant pool. */
40408 if (n_var == 0)
40410 emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
40411 return;
40414 /* If all values are identical, broadcast the value. */
40415 if (all_same
40416 && ix86_expand_vector_init_duplicate (mmx_ok, mode, target,
40417 XVECEXP (vals, 0, 0)))
40418 return;
40420 /* Values where only one field is non-constant are best loaded from
40421 the pool and overwritten via move later. */
40422 if (n_var == 1)
40424 if (all_const_zero
40425 && ix86_expand_vector_init_one_nonzero (mmx_ok, mode, target,
40426 XVECEXP (vals, 0, one_var),
40427 one_var))
40428 return;
40430 if (ix86_expand_vector_init_one_var (mmx_ok, mode, target, vals, one_var))
40431 return;
40434 ix86_expand_vector_init_general (mmx_ok, mode, target, vals);
40437 void
40438 ix86_expand_vector_set (bool mmx_ok, rtx target, rtx val, int elt)
40440 enum machine_mode mode = GET_MODE (target);
40441 enum machine_mode inner_mode = GET_MODE_INNER (mode);
40442 enum machine_mode half_mode;
40443 bool use_vec_merge = false;
40444 rtx tmp;
40445 static rtx (*gen_extract[6][2]) (rtx, rtx)
40447 { gen_vec_extract_lo_v32qi, gen_vec_extract_hi_v32qi },
40448 { gen_vec_extract_lo_v16hi, gen_vec_extract_hi_v16hi },
40449 { gen_vec_extract_lo_v8si, gen_vec_extract_hi_v8si },
40450 { gen_vec_extract_lo_v4di, gen_vec_extract_hi_v4di },
40451 { gen_vec_extract_lo_v8sf, gen_vec_extract_hi_v8sf },
40452 { gen_vec_extract_lo_v4df, gen_vec_extract_hi_v4df }
40454 static rtx (*gen_insert[6][2]) (rtx, rtx, rtx)
40456 { gen_vec_set_lo_v32qi, gen_vec_set_hi_v32qi },
40457 { gen_vec_set_lo_v16hi, gen_vec_set_hi_v16hi },
40458 { gen_vec_set_lo_v8si, gen_vec_set_hi_v8si },
40459 { gen_vec_set_lo_v4di, gen_vec_set_hi_v4di },
40460 { gen_vec_set_lo_v8sf, gen_vec_set_hi_v8sf },
40461 { gen_vec_set_lo_v4df, gen_vec_set_hi_v4df }
40463 int i, j, n;
40465 switch (mode)
40467 case V2SFmode:
40468 case V2SImode:
40469 if (mmx_ok)
40471 tmp = gen_reg_rtx (GET_MODE_INNER (mode));
40472 ix86_expand_vector_extract (true, tmp, target, 1 - elt);
40473 if (elt == 0)
40474 tmp = gen_rtx_VEC_CONCAT (mode, val, tmp);
40475 else
40476 tmp = gen_rtx_VEC_CONCAT (mode, tmp, val);
40477 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
40478 return;
40480 break;
40482 case V2DImode:
40483 use_vec_merge = TARGET_SSE4_1 && TARGET_64BIT;
40484 if (use_vec_merge)
40485 break;
40487 tmp = gen_reg_rtx (GET_MODE_INNER (mode));
40488 ix86_expand_vector_extract (false, tmp, target, 1 - elt);
40489 if (elt == 0)
40490 tmp = gen_rtx_VEC_CONCAT (mode, val, tmp);
40491 else
40492 tmp = gen_rtx_VEC_CONCAT (mode, tmp, val);
40493 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
40494 return;
40496 case V2DFmode:
40498 rtx op0, op1;
40500 /* For the two element vectors, we implement a VEC_CONCAT with
40501 the extraction of the other element. */
40503 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (1 - elt)));
40504 tmp = gen_rtx_VEC_SELECT (inner_mode, target, tmp);
40506 if (elt == 0)
40507 op0 = val, op1 = tmp;
40508 else
40509 op0 = tmp, op1 = val;
40511 tmp = gen_rtx_VEC_CONCAT (mode, op0, op1);
40512 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
40514 return;
40516 case V4SFmode:
40517 use_vec_merge = TARGET_SSE4_1;
40518 if (use_vec_merge)
40519 break;
40521 switch (elt)
40523 case 0:
40524 use_vec_merge = true;
40525 break;
40527 case 1:
40528 /* tmp = target = A B C D */
40529 tmp = copy_to_reg (target);
40530 /* target = A A B B */
40531 emit_insn (gen_vec_interleave_lowv4sf (target, target, target));
40532 /* target = X A B B */
40533 ix86_expand_vector_set (false, target, val, 0);
40534 /* target = A X C D */
40535 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
40536 const1_rtx, const0_rtx,
40537 GEN_INT (2+4), GEN_INT (3+4)));
40538 return;
40540 case 2:
40541 /* tmp = target = A B C D */
40542 tmp = copy_to_reg (target);
40543 /* tmp = X B C D */
40544 ix86_expand_vector_set (false, tmp, val, 0);
40545 /* target = A B X D */
40546 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
40547 const0_rtx, const1_rtx,
40548 GEN_INT (0+4), GEN_INT (3+4)));
40549 return;
40551 case 3:
40552 /* tmp = target = A B C D */
40553 tmp = copy_to_reg (target);
40554 /* tmp = X B C D */
40555 ix86_expand_vector_set (false, tmp, val, 0);
40556 /* target = A B X D */
40557 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
40558 const0_rtx, const1_rtx,
40559 GEN_INT (2+4), GEN_INT (0+4)));
40560 return;
40562 default:
40563 gcc_unreachable ();
40565 break;
40567 case V4SImode:
40568 use_vec_merge = TARGET_SSE4_1;
40569 if (use_vec_merge)
40570 break;
40572 /* Element 0 handled by vec_merge below. */
40573 if (elt == 0)
40575 use_vec_merge = true;
40576 break;
40579 if (TARGET_SSE2)
40581 /* With SSE2, use integer shuffles to swap element 0 and ELT,
40582 store into element 0, then shuffle them back. */
40584 rtx order[4];
40586 order[0] = GEN_INT (elt);
40587 order[1] = const1_rtx;
40588 order[2] = const2_rtx;
40589 order[3] = GEN_INT (3);
40590 order[elt] = const0_rtx;
40592 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
40593 order[1], order[2], order[3]));
40595 ix86_expand_vector_set (false, target, val, 0);
40597 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
40598 order[1], order[2], order[3]));
40600 else
40602 /* For SSE1, we have to reuse the V4SF code. */
40603 rtx t = gen_reg_rtx (V4SFmode);
40604 ix86_expand_vector_set (false, t, gen_lowpart (SFmode, val), elt);
40605 emit_move_insn (target, gen_lowpart (mode, t));
40607 return;
40609 case V8HImode:
40610 use_vec_merge = TARGET_SSE2;
40611 break;
40612 case V4HImode:
40613 use_vec_merge = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
40614 break;
40616 case V16QImode:
40617 use_vec_merge = TARGET_SSE4_1;
40618 break;
40620 case V8QImode:
40621 break;
40623 case V32QImode:
40624 half_mode = V16QImode;
40625 j = 0;
40626 n = 16;
40627 goto half;
40629 case V16HImode:
40630 half_mode = V8HImode;
40631 j = 1;
40632 n = 8;
40633 goto half;
40635 case V8SImode:
40636 half_mode = V4SImode;
40637 j = 2;
40638 n = 4;
40639 goto half;
40641 case V4DImode:
40642 half_mode = V2DImode;
40643 j = 3;
40644 n = 2;
40645 goto half;
40647 case V8SFmode:
40648 half_mode = V4SFmode;
40649 j = 4;
40650 n = 4;
40651 goto half;
40653 case V4DFmode:
40654 half_mode = V2DFmode;
40655 j = 5;
40656 n = 2;
40657 goto half;
40659 half:
40660 /* Compute offset. */
40661 i = elt / n;
40662 elt %= n;
40664 gcc_assert (i <= 1);
40666 /* Extract the half. */
40667 tmp = gen_reg_rtx (half_mode);
40668 emit_insn (gen_extract[j][i] (tmp, target));
40670 /* Put val in tmp at elt. */
40671 ix86_expand_vector_set (false, tmp, val, elt);
40673 /* Put it back. */
40674 emit_insn (gen_insert[j][i] (target, target, tmp));
40675 return;
40677 default:
40678 break;
40681 if (use_vec_merge)
40683 tmp = gen_rtx_VEC_DUPLICATE (mode, val);
40684 tmp = gen_rtx_VEC_MERGE (mode, tmp, target, GEN_INT (1 << elt));
40685 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
40687 else
40689 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
40691 emit_move_insn (mem, target);
40693 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
40694 emit_move_insn (tmp, val);
40696 emit_move_insn (target, mem);
40700 void
40701 ix86_expand_vector_extract (bool mmx_ok, rtx target, rtx vec, int elt)
40703 enum machine_mode mode = GET_MODE (vec);
40704 enum machine_mode inner_mode = GET_MODE_INNER (mode);
40705 bool use_vec_extr = false;
40706 rtx tmp;
40708 switch (mode)
40710 case V2SImode:
40711 case V2SFmode:
40712 if (!mmx_ok)
40713 break;
40714 /* FALLTHRU */
40716 case V2DFmode:
40717 case V2DImode:
40718 use_vec_extr = true;
40719 break;
40721 case V4SFmode:
40722 use_vec_extr = TARGET_SSE4_1;
40723 if (use_vec_extr)
40724 break;
40726 switch (elt)
40728 case 0:
40729 tmp = vec;
40730 break;
40732 case 1:
40733 case 3:
40734 tmp = gen_reg_rtx (mode);
40735 emit_insn (gen_sse_shufps_v4sf (tmp, vec, vec,
40736 GEN_INT (elt), GEN_INT (elt),
40737 GEN_INT (elt+4), GEN_INT (elt+4)));
40738 break;
40740 case 2:
40741 tmp = gen_reg_rtx (mode);
40742 emit_insn (gen_vec_interleave_highv4sf (tmp, vec, vec));
40743 break;
40745 default:
40746 gcc_unreachable ();
40748 vec = tmp;
40749 use_vec_extr = true;
40750 elt = 0;
40751 break;
40753 case V4SImode:
40754 use_vec_extr = TARGET_SSE4_1;
40755 if (use_vec_extr)
40756 break;
40758 if (TARGET_SSE2)
40760 switch (elt)
40762 case 0:
40763 tmp = vec;
40764 break;
40766 case 1:
40767 case 3:
40768 tmp = gen_reg_rtx (mode);
40769 emit_insn (gen_sse2_pshufd_1 (tmp, vec,
40770 GEN_INT (elt), GEN_INT (elt),
40771 GEN_INT (elt), GEN_INT (elt)));
40772 break;
40774 case 2:
40775 tmp = gen_reg_rtx (mode);
40776 emit_insn (gen_vec_interleave_highv4si (tmp, vec, vec));
40777 break;
40779 default:
40780 gcc_unreachable ();
40782 vec = tmp;
40783 use_vec_extr = true;
40784 elt = 0;
40786 else
40788 /* For SSE1, we have to reuse the V4SF code. */
40789 ix86_expand_vector_extract (false, gen_lowpart (SFmode, target),
40790 gen_lowpart (V4SFmode, vec), elt);
40791 return;
40793 break;
40795 case V8HImode:
40796 use_vec_extr = TARGET_SSE2;
40797 break;
40798 case V4HImode:
40799 use_vec_extr = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
40800 break;
40802 case V16QImode:
40803 use_vec_extr = TARGET_SSE4_1;
40804 break;
40806 case V8SFmode:
40807 if (TARGET_AVX)
40809 tmp = gen_reg_rtx (V4SFmode);
40810 if (elt < 4)
40811 emit_insn (gen_vec_extract_lo_v8sf (tmp, vec));
40812 else
40813 emit_insn (gen_vec_extract_hi_v8sf (tmp, vec));
40814 ix86_expand_vector_extract (false, target, tmp, elt & 3);
40815 return;
40817 break;
40819 case V4DFmode:
40820 if (TARGET_AVX)
40822 tmp = gen_reg_rtx (V2DFmode);
40823 if (elt < 2)
40824 emit_insn (gen_vec_extract_lo_v4df (tmp, vec));
40825 else
40826 emit_insn (gen_vec_extract_hi_v4df (tmp, vec));
40827 ix86_expand_vector_extract (false, target, tmp, elt & 1);
40828 return;
40830 break;
40832 case V32QImode:
40833 if (TARGET_AVX)
40835 tmp = gen_reg_rtx (V16QImode);
40836 if (elt < 16)
40837 emit_insn (gen_vec_extract_lo_v32qi (tmp, vec));
40838 else
40839 emit_insn (gen_vec_extract_hi_v32qi (tmp, vec));
40840 ix86_expand_vector_extract (false, target, tmp, elt & 15);
40841 return;
40843 break;
40845 case V16HImode:
40846 if (TARGET_AVX)
40848 tmp = gen_reg_rtx (V8HImode);
40849 if (elt < 8)
40850 emit_insn (gen_vec_extract_lo_v16hi (tmp, vec));
40851 else
40852 emit_insn (gen_vec_extract_hi_v16hi (tmp, vec));
40853 ix86_expand_vector_extract (false, target, tmp, elt & 7);
40854 return;
40856 break;
40858 case V8SImode:
40859 if (TARGET_AVX)
40861 tmp = gen_reg_rtx (V4SImode);
40862 if (elt < 4)
40863 emit_insn (gen_vec_extract_lo_v8si (tmp, vec));
40864 else
40865 emit_insn (gen_vec_extract_hi_v8si (tmp, vec));
40866 ix86_expand_vector_extract (false, target, tmp, elt & 3);
40867 return;
40869 break;
40871 case V4DImode:
40872 if (TARGET_AVX)
40874 tmp = gen_reg_rtx (V2DImode);
40875 if (elt < 2)
40876 emit_insn (gen_vec_extract_lo_v4di (tmp, vec));
40877 else
40878 emit_insn (gen_vec_extract_hi_v4di (tmp, vec));
40879 ix86_expand_vector_extract (false, target, tmp, elt & 1);
40880 return;
40882 break;
40884 case V16SFmode:
40885 tmp = gen_reg_rtx (V8SFmode);
40886 if (elt < 8)
40887 emit_insn (gen_vec_extract_lo_v16sf (tmp, vec));
40888 else
40889 emit_insn (gen_vec_extract_hi_v16sf (tmp, vec));
40890 ix86_expand_vector_extract (false, target, tmp, elt & 7);
40891 return;
40893 case V8DFmode:
40894 tmp = gen_reg_rtx (V4DFmode);
40895 if (elt < 4)
40896 emit_insn (gen_vec_extract_lo_v8df (tmp, vec));
40897 else
40898 emit_insn (gen_vec_extract_hi_v8df (tmp, vec));
40899 ix86_expand_vector_extract (false, target, tmp, elt & 3);
40900 return;
40902 case V16SImode:
40903 tmp = gen_reg_rtx (V8SImode);
40904 if (elt < 8)
40905 emit_insn (gen_vec_extract_lo_v16si (tmp, vec));
40906 else
40907 emit_insn (gen_vec_extract_hi_v16si (tmp, vec));
40908 ix86_expand_vector_extract (false, target, tmp, elt & 7);
40909 return;
40911 case V8DImode:
40912 tmp = gen_reg_rtx (V4DImode);
40913 if (elt < 4)
40914 emit_insn (gen_vec_extract_lo_v8di (tmp, vec));
40915 else
40916 emit_insn (gen_vec_extract_hi_v8di (tmp, vec));
40917 ix86_expand_vector_extract (false, target, tmp, elt & 3);
40918 return;
40920 case V8QImode:
40921 /* ??? Could extract the appropriate HImode element and shift. */
40922 default:
40923 break;
40926 if (use_vec_extr)
40928 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (elt)));
40929 tmp = gen_rtx_VEC_SELECT (inner_mode, vec, tmp);
40931 /* Let the rtl optimizers know about the zero extension performed. */
40932 if (inner_mode == QImode || inner_mode == HImode)
40934 tmp = gen_rtx_ZERO_EXTEND (SImode, tmp);
40935 target = gen_lowpart (SImode, target);
40938 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
40940 else
40942 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
40944 emit_move_insn (mem, vec);
40946 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
40947 emit_move_insn (target, tmp);
40951 /* Generate code to copy vector bits i / 2 ... i - 1 from vector SRC
40952 to bits 0 ... i / 2 - 1 of vector DEST, which has the same mode.
40953 The upper bits of DEST are undefined, though they shouldn't cause
40954 exceptions (some bits from src or all zeros are ok). */
40956 static void
40957 emit_reduc_half (rtx dest, rtx src, int i)
40959 rtx tem, d = dest;
40960 switch (GET_MODE (src))
40962 case V4SFmode:
40963 if (i == 128)
40964 tem = gen_sse_movhlps (dest, src, src);
40965 else
40966 tem = gen_sse_shufps_v4sf (dest, src, src, const1_rtx, const1_rtx,
40967 GEN_INT (1 + 4), GEN_INT (1 + 4));
40968 break;
40969 case V2DFmode:
40970 tem = gen_vec_interleave_highv2df (dest, src, src);
40971 break;
40972 case V16QImode:
40973 case V8HImode:
40974 case V4SImode:
40975 case V2DImode:
40976 d = gen_reg_rtx (V1TImode);
40977 tem = gen_sse2_lshrv1ti3 (d, gen_lowpart (V1TImode, src),
40978 GEN_INT (i / 2));
40979 break;
40980 case V8SFmode:
40981 if (i == 256)
40982 tem = gen_avx_vperm2f128v8sf3 (dest, src, src, const1_rtx);
40983 else
40984 tem = gen_avx_shufps256 (dest, src, src,
40985 GEN_INT (i == 128 ? 2 + (3 << 2) : 1));
40986 break;
40987 case V4DFmode:
40988 if (i == 256)
40989 tem = gen_avx_vperm2f128v4df3 (dest, src, src, const1_rtx);
40990 else
40991 tem = gen_avx_shufpd256 (dest, src, src, const1_rtx);
40992 break;
40993 case V32QImode:
40994 case V16HImode:
40995 case V8SImode:
40996 case V4DImode:
40997 if (i == 256)
40999 if (GET_MODE (dest) != V4DImode)
41000 d = gen_reg_rtx (V4DImode);
41001 tem = gen_avx2_permv2ti (d, gen_lowpart (V4DImode, src),
41002 gen_lowpart (V4DImode, src),
41003 const1_rtx);
41005 else
41007 d = gen_reg_rtx (V2TImode);
41008 tem = gen_avx2_lshrv2ti3 (d, gen_lowpart (V2TImode, src),
41009 GEN_INT (i / 2));
41011 break;
41012 case V16SImode:
41013 case V16SFmode:
41014 case V8DImode:
41015 case V8DFmode:
41016 if (i > 128)
41017 tem = gen_avx512f_shuf_i32x4_1 (gen_lowpart (V16SImode, dest),
41018 gen_lowpart (V16SImode, src),
41019 gen_lowpart (V16SImode, src),
41020 GEN_INT (0x4 + (i == 512 ? 4 : 0)),
41021 GEN_INT (0x5 + (i == 512 ? 4 : 0)),
41022 GEN_INT (0x6 + (i == 512 ? 4 : 0)),
41023 GEN_INT (0x7 + (i == 512 ? 4 : 0)),
41024 GEN_INT (0xC), GEN_INT (0xD),
41025 GEN_INT (0xE), GEN_INT (0xF),
41026 GEN_INT (0x10), GEN_INT (0x11),
41027 GEN_INT (0x12), GEN_INT (0x13),
41028 GEN_INT (0x14), GEN_INT (0x15),
41029 GEN_INT (0x16), GEN_INT (0x17));
41030 else
41031 tem = gen_avx512f_pshufd_1 (gen_lowpart (V16SImode, dest),
41032 gen_lowpart (V16SImode, src),
41033 GEN_INT (i == 128 ? 0x2 : 0x1),
41034 GEN_INT (0x3),
41035 GEN_INT (0x3),
41036 GEN_INT (0x3),
41037 GEN_INT (i == 128 ? 0x6 : 0x5),
41038 GEN_INT (0x7),
41039 GEN_INT (0x7),
41040 GEN_INT (0x7),
41041 GEN_INT (i == 128 ? 0xA : 0x9),
41042 GEN_INT (0xB),
41043 GEN_INT (0xB),
41044 GEN_INT (0xB),
41045 GEN_INT (i == 128 ? 0xE : 0xD),
41046 GEN_INT (0xF),
41047 GEN_INT (0xF),
41048 GEN_INT (0xF));
41049 break;
41050 default:
41051 gcc_unreachable ();
41053 emit_insn (tem);
41054 if (d != dest)
41055 emit_move_insn (dest, gen_lowpart (GET_MODE (dest), d));
41058 /* Expand a vector reduction. FN is the binary pattern to reduce;
41059 DEST is the destination; IN is the input vector. */
41061 void
41062 ix86_expand_reduc (rtx (*fn) (rtx, rtx, rtx), rtx dest, rtx in)
41064 rtx half, dst, vec = in;
41065 enum machine_mode mode = GET_MODE (in);
41066 int i;
41068 /* SSE4 has a special instruction for V8HImode UMIN reduction. */
41069 if (TARGET_SSE4_1
41070 && mode == V8HImode
41071 && fn == gen_uminv8hi3)
41073 emit_insn (gen_sse4_1_phminposuw (dest, in));
41074 return;
41077 for (i = GET_MODE_BITSIZE (mode);
41078 i > GET_MODE_BITSIZE (GET_MODE_INNER (mode));
41079 i >>= 1)
41081 half = gen_reg_rtx (mode);
41082 emit_reduc_half (half, vec, i);
41083 if (i == GET_MODE_BITSIZE (GET_MODE_INNER (mode)) * 2)
41084 dst = dest;
41085 else
41086 dst = gen_reg_rtx (mode);
41087 emit_insn (fn (dst, half, vec));
41088 vec = dst;
41092 /* Target hook for scalar_mode_supported_p. */
41093 static bool
41094 ix86_scalar_mode_supported_p (enum machine_mode mode)
41096 if (DECIMAL_FLOAT_MODE_P (mode))
41097 return default_decimal_float_supported_p ();
41098 else if (mode == TFmode)
41099 return true;
41100 else
41101 return default_scalar_mode_supported_p (mode);
41104 /* Implements target hook vector_mode_supported_p. */
41105 static bool
41106 ix86_vector_mode_supported_p (enum machine_mode mode)
41108 if (TARGET_SSE && VALID_SSE_REG_MODE (mode))
41109 return true;
41110 if (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode))
41111 return true;
41112 if (TARGET_AVX && VALID_AVX256_REG_MODE (mode))
41113 return true;
41114 if (TARGET_AVX512F && VALID_AVX512F_REG_MODE (mode))
41115 return true;
41116 if (TARGET_MMX && VALID_MMX_REG_MODE (mode))
41117 return true;
41118 if (TARGET_3DNOW && VALID_MMX_REG_MODE_3DNOW (mode))
41119 return true;
41120 return false;
41123 /* Target hook for c_mode_for_suffix. */
41124 static enum machine_mode
41125 ix86_c_mode_for_suffix (char suffix)
41127 if (suffix == 'q')
41128 return TFmode;
41129 if (suffix == 'w')
41130 return XFmode;
41132 return VOIDmode;
41135 /* Worker function for TARGET_MD_ASM_CLOBBERS.
41137 We do this in the new i386 backend to maintain source compatibility
41138 with the old cc0-based compiler. */
41140 static tree
41141 ix86_md_asm_clobbers (tree outputs ATTRIBUTE_UNUSED,
41142 tree inputs ATTRIBUTE_UNUSED,
41143 tree clobbers)
41145 clobbers = tree_cons (NULL_TREE, build_string (5, "flags"),
41146 clobbers);
41147 clobbers = tree_cons (NULL_TREE, build_string (4, "fpsr"),
41148 clobbers);
41149 return clobbers;
41152 /* Implements target vector targetm.asm.encode_section_info. */
41154 static void ATTRIBUTE_UNUSED
41155 ix86_encode_section_info (tree decl, rtx rtl, int first)
41157 default_encode_section_info (decl, rtl, first);
41159 if (TREE_CODE (decl) == VAR_DECL
41160 && (TREE_STATIC (decl) || DECL_EXTERNAL (decl))
41161 && ix86_in_large_data_p (decl))
41162 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_FAR_ADDR;
41165 /* Worker function for REVERSE_CONDITION. */
41167 enum rtx_code
41168 ix86_reverse_condition (enum rtx_code code, enum machine_mode mode)
41170 return (mode != CCFPmode && mode != CCFPUmode
41171 ? reverse_condition (code)
41172 : reverse_condition_maybe_unordered (code));
41175 /* Output code to perform an x87 FP register move, from OPERANDS[1]
41176 to OPERANDS[0]. */
41178 const char *
41179 output_387_reg_move (rtx insn, rtx *operands)
41181 if (REG_P (operands[0]))
41183 if (REG_P (operands[1])
41184 && find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
41186 if (REGNO (operands[0]) == FIRST_STACK_REG)
41187 return output_387_ffreep (operands, 0);
41188 return "fstp\t%y0";
41190 if (STACK_TOP_P (operands[0]))
41191 return "fld%Z1\t%y1";
41192 return "fst\t%y0";
41194 else if (MEM_P (operands[0]))
41196 gcc_assert (REG_P (operands[1]));
41197 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
41198 return "fstp%Z0\t%y0";
41199 else
41201 /* There is no non-popping store to memory for XFmode.
41202 So if we need one, follow the store with a load. */
41203 if (GET_MODE (operands[0]) == XFmode)
41204 return "fstp%Z0\t%y0\n\tfld%Z0\t%y0";
41205 else
41206 return "fst%Z0\t%y0";
41209 else
41210 gcc_unreachable();
41213 /* Output code to perform a conditional jump to LABEL, if C2 flag in
41214 FP status register is set. */
41216 void
41217 ix86_emit_fp_unordered_jump (rtx label)
41219 rtx reg = gen_reg_rtx (HImode);
41220 rtx temp;
41222 emit_insn (gen_x86_fnstsw_1 (reg));
41224 if (TARGET_SAHF && (TARGET_USE_SAHF || optimize_insn_for_size_p ()))
41226 emit_insn (gen_x86_sahf_1 (reg));
41228 temp = gen_rtx_REG (CCmode, FLAGS_REG);
41229 temp = gen_rtx_UNORDERED (VOIDmode, temp, const0_rtx);
41231 else
41233 emit_insn (gen_testqi_ext_ccno_0 (reg, GEN_INT (0x04)));
41235 temp = gen_rtx_REG (CCNOmode, FLAGS_REG);
41236 temp = gen_rtx_NE (VOIDmode, temp, const0_rtx);
41239 temp = gen_rtx_IF_THEN_ELSE (VOIDmode, temp,
41240 gen_rtx_LABEL_REF (VOIDmode, label),
41241 pc_rtx);
41242 temp = gen_rtx_SET (VOIDmode, pc_rtx, temp);
41244 emit_jump_insn (temp);
41245 predict_jump (REG_BR_PROB_BASE * 10 / 100);
41248 /* Output code to perform a log1p XFmode calculation. */
41250 void ix86_emit_i387_log1p (rtx op0, rtx op1)
41252 rtx label1 = gen_label_rtx ();
41253 rtx label2 = gen_label_rtx ();
41255 rtx tmp = gen_reg_rtx (XFmode);
41256 rtx tmp2 = gen_reg_rtx (XFmode);
41257 rtx test;
41259 emit_insn (gen_absxf2 (tmp, op1));
41260 test = gen_rtx_GE (VOIDmode, tmp,
41261 CONST_DOUBLE_FROM_REAL_VALUE (
41262 REAL_VALUE_ATOF ("0.29289321881345247561810596348408353", XFmode),
41263 XFmode));
41264 emit_jump_insn (gen_cbranchxf4 (test, XEXP (test, 0), XEXP (test, 1), label1));
41266 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
41267 emit_insn (gen_fyl2xp1xf3_i387 (op0, op1, tmp2));
41268 emit_jump (label2);
41270 emit_label (label1);
41271 emit_move_insn (tmp, CONST1_RTX (XFmode));
41272 emit_insn (gen_addxf3 (tmp, op1, tmp));
41273 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
41274 emit_insn (gen_fyl2xxf3_i387 (op0, tmp, tmp2));
41276 emit_label (label2);
41279 /* Emit code for round calculation. */
41280 void ix86_emit_i387_round (rtx op0, rtx op1)
41282 enum machine_mode inmode = GET_MODE (op1);
41283 enum machine_mode outmode = GET_MODE (op0);
41284 rtx e1, e2, res, tmp, tmp1, half;
41285 rtx scratch = gen_reg_rtx (HImode);
41286 rtx flags = gen_rtx_REG (CCNOmode, FLAGS_REG);
41287 rtx jump_label = gen_label_rtx ();
41288 rtx insn;
41289 rtx (*gen_abs) (rtx, rtx);
41290 rtx (*gen_neg) (rtx, rtx);
41292 switch (inmode)
41294 case SFmode:
41295 gen_abs = gen_abssf2;
41296 break;
41297 case DFmode:
41298 gen_abs = gen_absdf2;
41299 break;
41300 case XFmode:
41301 gen_abs = gen_absxf2;
41302 break;
41303 default:
41304 gcc_unreachable ();
41307 switch (outmode)
41309 case SFmode:
41310 gen_neg = gen_negsf2;
41311 break;
41312 case DFmode:
41313 gen_neg = gen_negdf2;
41314 break;
41315 case XFmode:
41316 gen_neg = gen_negxf2;
41317 break;
41318 case HImode:
41319 gen_neg = gen_neghi2;
41320 break;
41321 case SImode:
41322 gen_neg = gen_negsi2;
41323 break;
41324 case DImode:
41325 gen_neg = gen_negdi2;
41326 break;
41327 default:
41328 gcc_unreachable ();
41331 e1 = gen_reg_rtx (inmode);
41332 e2 = gen_reg_rtx (inmode);
41333 res = gen_reg_rtx (outmode);
41335 half = CONST_DOUBLE_FROM_REAL_VALUE (dconsthalf, inmode);
41337 /* round(a) = sgn(a) * floor(fabs(a) + 0.5) */
41339 /* scratch = fxam(op1) */
41340 emit_insn (gen_rtx_SET (VOIDmode, scratch,
41341 gen_rtx_UNSPEC (HImode, gen_rtvec (1, op1),
41342 UNSPEC_FXAM)));
41343 /* e1 = fabs(op1) */
41344 emit_insn (gen_abs (e1, op1));
41346 /* e2 = e1 + 0.5 */
41347 half = force_reg (inmode, half);
41348 emit_insn (gen_rtx_SET (VOIDmode, e2,
41349 gen_rtx_PLUS (inmode, e1, half)));
41351 /* res = floor(e2) */
41352 if (inmode != XFmode)
41354 tmp1 = gen_reg_rtx (XFmode);
41356 emit_insn (gen_rtx_SET (VOIDmode, tmp1,
41357 gen_rtx_FLOAT_EXTEND (XFmode, e2)));
41359 else
41360 tmp1 = e2;
41362 switch (outmode)
41364 case SFmode:
41365 case DFmode:
41367 rtx tmp0 = gen_reg_rtx (XFmode);
41369 emit_insn (gen_frndintxf2_floor (tmp0, tmp1));
41371 emit_insn (gen_rtx_SET (VOIDmode, res,
41372 gen_rtx_UNSPEC (outmode, gen_rtvec (1, tmp0),
41373 UNSPEC_TRUNC_NOOP)));
41375 break;
41376 case XFmode:
41377 emit_insn (gen_frndintxf2_floor (res, tmp1));
41378 break;
41379 case HImode:
41380 emit_insn (gen_lfloorxfhi2 (res, tmp1));
41381 break;
41382 case SImode:
41383 emit_insn (gen_lfloorxfsi2 (res, tmp1));
41384 break;
41385 case DImode:
41386 emit_insn (gen_lfloorxfdi2 (res, tmp1));
41387 break;
41388 default:
41389 gcc_unreachable ();
41392 /* flags = signbit(a) */
41393 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x02)));
41395 /* if (flags) then res = -res */
41396 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode,
41397 gen_rtx_EQ (VOIDmode, flags, const0_rtx),
41398 gen_rtx_LABEL_REF (VOIDmode, jump_label),
41399 pc_rtx);
41400 insn = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
41401 predict_jump (REG_BR_PROB_BASE * 50 / 100);
41402 JUMP_LABEL (insn) = jump_label;
41404 emit_insn (gen_neg (res, res));
41406 emit_label (jump_label);
41407 LABEL_NUSES (jump_label) = 1;
41409 emit_move_insn (op0, res);
41412 /* Output code to perform a Newton-Rhapson approximation of a single precision
41413 floating point divide [http://en.wikipedia.org/wiki/N-th_root_algorithm]. */
41415 void ix86_emit_swdivsf (rtx res, rtx a, rtx b, enum machine_mode mode)
41417 rtx x0, x1, e0, e1;
41419 x0 = gen_reg_rtx (mode);
41420 e0 = gen_reg_rtx (mode);
41421 e1 = gen_reg_rtx (mode);
41422 x1 = gen_reg_rtx (mode);
41424 /* a / b = a * ((rcp(b) + rcp(b)) - (b * rcp(b) * rcp (b))) */
41426 b = force_reg (mode, b);
41428 /* x0 = rcp(b) estimate */
41429 if (mode == V16SFmode || mode == V8DFmode)
41430 emit_insn (gen_rtx_SET (VOIDmode, x0,
41431 gen_rtx_UNSPEC (mode, gen_rtvec (1, b),
41432 UNSPEC_RCP14)));
41433 else
41434 emit_insn (gen_rtx_SET (VOIDmode, x0,
41435 gen_rtx_UNSPEC (mode, gen_rtvec (1, b),
41436 UNSPEC_RCP)));
41438 /* e0 = x0 * b */
41439 emit_insn (gen_rtx_SET (VOIDmode, e0,
41440 gen_rtx_MULT (mode, x0, b)));
41442 /* e0 = x0 * e0 */
41443 emit_insn (gen_rtx_SET (VOIDmode, e0,
41444 gen_rtx_MULT (mode, x0, e0)));
41446 /* e1 = x0 + x0 */
41447 emit_insn (gen_rtx_SET (VOIDmode, e1,
41448 gen_rtx_PLUS (mode, x0, x0)));
41450 /* x1 = e1 - e0 */
41451 emit_insn (gen_rtx_SET (VOIDmode, x1,
41452 gen_rtx_MINUS (mode, e1, e0)));
41454 /* res = a * x1 */
41455 emit_insn (gen_rtx_SET (VOIDmode, res,
41456 gen_rtx_MULT (mode, a, x1)));
41459 /* Output code to perform a Newton-Rhapson approximation of a
41460 single precision floating point [reciprocal] square root. */
41462 void ix86_emit_swsqrtsf (rtx res, rtx a, enum machine_mode mode,
41463 bool recip)
41465 rtx x0, e0, e1, e2, e3, mthree, mhalf;
41466 REAL_VALUE_TYPE r;
41467 int unspec;
41469 x0 = gen_reg_rtx (mode);
41470 e0 = gen_reg_rtx (mode);
41471 e1 = gen_reg_rtx (mode);
41472 e2 = gen_reg_rtx (mode);
41473 e3 = gen_reg_rtx (mode);
41475 real_from_integer (&r, VOIDmode, -3, -1, 0);
41476 mthree = CONST_DOUBLE_FROM_REAL_VALUE (r, SFmode);
41478 real_arithmetic (&r, NEGATE_EXPR, &dconsthalf, NULL);
41479 mhalf = CONST_DOUBLE_FROM_REAL_VALUE (r, SFmode);
41480 unspec = UNSPEC_RSQRT;
41482 if (VECTOR_MODE_P (mode))
41484 mthree = ix86_build_const_vector (mode, true, mthree);
41485 mhalf = ix86_build_const_vector (mode, true, mhalf);
41486 /* There is no 512-bit rsqrt. There is however rsqrt14. */
41487 if (GET_MODE_SIZE (mode) == 64)
41488 unspec = UNSPEC_RSQRT14;
41491 /* sqrt(a) = -0.5 * a * rsqrtss(a) * (a * rsqrtss(a) * rsqrtss(a) - 3.0)
41492 rsqrt(a) = -0.5 * rsqrtss(a) * (a * rsqrtss(a) * rsqrtss(a) - 3.0) */
41494 a = force_reg (mode, a);
41496 /* x0 = rsqrt(a) estimate */
41497 emit_insn (gen_rtx_SET (VOIDmode, x0,
41498 gen_rtx_UNSPEC (mode, gen_rtvec (1, a),
41499 unspec)));
41501 /* If (a == 0.0) Filter out infinity to prevent NaN for sqrt(0.0). */
41502 if (!recip)
41504 rtx zero, mask;
41506 zero = gen_reg_rtx (mode);
41507 mask = gen_reg_rtx (mode);
41509 zero = force_reg (mode, CONST0_RTX(mode));
41511 /* Handle masked compare. */
41512 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 64)
41514 mask = gen_reg_rtx (HImode);
41515 /* Imm value 0x4 corresponds to not-equal comparison. */
41516 emit_insn (gen_avx512f_cmpv16sf3 (mask, zero, a, GEN_INT (0x4)));
41517 emit_insn (gen_avx512f_blendmv16sf (x0, zero, x0, mask));
41519 else
41521 emit_insn (gen_rtx_SET (VOIDmode, mask,
41522 gen_rtx_NE (mode, zero, a)));
41524 emit_insn (gen_rtx_SET (VOIDmode, x0,
41525 gen_rtx_AND (mode, x0, mask)));
41529 /* e0 = x0 * a */
41530 emit_insn (gen_rtx_SET (VOIDmode, e0,
41531 gen_rtx_MULT (mode, x0, a)));
41532 /* e1 = e0 * x0 */
41533 emit_insn (gen_rtx_SET (VOIDmode, e1,
41534 gen_rtx_MULT (mode, e0, x0)));
41536 /* e2 = e1 - 3. */
41537 mthree = force_reg (mode, mthree);
41538 emit_insn (gen_rtx_SET (VOIDmode, e2,
41539 gen_rtx_PLUS (mode, e1, mthree)));
41541 mhalf = force_reg (mode, mhalf);
41542 if (recip)
41543 /* e3 = -.5 * x0 */
41544 emit_insn (gen_rtx_SET (VOIDmode, e3,
41545 gen_rtx_MULT (mode, x0, mhalf)));
41546 else
41547 /* e3 = -.5 * e0 */
41548 emit_insn (gen_rtx_SET (VOIDmode, e3,
41549 gen_rtx_MULT (mode, e0, mhalf)));
41550 /* ret = e2 * e3 */
41551 emit_insn (gen_rtx_SET (VOIDmode, res,
41552 gen_rtx_MULT (mode, e2, e3)));
41555 #ifdef TARGET_SOLARIS
41556 /* Solaris implementation of TARGET_ASM_NAMED_SECTION. */
41558 static void
41559 i386_solaris_elf_named_section (const char *name, unsigned int flags,
41560 tree decl)
41562 /* With Binutils 2.15, the "@unwind" marker must be specified on
41563 every occurrence of the ".eh_frame" section, not just the first
41564 one. */
41565 if (TARGET_64BIT
41566 && strcmp (name, ".eh_frame") == 0)
41568 fprintf (asm_out_file, "\t.section\t%s,\"%s\",@unwind\n", name,
41569 flags & SECTION_WRITE ? "aw" : "a");
41570 return;
41573 #ifndef USE_GAS
41574 if (HAVE_COMDAT_GROUP && flags & SECTION_LINKONCE)
41576 solaris_elf_asm_comdat_section (name, flags, decl);
41577 return;
41579 #endif
41581 default_elf_asm_named_section (name, flags, decl);
41583 #endif /* TARGET_SOLARIS */
41585 /* Return the mangling of TYPE if it is an extended fundamental type. */
41587 static const char *
41588 ix86_mangle_type (const_tree type)
41590 type = TYPE_MAIN_VARIANT (type);
41592 if (TREE_CODE (type) != VOID_TYPE && TREE_CODE (type) != BOOLEAN_TYPE
41593 && TREE_CODE (type) != INTEGER_TYPE && TREE_CODE (type) != REAL_TYPE)
41594 return NULL;
41596 switch (TYPE_MODE (type))
41598 case TFmode:
41599 /* __float128 is "g". */
41600 return "g";
41601 case XFmode:
41602 /* "long double" or __float80 is "e". */
41603 return "e";
41604 default:
41605 return NULL;
41609 /* For 32-bit code we can save PIC register setup by using
41610 __stack_chk_fail_local hidden function instead of calling
41611 __stack_chk_fail directly. 64-bit code doesn't need to setup any PIC
41612 register, so it is better to call __stack_chk_fail directly. */
41614 static tree ATTRIBUTE_UNUSED
41615 ix86_stack_protect_fail (void)
41617 return TARGET_64BIT
41618 ? default_external_stack_protect_fail ()
41619 : default_hidden_stack_protect_fail ();
41622 /* Select a format to encode pointers in exception handling data. CODE
41623 is 0 for data, 1 for code labels, 2 for function pointers. GLOBAL is
41624 true if the symbol may be affected by dynamic relocations.
41626 ??? All x86 object file formats are capable of representing this.
41627 After all, the relocation needed is the same as for the call insn.
41628 Whether or not a particular assembler allows us to enter such, I
41629 guess we'll have to see. */
41631 asm_preferred_eh_data_format (int code, int global)
41633 if (flag_pic)
41635 int type = DW_EH_PE_sdata8;
41636 if (!TARGET_64BIT
41637 || ix86_cmodel == CM_SMALL_PIC
41638 || (ix86_cmodel == CM_MEDIUM_PIC && (global || code)))
41639 type = DW_EH_PE_sdata4;
41640 return (global ? DW_EH_PE_indirect : 0) | DW_EH_PE_pcrel | type;
41642 if (ix86_cmodel == CM_SMALL
41643 || (ix86_cmodel == CM_MEDIUM && code))
41644 return DW_EH_PE_udata4;
41645 return DW_EH_PE_absptr;
41648 /* Expand copysign from SIGN to the positive value ABS_VALUE
41649 storing in RESULT. If MASK is non-null, it shall be a mask to mask out
41650 the sign-bit. */
41651 static void
41652 ix86_sse_copysign_to_positive (rtx result, rtx abs_value, rtx sign, rtx mask)
41654 enum machine_mode mode = GET_MODE (sign);
41655 rtx sgn = gen_reg_rtx (mode);
41656 if (mask == NULL_RTX)
41658 enum machine_mode vmode;
41660 if (mode == SFmode)
41661 vmode = V4SFmode;
41662 else if (mode == DFmode)
41663 vmode = V2DFmode;
41664 else
41665 vmode = mode;
41667 mask = ix86_build_signbit_mask (vmode, VECTOR_MODE_P (mode), false);
41668 if (!VECTOR_MODE_P (mode))
41670 /* We need to generate a scalar mode mask in this case. */
41671 rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
41672 tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
41673 mask = gen_reg_rtx (mode);
41674 emit_insn (gen_rtx_SET (VOIDmode, mask, tmp));
41677 else
41678 mask = gen_rtx_NOT (mode, mask);
41679 emit_insn (gen_rtx_SET (VOIDmode, sgn,
41680 gen_rtx_AND (mode, mask, sign)));
41681 emit_insn (gen_rtx_SET (VOIDmode, result,
41682 gen_rtx_IOR (mode, abs_value, sgn)));
41685 /* Expand fabs (OP0) and return a new rtx that holds the result. The
41686 mask for masking out the sign-bit is stored in *SMASK, if that is
41687 non-null. */
41688 static rtx
41689 ix86_expand_sse_fabs (rtx op0, rtx *smask)
41691 enum machine_mode vmode, mode = GET_MODE (op0);
41692 rtx xa, mask;
41694 xa = gen_reg_rtx (mode);
41695 if (mode == SFmode)
41696 vmode = V4SFmode;
41697 else if (mode == DFmode)
41698 vmode = V2DFmode;
41699 else
41700 vmode = mode;
41701 mask = ix86_build_signbit_mask (vmode, VECTOR_MODE_P (mode), true);
41702 if (!VECTOR_MODE_P (mode))
41704 /* We need to generate a scalar mode mask in this case. */
41705 rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
41706 tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
41707 mask = gen_reg_rtx (mode);
41708 emit_insn (gen_rtx_SET (VOIDmode, mask, tmp));
41710 emit_insn (gen_rtx_SET (VOIDmode, xa,
41711 gen_rtx_AND (mode, op0, mask)));
41713 if (smask)
41714 *smask = mask;
41716 return xa;
41719 /* Expands a comparison of OP0 with OP1 using comparison code CODE,
41720 swapping the operands if SWAP_OPERANDS is true. The expanded
41721 code is a forward jump to a newly created label in case the
41722 comparison is true. The generated label rtx is returned. */
41723 static rtx
41724 ix86_expand_sse_compare_and_jump (enum rtx_code code, rtx op0, rtx op1,
41725 bool swap_operands)
41727 enum machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
41728 rtx label, tmp;
41730 if (swap_operands)
41732 tmp = op0;
41733 op0 = op1;
41734 op1 = tmp;
41737 label = gen_label_rtx ();
41738 tmp = gen_rtx_REG (fpcmp_mode, FLAGS_REG);
41739 emit_insn (gen_rtx_SET (VOIDmode, tmp,
41740 gen_rtx_COMPARE (fpcmp_mode, op0, op1)));
41741 tmp = gen_rtx_fmt_ee (code, VOIDmode, tmp, const0_rtx);
41742 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
41743 gen_rtx_LABEL_REF (VOIDmode, label), pc_rtx);
41744 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
41745 JUMP_LABEL (tmp) = label;
41747 return label;
41750 /* Expand a mask generating SSE comparison instruction comparing OP0 with OP1
41751 using comparison code CODE. Operands are swapped for the comparison if
41752 SWAP_OPERANDS is true. Returns a rtx for the generated mask. */
41753 static rtx
41754 ix86_expand_sse_compare_mask (enum rtx_code code, rtx op0, rtx op1,
41755 bool swap_operands)
41757 rtx (*insn)(rtx, rtx, rtx, rtx);
41758 enum machine_mode mode = GET_MODE (op0);
41759 rtx mask = gen_reg_rtx (mode);
41761 if (swap_operands)
41763 rtx tmp = op0;
41764 op0 = op1;
41765 op1 = tmp;
41768 insn = mode == DFmode ? gen_setcc_df_sse : gen_setcc_sf_sse;
41770 emit_insn (insn (mask, op0, op1,
41771 gen_rtx_fmt_ee (code, mode, op0, op1)));
41772 return mask;
41775 /* Generate and return a rtx of mode MODE for 2**n where n is the number
41776 of bits of the mantissa of MODE, which must be one of DFmode or SFmode. */
41777 static rtx
41778 ix86_gen_TWO52 (enum machine_mode mode)
41780 REAL_VALUE_TYPE TWO52r;
41781 rtx TWO52;
41783 real_ldexp (&TWO52r, &dconst1, mode == DFmode ? 52 : 23);
41784 TWO52 = const_double_from_real_value (TWO52r, mode);
41785 TWO52 = force_reg (mode, TWO52);
41787 return TWO52;
41790 /* Expand SSE sequence for computing lround from OP1 storing
41791 into OP0. */
41792 void
41793 ix86_expand_lround (rtx op0, rtx op1)
41795 /* C code for the stuff we're doing below:
41796 tmp = op1 + copysign (nextafter (0.5, 0.0), op1)
41797 return (long)tmp;
41799 enum machine_mode mode = GET_MODE (op1);
41800 const struct real_format *fmt;
41801 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
41802 rtx adj;
41804 /* load nextafter (0.5, 0.0) */
41805 fmt = REAL_MODE_FORMAT (mode);
41806 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
41807 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
41809 /* adj = copysign (0.5, op1) */
41810 adj = force_reg (mode, const_double_from_real_value (pred_half, mode));
41811 ix86_sse_copysign_to_positive (adj, adj, force_reg (mode, op1), NULL_RTX);
41813 /* adj = op1 + adj */
41814 adj = expand_simple_binop (mode, PLUS, adj, op1, NULL_RTX, 0, OPTAB_DIRECT);
41816 /* op0 = (imode)adj */
41817 expand_fix (op0, adj, 0);
41820 /* Expand SSE2 sequence for computing lround from OPERAND1 storing
41821 into OPERAND0. */
41822 void
41823 ix86_expand_lfloorceil (rtx op0, rtx op1, bool do_floor)
41825 /* C code for the stuff we're doing below (for do_floor):
41826 xi = (long)op1;
41827 xi -= (double)xi > op1 ? 1 : 0;
41828 return xi;
41830 enum machine_mode fmode = GET_MODE (op1);
41831 enum machine_mode imode = GET_MODE (op0);
41832 rtx ireg, freg, label, tmp;
41834 /* reg = (long)op1 */
41835 ireg = gen_reg_rtx (imode);
41836 expand_fix (ireg, op1, 0);
41838 /* freg = (double)reg */
41839 freg = gen_reg_rtx (fmode);
41840 expand_float (freg, ireg, 0);
41842 /* ireg = (freg > op1) ? ireg - 1 : ireg */
41843 label = ix86_expand_sse_compare_and_jump (UNLE,
41844 freg, op1, !do_floor);
41845 tmp = expand_simple_binop (imode, do_floor ? MINUS : PLUS,
41846 ireg, const1_rtx, NULL_RTX, 0, OPTAB_DIRECT);
41847 emit_move_insn (ireg, tmp);
41849 emit_label (label);
41850 LABEL_NUSES (label) = 1;
41852 emit_move_insn (op0, ireg);
41855 /* Expand rint (IEEE round to nearest) rounding OPERAND1 and storing the
41856 result in OPERAND0. */
41857 void
41858 ix86_expand_rint (rtx operand0, rtx operand1)
41860 /* C code for the stuff we're doing below:
41861 xa = fabs (operand1);
41862 if (!isless (xa, 2**52))
41863 return operand1;
41864 xa = xa + 2**52 - 2**52;
41865 return copysign (xa, operand1);
41867 enum machine_mode mode = GET_MODE (operand0);
41868 rtx res, xa, label, TWO52, mask;
41870 res = gen_reg_rtx (mode);
41871 emit_move_insn (res, operand1);
41873 /* xa = abs (operand1) */
41874 xa = ix86_expand_sse_fabs (res, &mask);
41876 /* if (!isless (xa, TWO52)) goto label; */
41877 TWO52 = ix86_gen_TWO52 (mode);
41878 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
41880 xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
41881 xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
41883 ix86_sse_copysign_to_positive (res, xa, res, mask);
41885 emit_label (label);
41886 LABEL_NUSES (label) = 1;
41888 emit_move_insn (operand0, res);
41891 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
41892 into OPERAND0. */
41893 void
41894 ix86_expand_floorceildf_32 (rtx operand0, rtx operand1, bool do_floor)
41896 /* C code for the stuff we expand below.
41897 double xa = fabs (x), x2;
41898 if (!isless (xa, TWO52))
41899 return x;
41900 xa = xa + TWO52 - TWO52;
41901 x2 = copysign (xa, x);
41902 Compensate. Floor:
41903 if (x2 > x)
41904 x2 -= 1;
41905 Compensate. Ceil:
41906 if (x2 < x)
41907 x2 -= -1;
41908 return x2;
41910 enum machine_mode mode = GET_MODE (operand0);
41911 rtx xa, TWO52, tmp, label, one, res, mask;
41913 TWO52 = ix86_gen_TWO52 (mode);
41915 /* Temporary for holding the result, initialized to the input
41916 operand to ease control flow. */
41917 res = gen_reg_rtx (mode);
41918 emit_move_insn (res, operand1);
41920 /* xa = abs (operand1) */
41921 xa = ix86_expand_sse_fabs (res, &mask);
41923 /* if (!isless (xa, TWO52)) goto label; */
41924 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
41926 /* xa = xa + TWO52 - TWO52; */
41927 xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
41928 xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
41930 /* xa = copysign (xa, operand1) */
41931 ix86_sse_copysign_to_positive (xa, xa, res, mask);
41933 /* generate 1.0 or -1.0 */
41934 one = force_reg (mode,
41935 const_double_from_real_value (do_floor
41936 ? dconst1 : dconstm1, mode));
41938 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
41939 tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
41940 emit_insn (gen_rtx_SET (VOIDmode, tmp,
41941 gen_rtx_AND (mode, one, tmp)));
41942 /* We always need to subtract here to preserve signed zero. */
41943 tmp = expand_simple_binop (mode, MINUS,
41944 xa, tmp, NULL_RTX, 0, OPTAB_DIRECT);
41945 emit_move_insn (res, tmp);
41947 emit_label (label);
41948 LABEL_NUSES (label) = 1;
41950 emit_move_insn (operand0, res);
41953 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
41954 into OPERAND0. */
41955 void
41956 ix86_expand_floorceil (rtx operand0, rtx operand1, bool do_floor)
41958 /* C code for the stuff we expand below.
41959 double xa = fabs (x), x2;
41960 if (!isless (xa, TWO52))
41961 return x;
41962 x2 = (double)(long)x;
41963 Compensate. Floor:
41964 if (x2 > x)
41965 x2 -= 1;
41966 Compensate. Ceil:
41967 if (x2 < x)
41968 x2 += 1;
41969 if (HONOR_SIGNED_ZEROS (mode))
41970 return copysign (x2, x);
41971 return x2;
41973 enum machine_mode mode = GET_MODE (operand0);
41974 rtx xa, xi, TWO52, tmp, label, one, res, mask;
41976 TWO52 = ix86_gen_TWO52 (mode);
41978 /* Temporary for holding the result, initialized to the input
41979 operand to ease control flow. */
41980 res = gen_reg_rtx (mode);
41981 emit_move_insn (res, operand1);
41983 /* xa = abs (operand1) */
41984 xa = ix86_expand_sse_fabs (res, &mask);
41986 /* if (!isless (xa, TWO52)) goto label; */
41987 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
41989 /* xa = (double)(long)x */
41990 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
41991 expand_fix (xi, res, 0);
41992 expand_float (xa, xi, 0);
41994 /* generate 1.0 */
41995 one = force_reg (mode, const_double_from_real_value (dconst1, mode));
41997 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
41998 tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
41999 emit_insn (gen_rtx_SET (VOIDmode, tmp,
42000 gen_rtx_AND (mode, one, tmp)));
42001 tmp = expand_simple_binop (mode, do_floor ? MINUS : PLUS,
42002 xa, tmp, NULL_RTX, 0, OPTAB_DIRECT);
42003 emit_move_insn (res, tmp);
42005 if (HONOR_SIGNED_ZEROS (mode))
42006 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
42008 emit_label (label);
42009 LABEL_NUSES (label) = 1;
42011 emit_move_insn (operand0, res);
42014 /* Expand SSE sequence for computing round from OPERAND1 storing
42015 into OPERAND0. Sequence that works without relying on DImode truncation
42016 via cvttsd2siq that is only available on 64bit targets. */
42017 void
42018 ix86_expand_rounddf_32 (rtx operand0, rtx operand1)
42020 /* C code for the stuff we expand below.
42021 double xa = fabs (x), xa2, x2;
42022 if (!isless (xa, TWO52))
42023 return x;
42024 Using the absolute value and copying back sign makes
42025 -0.0 -> -0.0 correct.
42026 xa2 = xa + TWO52 - TWO52;
42027 Compensate.
42028 dxa = xa2 - xa;
42029 if (dxa <= -0.5)
42030 xa2 += 1;
42031 else if (dxa > 0.5)
42032 xa2 -= 1;
42033 x2 = copysign (xa2, x);
42034 return x2;
42036 enum machine_mode mode = GET_MODE (operand0);
42037 rtx xa, xa2, dxa, TWO52, tmp, label, half, mhalf, one, res, mask;
42039 TWO52 = ix86_gen_TWO52 (mode);
42041 /* Temporary for holding the result, initialized to the input
42042 operand to ease control flow. */
42043 res = gen_reg_rtx (mode);
42044 emit_move_insn (res, operand1);
42046 /* xa = abs (operand1) */
42047 xa = ix86_expand_sse_fabs (res, &mask);
42049 /* if (!isless (xa, TWO52)) goto label; */
42050 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
42052 /* xa2 = xa + TWO52 - TWO52; */
42053 xa2 = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
42054 xa2 = expand_simple_binop (mode, MINUS, xa2, TWO52, xa2, 0, OPTAB_DIRECT);
42056 /* dxa = xa2 - xa; */
42057 dxa = expand_simple_binop (mode, MINUS, xa2, xa, NULL_RTX, 0, OPTAB_DIRECT);
42059 /* generate 0.5, 1.0 and -0.5 */
42060 half = force_reg (mode, const_double_from_real_value (dconsthalf, mode));
42061 one = expand_simple_binop (mode, PLUS, half, half, NULL_RTX, 0, OPTAB_DIRECT);
42062 mhalf = expand_simple_binop (mode, MINUS, half, one, NULL_RTX,
42063 0, OPTAB_DIRECT);
42065 /* Compensate. */
42066 tmp = gen_reg_rtx (mode);
42067 /* xa2 = xa2 - (dxa > 0.5 ? 1 : 0) */
42068 tmp = ix86_expand_sse_compare_mask (UNGT, dxa, half, false);
42069 emit_insn (gen_rtx_SET (VOIDmode, tmp,
42070 gen_rtx_AND (mode, one, tmp)));
42071 xa2 = expand_simple_binop (mode, MINUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
42072 /* xa2 = xa2 + (dxa <= -0.5 ? 1 : 0) */
42073 tmp = ix86_expand_sse_compare_mask (UNGE, mhalf, dxa, false);
42074 emit_insn (gen_rtx_SET (VOIDmode, tmp,
42075 gen_rtx_AND (mode, one, tmp)));
42076 xa2 = expand_simple_binop (mode, PLUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
42078 /* res = copysign (xa2, operand1) */
42079 ix86_sse_copysign_to_positive (res, xa2, force_reg (mode, operand1), mask);
42081 emit_label (label);
42082 LABEL_NUSES (label) = 1;
42084 emit_move_insn (operand0, res);
42087 /* Expand SSE sequence for computing trunc from OPERAND1 storing
42088 into OPERAND0. */
42089 void
42090 ix86_expand_trunc (rtx operand0, rtx operand1)
42092 /* C code for SSE variant we expand below.
42093 double xa = fabs (x), x2;
42094 if (!isless (xa, TWO52))
42095 return x;
42096 x2 = (double)(long)x;
42097 if (HONOR_SIGNED_ZEROS (mode))
42098 return copysign (x2, x);
42099 return x2;
42101 enum machine_mode mode = GET_MODE (operand0);
42102 rtx xa, xi, TWO52, label, res, mask;
42104 TWO52 = ix86_gen_TWO52 (mode);
42106 /* Temporary for holding the result, initialized to the input
42107 operand to ease control flow. */
42108 res = gen_reg_rtx (mode);
42109 emit_move_insn (res, operand1);
42111 /* xa = abs (operand1) */
42112 xa = ix86_expand_sse_fabs (res, &mask);
42114 /* if (!isless (xa, TWO52)) goto label; */
42115 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
42117 /* x = (double)(long)x */
42118 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
42119 expand_fix (xi, res, 0);
42120 expand_float (res, xi, 0);
42122 if (HONOR_SIGNED_ZEROS (mode))
42123 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
42125 emit_label (label);
42126 LABEL_NUSES (label) = 1;
42128 emit_move_insn (operand0, res);
42131 /* Expand SSE sequence for computing trunc from OPERAND1 storing
42132 into OPERAND0. */
42133 void
42134 ix86_expand_truncdf_32 (rtx operand0, rtx operand1)
42136 enum machine_mode mode = GET_MODE (operand0);
42137 rtx xa, mask, TWO52, label, one, res, smask, tmp;
42139 /* C code for SSE variant we expand below.
42140 double xa = fabs (x), x2;
42141 if (!isless (xa, TWO52))
42142 return x;
42143 xa2 = xa + TWO52 - TWO52;
42144 Compensate:
42145 if (xa2 > xa)
42146 xa2 -= 1.0;
42147 x2 = copysign (xa2, x);
42148 return x2;
42151 TWO52 = ix86_gen_TWO52 (mode);
42153 /* Temporary for holding the result, initialized to the input
42154 operand to ease control flow. */
42155 res = gen_reg_rtx (mode);
42156 emit_move_insn (res, operand1);
42158 /* xa = abs (operand1) */
42159 xa = ix86_expand_sse_fabs (res, &smask);
42161 /* if (!isless (xa, TWO52)) goto label; */
42162 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
42164 /* res = xa + TWO52 - TWO52; */
42165 tmp = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
42166 tmp = expand_simple_binop (mode, MINUS, tmp, TWO52, tmp, 0, OPTAB_DIRECT);
42167 emit_move_insn (res, tmp);
42169 /* generate 1.0 */
42170 one = force_reg (mode, const_double_from_real_value (dconst1, mode));
42172 /* Compensate: res = xa2 - (res > xa ? 1 : 0) */
42173 mask = ix86_expand_sse_compare_mask (UNGT, res, xa, false);
42174 emit_insn (gen_rtx_SET (VOIDmode, mask,
42175 gen_rtx_AND (mode, mask, one)));
42176 tmp = expand_simple_binop (mode, MINUS,
42177 res, mask, NULL_RTX, 0, OPTAB_DIRECT);
42178 emit_move_insn (res, tmp);
42180 /* res = copysign (res, operand1) */
42181 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), smask);
42183 emit_label (label);
42184 LABEL_NUSES (label) = 1;
42186 emit_move_insn (operand0, res);
42189 /* Expand SSE sequence for computing round from OPERAND1 storing
42190 into OPERAND0. */
42191 void
42192 ix86_expand_round (rtx operand0, rtx operand1)
42194 /* C code for the stuff we're doing below:
42195 double xa = fabs (x);
42196 if (!isless (xa, TWO52))
42197 return x;
42198 xa = (double)(long)(xa + nextafter (0.5, 0.0));
42199 return copysign (xa, x);
42201 enum machine_mode mode = GET_MODE (operand0);
42202 rtx res, TWO52, xa, label, xi, half, mask;
42203 const struct real_format *fmt;
42204 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
42206 /* Temporary for holding the result, initialized to the input
42207 operand to ease control flow. */
42208 res = gen_reg_rtx (mode);
42209 emit_move_insn (res, operand1);
42211 TWO52 = ix86_gen_TWO52 (mode);
42212 xa = ix86_expand_sse_fabs (res, &mask);
42213 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
42215 /* load nextafter (0.5, 0.0) */
42216 fmt = REAL_MODE_FORMAT (mode);
42217 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
42218 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
42220 /* xa = xa + 0.5 */
42221 half = force_reg (mode, const_double_from_real_value (pred_half, mode));
42222 xa = expand_simple_binop (mode, PLUS, xa, half, NULL_RTX, 0, OPTAB_DIRECT);
42224 /* xa = (double)(int64_t)xa */
42225 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
42226 expand_fix (xi, xa, 0);
42227 expand_float (xa, xi, 0);
42229 /* res = copysign (xa, operand1) */
42230 ix86_sse_copysign_to_positive (res, xa, force_reg (mode, operand1), mask);
42232 emit_label (label);
42233 LABEL_NUSES (label) = 1;
42235 emit_move_insn (operand0, res);
42238 /* Expand SSE sequence for computing round
42239 from OP1 storing into OP0 using sse4 round insn. */
42240 void
42241 ix86_expand_round_sse4 (rtx op0, rtx op1)
42243 enum machine_mode mode = GET_MODE (op0);
42244 rtx e1, e2, res, half;
42245 const struct real_format *fmt;
42246 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
42247 rtx (*gen_copysign) (rtx, rtx, rtx);
42248 rtx (*gen_round) (rtx, rtx, rtx);
42250 switch (mode)
42252 case SFmode:
42253 gen_copysign = gen_copysignsf3;
42254 gen_round = gen_sse4_1_roundsf2;
42255 break;
42256 case DFmode:
42257 gen_copysign = gen_copysigndf3;
42258 gen_round = gen_sse4_1_rounddf2;
42259 break;
42260 default:
42261 gcc_unreachable ();
42264 /* round (a) = trunc (a + copysign (0.5, a)) */
42266 /* load nextafter (0.5, 0.0) */
42267 fmt = REAL_MODE_FORMAT (mode);
42268 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
42269 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
42270 half = const_double_from_real_value (pred_half, mode);
42272 /* e1 = copysign (0.5, op1) */
42273 e1 = gen_reg_rtx (mode);
42274 emit_insn (gen_copysign (e1, half, op1));
42276 /* e2 = op1 + e1 */
42277 e2 = expand_simple_binop (mode, PLUS, op1, e1, NULL_RTX, 0, OPTAB_DIRECT);
42279 /* res = trunc (e2) */
42280 res = gen_reg_rtx (mode);
42281 emit_insn (gen_round (res, e2, GEN_INT (ROUND_TRUNC)));
42283 emit_move_insn (op0, res);
42287 /* Table of valid machine attributes. */
42288 static const struct attribute_spec ix86_attribute_table[] =
42290 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
42291 affects_type_identity } */
42292 /* Stdcall attribute says callee is responsible for popping arguments
42293 if they are not variable. */
42294 { "stdcall", 0, 0, false, true, true, ix86_handle_cconv_attribute,
42295 true },
42296 /* Fastcall attribute says callee is responsible for popping arguments
42297 if they are not variable. */
42298 { "fastcall", 0, 0, false, true, true, ix86_handle_cconv_attribute,
42299 true },
42300 /* Thiscall attribute says callee is responsible for popping arguments
42301 if they are not variable. */
42302 { "thiscall", 0, 0, false, true, true, ix86_handle_cconv_attribute,
42303 true },
42304 /* Cdecl attribute says the callee is a normal C declaration */
42305 { "cdecl", 0, 0, false, true, true, ix86_handle_cconv_attribute,
42306 true },
42307 /* Regparm attribute specifies how many integer arguments are to be
42308 passed in registers. */
42309 { "regparm", 1, 1, false, true, true, ix86_handle_cconv_attribute,
42310 true },
42311 /* Sseregparm attribute says we are using x86_64 calling conventions
42312 for FP arguments. */
42313 { "sseregparm", 0, 0, false, true, true, ix86_handle_cconv_attribute,
42314 true },
42315 /* The transactional memory builtins are implicitly regparm or fastcall
42316 depending on the ABI. Override the generic do-nothing attribute that
42317 these builtins were declared with. */
42318 { "*tm regparm", 0, 0, false, true, true, ix86_handle_tm_regparm_attribute,
42319 true },
42320 /* force_align_arg_pointer says this function realigns the stack at entry. */
42321 { (const char *)&ix86_force_align_arg_pointer_string, 0, 0,
42322 false, true, true, ix86_handle_cconv_attribute, false },
42323 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
42324 { "dllimport", 0, 0, false, false, false, handle_dll_attribute, false },
42325 { "dllexport", 0, 0, false, false, false, handle_dll_attribute, false },
42326 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute,
42327 false },
42328 #endif
42329 { "ms_struct", 0, 0, false, false, false, ix86_handle_struct_attribute,
42330 false },
42331 { "gcc_struct", 0, 0, false, false, false, ix86_handle_struct_attribute,
42332 false },
42333 #ifdef SUBTARGET_ATTRIBUTE_TABLE
42334 SUBTARGET_ATTRIBUTE_TABLE,
42335 #endif
42336 /* ms_abi and sysv_abi calling convention function attributes. */
42337 { "ms_abi", 0, 0, false, true, true, ix86_handle_abi_attribute, true },
42338 { "sysv_abi", 0, 0, false, true, true, ix86_handle_abi_attribute, true },
42339 { "ms_hook_prologue", 0, 0, true, false, false, ix86_handle_fndecl_attribute,
42340 false },
42341 { "callee_pop_aggregate_return", 1, 1, false, true, true,
42342 ix86_handle_callee_pop_aggregate_return, true },
42343 /* End element. */
42344 { NULL, 0, 0, false, false, false, NULL, false }
42347 /* Implement targetm.vectorize.builtin_vectorization_cost. */
42348 static int
42349 ix86_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
42350 tree vectype,
42351 int misalign ATTRIBUTE_UNUSED)
42353 unsigned elements;
42355 switch (type_of_cost)
42357 case scalar_stmt:
42358 return ix86_cost->scalar_stmt_cost;
42360 case scalar_load:
42361 return ix86_cost->scalar_load_cost;
42363 case scalar_store:
42364 return ix86_cost->scalar_store_cost;
42366 case vector_stmt:
42367 return ix86_cost->vec_stmt_cost;
42369 case vector_load:
42370 return ix86_cost->vec_align_load_cost;
42372 case vector_store:
42373 return ix86_cost->vec_store_cost;
42375 case vec_to_scalar:
42376 return ix86_cost->vec_to_scalar_cost;
42378 case scalar_to_vec:
42379 return ix86_cost->scalar_to_vec_cost;
42381 case unaligned_load:
42382 case unaligned_store:
42383 return ix86_cost->vec_unalign_load_cost;
42385 case cond_branch_taken:
42386 return ix86_cost->cond_taken_branch_cost;
42388 case cond_branch_not_taken:
42389 return ix86_cost->cond_not_taken_branch_cost;
42391 case vec_perm:
42392 case vec_promote_demote:
42393 return ix86_cost->vec_stmt_cost;
42395 case vec_construct:
42396 elements = TYPE_VECTOR_SUBPARTS (vectype);
42397 return elements / 2 + 1;
42399 default:
42400 gcc_unreachable ();
42404 /* A cached (set (nil) (vselect (vconcat (nil) (nil)) (parallel [])))
42405 insn, so that expand_vselect{,_vconcat} doesn't have to create a fresh
42406 insn every time. */
42408 static GTY(()) rtx vselect_insn;
42410 /* Initialize vselect_insn. */
42412 static void
42413 init_vselect_insn (void)
42415 unsigned i;
42416 rtx x;
42418 x = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (MAX_VECT_LEN));
42419 for (i = 0; i < MAX_VECT_LEN; ++i)
42420 XVECEXP (x, 0, i) = const0_rtx;
42421 x = gen_rtx_VEC_SELECT (V2DFmode, gen_rtx_VEC_CONCAT (V4DFmode, const0_rtx,
42422 const0_rtx), x);
42423 x = gen_rtx_SET (VOIDmode, const0_rtx, x);
42424 start_sequence ();
42425 vselect_insn = emit_insn (x);
42426 end_sequence ();
42429 /* Construct (set target (vec_select op0 (parallel perm))) and
42430 return true if that's a valid instruction in the active ISA. */
42432 static bool
42433 expand_vselect (rtx target, rtx op0, const unsigned char *perm,
42434 unsigned nelt, bool testing_p)
42436 unsigned int i;
42437 rtx x, save_vconcat;
42438 int icode;
42440 if (vselect_insn == NULL_RTX)
42441 init_vselect_insn ();
42443 x = XEXP (SET_SRC (PATTERN (vselect_insn)), 1);
42444 PUT_NUM_ELEM (XVEC (x, 0), nelt);
42445 for (i = 0; i < nelt; ++i)
42446 XVECEXP (x, 0, i) = GEN_INT (perm[i]);
42447 save_vconcat = XEXP (SET_SRC (PATTERN (vselect_insn)), 0);
42448 XEXP (SET_SRC (PATTERN (vselect_insn)), 0) = op0;
42449 PUT_MODE (SET_SRC (PATTERN (vselect_insn)), GET_MODE (target));
42450 SET_DEST (PATTERN (vselect_insn)) = target;
42451 icode = recog_memoized (vselect_insn);
42453 if (icode >= 0 && !testing_p)
42454 emit_insn (copy_rtx (PATTERN (vselect_insn)));
42456 SET_DEST (PATTERN (vselect_insn)) = const0_rtx;
42457 XEXP (SET_SRC (PATTERN (vselect_insn)), 0) = save_vconcat;
42458 INSN_CODE (vselect_insn) = -1;
42460 return icode >= 0;
42463 /* Similar, but generate a vec_concat from op0 and op1 as well. */
42465 static bool
42466 expand_vselect_vconcat (rtx target, rtx op0, rtx op1,
42467 const unsigned char *perm, unsigned nelt,
42468 bool testing_p)
42470 enum machine_mode v2mode;
42471 rtx x;
42472 bool ok;
42474 if (vselect_insn == NULL_RTX)
42475 init_vselect_insn ();
42477 v2mode = GET_MODE_2XWIDER_MODE (GET_MODE (op0));
42478 x = XEXP (SET_SRC (PATTERN (vselect_insn)), 0);
42479 PUT_MODE (x, v2mode);
42480 XEXP (x, 0) = op0;
42481 XEXP (x, 1) = op1;
42482 ok = expand_vselect (target, x, perm, nelt, testing_p);
42483 XEXP (x, 0) = const0_rtx;
42484 XEXP (x, 1) = const0_rtx;
42485 return ok;
42488 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
42489 in terms of blendp[sd] / pblendw / pblendvb / vpblendd. */
42491 static bool
42492 expand_vec_perm_blend (struct expand_vec_perm_d *d)
42494 enum machine_mode vmode = d->vmode;
42495 unsigned i, mask, nelt = d->nelt;
42496 rtx target, op0, op1, x;
42497 rtx rperm[32], vperm;
42499 if (d->one_operand_p)
42500 return false;
42501 if (TARGET_AVX2 && GET_MODE_SIZE (vmode) == 32)
42503 else if (TARGET_AVX && (vmode == V4DFmode || vmode == V8SFmode))
42505 else if (TARGET_SSE4_1 && GET_MODE_SIZE (vmode) == 16)
42507 else
42508 return false;
42510 /* This is a blend, not a permute. Elements must stay in their
42511 respective lanes. */
42512 for (i = 0; i < nelt; ++i)
42514 unsigned e = d->perm[i];
42515 if (!(e == i || e == i + nelt))
42516 return false;
42519 if (d->testing_p)
42520 return true;
42522 /* ??? Without SSE4.1, we could implement this with and/andn/or. This
42523 decision should be extracted elsewhere, so that we only try that
42524 sequence once all budget==3 options have been tried. */
42525 target = d->target;
42526 op0 = d->op0;
42527 op1 = d->op1;
42528 mask = 0;
42530 switch (vmode)
42532 case V4DFmode:
42533 case V8SFmode:
42534 case V2DFmode:
42535 case V4SFmode:
42536 case V8HImode:
42537 case V8SImode:
42538 for (i = 0; i < nelt; ++i)
42539 mask |= (d->perm[i] >= nelt) << i;
42540 break;
42542 case V2DImode:
42543 for (i = 0; i < 2; ++i)
42544 mask |= (d->perm[i] >= 2 ? 15 : 0) << (i * 4);
42545 vmode = V8HImode;
42546 goto do_subreg;
42548 case V4SImode:
42549 for (i = 0; i < 4; ++i)
42550 mask |= (d->perm[i] >= 4 ? 3 : 0) << (i * 2);
42551 vmode = V8HImode;
42552 goto do_subreg;
42554 case V16QImode:
42555 /* See if bytes move in pairs so we can use pblendw with
42556 an immediate argument, rather than pblendvb with a vector
42557 argument. */
42558 for (i = 0; i < 16; i += 2)
42559 if (d->perm[i] + 1 != d->perm[i + 1])
42561 use_pblendvb:
42562 for (i = 0; i < nelt; ++i)
42563 rperm[i] = (d->perm[i] < nelt ? const0_rtx : constm1_rtx);
42565 finish_pblendvb:
42566 vperm = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rperm));
42567 vperm = force_reg (vmode, vperm);
42569 if (GET_MODE_SIZE (vmode) == 16)
42570 emit_insn (gen_sse4_1_pblendvb (target, op0, op1, vperm));
42571 else
42572 emit_insn (gen_avx2_pblendvb (target, op0, op1, vperm));
42573 if (target != d->target)
42574 emit_move_insn (d->target, gen_lowpart (d->vmode, target));
42575 return true;
42578 for (i = 0; i < 8; ++i)
42579 mask |= (d->perm[i * 2] >= 16) << i;
42580 vmode = V8HImode;
42581 /* FALLTHRU */
42583 do_subreg:
42584 target = gen_reg_rtx (vmode);
42585 op0 = gen_lowpart (vmode, op0);
42586 op1 = gen_lowpart (vmode, op1);
42587 break;
42589 case V32QImode:
42590 /* See if bytes move in pairs. If not, vpblendvb must be used. */
42591 for (i = 0; i < 32; i += 2)
42592 if (d->perm[i] + 1 != d->perm[i + 1])
42593 goto use_pblendvb;
42594 /* See if bytes move in quadruplets. If yes, vpblendd
42595 with immediate can be used. */
42596 for (i = 0; i < 32; i += 4)
42597 if (d->perm[i] + 2 != d->perm[i + 2])
42598 break;
42599 if (i < 32)
42601 /* See if bytes move the same in both lanes. If yes,
42602 vpblendw with immediate can be used. */
42603 for (i = 0; i < 16; i += 2)
42604 if (d->perm[i] + 16 != d->perm[i + 16])
42605 goto use_pblendvb;
42607 /* Use vpblendw. */
42608 for (i = 0; i < 16; ++i)
42609 mask |= (d->perm[i * 2] >= 32) << i;
42610 vmode = V16HImode;
42611 goto do_subreg;
42614 /* Use vpblendd. */
42615 for (i = 0; i < 8; ++i)
42616 mask |= (d->perm[i * 4] >= 32) << i;
42617 vmode = V8SImode;
42618 goto do_subreg;
42620 case V16HImode:
42621 /* See if words move in pairs. If yes, vpblendd can be used. */
42622 for (i = 0; i < 16; i += 2)
42623 if (d->perm[i] + 1 != d->perm[i + 1])
42624 break;
42625 if (i < 16)
42627 /* See if words move the same in both lanes. If not,
42628 vpblendvb must be used. */
42629 for (i = 0; i < 8; i++)
42630 if (d->perm[i] + 8 != d->perm[i + 8])
42632 /* Use vpblendvb. */
42633 for (i = 0; i < 32; ++i)
42634 rperm[i] = (d->perm[i / 2] < 16 ? const0_rtx : constm1_rtx);
42636 vmode = V32QImode;
42637 nelt = 32;
42638 target = gen_reg_rtx (vmode);
42639 op0 = gen_lowpart (vmode, op0);
42640 op1 = gen_lowpart (vmode, op1);
42641 goto finish_pblendvb;
42644 /* Use vpblendw. */
42645 for (i = 0; i < 16; ++i)
42646 mask |= (d->perm[i] >= 16) << i;
42647 break;
42650 /* Use vpblendd. */
42651 for (i = 0; i < 8; ++i)
42652 mask |= (d->perm[i * 2] >= 16) << i;
42653 vmode = V8SImode;
42654 goto do_subreg;
42656 case V4DImode:
42657 /* Use vpblendd. */
42658 for (i = 0; i < 4; ++i)
42659 mask |= (d->perm[i] >= 4 ? 3 : 0) << (i * 2);
42660 vmode = V8SImode;
42661 goto do_subreg;
42663 default:
42664 gcc_unreachable ();
42667 /* This matches five different patterns with the different modes. */
42668 x = gen_rtx_VEC_MERGE (vmode, op1, op0, GEN_INT (mask));
42669 x = gen_rtx_SET (VOIDmode, target, x);
42670 emit_insn (x);
42671 if (target != d->target)
42672 emit_move_insn (d->target, gen_lowpart (d->vmode, target));
42674 return true;
42677 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
42678 in terms of the variable form of vpermilps.
42680 Note that we will have already failed the immediate input vpermilps,
42681 which requires that the high and low part shuffle be identical; the
42682 variable form doesn't require that. */
42684 static bool
42685 expand_vec_perm_vpermil (struct expand_vec_perm_d *d)
42687 rtx rperm[8], vperm;
42688 unsigned i;
42690 if (!TARGET_AVX || d->vmode != V8SFmode || !d->one_operand_p)
42691 return false;
42693 /* We can only permute within the 128-bit lane. */
42694 for (i = 0; i < 8; ++i)
42696 unsigned e = d->perm[i];
42697 if (i < 4 ? e >= 4 : e < 4)
42698 return false;
42701 if (d->testing_p)
42702 return true;
42704 for (i = 0; i < 8; ++i)
42706 unsigned e = d->perm[i];
42708 /* Within each 128-bit lane, the elements of op0 are numbered
42709 from 0 and the elements of op1 are numbered from 4. */
42710 if (e >= 8 + 4)
42711 e -= 8;
42712 else if (e >= 4)
42713 e -= 4;
42715 rperm[i] = GEN_INT (e);
42718 vperm = gen_rtx_CONST_VECTOR (V8SImode, gen_rtvec_v (8, rperm));
42719 vperm = force_reg (V8SImode, vperm);
42720 emit_insn (gen_avx_vpermilvarv8sf3 (d->target, d->op0, vperm));
42722 return true;
42725 /* Return true if permutation D can be performed as VMODE permutation
42726 instead. */
42728 static bool
42729 valid_perm_using_mode_p (enum machine_mode vmode, struct expand_vec_perm_d *d)
42731 unsigned int i, j, chunk;
42733 if (GET_MODE_CLASS (vmode) != MODE_VECTOR_INT
42734 || GET_MODE_CLASS (d->vmode) != MODE_VECTOR_INT
42735 || GET_MODE_SIZE (vmode) != GET_MODE_SIZE (d->vmode))
42736 return false;
42738 if (GET_MODE_NUNITS (vmode) >= d->nelt)
42739 return true;
42741 chunk = d->nelt / GET_MODE_NUNITS (vmode);
42742 for (i = 0; i < d->nelt; i += chunk)
42743 if (d->perm[i] & (chunk - 1))
42744 return false;
42745 else
42746 for (j = 1; j < chunk; ++j)
42747 if (d->perm[i] + j != d->perm[i + j])
42748 return false;
42750 return true;
42753 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
42754 in terms of pshufb, vpperm, vpermq, vpermd, vpermps or vperm2i128. */
42756 static bool
42757 expand_vec_perm_pshufb (struct expand_vec_perm_d *d)
42759 unsigned i, nelt, eltsz, mask;
42760 unsigned char perm[32];
42761 enum machine_mode vmode = V16QImode;
42762 rtx rperm[32], vperm, target, op0, op1;
42764 nelt = d->nelt;
42766 if (!d->one_operand_p)
42768 if (!TARGET_XOP || GET_MODE_SIZE (d->vmode) != 16)
42770 if (TARGET_AVX2
42771 && valid_perm_using_mode_p (V2TImode, d))
42773 if (d->testing_p)
42774 return true;
42776 /* Use vperm2i128 insn. The pattern uses
42777 V4DImode instead of V2TImode. */
42778 target = d->target;
42779 if (d->vmode != V4DImode)
42780 target = gen_reg_rtx (V4DImode);
42781 op0 = gen_lowpart (V4DImode, d->op0);
42782 op1 = gen_lowpart (V4DImode, d->op1);
42783 rperm[0]
42784 = GEN_INT (((d->perm[0] & (nelt / 2)) ? 1 : 0)
42785 || ((d->perm[nelt / 2] & (nelt / 2)) ? 2 : 0));
42786 emit_insn (gen_avx2_permv2ti (target, op0, op1, rperm[0]));
42787 if (target != d->target)
42788 emit_move_insn (d->target, gen_lowpart (d->vmode, target));
42789 return true;
42791 return false;
42794 else
42796 if (GET_MODE_SIZE (d->vmode) == 16)
42798 if (!TARGET_SSSE3)
42799 return false;
42801 else if (GET_MODE_SIZE (d->vmode) == 32)
42803 if (!TARGET_AVX2)
42804 return false;
42806 /* V4DImode should be already handled through
42807 expand_vselect by vpermq instruction. */
42808 gcc_assert (d->vmode != V4DImode);
42810 vmode = V32QImode;
42811 if (d->vmode == V8SImode
42812 || d->vmode == V16HImode
42813 || d->vmode == V32QImode)
42815 /* First see if vpermq can be used for
42816 V8SImode/V16HImode/V32QImode. */
42817 if (valid_perm_using_mode_p (V4DImode, d))
42819 for (i = 0; i < 4; i++)
42820 perm[i] = (d->perm[i * nelt / 4] * 4 / nelt) & 3;
42821 if (d->testing_p)
42822 return true;
42823 target = gen_reg_rtx (V4DImode);
42824 if (expand_vselect (target, gen_lowpart (V4DImode, d->op0),
42825 perm, 4, false))
42827 emit_move_insn (d->target,
42828 gen_lowpart (d->vmode, target));
42829 return true;
42831 return false;
42834 /* Next see if vpermd can be used. */
42835 if (valid_perm_using_mode_p (V8SImode, d))
42836 vmode = V8SImode;
42838 /* Or if vpermps can be used. */
42839 else if (d->vmode == V8SFmode)
42840 vmode = V8SImode;
42842 if (vmode == V32QImode)
42844 /* vpshufb only works intra lanes, it is not
42845 possible to shuffle bytes in between the lanes. */
42846 for (i = 0; i < nelt; ++i)
42847 if ((d->perm[i] ^ i) & (nelt / 2))
42848 return false;
42851 else
42852 return false;
42855 if (d->testing_p)
42856 return true;
42858 if (vmode == V8SImode)
42859 for (i = 0; i < 8; ++i)
42860 rperm[i] = GEN_INT ((d->perm[i * nelt / 8] * 8 / nelt) & 7);
42861 else
42863 eltsz = GET_MODE_SIZE (GET_MODE_INNER (d->vmode));
42864 if (!d->one_operand_p)
42865 mask = 2 * nelt - 1;
42866 else if (vmode == V16QImode)
42867 mask = nelt - 1;
42868 else
42869 mask = nelt / 2 - 1;
42871 for (i = 0; i < nelt; ++i)
42873 unsigned j, e = d->perm[i] & mask;
42874 for (j = 0; j < eltsz; ++j)
42875 rperm[i * eltsz + j] = GEN_INT (e * eltsz + j);
42879 vperm = gen_rtx_CONST_VECTOR (vmode,
42880 gen_rtvec_v (GET_MODE_NUNITS (vmode), rperm));
42881 vperm = force_reg (vmode, vperm);
42883 target = d->target;
42884 if (d->vmode != vmode)
42885 target = gen_reg_rtx (vmode);
42886 op0 = gen_lowpart (vmode, d->op0);
42887 if (d->one_operand_p)
42889 if (vmode == V16QImode)
42890 emit_insn (gen_ssse3_pshufbv16qi3 (target, op0, vperm));
42891 else if (vmode == V32QImode)
42892 emit_insn (gen_avx2_pshufbv32qi3 (target, op0, vperm));
42893 else if (vmode == V8SFmode)
42894 emit_insn (gen_avx2_permvarv8sf (target, op0, vperm));
42895 else
42896 emit_insn (gen_avx2_permvarv8si (target, op0, vperm));
42898 else
42900 op1 = gen_lowpart (vmode, d->op1);
42901 emit_insn (gen_xop_pperm (target, op0, op1, vperm));
42903 if (target != d->target)
42904 emit_move_insn (d->target, gen_lowpart (d->vmode, target));
42906 return true;
42909 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to instantiate D
42910 in a single instruction. */
42912 static bool
42913 expand_vec_perm_1 (struct expand_vec_perm_d *d)
42915 unsigned i, nelt = d->nelt;
42916 unsigned char perm2[MAX_VECT_LEN];
42918 /* Check plain VEC_SELECT first, because AVX has instructions that could
42919 match both SEL and SEL+CONCAT, but the plain SEL will allow a memory
42920 input where SEL+CONCAT may not. */
42921 if (d->one_operand_p)
42923 int mask = nelt - 1;
42924 bool identity_perm = true;
42925 bool broadcast_perm = true;
42927 for (i = 0; i < nelt; i++)
42929 perm2[i] = d->perm[i] & mask;
42930 if (perm2[i] != i)
42931 identity_perm = false;
42932 if (perm2[i])
42933 broadcast_perm = false;
42936 if (identity_perm)
42938 if (!d->testing_p)
42939 emit_move_insn (d->target, d->op0);
42940 return true;
42942 else if (broadcast_perm && TARGET_AVX2)
42944 /* Use vpbroadcast{b,w,d}. */
42945 rtx (*gen) (rtx, rtx) = NULL;
42946 switch (d->vmode)
42948 case V32QImode:
42949 gen = gen_avx2_pbroadcastv32qi_1;
42950 break;
42951 case V16HImode:
42952 gen = gen_avx2_pbroadcastv16hi_1;
42953 break;
42954 case V8SImode:
42955 gen = gen_avx2_pbroadcastv8si_1;
42956 break;
42957 case V16QImode:
42958 gen = gen_avx2_pbroadcastv16qi;
42959 break;
42960 case V8HImode:
42961 gen = gen_avx2_pbroadcastv8hi;
42962 break;
42963 case V8SFmode:
42964 gen = gen_avx2_vec_dupv8sf_1;
42965 break;
42966 /* For other modes prefer other shuffles this function creates. */
42967 default: break;
42969 if (gen != NULL)
42971 if (!d->testing_p)
42972 emit_insn (gen (d->target, d->op0));
42973 return true;
42977 if (expand_vselect (d->target, d->op0, perm2, nelt, d->testing_p))
42978 return true;
42980 /* There are plenty of patterns in sse.md that are written for
42981 SEL+CONCAT and are not replicated for a single op. Perhaps
42982 that should be changed, to avoid the nastiness here. */
42984 /* Recognize interleave style patterns, which means incrementing
42985 every other permutation operand. */
42986 for (i = 0; i < nelt; i += 2)
42988 perm2[i] = d->perm[i] & mask;
42989 perm2[i + 1] = (d->perm[i + 1] & mask) + nelt;
42991 if (expand_vselect_vconcat (d->target, d->op0, d->op0, perm2, nelt,
42992 d->testing_p))
42993 return true;
42995 /* Recognize shufps, which means adding {0, 0, nelt, nelt}. */
42996 if (nelt >= 4)
42998 for (i = 0; i < nelt; i += 4)
43000 perm2[i + 0] = d->perm[i + 0] & mask;
43001 perm2[i + 1] = d->perm[i + 1] & mask;
43002 perm2[i + 2] = (d->perm[i + 2] & mask) + nelt;
43003 perm2[i + 3] = (d->perm[i + 3] & mask) + nelt;
43006 if (expand_vselect_vconcat (d->target, d->op0, d->op0, perm2, nelt,
43007 d->testing_p))
43008 return true;
43012 /* Finally, try the fully general two operand permute. */
43013 if (expand_vselect_vconcat (d->target, d->op0, d->op1, d->perm, nelt,
43014 d->testing_p))
43015 return true;
43017 /* Recognize interleave style patterns with reversed operands. */
43018 if (!d->one_operand_p)
43020 for (i = 0; i < nelt; ++i)
43022 unsigned e = d->perm[i];
43023 if (e >= nelt)
43024 e -= nelt;
43025 else
43026 e += nelt;
43027 perm2[i] = e;
43030 if (expand_vselect_vconcat (d->target, d->op1, d->op0, perm2, nelt,
43031 d->testing_p))
43032 return true;
43035 /* Try the SSE4.1 blend variable merge instructions. */
43036 if (expand_vec_perm_blend (d))
43037 return true;
43039 /* Try one of the AVX vpermil variable permutations. */
43040 if (expand_vec_perm_vpermil (d))
43041 return true;
43043 /* Try the SSSE3 pshufb or XOP vpperm or AVX2 vperm2i128,
43044 vpshufb, vpermd, vpermps or vpermq variable permutation. */
43045 if (expand_vec_perm_pshufb (d))
43046 return true;
43048 /* Try the AVX512F vpermi2 instructions. */
43049 rtx vec[64];
43050 enum machine_mode mode = d->vmode;
43051 if (mode == V8DFmode)
43052 mode = V8DImode;
43053 else if (mode == V16SFmode)
43054 mode = V16SImode;
43055 for (i = 0; i < nelt; ++i)
43056 vec[i] = GEN_INT (d->perm[i]);
43057 rtx mask = gen_rtx_CONST_VECTOR (mode, gen_rtvec_v (nelt, vec));
43058 if (ix86_expand_vec_perm_vpermi2 (d->target, d->op0, mask, d->op1))
43059 return true;
43061 return false;
43064 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
43065 in terms of a pair of pshuflw + pshufhw instructions. */
43067 static bool
43068 expand_vec_perm_pshuflw_pshufhw (struct expand_vec_perm_d *d)
43070 unsigned char perm2[MAX_VECT_LEN];
43071 unsigned i;
43072 bool ok;
43074 if (d->vmode != V8HImode || !d->one_operand_p)
43075 return false;
43077 /* The two permutations only operate in 64-bit lanes. */
43078 for (i = 0; i < 4; ++i)
43079 if (d->perm[i] >= 4)
43080 return false;
43081 for (i = 4; i < 8; ++i)
43082 if (d->perm[i] < 4)
43083 return false;
43085 if (d->testing_p)
43086 return true;
43088 /* Emit the pshuflw. */
43089 memcpy (perm2, d->perm, 4);
43090 for (i = 4; i < 8; ++i)
43091 perm2[i] = i;
43092 ok = expand_vselect (d->target, d->op0, perm2, 8, d->testing_p);
43093 gcc_assert (ok);
43095 /* Emit the pshufhw. */
43096 memcpy (perm2 + 4, d->perm + 4, 4);
43097 for (i = 0; i < 4; ++i)
43098 perm2[i] = i;
43099 ok = expand_vselect (d->target, d->target, perm2, 8, d->testing_p);
43100 gcc_assert (ok);
43102 return true;
43105 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
43106 the permutation using the SSSE3 palignr instruction. This succeeds
43107 when all of the elements in PERM fit within one vector and we merely
43108 need to shift them down so that a single vector permutation has a
43109 chance to succeed. */
43111 static bool
43112 expand_vec_perm_palignr (struct expand_vec_perm_d *d)
43114 unsigned i, nelt = d->nelt;
43115 unsigned min, max;
43116 bool in_order, ok;
43117 rtx shift, target;
43118 struct expand_vec_perm_d dcopy;
43120 /* Even with AVX, palignr only operates on 128-bit vectors. */
43121 if (!TARGET_SSSE3 || GET_MODE_SIZE (d->vmode) != 16)
43122 return false;
43124 min = nelt, max = 0;
43125 for (i = 0; i < nelt; ++i)
43127 unsigned e = d->perm[i];
43128 if (e < min)
43129 min = e;
43130 if (e > max)
43131 max = e;
43133 if (min == 0 || max - min >= nelt)
43134 return false;
43136 /* Given that we have SSSE3, we know we'll be able to implement the
43137 single operand permutation after the palignr with pshufb. */
43138 if (d->testing_p)
43139 return true;
43141 dcopy = *d;
43142 shift = GEN_INT (min * GET_MODE_BITSIZE (GET_MODE_INNER (d->vmode)));
43143 target = gen_reg_rtx (TImode);
43144 emit_insn (gen_ssse3_palignrti (target, gen_lowpart (TImode, d->op1),
43145 gen_lowpart (TImode, d->op0), shift));
43147 dcopy.op0 = dcopy.op1 = gen_lowpart (d->vmode, target);
43148 dcopy.one_operand_p = true;
43150 in_order = true;
43151 for (i = 0; i < nelt; ++i)
43153 unsigned e = dcopy.perm[i] - min;
43154 if (e != i)
43155 in_order = false;
43156 dcopy.perm[i] = e;
43159 /* Test for the degenerate case where the alignment by itself
43160 produces the desired permutation. */
43161 if (in_order)
43163 emit_move_insn (d->target, dcopy.op0);
43164 return true;
43167 ok = expand_vec_perm_1 (&dcopy);
43168 gcc_assert (ok);
43170 return ok;
43173 static bool expand_vec_perm_interleave3 (struct expand_vec_perm_d *d);
43175 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
43176 a two vector permutation into a single vector permutation by using
43177 an interleave operation to merge the vectors. */
43179 static bool
43180 expand_vec_perm_interleave2 (struct expand_vec_perm_d *d)
43182 struct expand_vec_perm_d dremap, dfinal;
43183 unsigned i, nelt = d->nelt, nelt2 = nelt / 2;
43184 unsigned HOST_WIDE_INT contents;
43185 unsigned char remap[2 * MAX_VECT_LEN];
43186 rtx seq;
43187 bool ok, same_halves = false;
43189 if (GET_MODE_SIZE (d->vmode) == 16)
43191 if (d->one_operand_p)
43192 return false;
43194 else if (GET_MODE_SIZE (d->vmode) == 32)
43196 if (!TARGET_AVX)
43197 return false;
43198 /* For 32-byte modes allow even d->one_operand_p.
43199 The lack of cross-lane shuffling in some instructions
43200 might prevent a single insn shuffle. */
43201 dfinal = *d;
43202 dfinal.testing_p = true;
43203 /* If expand_vec_perm_interleave3 can expand this into
43204 a 3 insn sequence, give up and let it be expanded as
43205 3 insn sequence. While that is one insn longer,
43206 it doesn't need a memory operand and in the common
43207 case that both interleave low and high permutations
43208 with the same operands are adjacent needs 4 insns
43209 for both after CSE. */
43210 if (expand_vec_perm_interleave3 (&dfinal))
43211 return false;
43213 else
43214 return false;
43216 /* Examine from whence the elements come. */
43217 contents = 0;
43218 for (i = 0; i < nelt; ++i)
43219 contents |= ((unsigned HOST_WIDE_INT) 1) << d->perm[i];
43221 memset (remap, 0xff, sizeof (remap));
43222 dremap = *d;
43224 if (GET_MODE_SIZE (d->vmode) == 16)
43226 unsigned HOST_WIDE_INT h1, h2, h3, h4;
43228 /* Split the two input vectors into 4 halves. */
43229 h1 = (((unsigned HOST_WIDE_INT) 1) << nelt2) - 1;
43230 h2 = h1 << nelt2;
43231 h3 = h2 << nelt2;
43232 h4 = h3 << nelt2;
43234 /* If the elements from the low halves use interleave low, and similarly
43235 for interleave high. If the elements are from mis-matched halves, we
43236 can use shufps for V4SF/V4SI or do a DImode shuffle. */
43237 if ((contents & (h1 | h3)) == contents)
43239 /* punpckl* */
43240 for (i = 0; i < nelt2; ++i)
43242 remap[i] = i * 2;
43243 remap[i + nelt] = i * 2 + 1;
43244 dremap.perm[i * 2] = i;
43245 dremap.perm[i * 2 + 1] = i + nelt;
43247 if (!TARGET_SSE2 && d->vmode == V4SImode)
43248 dremap.vmode = V4SFmode;
43250 else if ((contents & (h2 | h4)) == contents)
43252 /* punpckh* */
43253 for (i = 0; i < nelt2; ++i)
43255 remap[i + nelt2] = i * 2;
43256 remap[i + nelt + nelt2] = i * 2 + 1;
43257 dremap.perm[i * 2] = i + nelt2;
43258 dremap.perm[i * 2 + 1] = i + nelt + nelt2;
43260 if (!TARGET_SSE2 && d->vmode == V4SImode)
43261 dremap.vmode = V4SFmode;
43263 else if ((contents & (h1 | h4)) == contents)
43265 /* shufps */
43266 for (i = 0; i < nelt2; ++i)
43268 remap[i] = i;
43269 remap[i + nelt + nelt2] = i + nelt2;
43270 dremap.perm[i] = i;
43271 dremap.perm[i + nelt2] = i + nelt + nelt2;
43273 if (nelt != 4)
43275 /* shufpd */
43276 dremap.vmode = V2DImode;
43277 dremap.nelt = 2;
43278 dremap.perm[0] = 0;
43279 dremap.perm[1] = 3;
43282 else if ((contents & (h2 | h3)) == contents)
43284 /* shufps */
43285 for (i = 0; i < nelt2; ++i)
43287 remap[i + nelt2] = i;
43288 remap[i + nelt] = i + nelt2;
43289 dremap.perm[i] = i + nelt2;
43290 dremap.perm[i + nelt2] = i + nelt;
43292 if (nelt != 4)
43294 /* shufpd */
43295 dremap.vmode = V2DImode;
43296 dremap.nelt = 2;
43297 dremap.perm[0] = 1;
43298 dremap.perm[1] = 2;
43301 else
43302 return false;
43304 else
43306 unsigned int nelt4 = nelt / 4, nzcnt = 0;
43307 unsigned HOST_WIDE_INT q[8];
43308 unsigned int nonzero_halves[4];
43310 /* Split the two input vectors into 8 quarters. */
43311 q[0] = (((unsigned HOST_WIDE_INT) 1) << nelt4) - 1;
43312 for (i = 1; i < 8; ++i)
43313 q[i] = q[0] << (nelt4 * i);
43314 for (i = 0; i < 4; ++i)
43315 if (((q[2 * i] | q[2 * i + 1]) & contents) != 0)
43317 nonzero_halves[nzcnt] = i;
43318 ++nzcnt;
43321 if (nzcnt == 1)
43323 gcc_assert (d->one_operand_p);
43324 nonzero_halves[1] = nonzero_halves[0];
43325 same_halves = true;
43327 else if (d->one_operand_p)
43329 gcc_assert (nonzero_halves[0] == 0);
43330 gcc_assert (nonzero_halves[1] == 1);
43333 if (nzcnt <= 2)
43335 if (d->perm[0] / nelt2 == nonzero_halves[1])
43337 /* Attempt to increase the likelihood that dfinal
43338 shuffle will be intra-lane. */
43339 char tmph = nonzero_halves[0];
43340 nonzero_halves[0] = nonzero_halves[1];
43341 nonzero_halves[1] = tmph;
43344 /* vperm2f128 or vperm2i128. */
43345 for (i = 0; i < nelt2; ++i)
43347 remap[i + nonzero_halves[1] * nelt2] = i + nelt2;
43348 remap[i + nonzero_halves[0] * nelt2] = i;
43349 dremap.perm[i + nelt2] = i + nonzero_halves[1] * nelt2;
43350 dremap.perm[i] = i + nonzero_halves[0] * nelt2;
43353 if (d->vmode != V8SFmode
43354 && d->vmode != V4DFmode
43355 && d->vmode != V8SImode)
43357 dremap.vmode = V8SImode;
43358 dremap.nelt = 8;
43359 for (i = 0; i < 4; ++i)
43361 dremap.perm[i] = i + nonzero_halves[0] * 4;
43362 dremap.perm[i + 4] = i + nonzero_halves[1] * 4;
43366 else if (d->one_operand_p)
43367 return false;
43368 else if (TARGET_AVX2
43369 && (contents & (q[0] | q[2] | q[4] | q[6])) == contents)
43371 /* vpunpckl* */
43372 for (i = 0; i < nelt4; ++i)
43374 remap[i] = i * 2;
43375 remap[i + nelt] = i * 2 + 1;
43376 remap[i + nelt2] = i * 2 + nelt2;
43377 remap[i + nelt + nelt2] = i * 2 + nelt2 + 1;
43378 dremap.perm[i * 2] = i;
43379 dremap.perm[i * 2 + 1] = i + nelt;
43380 dremap.perm[i * 2 + nelt2] = i + nelt2;
43381 dremap.perm[i * 2 + nelt2 + 1] = i + nelt + nelt2;
43384 else if (TARGET_AVX2
43385 && (contents & (q[1] | q[3] | q[5] | q[7])) == contents)
43387 /* vpunpckh* */
43388 for (i = 0; i < nelt4; ++i)
43390 remap[i + nelt4] = i * 2;
43391 remap[i + nelt + nelt4] = i * 2 + 1;
43392 remap[i + nelt2 + nelt4] = i * 2 + nelt2;
43393 remap[i + nelt + nelt2 + nelt4] = i * 2 + nelt2 + 1;
43394 dremap.perm[i * 2] = i + nelt4;
43395 dremap.perm[i * 2 + 1] = i + nelt + nelt4;
43396 dremap.perm[i * 2 + nelt2] = i + nelt2 + nelt4;
43397 dremap.perm[i * 2 + nelt2 + 1] = i + nelt + nelt2 + nelt4;
43400 else
43401 return false;
43404 /* Use the remapping array set up above to move the elements from their
43405 swizzled locations into their final destinations. */
43406 dfinal = *d;
43407 for (i = 0; i < nelt; ++i)
43409 unsigned e = remap[d->perm[i]];
43410 gcc_assert (e < nelt);
43411 /* If same_halves is true, both halves of the remapped vector are the
43412 same. Avoid cross-lane accesses if possible. */
43413 if (same_halves && i >= nelt2)
43415 gcc_assert (e < nelt2);
43416 dfinal.perm[i] = e + nelt2;
43418 else
43419 dfinal.perm[i] = e;
43421 if (!d->testing_p)
43423 dremap.target = gen_reg_rtx (dremap.vmode);
43424 dfinal.op0 = gen_lowpart (dfinal.vmode, dremap.target);
43426 dfinal.op1 = dfinal.op0;
43427 dfinal.one_operand_p = true;
43429 /* Test if the final remap can be done with a single insn. For V4SFmode or
43430 V4SImode this *will* succeed. For V8HImode or V16QImode it may not. */
43431 start_sequence ();
43432 ok = expand_vec_perm_1 (&dfinal);
43433 seq = get_insns ();
43434 end_sequence ();
43436 if (!ok)
43437 return false;
43439 if (d->testing_p)
43440 return true;
43442 if (dremap.vmode != dfinal.vmode)
43444 dremap.op0 = gen_lowpart (dremap.vmode, dremap.op0);
43445 dremap.op1 = gen_lowpart (dremap.vmode, dremap.op1);
43448 ok = expand_vec_perm_1 (&dremap);
43449 gcc_assert (ok);
43451 emit_insn (seq);
43452 return true;
43455 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
43456 a single vector cross-lane permutation into vpermq followed
43457 by any of the single insn permutations. */
43459 static bool
43460 expand_vec_perm_vpermq_perm_1 (struct expand_vec_perm_d *d)
43462 struct expand_vec_perm_d dremap, dfinal;
43463 unsigned i, j, nelt = d->nelt, nelt2 = nelt / 2, nelt4 = nelt / 4;
43464 unsigned contents[2];
43465 bool ok;
43467 if (!(TARGET_AVX2
43468 && (d->vmode == V32QImode || d->vmode == V16HImode)
43469 && d->one_operand_p))
43470 return false;
43472 contents[0] = 0;
43473 contents[1] = 0;
43474 for (i = 0; i < nelt2; ++i)
43476 contents[0] |= 1u << (d->perm[i] / nelt4);
43477 contents[1] |= 1u << (d->perm[i + nelt2] / nelt4);
43480 for (i = 0; i < 2; ++i)
43482 unsigned int cnt = 0;
43483 for (j = 0; j < 4; ++j)
43484 if ((contents[i] & (1u << j)) != 0 && ++cnt > 2)
43485 return false;
43488 if (d->testing_p)
43489 return true;
43491 dremap = *d;
43492 dremap.vmode = V4DImode;
43493 dremap.nelt = 4;
43494 dremap.target = gen_reg_rtx (V4DImode);
43495 dremap.op0 = gen_lowpart (V4DImode, d->op0);
43496 dremap.op1 = dremap.op0;
43497 dremap.one_operand_p = true;
43498 for (i = 0; i < 2; ++i)
43500 unsigned int cnt = 0;
43501 for (j = 0; j < 4; ++j)
43502 if ((contents[i] & (1u << j)) != 0)
43503 dremap.perm[2 * i + cnt++] = j;
43504 for (; cnt < 2; ++cnt)
43505 dremap.perm[2 * i + cnt] = 0;
43508 dfinal = *d;
43509 dfinal.op0 = gen_lowpart (dfinal.vmode, dremap.target);
43510 dfinal.op1 = dfinal.op0;
43511 dfinal.one_operand_p = true;
43512 for (i = 0, j = 0; i < nelt; ++i)
43514 if (i == nelt2)
43515 j = 2;
43516 dfinal.perm[i] = (d->perm[i] & (nelt4 - 1)) | (j ? nelt2 : 0);
43517 if ((d->perm[i] / nelt4) == dremap.perm[j])
43519 else if ((d->perm[i] / nelt4) == dremap.perm[j + 1])
43520 dfinal.perm[i] |= nelt4;
43521 else
43522 gcc_unreachable ();
43525 ok = expand_vec_perm_1 (&dremap);
43526 gcc_assert (ok);
43528 ok = expand_vec_perm_1 (&dfinal);
43529 gcc_assert (ok);
43531 return true;
43534 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to expand
43535 a vector permutation using two instructions, vperm2f128 resp.
43536 vperm2i128 followed by any single in-lane permutation. */
43538 static bool
43539 expand_vec_perm_vperm2f128 (struct expand_vec_perm_d *d)
43541 struct expand_vec_perm_d dfirst, dsecond;
43542 unsigned i, j, nelt = d->nelt, nelt2 = nelt / 2, perm;
43543 bool ok;
43545 if (!TARGET_AVX
43546 || GET_MODE_SIZE (d->vmode) != 32
43547 || (d->vmode != V8SFmode && d->vmode != V4DFmode && !TARGET_AVX2))
43548 return false;
43550 dsecond = *d;
43551 dsecond.one_operand_p = false;
43552 dsecond.testing_p = true;
43554 /* ((perm << 2)|perm) & 0x33 is the vperm2[fi]128
43555 immediate. For perm < 16 the second permutation uses
43556 d->op0 as first operand, for perm >= 16 it uses d->op1
43557 as first operand. The second operand is the result of
43558 vperm2[fi]128. */
43559 for (perm = 0; perm < 32; perm++)
43561 /* Ignore permutations which do not move anything cross-lane. */
43562 if (perm < 16)
43564 /* The second shuffle for e.g. V4DFmode has
43565 0123 and ABCD operands.
43566 Ignore AB23, as 23 is already in the second lane
43567 of the first operand. */
43568 if ((perm & 0xc) == (1 << 2)) continue;
43569 /* And 01CD, as 01 is in the first lane of the first
43570 operand. */
43571 if ((perm & 3) == 0) continue;
43572 /* And 4567, as then the vperm2[fi]128 doesn't change
43573 anything on the original 4567 second operand. */
43574 if ((perm & 0xf) == ((3 << 2) | 2)) continue;
43576 else
43578 /* The second shuffle for e.g. V4DFmode has
43579 4567 and ABCD operands.
43580 Ignore AB67, as 67 is already in the second lane
43581 of the first operand. */
43582 if ((perm & 0xc) == (3 << 2)) continue;
43583 /* And 45CD, as 45 is in the first lane of the first
43584 operand. */
43585 if ((perm & 3) == 2) continue;
43586 /* And 0123, as then the vperm2[fi]128 doesn't change
43587 anything on the original 0123 first operand. */
43588 if ((perm & 0xf) == (1 << 2)) continue;
43591 for (i = 0; i < nelt; i++)
43593 j = d->perm[i] / nelt2;
43594 if (j == ((perm >> (2 * (i >= nelt2))) & 3))
43595 dsecond.perm[i] = nelt + (i & nelt2) + (d->perm[i] & (nelt2 - 1));
43596 else if (j == (unsigned) (i >= nelt2) + 2 * (perm >= 16))
43597 dsecond.perm[i] = d->perm[i] & (nelt - 1);
43598 else
43599 break;
43602 if (i == nelt)
43604 start_sequence ();
43605 ok = expand_vec_perm_1 (&dsecond);
43606 end_sequence ();
43608 else
43609 ok = false;
43611 if (ok)
43613 if (d->testing_p)
43614 return true;
43616 /* Found a usable second shuffle. dfirst will be
43617 vperm2f128 on d->op0 and d->op1. */
43618 dsecond.testing_p = false;
43619 dfirst = *d;
43620 dfirst.target = gen_reg_rtx (d->vmode);
43621 for (i = 0; i < nelt; i++)
43622 dfirst.perm[i] = (i & (nelt2 - 1))
43623 + ((perm >> (2 * (i >= nelt2))) & 3) * nelt2;
43625 ok = expand_vec_perm_1 (&dfirst);
43626 gcc_assert (ok);
43628 /* And dsecond is some single insn shuffle, taking
43629 d->op0 and result of vperm2f128 (if perm < 16) or
43630 d->op1 and result of vperm2f128 (otherwise). */
43631 dsecond.op1 = dfirst.target;
43632 if (perm >= 16)
43633 dsecond.op0 = dfirst.op1;
43635 ok = expand_vec_perm_1 (&dsecond);
43636 gcc_assert (ok);
43638 return true;
43641 /* For one operand, the only useful vperm2f128 permutation is 0x10. */
43642 if (d->one_operand_p)
43643 return false;
43646 return false;
43649 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
43650 a two vector permutation using 2 intra-lane interleave insns
43651 and cross-lane shuffle for 32-byte vectors. */
43653 static bool
43654 expand_vec_perm_interleave3 (struct expand_vec_perm_d *d)
43656 unsigned i, nelt;
43657 rtx (*gen) (rtx, rtx, rtx);
43659 if (d->one_operand_p)
43660 return false;
43661 if (TARGET_AVX2 && GET_MODE_SIZE (d->vmode) == 32)
43663 else if (TARGET_AVX && (d->vmode == V8SFmode || d->vmode == V4DFmode))
43665 else
43666 return false;
43668 nelt = d->nelt;
43669 if (d->perm[0] != 0 && d->perm[0] != nelt / 2)
43670 return false;
43671 for (i = 0; i < nelt; i += 2)
43672 if (d->perm[i] != d->perm[0] + i / 2
43673 || d->perm[i + 1] != d->perm[0] + i / 2 + nelt)
43674 return false;
43676 if (d->testing_p)
43677 return true;
43679 switch (d->vmode)
43681 case V32QImode:
43682 if (d->perm[0])
43683 gen = gen_vec_interleave_highv32qi;
43684 else
43685 gen = gen_vec_interleave_lowv32qi;
43686 break;
43687 case V16HImode:
43688 if (d->perm[0])
43689 gen = gen_vec_interleave_highv16hi;
43690 else
43691 gen = gen_vec_interleave_lowv16hi;
43692 break;
43693 case V8SImode:
43694 if (d->perm[0])
43695 gen = gen_vec_interleave_highv8si;
43696 else
43697 gen = gen_vec_interleave_lowv8si;
43698 break;
43699 case V4DImode:
43700 if (d->perm[0])
43701 gen = gen_vec_interleave_highv4di;
43702 else
43703 gen = gen_vec_interleave_lowv4di;
43704 break;
43705 case V8SFmode:
43706 if (d->perm[0])
43707 gen = gen_vec_interleave_highv8sf;
43708 else
43709 gen = gen_vec_interleave_lowv8sf;
43710 break;
43711 case V4DFmode:
43712 if (d->perm[0])
43713 gen = gen_vec_interleave_highv4df;
43714 else
43715 gen = gen_vec_interleave_lowv4df;
43716 break;
43717 default:
43718 gcc_unreachable ();
43721 emit_insn (gen (d->target, d->op0, d->op1));
43722 return true;
43725 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement
43726 a single vector permutation using a single intra-lane vector
43727 permutation, vperm2f128 swapping the lanes and vblend* insn blending
43728 the non-swapped and swapped vectors together. */
43730 static bool
43731 expand_vec_perm_vperm2f128_vblend (struct expand_vec_perm_d *d)
43733 struct expand_vec_perm_d dfirst, dsecond;
43734 unsigned i, j, msk, nelt = d->nelt, nelt2 = nelt / 2;
43735 rtx seq;
43736 bool ok;
43737 rtx (*blend) (rtx, rtx, rtx, rtx) = NULL;
43739 if (!TARGET_AVX
43740 || TARGET_AVX2
43741 || (d->vmode != V8SFmode && d->vmode != V4DFmode)
43742 || !d->one_operand_p)
43743 return false;
43745 dfirst = *d;
43746 for (i = 0; i < nelt; i++)
43747 dfirst.perm[i] = 0xff;
43748 for (i = 0, msk = 0; i < nelt; i++)
43750 j = (d->perm[i] & nelt2) ? i | nelt2 : i & ~nelt2;
43751 if (dfirst.perm[j] != 0xff && dfirst.perm[j] != d->perm[i])
43752 return false;
43753 dfirst.perm[j] = d->perm[i];
43754 if (j != i)
43755 msk |= (1 << i);
43757 for (i = 0; i < nelt; i++)
43758 if (dfirst.perm[i] == 0xff)
43759 dfirst.perm[i] = i;
43761 if (!d->testing_p)
43762 dfirst.target = gen_reg_rtx (dfirst.vmode);
43764 start_sequence ();
43765 ok = expand_vec_perm_1 (&dfirst);
43766 seq = get_insns ();
43767 end_sequence ();
43769 if (!ok)
43770 return false;
43772 if (d->testing_p)
43773 return true;
43775 emit_insn (seq);
43777 dsecond = *d;
43778 dsecond.op0 = dfirst.target;
43779 dsecond.op1 = dfirst.target;
43780 dsecond.one_operand_p = true;
43781 dsecond.target = gen_reg_rtx (dsecond.vmode);
43782 for (i = 0; i < nelt; i++)
43783 dsecond.perm[i] = i ^ nelt2;
43785 ok = expand_vec_perm_1 (&dsecond);
43786 gcc_assert (ok);
43788 blend = d->vmode == V8SFmode ? gen_avx_blendps256 : gen_avx_blendpd256;
43789 emit_insn (blend (d->target, dfirst.target, dsecond.target, GEN_INT (msk)));
43790 return true;
43793 /* A subroutine of ix86_expand_vec_perm_builtin_1. Implement a V4DF
43794 permutation using two vperm2f128, followed by a vshufpd insn blending
43795 the two vectors together. */
43797 static bool
43798 expand_vec_perm_2vperm2f128_vshuf (struct expand_vec_perm_d *d)
43800 struct expand_vec_perm_d dfirst, dsecond, dthird;
43801 bool ok;
43803 if (!TARGET_AVX || (d->vmode != V4DFmode))
43804 return false;
43806 if (d->testing_p)
43807 return true;
43809 dfirst = *d;
43810 dsecond = *d;
43811 dthird = *d;
43813 dfirst.perm[0] = (d->perm[0] & ~1);
43814 dfirst.perm[1] = (d->perm[0] & ~1) + 1;
43815 dfirst.perm[2] = (d->perm[2] & ~1);
43816 dfirst.perm[3] = (d->perm[2] & ~1) + 1;
43817 dsecond.perm[0] = (d->perm[1] & ~1);
43818 dsecond.perm[1] = (d->perm[1] & ~1) + 1;
43819 dsecond.perm[2] = (d->perm[3] & ~1);
43820 dsecond.perm[3] = (d->perm[3] & ~1) + 1;
43821 dthird.perm[0] = (d->perm[0] % 2);
43822 dthird.perm[1] = (d->perm[1] % 2) + 4;
43823 dthird.perm[2] = (d->perm[2] % 2) + 2;
43824 dthird.perm[3] = (d->perm[3] % 2) + 6;
43826 dfirst.target = gen_reg_rtx (dfirst.vmode);
43827 dsecond.target = gen_reg_rtx (dsecond.vmode);
43828 dthird.op0 = dfirst.target;
43829 dthird.op1 = dsecond.target;
43830 dthird.one_operand_p = false;
43832 canonicalize_perm (&dfirst);
43833 canonicalize_perm (&dsecond);
43835 ok = expand_vec_perm_1 (&dfirst)
43836 && expand_vec_perm_1 (&dsecond)
43837 && expand_vec_perm_1 (&dthird);
43839 gcc_assert (ok);
43841 return true;
43844 /* A subroutine of expand_vec_perm_even_odd_1. Implement the double-word
43845 permutation with two pshufb insns and an ior. We should have already
43846 failed all two instruction sequences. */
43848 static bool
43849 expand_vec_perm_pshufb2 (struct expand_vec_perm_d *d)
43851 rtx rperm[2][16], vperm, l, h, op, m128;
43852 unsigned int i, nelt, eltsz;
43854 if (!TARGET_SSSE3 || GET_MODE_SIZE (d->vmode) != 16)
43855 return false;
43856 gcc_assert (!d->one_operand_p);
43858 if (d->testing_p)
43859 return true;
43861 nelt = d->nelt;
43862 eltsz = GET_MODE_SIZE (GET_MODE_INNER (d->vmode));
43864 /* Generate two permutation masks. If the required element is within
43865 the given vector it is shuffled into the proper lane. If the required
43866 element is in the other vector, force a zero into the lane by setting
43867 bit 7 in the permutation mask. */
43868 m128 = GEN_INT (-128);
43869 for (i = 0; i < nelt; ++i)
43871 unsigned j, e = d->perm[i];
43872 unsigned which = (e >= nelt);
43873 if (e >= nelt)
43874 e -= nelt;
43876 for (j = 0; j < eltsz; ++j)
43878 rperm[which][i*eltsz + j] = GEN_INT (e*eltsz + j);
43879 rperm[1-which][i*eltsz + j] = m128;
43883 vperm = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, rperm[0]));
43884 vperm = force_reg (V16QImode, vperm);
43886 l = gen_reg_rtx (V16QImode);
43887 op = gen_lowpart (V16QImode, d->op0);
43888 emit_insn (gen_ssse3_pshufbv16qi3 (l, op, vperm));
43890 vperm = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, rperm[1]));
43891 vperm = force_reg (V16QImode, vperm);
43893 h = gen_reg_rtx (V16QImode);
43894 op = gen_lowpart (V16QImode, d->op1);
43895 emit_insn (gen_ssse3_pshufbv16qi3 (h, op, vperm));
43897 op = d->target;
43898 if (d->vmode != V16QImode)
43899 op = gen_reg_rtx (V16QImode);
43900 emit_insn (gen_iorv16qi3 (op, l, h));
43901 if (op != d->target)
43902 emit_move_insn (d->target, gen_lowpart (d->vmode, op));
43904 return true;
43907 /* Implement arbitrary permutation of one V32QImode and V16QImode operand
43908 with two vpshufb insns, vpermq and vpor. We should have already failed
43909 all two or three instruction sequences. */
43911 static bool
43912 expand_vec_perm_vpshufb2_vpermq (struct expand_vec_perm_d *d)
43914 rtx rperm[2][32], vperm, l, h, hp, op, m128;
43915 unsigned int i, nelt, eltsz;
43917 if (!TARGET_AVX2
43918 || !d->one_operand_p
43919 || (d->vmode != V32QImode && d->vmode != V16HImode))
43920 return false;
43922 if (d->testing_p)
43923 return true;
43925 nelt = d->nelt;
43926 eltsz = GET_MODE_SIZE (GET_MODE_INNER (d->vmode));
43928 /* Generate two permutation masks. If the required element is within
43929 the same lane, it is shuffled in. If the required element from the
43930 other lane, force a zero by setting bit 7 in the permutation mask.
43931 In the other mask the mask has non-negative elements if element
43932 is requested from the other lane, but also moved to the other lane,
43933 so that the result of vpshufb can have the two V2TImode halves
43934 swapped. */
43935 m128 = GEN_INT (-128);
43936 for (i = 0; i < nelt; ++i)
43938 unsigned j, e = d->perm[i] & (nelt / 2 - 1);
43939 unsigned which = ((d->perm[i] ^ i) & (nelt / 2)) * eltsz;
43941 for (j = 0; j < eltsz; ++j)
43943 rperm[!!which][(i * eltsz + j) ^ which] = GEN_INT (e * eltsz + j);
43944 rperm[!which][(i * eltsz + j) ^ (which ^ 16)] = m128;
43948 vperm = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, rperm[1]));
43949 vperm = force_reg (V32QImode, vperm);
43951 h = gen_reg_rtx (V32QImode);
43952 op = gen_lowpart (V32QImode, d->op0);
43953 emit_insn (gen_avx2_pshufbv32qi3 (h, op, vperm));
43955 /* Swap the 128-byte lanes of h into hp. */
43956 hp = gen_reg_rtx (V4DImode);
43957 op = gen_lowpart (V4DImode, h);
43958 emit_insn (gen_avx2_permv4di_1 (hp, op, const2_rtx, GEN_INT (3), const0_rtx,
43959 const1_rtx));
43961 vperm = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, rperm[0]));
43962 vperm = force_reg (V32QImode, vperm);
43964 l = gen_reg_rtx (V32QImode);
43965 op = gen_lowpart (V32QImode, d->op0);
43966 emit_insn (gen_avx2_pshufbv32qi3 (l, op, vperm));
43968 op = d->target;
43969 if (d->vmode != V32QImode)
43970 op = gen_reg_rtx (V32QImode);
43971 emit_insn (gen_iorv32qi3 (op, l, gen_lowpart (V32QImode, hp)));
43972 if (op != d->target)
43973 emit_move_insn (d->target, gen_lowpart (d->vmode, op));
43975 return true;
43978 /* A subroutine of expand_vec_perm_even_odd_1. Implement extract-even
43979 and extract-odd permutations of two V32QImode and V16QImode operand
43980 with two vpshufb insns, vpor and vpermq. We should have already
43981 failed all two or three instruction sequences. */
43983 static bool
43984 expand_vec_perm_vpshufb2_vpermq_even_odd (struct expand_vec_perm_d *d)
43986 rtx rperm[2][32], vperm, l, h, ior, op, m128;
43987 unsigned int i, nelt, eltsz;
43989 if (!TARGET_AVX2
43990 || d->one_operand_p
43991 || (d->vmode != V32QImode && d->vmode != V16HImode))
43992 return false;
43994 for (i = 0; i < d->nelt; ++i)
43995 if ((d->perm[i] ^ (i * 2)) & (3 * d->nelt / 2))
43996 return false;
43998 if (d->testing_p)
43999 return true;
44001 nelt = d->nelt;
44002 eltsz = GET_MODE_SIZE (GET_MODE_INNER (d->vmode));
44004 /* Generate two permutation masks. In the first permutation mask
44005 the first quarter will contain indexes for the first half
44006 of the op0, the second quarter will contain bit 7 set, third quarter
44007 will contain indexes for the second half of the op0 and the
44008 last quarter bit 7 set. In the second permutation mask
44009 the first quarter will contain bit 7 set, the second quarter
44010 indexes for the first half of the op1, the third quarter bit 7 set
44011 and last quarter indexes for the second half of the op1.
44012 I.e. the first mask e.g. for V32QImode extract even will be:
44013 0, 2, ..., 0xe, -128, ..., -128, 0, 2, ..., 0xe, -128, ..., -128
44014 (all values masked with 0xf except for -128) and second mask
44015 for extract even will be
44016 -128, ..., -128, 0, 2, ..., 0xe, -128, ..., -128, 0, 2, ..., 0xe. */
44017 m128 = GEN_INT (-128);
44018 for (i = 0; i < nelt; ++i)
44020 unsigned j, e = d->perm[i] & (nelt / 2 - 1);
44021 unsigned which = d->perm[i] >= nelt;
44022 unsigned xorv = (i >= nelt / 4 && i < 3 * nelt / 4) ? 24 : 0;
44024 for (j = 0; j < eltsz; ++j)
44026 rperm[which][(i * eltsz + j) ^ xorv] = GEN_INT (e * eltsz + j);
44027 rperm[1 - which][(i * eltsz + j) ^ xorv] = m128;
44031 vperm = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, rperm[0]));
44032 vperm = force_reg (V32QImode, vperm);
44034 l = gen_reg_rtx (V32QImode);
44035 op = gen_lowpart (V32QImode, d->op0);
44036 emit_insn (gen_avx2_pshufbv32qi3 (l, op, vperm));
44038 vperm = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, rperm[1]));
44039 vperm = force_reg (V32QImode, vperm);
44041 h = gen_reg_rtx (V32QImode);
44042 op = gen_lowpart (V32QImode, d->op1);
44043 emit_insn (gen_avx2_pshufbv32qi3 (h, op, vperm));
44045 ior = gen_reg_rtx (V32QImode);
44046 emit_insn (gen_iorv32qi3 (ior, l, h));
44048 /* Permute the V4DImode quarters using { 0, 2, 1, 3 } permutation. */
44049 op = gen_reg_rtx (V4DImode);
44050 ior = gen_lowpart (V4DImode, ior);
44051 emit_insn (gen_avx2_permv4di_1 (op, ior, const0_rtx, const2_rtx,
44052 const1_rtx, GEN_INT (3)));
44053 emit_move_insn (d->target, gen_lowpart (d->vmode, op));
44055 return true;
44058 /* A subroutine of ix86_expand_vec_perm_builtin_1. Implement extract-even
44059 and extract-odd permutations. */
44061 static bool
44062 expand_vec_perm_even_odd_1 (struct expand_vec_perm_d *d, unsigned odd)
44064 rtx t1, t2, t3, t4, t5;
44066 switch (d->vmode)
44068 case V4DFmode:
44069 if (d->testing_p)
44070 break;
44071 t1 = gen_reg_rtx (V4DFmode);
44072 t2 = gen_reg_rtx (V4DFmode);
44074 /* Shuffle the lanes around into { 0 1 4 5 } and { 2 3 6 7 }. */
44075 emit_insn (gen_avx_vperm2f128v4df3 (t1, d->op0, d->op1, GEN_INT (0x20)));
44076 emit_insn (gen_avx_vperm2f128v4df3 (t2, d->op0, d->op1, GEN_INT (0x31)));
44078 /* Now an unpck[lh]pd will produce the result required. */
44079 if (odd)
44080 t3 = gen_avx_unpckhpd256 (d->target, t1, t2);
44081 else
44082 t3 = gen_avx_unpcklpd256 (d->target, t1, t2);
44083 emit_insn (t3);
44084 break;
44086 case V8SFmode:
44088 int mask = odd ? 0xdd : 0x88;
44090 if (d->testing_p)
44091 break;
44092 t1 = gen_reg_rtx (V8SFmode);
44093 t2 = gen_reg_rtx (V8SFmode);
44094 t3 = gen_reg_rtx (V8SFmode);
44096 /* Shuffle within the 128-bit lanes to produce:
44097 { 0 2 8 a 4 6 c e } | { 1 3 9 b 5 7 d f }. */
44098 emit_insn (gen_avx_shufps256 (t1, d->op0, d->op1,
44099 GEN_INT (mask)));
44101 /* Shuffle the lanes around to produce:
44102 { 4 6 c e 0 2 8 a } and { 5 7 d f 1 3 9 b }. */
44103 emit_insn (gen_avx_vperm2f128v8sf3 (t2, t1, t1,
44104 GEN_INT (0x3)));
44106 /* Shuffle within the 128-bit lanes to produce:
44107 { 0 2 4 6 4 6 0 2 } | { 1 3 5 7 5 7 1 3 }. */
44108 emit_insn (gen_avx_shufps256 (t3, t1, t2, GEN_INT (0x44)));
44110 /* Shuffle within the 128-bit lanes to produce:
44111 { 8 a c e c e 8 a } | { 9 b d f d f 9 b }. */
44112 emit_insn (gen_avx_shufps256 (t2, t1, t2, GEN_INT (0xee)));
44114 /* Shuffle the lanes around to produce:
44115 { 0 2 4 6 8 a c e } | { 1 3 5 7 9 b d f }. */
44116 emit_insn (gen_avx_vperm2f128v8sf3 (d->target, t3, t2,
44117 GEN_INT (0x20)));
44119 break;
44121 case V2DFmode:
44122 case V4SFmode:
44123 case V2DImode:
44124 case V4SImode:
44125 /* These are always directly implementable by expand_vec_perm_1. */
44126 gcc_unreachable ();
44128 case V8HImode:
44129 if (TARGET_SSSE3)
44130 return expand_vec_perm_pshufb2 (d);
44131 else
44133 if (d->testing_p)
44134 break;
44135 /* We need 2*log2(N)-1 operations to achieve odd/even
44136 with interleave. */
44137 t1 = gen_reg_rtx (V8HImode);
44138 t2 = gen_reg_rtx (V8HImode);
44139 emit_insn (gen_vec_interleave_highv8hi (t1, d->op0, d->op1));
44140 emit_insn (gen_vec_interleave_lowv8hi (d->target, d->op0, d->op1));
44141 emit_insn (gen_vec_interleave_highv8hi (t2, d->target, t1));
44142 emit_insn (gen_vec_interleave_lowv8hi (d->target, d->target, t1));
44143 if (odd)
44144 t3 = gen_vec_interleave_highv8hi (d->target, d->target, t2);
44145 else
44146 t3 = gen_vec_interleave_lowv8hi (d->target, d->target, t2);
44147 emit_insn (t3);
44149 break;
44151 case V16QImode:
44152 if (TARGET_SSSE3)
44153 return expand_vec_perm_pshufb2 (d);
44154 else
44156 if (d->testing_p)
44157 break;
44158 t1 = gen_reg_rtx (V16QImode);
44159 t2 = gen_reg_rtx (V16QImode);
44160 t3 = gen_reg_rtx (V16QImode);
44161 emit_insn (gen_vec_interleave_highv16qi (t1, d->op0, d->op1));
44162 emit_insn (gen_vec_interleave_lowv16qi (d->target, d->op0, d->op1));
44163 emit_insn (gen_vec_interleave_highv16qi (t2, d->target, t1));
44164 emit_insn (gen_vec_interleave_lowv16qi (d->target, d->target, t1));
44165 emit_insn (gen_vec_interleave_highv16qi (t3, d->target, t2));
44166 emit_insn (gen_vec_interleave_lowv16qi (d->target, d->target, t2));
44167 if (odd)
44168 t3 = gen_vec_interleave_highv16qi (d->target, d->target, t3);
44169 else
44170 t3 = gen_vec_interleave_lowv16qi (d->target, d->target, t3);
44171 emit_insn (t3);
44173 break;
44175 case V16HImode:
44176 case V32QImode:
44177 return expand_vec_perm_vpshufb2_vpermq_even_odd (d);
44179 case V4DImode:
44180 if (!TARGET_AVX2)
44182 struct expand_vec_perm_d d_copy = *d;
44183 d_copy.vmode = V4DFmode;
44184 if (d->testing_p)
44185 d_copy.target = gen_lowpart (V4DFmode, d->target);
44186 else
44187 d_copy.target = gen_reg_rtx (V4DFmode);
44188 d_copy.op0 = gen_lowpart (V4DFmode, d->op0);
44189 d_copy.op1 = gen_lowpart (V4DFmode, d->op1);
44190 if (expand_vec_perm_even_odd_1 (&d_copy, odd))
44192 if (!d->testing_p)
44193 emit_move_insn (d->target,
44194 gen_lowpart (V4DImode, d_copy.target));
44195 return true;
44197 return false;
44200 if (d->testing_p)
44201 break;
44203 t1 = gen_reg_rtx (V4DImode);
44204 t2 = gen_reg_rtx (V4DImode);
44206 /* Shuffle the lanes around into { 0 1 4 5 } and { 2 3 6 7 }. */
44207 emit_insn (gen_avx2_permv2ti (t1, d->op0, d->op1, GEN_INT (0x20)));
44208 emit_insn (gen_avx2_permv2ti (t2, d->op0, d->op1, GEN_INT (0x31)));
44210 /* Now an vpunpck[lh]qdq will produce the result required. */
44211 if (odd)
44212 t3 = gen_avx2_interleave_highv4di (d->target, t1, t2);
44213 else
44214 t3 = gen_avx2_interleave_lowv4di (d->target, t1, t2);
44215 emit_insn (t3);
44216 break;
44218 case V8SImode:
44219 if (!TARGET_AVX2)
44221 struct expand_vec_perm_d d_copy = *d;
44222 d_copy.vmode = V8SFmode;
44223 if (d->testing_p)
44224 d_copy.target = gen_lowpart (V8SFmode, d->target);
44225 else
44226 d_copy.target = gen_reg_rtx (V8SFmode);
44227 d_copy.op0 = gen_lowpart (V8SFmode, d->op0);
44228 d_copy.op1 = gen_lowpart (V8SFmode, d->op1);
44229 if (expand_vec_perm_even_odd_1 (&d_copy, odd))
44231 if (!d->testing_p)
44232 emit_move_insn (d->target,
44233 gen_lowpart (V8SImode, d_copy.target));
44234 return true;
44236 return false;
44239 if (d->testing_p)
44240 break;
44242 t1 = gen_reg_rtx (V8SImode);
44243 t2 = gen_reg_rtx (V8SImode);
44244 t3 = gen_reg_rtx (V4DImode);
44245 t4 = gen_reg_rtx (V4DImode);
44246 t5 = gen_reg_rtx (V4DImode);
44248 /* Shuffle the lanes around into
44249 { 0 1 2 3 8 9 a b } and { 4 5 6 7 c d e f }. */
44250 emit_insn (gen_avx2_permv2ti (t3, gen_lowpart (V4DImode, d->op0),
44251 gen_lowpart (V4DImode, d->op1),
44252 GEN_INT (0x20)));
44253 emit_insn (gen_avx2_permv2ti (t4, gen_lowpart (V4DImode, d->op0),
44254 gen_lowpart (V4DImode, d->op1),
44255 GEN_INT (0x31)));
44257 /* Swap the 2nd and 3rd position in each lane into
44258 { 0 2 1 3 8 a 9 b } and { 4 6 5 7 c e d f }. */
44259 emit_insn (gen_avx2_pshufdv3 (t1, gen_lowpart (V8SImode, t3),
44260 GEN_INT (2 * 4 + 1 * 16 + 3 * 64)));
44261 emit_insn (gen_avx2_pshufdv3 (t2, gen_lowpart (V8SImode, t4),
44262 GEN_INT (2 * 4 + 1 * 16 + 3 * 64)));
44264 /* Now an vpunpck[lh]qdq will produce
44265 { 0 2 4 6 8 a c e } resp. { 1 3 5 7 9 b d f }. */
44266 if (odd)
44267 t3 = gen_avx2_interleave_highv4di (t5, gen_lowpart (V4DImode, t1),
44268 gen_lowpart (V4DImode, t2));
44269 else
44270 t3 = gen_avx2_interleave_lowv4di (t5, gen_lowpart (V4DImode, t1),
44271 gen_lowpart (V4DImode, t2));
44272 emit_insn (t3);
44273 emit_move_insn (d->target, gen_lowpart (V8SImode, t5));
44274 break;
44276 default:
44277 gcc_unreachable ();
44280 return true;
44283 /* A subroutine of ix86_expand_vec_perm_builtin_1. Pattern match
44284 extract-even and extract-odd permutations. */
44286 static bool
44287 expand_vec_perm_even_odd (struct expand_vec_perm_d *d)
44289 unsigned i, odd, nelt = d->nelt;
44291 odd = d->perm[0];
44292 if (odd != 0 && odd != 1)
44293 return false;
44295 for (i = 1; i < nelt; ++i)
44296 if (d->perm[i] != 2 * i + odd)
44297 return false;
44299 return expand_vec_perm_even_odd_1 (d, odd);
44302 /* A subroutine of ix86_expand_vec_perm_builtin_1. Implement broadcast
44303 permutations. We assume that expand_vec_perm_1 has already failed. */
44305 static bool
44306 expand_vec_perm_broadcast_1 (struct expand_vec_perm_d *d)
44308 unsigned elt = d->perm[0], nelt2 = d->nelt / 2;
44309 enum machine_mode vmode = d->vmode;
44310 unsigned char perm2[4];
44311 rtx op0 = d->op0, dest;
44312 bool ok;
44314 switch (vmode)
44316 case V4DFmode:
44317 case V8SFmode:
44318 /* These are special-cased in sse.md so that we can optionally
44319 use the vbroadcast instruction. They expand to two insns
44320 if the input happens to be in a register. */
44321 gcc_unreachable ();
44323 case V2DFmode:
44324 case V2DImode:
44325 case V4SFmode:
44326 case V4SImode:
44327 /* These are always implementable using standard shuffle patterns. */
44328 gcc_unreachable ();
44330 case V8HImode:
44331 case V16QImode:
44332 /* These can be implemented via interleave. We save one insn by
44333 stopping once we have promoted to V4SImode and then use pshufd. */
44334 if (d->testing_p)
44335 return true;
44338 rtx dest;
44339 rtx (*gen) (rtx, rtx, rtx)
44340 = vmode == V16QImode ? gen_vec_interleave_lowv16qi
44341 : gen_vec_interleave_lowv8hi;
44343 if (elt >= nelt2)
44345 gen = vmode == V16QImode ? gen_vec_interleave_highv16qi
44346 : gen_vec_interleave_highv8hi;
44347 elt -= nelt2;
44349 nelt2 /= 2;
44351 dest = gen_reg_rtx (vmode);
44352 emit_insn (gen (dest, op0, op0));
44353 vmode = get_mode_wider_vector (vmode);
44354 op0 = gen_lowpart (vmode, dest);
44356 while (vmode != V4SImode);
44358 memset (perm2, elt, 4);
44359 dest = gen_reg_rtx (V4SImode);
44360 ok = expand_vselect (dest, op0, perm2, 4, d->testing_p);
44361 gcc_assert (ok);
44362 if (!d->testing_p)
44363 emit_move_insn (d->target, gen_lowpart (d->vmode, dest));
44364 return true;
44366 case V32QImode:
44367 case V16HImode:
44368 case V8SImode:
44369 case V4DImode:
44370 /* For AVX2 broadcasts of the first element vpbroadcast* or
44371 vpermq should be used by expand_vec_perm_1. */
44372 gcc_assert (!TARGET_AVX2 || d->perm[0]);
44373 return false;
44375 default:
44376 gcc_unreachable ();
44380 /* A subroutine of ix86_expand_vec_perm_builtin_1. Pattern match
44381 broadcast permutations. */
44383 static bool
44384 expand_vec_perm_broadcast (struct expand_vec_perm_d *d)
44386 unsigned i, elt, nelt = d->nelt;
44388 if (!d->one_operand_p)
44389 return false;
44391 elt = d->perm[0];
44392 for (i = 1; i < nelt; ++i)
44393 if (d->perm[i] != elt)
44394 return false;
44396 return expand_vec_perm_broadcast_1 (d);
44399 /* Implement arbitrary permutation of two V32QImode and V16QImode operands
44400 with 4 vpshufb insns, 2 vpermq and 3 vpor. We should have already failed
44401 all the shorter instruction sequences. */
44403 static bool
44404 expand_vec_perm_vpshufb4_vpermq2 (struct expand_vec_perm_d *d)
44406 rtx rperm[4][32], vperm, l[2], h[2], op, m128;
44407 unsigned int i, nelt, eltsz;
44408 bool used[4];
44410 if (!TARGET_AVX2
44411 || d->one_operand_p
44412 || (d->vmode != V32QImode && d->vmode != V16HImode))
44413 return false;
44415 if (d->testing_p)
44416 return true;
44418 nelt = d->nelt;
44419 eltsz = GET_MODE_SIZE (GET_MODE_INNER (d->vmode));
44421 /* Generate 4 permutation masks. If the required element is within
44422 the same lane, it is shuffled in. If the required element from the
44423 other lane, force a zero by setting bit 7 in the permutation mask.
44424 In the other mask the mask has non-negative elements if element
44425 is requested from the other lane, but also moved to the other lane,
44426 so that the result of vpshufb can have the two V2TImode halves
44427 swapped. */
44428 m128 = GEN_INT (-128);
44429 for (i = 0; i < 32; ++i)
44431 rperm[0][i] = m128;
44432 rperm[1][i] = m128;
44433 rperm[2][i] = m128;
44434 rperm[3][i] = m128;
44436 used[0] = false;
44437 used[1] = false;
44438 used[2] = false;
44439 used[3] = false;
44440 for (i = 0; i < nelt; ++i)
44442 unsigned j, e = d->perm[i] & (nelt / 2 - 1);
44443 unsigned xlane = ((d->perm[i] ^ i) & (nelt / 2)) * eltsz;
44444 unsigned int which = ((d->perm[i] & nelt) ? 2 : 0) + (xlane ? 1 : 0);
44446 for (j = 0; j < eltsz; ++j)
44447 rperm[which][(i * eltsz + j) ^ xlane] = GEN_INT (e * eltsz + j);
44448 used[which] = true;
44451 for (i = 0; i < 2; ++i)
44453 if (!used[2 * i + 1])
44455 h[i] = NULL_RTX;
44456 continue;
44458 vperm = gen_rtx_CONST_VECTOR (V32QImode,
44459 gen_rtvec_v (32, rperm[2 * i + 1]));
44460 vperm = force_reg (V32QImode, vperm);
44461 h[i] = gen_reg_rtx (V32QImode);
44462 op = gen_lowpart (V32QImode, i ? d->op1 : d->op0);
44463 emit_insn (gen_avx2_pshufbv32qi3 (h[i], op, vperm));
44466 /* Swap the 128-byte lanes of h[X]. */
44467 for (i = 0; i < 2; ++i)
44469 if (h[i] == NULL_RTX)
44470 continue;
44471 op = gen_reg_rtx (V4DImode);
44472 emit_insn (gen_avx2_permv4di_1 (op, gen_lowpart (V4DImode, h[i]),
44473 const2_rtx, GEN_INT (3), const0_rtx,
44474 const1_rtx));
44475 h[i] = gen_lowpart (V32QImode, op);
44478 for (i = 0; i < 2; ++i)
44480 if (!used[2 * i])
44482 l[i] = NULL_RTX;
44483 continue;
44485 vperm = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, rperm[2 * i]));
44486 vperm = force_reg (V32QImode, vperm);
44487 l[i] = gen_reg_rtx (V32QImode);
44488 op = gen_lowpart (V32QImode, i ? d->op1 : d->op0);
44489 emit_insn (gen_avx2_pshufbv32qi3 (l[i], op, vperm));
44492 for (i = 0; i < 2; ++i)
44494 if (h[i] && l[i])
44496 op = gen_reg_rtx (V32QImode);
44497 emit_insn (gen_iorv32qi3 (op, l[i], h[i]));
44498 l[i] = op;
44500 else if (h[i])
44501 l[i] = h[i];
44504 gcc_assert (l[0] && l[1]);
44505 op = d->target;
44506 if (d->vmode != V32QImode)
44507 op = gen_reg_rtx (V32QImode);
44508 emit_insn (gen_iorv32qi3 (op, l[0], l[1]));
44509 if (op != d->target)
44510 emit_move_insn (d->target, gen_lowpart (d->vmode, op));
44511 return true;
44514 /* The guts of ix86_expand_vec_perm_const, also used by the ok hook.
44515 With all of the interface bits taken care of, perform the expansion
44516 in D and return true on success. */
44518 static bool
44519 ix86_expand_vec_perm_const_1 (struct expand_vec_perm_d *d)
44521 /* Try a single instruction expansion. */
44522 if (expand_vec_perm_1 (d))
44523 return true;
44525 /* Try sequences of two instructions. */
44527 if (expand_vec_perm_pshuflw_pshufhw (d))
44528 return true;
44530 if (expand_vec_perm_palignr (d))
44531 return true;
44533 if (expand_vec_perm_interleave2 (d))
44534 return true;
44536 if (expand_vec_perm_broadcast (d))
44537 return true;
44539 if (expand_vec_perm_vpermq_perm_1 (d))
44540 return true;
44542 if (expand_vec_perm_vperm2f128 (d))
44543 return true;
44545 /* Try sequences of three instructions. */
44547 if (expand_vec_perm_2vperm2f128_vshuf (d))
44548 return true;
44550 if (expand_vec_perm_pshufb2 (d))
44551 return true;
44553 if (expand_vec_perm_interleave3 (d))
44554 return true;
44556 if (expand_vec_perm_vperm2f128_vblend (d))
44557 return true;
44559 /* Try sequences of four instructions. */
44561 if (expand_vec_perm_vpshufb2_vpermq (d))
44562 return true;
44564 if (expand_vec_perm_vpshufb2_vpermq_even_odd (d))
44565 return true;
44567 /* ??? Look for narrow permutations whose element orderings would
44568 allow the promotion to a wider mode. */
44570 /* ??? Look for sequences of interleave or a wider permute that place
44571 the data into the correct lanes for a half-vector shuffle like
44572 pshuf[lh]w or vpermilps. */
44574 /* ??? Look for sequences of interleave that produce the desired results.
44575 The combinatorics of punpck[lh] get pretty ugly... */
44577 if (expand_vec_perm_even_odd (d))
44578 return true;
44580 /* Even longer sequences. */
44581 if (expand_vec_perm_vpshufb4_vpermq2 (d))
44582 return true;
44584 return false;
44587 /* If a permutation only uses one operand, make it clear. Returns true
44588 if the permutation references both operands. */
44590 static bool
44591 canonicalize_perm (struct expand_vec_perm_d *d)
44593 int i, which, nelt = d->nelt;
44595 for (i = which = 0; i < nelt; ++i)
44596 which |= (d->perm[i] < nelt ? 1 : 2);
44598 d->one_operand_p = true;
44599 switch (which)
44601 default:
44602 gcc_unreachable();
44604 case 3:
44605 if (!rtx_equal_p (d->op0, d->op1))
44607 d->one_operand_p = false;
44608 break;
44610 /* The elements of PERM do not suggest that only the first operand
44611 is used, but both operands are identical. Allow easier matching
44612 of the permutation by folding the permutation into the single
44613 input vector. */
44614 /* FALLTHRU */
44616 case 2:
44617 for (i = 0; i < nelt; ++i)
44618 d->perm[i] &= nelt - 1;
44619 d->op0 = d->op1;
44620 break;
44622 case 1:
44623 d->op1 = d->op0;
44624 break;
44627 return (which == 3);
44630 bool
44631 ix86_expand_vec_perm_const (rtx operands[4])
44633 struct expand_vec_perm_d d;
44634 unsigned char perm[MAX_VECT_LEN];
44635 int i, nelt;
44636 bool two_args;
44637 rtx sel;
44639 d.target = operands[0];
44640 d.op0 = operands[1];
44641 d.op1 = operands[2];
44642 sel = operands[3];
44644 d.vmode = GET_MODE (d.target);
44645 gcc_assert (VECTOR_MODE_P (d.vmode));
44646 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
44647 d.testing_p = false;
44649 gcc_assert (GET_CODE (sel) == CONST_VECTOR);
44650 gcc_assert (XVECLEN (sel, 0) == nelt);
44651 gcc_checking_assert (sizeof (d.perm) == sizeof (perm));
44653 for (i = 0; i < nelt; ++i)
44655 rtx e = XVECEXP (sel, 0, i);
44656 int ei = INTVAL (e) & (2 * nelt - 1);
44657 d.perm[i] = ei;
44658 perm[i] = ei;
44661 two_args = canonicalize_perm (&d);
44663 if (ix86_expand_vec_perm_const_1 (&d))
44664 return true;
44666 /* If the selector says both arguments are needed, but the operands are the
44667 same, the above tried to expand with one_operand_p and flattened selector.
44668 If that didn't work, retry without one_operand_p; we succeeded with that
44669 during testing. */
44670 if (two_args && d.one_operand_p)
44672 d.one_operand_p = false;
44673 memcpy (d.perm, perm, sizeof (perm));
44674 return ix86_expand_vec_perm_const_1 (&d);
44677 return false;
44680 /* Implement targetm.vectorize.vec_perm_const_ok. */
44682 static bool
44683 ix86_vectorize_vec_perm_const_ok (enum machine_mode vmode,
44684 const unsigned char *sel)
44686 struct expand_vec_perm_d d;
44687 unsigned int i, nelt, which;
44688 bool ret;
44690 d.vmode = vmode;
44691 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
44692 d.testing_p = true;
44694 /* Given sufficient ISA support we can just return true here
44695 for selected vector modes. */
44696 if (d.vmode == V16SImode || d.vmode == V16SFmode
44697 || d.vmode == V8DFmode || d.vmode == V8DImode)
44698 /* All implementable with a single vpermi2 insn. */
44699 return true;
44700 if (GET_MODE_SIZE (d.vmode) == 16)
44702 /* All implementable with a single vpperm insn. */
44703 if (TARGET_XOP)
44704 return true;
44705 /* All implementable with 2 pshufb + 1 ior. */
44706 if (TARGET_SSSE3)
44707 return true;
44708 /* All implementable with shufpd or unpck[lh]pd. */
44709 if (d.nelt == 2)
44710 return true;
44713 /* Extract the values from the vector CST into the permutation
44714 array in D. */
44715 memcpy (d.perm, sel, nelt);
44716 for (i = which = 0; i < nelt; ++i)
44718 unsigned char e = d.perm[i];
44719 gcc_assert (e < 2 * nelt);
44720 which |= (e < nelt ? 1 : 2);
44723 /* For all elements from second vector, fold the elements to first. */
44724 if (which == 2)
44725 for (i = 0; i < nelt; ++i)
44726 d.perm[i] -= nelt;
44728 /* Check whether the mask can be applied to the vector type. */
44729 d.one_operand_p = (which != 3);
44731 /* Implementable with shufps or pshufd. */
44732 if (d.one_operand_p && (d.vmode == V4SFmode || d.vmode == V4SImode))
44733 return true;
44735 /* Otherwise we have to go through the motions and see if we can
44736 figure out how to generate the requested permutation. */
44737 d.target = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 1);
44738 d.op1 = d.op0 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 2);
44739 if (!d.one_operand_p)
44740 d.op1 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 3);
44742 start_sequence ();
44743 ret = ix86_expand_vec_perm_const_1 (&d);
44744 end_sequence ();
44746 return ret;
44749 void
44750 ix86_expand_vec_extract_even_odd (rtx targ, rtx op0, rtx op1, unsigned odd)
44752 struct expand_vec_perm_d d;
44753 unsigned i, nelt;
44755 d.target = targ;
44756 d.op0 = op0;
44757 d.op1 = op1;
44758 d.vmode = GET_MODE (targ);
44759 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
44760 d.one_operand_p = false;
44761 d.testing_p = false;
44763 for (i = 0; i < nelt; ++i)
44764 d.perm[i] = i * 2 + odd;
44766 /* We'll either be able to implement the permutation directly... */
44767 if (expand_vec_perm_1 (&d))
44768 return;
44770 /* ... or we use the special-case patterns. */
44771 expand_vec_perm_even_odd_1 (&d, odd);
44774 static void
44775 ix86_expand_vec_interleave (rtx targ, rtx op0, rtx op1, bool high_p)
44777 struct expand_vec_perm_d d;
44778 unsigned i, nelt, base;
44779 bool ok;
44781 d.target = targ;
44782 d.op0 = op0;
44783 d.op1 = op1;
44784 d.vmode = GET_MODE (targ);
44785 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
44786 d.one_operand_p = false;
44787 d.testing_p = false;
44789 base = high_p ? nelt / 2 : 0;
44790 for (i = 0; i < nelt / 2; ++i)
44792 d.perm[i * 2] = i + base;
44793 d.perm[i * 2 + 1] = i + base + nelt;
44796 /* Note that for AVX this isn't one instruction. */
44797 ok = ix86_expand_vec_perm_const_1 (&d);
44798 gcc_assert (ok);
44802 /* Expand a vector operation CODE for a V*QImode in terms of the
44803 same operation on V*HImode. */
44805 void
44806 ix86_expand_vecop_qihi (enum rtx_code code, rtx dest, rtx op1, rtx op2)
44808 enum machine_mode qimode = GET_MODE (dest);
44809 enum machine_mode himode;
44810 rtx (*gen_il) (rtx, rtx, rtx);
44811 rtx (*gen_ih) (rtx, rtx, rtx);
44812 rtx op1_l, op1_h, op2_l, op2_h, res_l, res_h;
44813 struct expand_vec_perm_d d;
44814 bool ok, full_interleave;
44815 bool uns_p = false;
44816 int i;
44818 switch (qimode)
44820 case V16QImode:
44821 himode = V8HImode;
44822 gen_il = gen_vec_interleave_lowv16qi;
44823 gen_ih = gen_vec_interleave_highv16qi;
44824 break;
44825 case V32QImode:
44826 himode = V16HImode;
44827 gen_il = gen_avx2_interleave_lowv32qi;
44828 gen_ih = gen_avx2_interleave_highv32qi;
44829 break;
44830 default:
44831 gcc_unreachable ();
44834 op2_l = op2_h = op2;
44835 switch (code)
44837 case MULT:
44838 /* Unpack data such that we've got a source byte in each low byte of
44839 each word. We don't care what goes into the high byte of each word.
44840 Rather than trying to get zero in there, most convenient is to let
44841 it be a copy of the low byte. */
44842 op2_l = gen_reg_rtx (qimode);
44843 op2_h = gen_reg_rtx (qimode);
44844 emit_insn (gen_il (op2_l, op2, op2));
44845 emit_insn (gen_ih (op2_h, op2, op2));
44846 /* FALLTHRU */
44848 op1_l = gen_reg_rtx (qimode);
44849 op1_h = gen_reg_rtx (qimode);
44850 emit_insn (gen_il (op1_l, op1, op1));
44851 emit_insn (gen_ih (op1_h, op1, op1));
44852 full_interleave = qimode == V16QImode;
44853 break;
44855 case ASHIFT:
44856 case LSHIFTRT:
44857 uns_p = true;
44858 /* FALLTHRU */
44859 case ASHIFTRT:
44860 op1_l = gen_reg_rtx (himode);
44861 op1_h = gen_reg_rtx (himode);
44862 ix86_expand_sse_unpack (op1_l, op1, uns_p, false);
44863 ix86_expand_sse_unpack (op1_h, op1, uns_p, true);
44864 full_interleave = true;
44865 break;
44866 default:
44867 gcc_unreachable ();
44870 /* Perform the operation. */
44871 res_l = expand_simple_binop (himode, code, op1_l, op2_l, NULL_RTX,
44872 1, OPTAB_DIRECT);
44873 res_h = expand_simple_binop (himode, code, op1_h, op2_h, NULL_RTX,
44874 1, OPTAB_DIRECT);
44875 gcc_assert (res_l && res_h);
44877 /* Merge the data back into the right place. */
44878 d.target = dest;
44879 d.op0 = gen_lowpart (qimode, res_l);
44880 d.op1 = gen_lowpart (qimode, res_h);
44881 d.vmode = qimode;
44882 d.nelt = GET_MODE_NUNITS (qimode);
44883 d.one_operand_p = false;
44884 d.testing_p = false;
44886 if (full_interleave)
44888 /* For SSE2, we used an full interleave, so the desired
44889 results are in the even elements. */
44890 for (i = 0; i < 32; ++i)
44891 d.perm[i] = i * 2;
44893 else
44895 /* For AVX, the interleave used above was not cross-lane. So the
44896 extraction is evens but with the second and third quarter swapped.
44897 Happily, that is even one insn shorter than even extraction. */
44898 for (i = 0; i < 32; ++i)
44899 d.perm[i] = i * 2 + ((i & 24) == 8 ? 16 : (i & 24) == 16 ? -16 : 0);
44902 ok = ix86_expand_vec_perm_const_1 (&d);
44903 gcc_assert (ok);
44905 set_unique_reg_note (get_last_insn (), REG_EQUAL,
44906 gen_rtx_fmt_ee (code, qimode, op1, op2));
44909 /* Helper function of ix86_expand_mul_widen_evenodd. Return true
44910 if op is CONST_VECTOR with all odd elements equal to their
44911 preceding element. */
44913 static bool
44914 const_vector_equal_evenodd_p (rtx op)
44916 enum machine_mode mode = GET_MODE (op);
44917 int i, nunits = GET_MODE_NUNITS (mode);
44918 if (GET_CODE (op) != CONST_VECTOR
44919 || nunits != CONST_VECTOR_NUNITS (op))
44920 return false;
44921 for (i = 0; i < nunits; i += 2)
44922 if (CONST_VECTOR_ELT (op, i) != CONST_VECTOR_ELT (op, i + 1))
44923 return false;
44924 return true;
44927 void
44928 ix86_expand_mul_widen_evenodd (rtx dest, rtx op1, rtx op2,
44929 bool uns_p, bool odd_p)
44931 enum machine_mode mode = GET_MODE (op1);
44932 enum machine_mode wmode = GET_MODE (dest);
44933 rtx x;
44934 rtx orig_op1 = op1, orig_op2 = op2;
44936 if (!nonimmediate_operand (op1, mode))
44937 op1 = force_reg (mode, op1);
44938 if (!nonimmediate_operand (op2, mode))
44939 op2 = force_reg (mode, op2);
44941 /* We only play even/odd games with vectors of SImode. */
44942 gcc_assert (mode == V4SImode || mode == V8SImode || mode == V16SImode);
44944 /* If we're looking for the odd results, shift those members down to
44945 the even slots. For some cpus this is faster than a PSHUFD. */
44946 if (odd_p)
44948 /* For XOP use vpmacsdqh, but only for smult, as it is only
44949 signed. */
44950 if (TARGET_XOP && mode == V4SImode && !uns_p)
44952 x = force_reg (wmode, CONST0_RTX (wmode));
44953 emit_insn (gen_xop_pmacsdqh (dest, op1, op2, x));
44954 return;
44957 x = GEN_INT (GET_MODE_UNIT_BITSIZE (mode));
44958 if (!const_vector_equal_evenodd_p (orig_op1))
44959 op1 = expand_binop (wmode, lshr_optab, gen_lowpart (wmode, op1),
44960 x, NULL, 1, OPTAB_DIRECT);
44961 if (!const_vector_equal_evenodd_p (orig_op2))
44962 op2 = expand_binop (wmode, lshr_optab, gen_lowpart (wmode, op2),
44963 x, NULL, 1, OPTAB_DIRECT);
44964 op1 = gen_lowpart (mode, op1);
44965 op2 = gen_lowpart (mode, op2);
44968 if (mode == V16SImode)
44970 if (uns_p)
44971 x = gen_vec_widen_umult_even_v16si (dest, op1, op2);
44972 else
44973 x = gen_vec_widen_smult_even_v16si (dest, op1, op2);
44975 else if (mode == V8SImode)
44977 if (uns_p)
44978 x = gen_vec_widen_umult_even_v8si (dest, op1, op2);
44979 else
44980 x = gen_vec_widen_smult_even_v8si (dest, op1, op2);
44982 else if (uns_p)
44983 x = gen_vec_widen_umult_even_v4si (dest, op1, op2);
44984 else if (TARGET_SSE4_1)
44985 x = gen_sse4_1_mulv2siv2di3 (dest, op1, op2);
44986 else
44988 rtx s1, s2, t0, t1, t2;
44990 /* The easiest way to implement this without PMULDQ is to go through
44991 the motions as if we are performing a full 64-bit multiply. With
44992 the exception that we need to do less shuffling of the elements. */
44994 /* Compute the sign-extension, aka highparts, of the two operands. */
44995 s1 = ix86_expand_sse_cmp (gen_reg_rtx (mode), GT, CONST0_RTX (mode),
44996 op1, pc_rtx, pc_rtx);
44997 s2 = ix86_expand_sse_cmp (gen_reg_rtx (mode), GT, CONST0_RTX (mode),
44998 op2, pc_rtx, pc_rtx);
45000 /* Multiply LO(A) * HI(B), and vice-versa. */
45001 t1 = gen_reg_rtx (wmode);
45002 t2 = gen_reg_rtx (wmode);
45003 emit_insn (gen_vec_widen_umult_even_v4si (t1, s1, op2));
45004 emit_insn (gen_vec_widen_umult_even_v4si (t2, s2, op1));
45006 /* Multiply LO(A) * LO(B). */
45007 t0 = gen_reg_rtx (wmode);
45008 emit_insn (gen_vec_widen_umult_even_v4si (t0, op1, op2));
45010 /* Combine and shift the highparts into place. */
45011 t1 = expand_binop (wmode, add_optab, t1, t2, t1, 1, OPTAB_DIRECT);
45012 t1 = expand_binop (wmode, ashl_optab, t1, GEN_INT (32), t1,
45013 1, OPTAB_DIRECT);
45015 /* Combine high and low parts. */
45016 force_expand_binop (wmode, add_optab, t0, t1, dest, 1, OPTAB_DIRECT);
45017 return;
45019 emit_insn (x);
45022 void
45023 ix86_expand_mul_widen_hilo (rtx dest, rtx op1, rtx op2,
45024 bool uns_p, bool high_p)
45026 enum machine_mode wmode = GET_MODE (dest);
45027 enum machine_mode mode = GET_MODE (op1);
45028 rtx t1, t2, t3, t4, mask;
45030 switch (mode)
45032 case V4SImode:
45033 t1 = gen_reg_rtx (mode);
45034 t2 = gen_reg_rtx (mode);
45035 if (TARGET_XOP && !uns_p)
45037 /* With XOP, we have pmacsdqh, aka mul_widen_odd. In this case,
45038 shuffle the elements once so that all elements are in the right
45039 place for immediate use: { A C B D }. */
45040 emit_insn (gen_sse2_pshufd_1 (t1, op1, const0_rtx, const2_rtx,
45041 const1_rtx, GEN_INT (3)));
45042 emit_insn (gen_sse2_pshufd_1 (t2, op2, const0_rtx, const2_rtx,
45043 const1_rtx, GEN_INT (3)));
45045 else
45047 /* Put the elements into place for the multiply. */
45048 ix86_expand_vec_interleave (t1, op1, op1, high_p);
45049 ix86_expand_vec_interleave (t2, op2, op2, high_p);
45050 high_p = false;
45052 ix86_expand_mul_widen_evenodd (dest, t1, t2, uns_p, high_p);
45053 break;
45055 case V8SImode:
45056 /* Shuffle the elements between the lanes. After this we
45057 have { A B E F | C D G H } for each operand. */
45058 t1 = gen_reg_rtx (V4DImode);
45059 t2 = gen_reg_rtx (V4DImode);
45060 emit_insn (gen_avx2_permv4di_1 (t1, gen_lowpart (V4DImode, op1),
45061 const0_rtx, const2_rtx,
45062 const1_rtx, GEN_INT (3)));
45063 emit_insn (gen_avx2_permv4di_1 (t2, gen_lowpart (V4DImode, op2),
45064 const0_rtx, const2_rtx,
45065 const1_rtx, GEN_INT (3)));
45067 /* Shuffle the elements within the lanes. After this we
45068 have { A A B B | C C D D } or { E E F F | G G H H }. */
45069 t3 = gen_reg_rtx (V8SImode);
45070 t4 = gen_reg_rtx (V8SImode);
45071 mask = GEN_INT (high_p
45072 ? 2 + (2 << 2) + (3 << 4) + (3 << 6)
45073 : 0 + (0 << 2) + (1 << 4) + (1 << 6));
45074 emit_insn (gen_avx2_pshufdv3 (t3, gen_lowpart (V8SImode, t1), mask));
45075 emit_insn (gen_avx2_pshufdv3 (t4, gen_lowpart (V8SImode, t2), mask));
45077 ix86_expand_mul_widen_evenodd (dest, t3, t4, uns_p, false);
45078 break;
45080 case V8HImode:
45081 case V16HImode:
45082 t1 = expand_binop (mode, smul_optab, op1, op2, NULL_RTX,
45083 uns_p, OPTAB_DIRECT);
45084 t2 = expand_binop (mode,
45085 uns_p ? umul_highpart_optab : smul_highpart_optab,
45086 op1, op2, NULL_RTX, uns_p, OPTAB_DIRECT);
45087 gcc_assert (t1 && t2);
45089 t3 = gen_reg_rtx (mode);
45090 ix86_expand_vec_interleave (t3, t1, t2, high_p);
45091 emit_move_insn (dest, gen_lowpart (wmode, t3));
45092 break;
45094 case V16QImode:
45095 case V32QImode:
45096 t1 = gen_reg_rtx (wmode);
45097 t2 = gen_reg_rtx (wmode);
45098 ix86_expand_sse_unpack (t1, op1, uns_p, high_p);
45099 ix86_expand_sse_unpack (t2, op2, uns_p, high_p);
45101 emit_insn (gen_rtx_SET (VOIDmode, dest, gen_rtx_MULT (wmode, t1, t2)));
45102 break;
45104 default:
45105 gcc_unreachable ();
45109 void
45110 ix86_expand_sse2_mulv4si3 (rtx op0, rtx op1, rtx op2)
45112 rtx res_1, res_2, res_3, res_4;
45114 res_1 = gen_reg_rtx (V4SImode);
45115 res_2 = gen_reg_rtx (V4SImode);
45116 res_3 = gen_reg_rtx (V2DImode);
45117 res_4 = gen_reg_rtx (V2DImode);
45118 ix86_expand_mul_widen_evenodd (res_3, op1, op2, true, false);
45119 ix86_expand_mul_widen_evenodd (res_4, op1, op2, true, true);
45121 /* Move the results in element 2 down to element 1; we don't care
45122 what goes in elements 2 and 3. Then we can merge the parts
45123 back together with an interleave.
45125 Note that two other sequences were tried:
45126 (1) Use interleaves at the start instead of psrldq, which allows
45127 us to use a single shufps to merge things back at the end.
45128 (2) Use shufps here to combine the two vectors, then pshufd to
45129 put the elements in the correct order.
45130 In both cases the cost of the reformatting stall was too high
45131 and the overall sequence slower. */
45133 emit_insn (gen_sse2_pshufd_1 (res_1, gen_lowpart (V4SImode, res_3),
45134 const0_rtx, const2_rtx,
45135 const0_rtx, const0_rtx));
45136 emit_insn (gen_sse2_pshufd_1 (res_2, gen_lowpart (V4SImode, res_4),
45137 const0_rtx, const2_rtx,
45138 const0_rtx, const0_rtx));
45139 res_1 = emit_insn (gen_vec_interleave_lowv4si (op0, res_1, res_2));
45141 set_unique_reg_note (res_1, REG_EQUAL, gen_rtx_MULT (V4SImode, op1, op2));
45144 void
45145 ix86_expand_sse2_mulvxdi3 (rtx op0, rtx op1, rtx op2)
45147 enum machine_mode mode = GET_MODE (op0);
45148 rtx t1, t2, t3, t4, t5, t6;
45150 if (TARGET_XOP && mode == V2DImode)
45152 /* op1: A,B,C,D, op2: E,F,G,H */
45153 op1 = gen_lowpart (V4SImode, op1);
45154 op2 = gen_lowpart (V4SImode, op2);
45156 t1 = gen_reg_rtx (V4SImode);
45157 t2 = gen_reg_rtx (V4SImode);
45158 t3 = gen_reg_rtx (V2DImode);
45159 t4 = gen_reg_rtx (V2DImode);
45161 /* t1: B,A,D,C */
45162 emit_insn (gen_sse2_pshufd_1 (t1, op1,
45163 GEN_INT (1),
45164 GEN_INT (0),
45165 GEN_INT (3),
45166 GEN_INT (2)));
45168 /* t2: (B*E),(A*F),(D*G),(C*H) */
45169 emit_insn (gen_mulv4si3 (t2, t1, op2));
45171 /* t3: (B*E)+(A*F), (D*G)+(C*H) */
45172 emit_insn (gen_xop_phadddq (t3, t2));
45174 /* t4: ((B*E)+(A*F))<<32, ((D*G)+(C*H))<<32 */
45175 emit_insn (gen_ashlv2di3 (t4, t3, GEN_INT (32)));
45177 /* op0: (((B*E)+(A*F))<<32)+(B*F), (((D*G)+(C*H))<<32)+(D*H) */
45178 emit_insn (gen_xop_pmacsdql (op0, op1, op2, t4));
45180 else
45182 enum machine_mode nmode;
45183 rtx (*umul) (rtx, rtx, rtx);
45185 if (mode == V2DImode)
45187 umul = gen_vec_widen_umult_even_v4si;
45188 nmode = V4SImode;
45190 else if (mode == V4DImode)
45192 umul = gen_vec_widen_umult_even_v8si;
45193 nmode = V8SImode;
45195 else if (mode == V8DImode)
45197 umul = gen_vec_widen_umult_even_v16si;
45198 nmode = V16SImode;
45200 else
45201 gcc_unreachable ();
45204 /* Multiply low parts. */
45205 t1 = gen_reg_rtx (mode);
45206 emit_insn (umul (t1, gen_lowpart (nmode, op1), gen_lowpart (nmode, op2)));
45208 /* Shift input vectors right 32 bits so we can multiply high parts. */
45209 t6 = GEN_INT (32);
45210 t2 = expand_binop (mode, lshr_optab, op1, t6, NULL, 1, OPTAB_DIRECT);
45211 t3 = expand_binop (mode, lshr_optab, op2, t6, NULL, 1, OPTAB_DIRECT);
45213 /* Multiply high parts by low parts. */
45214 t4 = gen_reg_rtx (mode);
45215 t5 = gen_reg_rtx (mode);
45216 emit_insn (umul (t4, gen_lowpart (nmode, t2), gen_lowpart (nmode, op2)));
45217 emit_insn (umul (t5, gen_lowpart (nmode, t3), gen_lowpart (nmode, op1)));
45219 /* Combine and shift the highparts back. */
45220 t4 = expand_binop (mode, add_optab, t4, t5, t4, 1, OPTAB_DIRECT);
45221 t4 = expand_binop (mode, ashl_optab, t4, t6, t4, 1, OPTAB_DIRECT);
45223 /* Combine high and low parts. */
45224 force_expand_binop (mode, add_optab, t1, t4, op0, 1, OPTAB_DIRECT);
45227 set_unique_reg_note (get_last_insn (), REG_EQUAL,
45228 gen_rtx_MULT (mode, op1, op2));
45231 /* Calculate integer abs() using only SSE2 instructions. */
45233 void
45234 ix86_expand_sse2_abs (rtx target, rtx input)
45236 enum machine_mode mode = GET_MODE (target);
45237 rtx tmp0, tmp1, x;
45239 switch (mode)
45241 /* For 32-bit signed integer X, the best way to calculate the absolute
45242 value of X is (((signed) X >> (W-1)) ^ X) - ((signed) X >> (W-1)). */
45243 case V4SImode:
45244 tmp0 = expand_simple_binop (mode, ASHIFTRT, input,
45245 GEN_INT (GET_MODE_BITSIZE
45246 (GET_MODE_INNER (mode)) - 1),
45247 NULL, 0, OPTAB_DIRECT);
45248 tmp1 = expand_simple_binop (mode, XOR, tmp0, input,
45249 NULL, 0, OPTAB_DIRECT);
45250 x = expand_simple_binop (mode, MINUS, tmp1, tmp0,
45251 target, 0, OPTAB_DIRECT);
45252 break;
45254 /* For 16-bit signed integer X, the best way to calculate the absolute
45255 value of X is max (X, -X), as SSE2 provides the PMAXSW insn. */
45256 case V8HImode:
45257 tmp0 = expand_unop (mode, neg_optab, input, NULL_RTX, 0);
45259 x = expand_simple_binop (mode, SMAX, tmp0, input,
45260 target, 0, OPTAB_DIRECT);
45261 break;
45263 /* For 8-bit signed integer X, the best way to calculate the absolute
45264 value of X is min ((unsigned char) X, (unsigned char) (-X)),
45265 as SSE2 provides the PMINUB insn. */
45266 case V16QImode:
45267 tmp0 = expand_unop (mode, neg_optab, input, NULL_RTX, 0);
45269 x = expand_simple_binop (V16QImode, UMIN, tmp0, input,
45270 target, 0, OPTAB_DIRECT);
45271 break;
45273 default:
45274 gcc_unreachable ();
45277 if (x != target)
45278 emit_move_insn (target, x);
45281 /* Expand an insert into a vector register through pinsr insn.
45282 Return true if successful. */
45284 bool
45285 ix86_expand_pinsr (rtx *operands)
45287 rtx dst = operands[0];
45288 rtx src = operands[3];
45290 unsigned int size = INTVAL (operands[1]);
45291 unsigned int pos = INTVAL (operands[2]);
45293 if (GET_CODE (dst) == SUBREG)
45295 pos += SUBREG_BYTE (dst) * BITS_PER_UNIT;
45296 dst = SUBREG_REG (dst);
45299 if (GET_CODE (src) == SUBREG)
45300 src = SUBREG_REG (src);
45302 switch (GET_MODE (dst))
45304 case V16QImode:
45305 case V8HImode:
45306 case V4SImode:
45307 case V2DImode:
45309 enum machine_mode srcmode, dstmode;
45310 rtx (*pinsr)(rtx, rtx, rtx, rtx);
45312 srcmode = mode_for_size (size, MODE_INT, 0);
45314 switch (srcmode)
45316 case QImode:
45317 if (!TARGET_SSE4_1)
45318 return false;
45319 dstmode = V16QImode;
45320 pinsr = gen_sse4_1_pinsrb;
45321 break;
45323 case HImode:
45324 if (!TARGET_SSE2)
45325 return false;
45326 dstmode = V8HImode;
45327 pinsr = gen_sse2_pinsrw;
45328 break;
45330 case SImode:
45331 if (!TARGET_SSE4_1)
45332 return false;
45333 dstmode = V4SImode;
45334 pinsr = gen_sse4_1_pinsrd;
45335 break;
45337 case DImode:
45338 gcc_assert (TARGET_64BIT);
45339 if (!TARGET_SSE4_1)
45340 return false;
45341 dstmode = V2DImode;
45342 pinsr = gen_sse4_1_pinsrq;
45343 break;
45345 default:
45346 return false;
45349 rtx d = dst;
45350 if (GET_MODE (dst) != dstmode)
45351 d = gen_reg_rtx (dstmode);
45352 src = gen_lowpart (srcmode, src);
45354 pos /= size;
45356 emit_insn (pinsr (d, gen_lowpart (dstmode, dst), src,
45357 GEN_INT (1 << pos)));
45358 if (d != dst)
45359 emit_move_insn (dst, gen_lowpart (GET_MODE (dst), d));
45360 return true;
45363 default:
45364 return false;
45368 /* This function returns the calling abi specific va_list type node.
45369 It returns the FNDECL specific va_list type. */
45371 static tree
45372 ix86_fn_abi_va_list (tree fndecl)
45374 if (!TARGET_64BIT)
45375 return va_list_type_node;
45376 gcc_assert (fndecl != NULL_TREE);
45378 if (ix86_function_abi ((const_tree) fndecl) == MS_ABI)
45379 return ms_va_list_type_node;
45380 else
45381 return sysv_va_list_type_node;
45384 /* Returns the canonical va_list type specified by TYPE. If there
45385 is no valid TYPE provided, it return NULL_TREE. */
45387 static tree
45388 ix86_canonical_va_list_type (tree type)
45390 tree wtype, htype;
45392 /* Resolve references and pointers to va_list type. */
45393 if (TREE_CODE (type) == MEM_REF)
45394 type = TREE_TYPE (type);
45395 else if (POINTER_TYPE_P (type) && POINTER_TYPE_P (TREE_TYPE(type)))
45396 type = TREE_TYPE (type);
45397 else if (POINTER_TYPE_P (type) && TREE_CODE (TREE_TYPE (type)) == ARRAY_TYPE)
45398 type = TREE_TYPE (type);
45400 if (TARGET_64BIT && va_list_type_node != NULL_TREE)
45402 wtype = va_list_type_node;
45403 gcc_assert (wtype != NULL_TREE);
45404 htype = type;
45405 if (TREE_CODE (wtype) == ARRAY_TYPE)
45407 /* If va_list is an array type, the argument may have decayed
45408 to a pointer type, e.g. by being passed to another function.
45409 In that case, unwrap both types so that we can compare the
45410 underlying records. */
45411 if (TREE_CODE (htype) == ARRAY_TYPE
45412 || POINTER_TYPE_P (htype))
45414 wtype = TREE_TYPE (wtype);
45415 htype = TREE_TYPE (htype);
45418 if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
45419 return va_list_type_node;
45420 wtype = sysv_va_list_type_node;
45421 gcc_assert (wtype != NULL_TREE);
45422 htype = type;
45423 if (TREE_CODE (wtype) == ARRAY_TYPE)
45425 /* If va_list is an array type, the argument may have decayed
45426 to a pointer type, e.g. by being passed to another function.
45427 In that case, unwrap both types so that we can compare the
45428 underlying records. */
45429 if (TREE_CODE (htype) == ARRAY_TYPE
45430 || POINTER_TYPE_P (htype))
45432 wtype = TREE_TYPE (wtype);
45433 htype = TREE_TYPE (htype);
45436 if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
45437 return sysv_va_list_type_node;
45438 wtype = ms_va_list_type_node;
45439 gcc_assert (wtype != NULL_TREE);
45440 htype = type;
45441 if (TREE_CODE (wtype) == ARRAY_TYPE)
45443 /* If va_list is an array type, the argument may have decayed
45444 to a pointer type, e.g. by being passed to another function.
45445 In that case, unwrap both types so that we can compare the
45446 underlying records. */
45447 if (TREE_CODE (htype) == ARRAY_TYPE
45448 || POINTER_TYPE_P (htype))
45450 wtype = TREE_TYPE (wtype);
45451 htype = TREE_TYPE (htype);
45454 if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
45455 return ms_va_list_type_node;
45456 return NULL_TREE;
45458 return std_canonical_va_list_type (type);
45461 /* Iterate through the target-specific builtin types for va_list.
45462 IDX denotes the iterator, *PTREE is set to the result type of
45463 the va_list builtin, and *PNAME to its internal type.
45464 Returns zero if there is no element for this index, otherwise
45465 IDX should be increased upon the next call.
45466 Note, do not iterate a base builtin's name like __builtin_va_list.
45467 Used from c_common_nodes_and_builtins. */
45469 static int
45470 ix86_enum_va_list (int idx, const char **pname, tree *ptree)
45472 if (TARGET_64BIT)
45474 switch (idx)
45476 default:
45477 break;
45479 case 0:
45480 *ptree = ms_va_list_type_node;
45481 *pname = "__builtin_ms_va_list";
45482 return 1;
45484 case 1:
45485 *ptree = sysv_va_list_type_node;
45486 *pname = "__builtin_sysv_va_list";
45487 return 1;
45491 return 0;
45494 #undef TARGET_SCHED_DISPATCH
45495 #define TARGET_SCHED_DISPATCH has_dispatch
45496 #undef TARGET_SCHED_DISPATCH_DO
45497 #define TARGET_SCHED_DISPATCH_DO do_dispatch
45498 #undef TARGET_SCHED_REASSOCIATION_WIDTH
45499 #define TARGET_SCHED_REASSOCIATION_WIDTH ix86_reassociation_width
45500 #undef TARGET_SCHED_REORDER
45501 #define TARGET_SCHED_REORDER ix86_sched_reorder
45502 #undef TARGET_SCHED_ADJUST_PRIORITY
45503 #define TARGET_SCHED_ADJUST_PRIORITY ix86_adjust_priority
45504 #undef TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK
45505 #define TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK \
45506 ix86_dependencies_evaluation_hook
45508 /* The size of the dispatch window is the total number of bytes of
45509 object code allowed in a window. */
45510 #define DISPATCH_WINDOW_SIZE 16
45512 /* Number of dispatch windows considered for scheduling. */
45513 #define MAX_DISPATCH_WINDOWS 3
45515 /* Maximum number of instructions in a window. */
45516 #define MAX_INSN 4
45518 /* Maximum number of immediate operands in a window. */
45519 #define MAX_IMM 4
45521 /* Maximum number of immediate bits allowed in a window. */
45522 #define MAX_IMM_SIZE 128
45524 /* Maximum number of 32 bit immediates allowed in a window. */
45525 #define MAX_IMM_32 4
45527 /* Maximum number of 64 bit immediates allowed in a window. */
45528 #define MAX_IMM_64 2
45530 /* Maximum total of loads or prefetches allowed in a window. */
45531 #define MAX_LOAD 2
45533 /* Maximum total of stores allowed in a window. */
45534 #define MAX_STORE 1
45536 #undef BIG
45537 #define BIG 100
45540 /* Dispatch groups. Istructions that affect the mix in a dispatch window. */
45541 enum dispatch_group {
45542 disp_no_group = 0,
45543 disp_load,
45544 disp_store,
45545 disp_load_store,
45546 disp_prefetch,
45547 disp_imm,
45548 disp_imm_32,
45549 disp_imm_64,
45550 disp_branch,
45551 disp_cmp,
45552 disp_jcc,
45553 disp_last
45556 /* Number of allowable groups in a dispatch window. It is an array
45557 indexed by dispatch_group enum. 100 is used as a big number,
45558 because the number of these kind of operations does not have any
45559 effect in dispatch window, but we need them for other reasons in
45560 the table. */
45561 static unsigned int num_allowable_groups[disp_last] = {
45562 0, 2, 1, 1, 2, 4, 4, 2, 1, BIG, BIG
45565 char group_name[disp_last + 1][16] = {
45566 "disp_no_group", "disp_load", "disp_store", "disp_load_store",
45567 "disp_prefetch", "disp_imm", "disp_imm_32", "disp_imm_64",
45568 "disp_branch", "disp_cmp", "disp_jcc", "disp_last"
45571 /* Instruction path. */
45572 enum insn_path {
45573 no_path = 0,
45574 path_single, /* Single micro op. */
45575 path_double, /* Double micro op. */
45576 path_multi, /* Instructions with more than 2 micro op.. */
45577 last_path
45580 /* sched_insn_info defines a window to the instructions scheduled in
45581 the basic block. It contains a pointer to the insn_info table and
45582 the instruction scheduled.
45584 Windows are allocated for each basic block and are linked
45585 together. */
45586 typedef struct sched_insn_info_s {
45587 rtx insn;
45588 enum dispatch_group group;
45589 enum insn_path path;
45590 int byte_len;
45591 int imm_bytes;
45592 } sched_insn_info;
45594 /* Linked list of dispatch windows. This is a two way list of
45595 dispatch windows of a basic block. It contains information about
45596 the number of uops in the window and the total number of
45597 instructions and of bytes in the object code for this dispatch
45598 window. */
45599 typedef struct dispatch_windows_s {
45600 int num_insn; /* Number of insn in the window. */
45601 int num_uops; /* Number of uops in the window. */
45602 int window_size; /* Number of bytes in the window. */
45603 int window_num; /* Window number between 0 or 1. */
45604 int num_imm; /* Number of immediates in an insn. */
45605 int num_imm_32; /* Number of 32 bit immediates in an insn. */
45606 int num_imm_64; /* Number of 64 bit immediates in an insn. */
45607 int imm_size; /* Total immediates in the window. */
45608 int num_loads; /* Total memory loads in the window. */
45609 int num_stores; /* Total memory stores in the window. */
45610 int violation; /* Violation exists in window. */
45611 sched_insn_info *window; /* Pointer to the window. */
45612 struct dispatch_windows_s *next;
45613 struct dispatch_windows_s *prev;
45614 } dispatch_windows;
45616 /* Immediate valuse used in an insn. */
45617 typedef struct imm_info_s
45619 int imm;
45620 int imm32;
45621 int imm64;
45622 } imm_info;
45624 static dispatch_windows *dispatch_window_list;
45625 static dispatch_windows *dispatch_window_list1;
45627 /* Get dispatch group of insn. */
45629 static enum dispatch_group
45630 get_mem_group (rtx insn)
45632 enum attr_memory memory;
45634 if (INSN_CODE (insn) < 0)
45635 return disp_no_group;
45636 memory = get_attr_memory (insn);
45637 if (memory == MEMORY_STORE)
45638 return disp_store;
45640 if (memory == MEMORY_LOAD)
45641 return disp_load;
45643 if (memory == MEMORY_BOTH)
45644 return disp_load_store;
45646 return disp_no_group;
45649 /* Return true if insn is a compare instruction. */
45651 static bool
45652 is_cmp (rtx insn)
45654 enum attr_type type;
45656 type = get_attr_type (insn);
45657 return (type == TYPE_TEST
45658 || type == TYPE_ICMP
45659 || type == TYPE_FCMP
45660 || GET_CODE (PATTERN (insn)) == COMPARE);
45663 /* Return true if a dispatch violation encountered. */
45665 static bool
45666 dispatch_violation (void)
45668 if (dispatch_window_list->next)
45669 return dispatch_window_list->next->violation;
45670 return dispatch_window_list->violation;
45673 /* Return true if insn is a branch instruction. */
45675 static bool
45676 is_branch (rtx insn)
45678 return (CALL_P (insn) || JUMP_P (insn));
45681 /* Return true if insn is a prefetch instruction. */
45683 static bool
45684 is_prefetch (rtx insn)
45686 return NONJUMP_INSN_P (insn) && GET_CODE (PATTERN (insn)) == PREFETCH;
45689 /* This function initializes a dispatch window and the list container holding a
45690 pointer to the window. */
45692 static void
45693 init_window (int window_num)
45695 int i;
45696 dispatch_windows *new_list;
45698 if (window_num == 0)
45699 new_list = dispatch_window_list;
45700 else
45701 new_list = dispatch_window_list1;
45703 new_list->num_insn = 0;
45704 new_list->num_uops = 0;
45705 new_list->window_size = 0;
45706 new_list->next = NULL;
45707 new_list->prev = NULL;
45708 new_list->window_num = window_num;
45709 new_list->num_imm = 0;
45710 new_list->num_imm_32 = 0;
45711 new_list->num_imm_64 = 0;
45712 new_list->imm_size = 0;
45713 new_list->num_loads = 0;
45714 new_list->num_stores = 0;
45715 new_list->violation = false;
45717 for (i = 0; i < MAX_INSN; i++)
45719 new_list->window[i].insn = NULL;
45720 new_list->window[i].group = disp_no_group;
45721 new_list->window[i].path = no_path;
45722 new_list->window[i].byte_len = 0;
45723 new_list->window[i].imm_bytes = 0;
45725 return;
45728 /* This function allocates and initializes a dispatch window and the
45729 list container holding a pointer to the window. */
45731 static dispatch_windows *
45732 allocate_window (void)
45734 dispatch_windows *new_list = XNEW (struct dispatch_windows_s);
45735 new_list->window = XNEWVEC (struct sched_insn_info_s, MAX_INSN + 1);
45737 return new_list;
45740 /* This routine initializes the dispatch scheduling information. It
45741 initiates building dispatch scheduler tables and constructs the
45742 first dispatch window. */
45744 static void
45745 init_dispatch_sched (void)
45747 /* Allocate a dispatch list and a window. */
45748 dispatch_window_list = allocate_window ();
45749 dispatch_window_list1 = allocate_window ();
45750 init_window (0);
45751 init_window (1);
45754 /* This function returns true if a branch is detected. End of a basic block
45755 does not have to be a branch, but here we assume only branches end a
45756 window. */
45758 static bool
45759 is_end_basic_block (enum dispatch_group group)
45761 return group == disp_branch;
45764 /* This function is called when the end of a window processing is reached. */
45766 static void
45767 process_end_window (void)
45769 gcc_assert (dispatch_window_list->num_insn <= MAX_INSN);
45770 if (dispatch_window_list->next)
45772 gcc_assert (dispatch_window_list1->num_insn <= MAX_INSN);
45773 gcc_assert (dispatch_window_list->window_size
45774 + dispatch_window_list1->window_size <= 48);
45775 init_window (1);
45777 init_window (0);
45780 /* Allocates a new dispatch window and adds it to WINDOW_LIST.
45781 WINDOW_NUM is either 0 or 1. A maximum of two windows are generated
45782 for 48 bytes of instructions. Note that these windows are not dispatch
45783 windows that their sizes are DISPATCH_WINDOW_SIZE. */
45785 static dispatch_windows *
45786 allocate_next_window (int window_num)
45788 if (window_num == 0)
45790 if (dispatch_window_list->next)
45791 init_window (1);
45792 init_window (0);
45793 return dispatch_window_list;
45796 dispatch_window_list->next = dispatch_window_list1;
45797 dispatch_window_list1->prev = dispatch_window_list;
45799 return dispatch_window_list1;
45802 /* Increment the number of immediate operands of an instruction. */
45804 static int
45805 find_constant_1 (rtx *in_rtx, imm_info *imm_values)
45807 if (*in_rtx == 0)
45808 return 0;
45810 switch ( GET_CODE (*in_rtx))
45812 case CONST:
45813 case SYMBOL_REF:
45814 case CONST_INT:
45815 (imm_values->imm)++;
45816 if (x86_64_immediate_operand (*in_rtx, SImode))
45817 (imm_values->imm32)++;
45818 else
45819 (imm_values->imm64)++;
45820 break;
45822 case CONST_DOUBLE:
45823 (imm_values->imm)++;
45824 (imm_values->imm64)++;
45825 break;
45827 case CODE_LABEL:
45828 if (LABEL_KIND (*in_rtx) == LABEL_NORMAL)
45830 (imm_values->imm)++;
45831 (imm_values->imm32)++;
45833 break;
45835 default:
45836 break;
45839 return 0;
45842 /* Compute number of immediate operands of an instruction. */
45844 static void
45845 find_constant (rtx in_rtx, imm_info *imm_values)
45847 for_each_rtx (INSN_P (in_rtx) ? &PATTERN (in_rtx) : &in_rtx,
45848 (rtx_function) find_constant_1, (void *) imm_values);
45851 /* Return total size of immediate operands of an instruction along with number
45852 of corresponding immediate-operands. It initializes its parameters to zero
45853 befor calling FIND_CONSTANT.
45854 INSN is the input instruction. IMM is the total of immediates.
45855 IMM32 is the number of 32 bit immediates. IMM64 is the number of 64
45856 bit immediates. */
45858 static int
45859 get_num_immediates (rtx insn, int *imm, int *imm32, int *imm64)
45861 imm_info imm_values = {0, 0, 0};
45863 find_constant (insn, &imm_values);
45864 *imm = imm_values.imm;
45865 *imm32 = imm_values.imm32;
45866 *imm64 = imm_values.imm64;
45867 return imm_values.imm32 * 4 + imm_values.imm64 * 8;
45870 /* This function indicates if an operand of an instruction is an
45871 immediate. */
45873 static bool
45874 has_immediate (rtx insn)
45876 int num_imm_operand;
45877 int num_imm32_operand;
45878 int num_imm64_operand;
45880 if (insn)
45881 return get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
45882 &num_imm64_operand);
45883 return false;
45886 /* Return single or double path for instructions. */
45888 static enum insn_path
45889 get_insn_path (rtx insn)
45891 enum attr_amdfam10_decode path = get_attr_amdfam10_decode (insn);
45893 if ((int)path == 0)
45894 return path_single;
45896 if ((int)path == 1)
45897 return path_double;
45899 return path_multi;
45902 /* Return insn dispatch group. */
45904 static enum dispatch_group
45905 get_insn_group (rtx insn)
45907 enum dispatch_group group = get_mem_group (insn);
45908 if (group)
45909 return group;
45911 if (is_branch (insn))
45912 return disp_branch;
45914 if (is_cmp (insn))
45915 return disp_cmp;
45917 if (has_immediate (insn))
45918 return disp_imm;
45920 if (is_prefetch (insn))
45921 return disp_prefetch;
45923 return disp_no_group;
45926 /* Count number of GROUP restricted instructions in a dispatch
45927 window WINDOW_LIST. */
45929 static int
45930 count_num_restricted (rtx insn, dispatch_windows *window_list)
45932 enum dispatch_group group = get_insn_group (insn);
45933 int imm_size;
45934 int num_imm_operand;
45935 int num_imm32_operand;
45936 int num_imm64_operand;
45938 if (group == disp_no_group)
45939 return 0;
45941 if (group == disp_imm)
45943 imm_size = get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
45944 &num_imm64_operand);
45945 if (window_list->imm_size + imm_size > MAX_IMM_SIZE
45946 || num_imm_operand + window_list->num_imm > MAX_IMM
45947 || (num_imm32_operand > 0
45948 && (window_list->num_imm_32 + num_imm32_operand > MAX_IMM_32
45949 || window_list->num_imm_64 * 2 + num_imm32_operand > MAX_IMM_32))
45950 || (num_imm64_operand > 0
45951 && (window_list->num_imm_64 + num_imm64_operand > MAX_IMM_64
45952 || window_list->num_imm_32 + num_imm64_operand * 2 > MAX_IMM_32))
45953 || (window_list->imm_size + imm_size == MAX_IMM_SIZE
45954 && num_imm64_operand > 0
45955 && ((window_list->num_imm_64 > 0
45956 && window_list->num_insn >= 2)
45957 || window_list->num_insn >= 3)))
45958 return BIG;
45960 return 1;
45963 if ((group == disp_load_store
45964 && (window_list->num_loads >= MAX_LOAD
45965 || window_list->num_stores >= MAX_STORE))
45966 || ((group == disp_load
45967 || group == disp_prefetch)
45968 && window_list->num_loads >= MAX_LOAD)
45969 || (group == disp_store
45970 && window_list->num_stores >= MAX_STORE))
45971 return BIG;
45973 return 1;
45976 /* This function returns true if insn satisfies dispatch rules on the
45977 last window scheduled. */
45979 static bool
45980 fits_dispatch_window (rtx insn)
45982 dispatch_windows *window_list = dispatch_window_list;
45983 dispatch_windows *window_list_next = dispatch_window_list->next;
45984 unsigned int num_restrict;
45985 enum dispatch_group group = get_insn_group (insn);
45986 enum insn_path path = get_insn_path (insn);
45987 int sum;
45989 /* Make disp_cmp and disp_jcc get scheduled at the latest. These
45990 instructions should be given the lowest priority in the
45991 scheduling process in Haifa scheduler to make sure they will be
45992 scheduled in the same dispatch window as the reference to them. */
45993 if (group == disp_jcc || group == disp_cmp)
45994 return false;
45996 /* Check nonrestricted. */
45997 if (group == disp_no_group || group == disp_branch)
45998 return true;
46000 /* Get last dispatch window. */
46001 if (window_list_next)
46002 window_list = window_list_next;
46004 if (window_list->window_num == 1)
46006 sum = window_list->prev->window_size + window_list->window_size;
46008 if (sum == 32
46009 || (min_insn_size (insn) + sum) >= 48)
46010 /* Window 1 is full. Go for next window. */
46011 return true;
46014 num_restrict = count_num_restricted (insn, window_list);
46016 if (num_restrict > num_allowable_groups[group])
46017 return false;
46019 /* See if it fits in the first window. */
46020 if (window_list->window_num == 0)
46022 /* The first widow should have only single and double path
46023 uops. */
46024 if (path == path_double
46025 && (window_list->num_uops + 2) > MAX_INSN)
46026 return false;
46027 else if (path != path_single)
46028 return false;
46030 return true;
46033 /* Add an instruction INSN with NUM_UOPS micro-operations to the
46034 dispatch window WINDOW_LIST. */
46036 static void
46037 add_insn_window (rtx insn, dispatch_windows *window_list, int num_uops)
46039 int byte_len = min_insn_size (insn);
46040 int num_insn = window_list->num_insn;
46041 int imm_size;
46042 sched_insn_info *window = window_list->window;
46043 enum dispatch_group group = get_insn_group (insn);
46044 enum insn_path path = get_insn_path (insn);
46045 int num_imm_operand;
46046 int num_imm32_operand;
46047 int num_imm64_operand;
46049 if (!window_list->violation && group != disp_cmp
46050 && !fits_dispatch_window (insn))
46051 window_list->violation = true;
46053 imm_size = get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
46054 &num_imm64_operand);
46056 /* Initialize window with new instruction. */
46057 window[num_insn].insn = insn;
46058 window[num_insn].byte_len = byte_len;
46059 window[num_insn].group = group;
46060 window[num_insn].path = path;
46061 window[num_insn].imm_bytes = imm_size;
46063 window_list->window_size += byte_len;
46064 window_list->num_insn = num_insn + 1;
46065 window_list->num_uops = window_list->num_uops + num_uops;
46066 window_list->imm_size += imm_size;
46067 window_list->num_imm += num_imm_operand;
46068 window_list->num_imm_32 += num_imm32_operand;
46069 window_list->num_imm_64 += num_imm64_operand;
46071 if (group == disp_store)
46072 window_list->num_stores += 1;
46073 else if (group == disp_load
46074 || group == disp_prefetch)
46075 window_list->num_loads += 1;
46076 else if (group == disp_load_store)
46078 window_list->num_stores += 1;
46079 window_list->num_loads += 1;
46083 /* Adds a scheduled instruction, INSN, to the current dispatch window.
46084 If the total bytes of instructions or the number of instructions in
46085 the window exceed allowable, it allocates a new window. */
46087 static void
46088 add_to_dispatch_window (rtx insn)
46090 int byte_len;
46091 dispatch_windows *window_list;
46092 dispatch_windows *next_list;
46093 dispatch_windows *window0_list;
46094 enum insn_path path;
46095 enum dispatch_group insn_group;
46096 bool insn_fits;
46097 int num_insn;
46098 int num_uops;
46099 int window_num;
46100 int insn_num_uops;
46101 int sum;
46103 if (INSN_CODE (insn) < 0)
46104 return;
46106 byte_len = min_insn_size (insn);
46107 window_list = dispatch_window_list;
46108 next_list = window_list->next;
46109 path = get_insn_path (insn);
46110 insn_group = get_insn_group (insn);
46112 /* Get the last dispatch window. */
46113 if (next_list)
46114 window_list = dispatch_window_list->next;
46116 if (path == path_single)
46117 insn_num_uops = 1;
46118 else if (path == path_double)
46119 insn_num_uops = 2;
46120 else
46121 insn_num_uops = (int) path;
46123 /* If current window is full, get a new window.
46124 Window number zero is full, if MAX_INSN uops are scheduled in it.
46125 Window number one is full, if window zero's bytes plus window
46126 one's bytes is 32, or if the bytes of the new instruction added
46127 to the total makes it greater than 48, or it has already MAX_INSN
46128 instructions in it. */
46129 num_insn = window_list->num_insn;
46130 num_uops = window_list->num_uops;
46131 window_num = window_list->window_num;
46132 insn_fits = fits_dispatch_window (insn);
46134 if (num_insn >= MAX_INSN
46135 || num_uops + insn_num_uops > MAX_INSN
46136 || !(insn_fits))
46138 window_num = ~window_num & 1;
46139 window_list = allocate_next_window (window_num);
46142 if (window_num == 0)
46144 add_insn_window (insn, window_list, insn_num_uops);
46145 if (window_list->num_insn >= MAX_INSN
46146 && insn_group == disp_branch)
46148 process_end_window ();
46149 return;
46152 else if (window_num == 1)
46154 window0_list = window_list->prev;
46155 sum = window0_list->window_size + window_list->window_size;
46156 if (sum == 32
46157 || (byte_len + sum) >= 48)
46159 process_end_window ();
46160 window_list = dispatch_window_list;
46163 add_insn_window (insn, window_list, insn_num_uops);
46165 else
46166 gcc_unreachable ();
46168 if (is_end_basic_block (insn_group))
46170 /* End of basic block is reached do end-basic-block process. */
46171 process_end_window ();
46172 return;
46176 /* Print the dispatch window, WINDOW_NUM, to FILE. */
46178 DEBUG_FUNCTION static void
46179 debug_dispatch_window_file (FILE *file, int window_num)
46181 dispatch_windows *list;
46182 int i;
46184 if (window_num == 0)
46185 list = dispatch_window_list;
46186 else
46187 list = dispatch_window_list1;
46189 fprintf (file, "Window #%d:\n", list->window_num);
46190 fprintf (file, " num_insn = %d, num_uops = %d, window_size = %d\n",
46191 list->num_insn, list->num_uops, list->window_size);
46192 fprintf (file, " num_imm = %d, num_imm_32 = %d, num_imm_64 = %d, imm_size = %d\n",
46193 list->num_imm, list->num_imm_32, list->num_imm_64, list->imm_size);
46195 fprintf (file, " num_loads = %d, num_stores = %d\n", list->num_loads,
46196 list->num_stores);
46197 fprintf (file, " insn info:\n");
46199 for (i = 0; i < MAX_INSN; i++)
46201 if (!list->window[i].insn)
46202 break;
46203 fprintf (file, " group[%d] = %s, insn[%d] = %p, path[%d] = %d byte_len[%d] = %d, imm_bytes[%d] = %d\n",
46204 i, group_name[list->window[i].group],
46205 i, (void *)list->window[i].insn,
46206 i, list->window[i].path,
46207 i, list->window[i].byte_len,
46208 i, list->window[i].imm_bytes);
46212 /* Print to stdout a dispatch window. */
46214 DEBUG_FUNCTION void
46215 debug_dispatch_window (int window_num)
46217 debug_dispatch_window_file (stdout, window_num);
46220 /* Print INSN dispatch information to FILE. */
46222 DEBUG_FUNCTION static void
46223 debug_insn_dispatch_info_file (FILE *file, rtx insn)
46225 int byte_len;
46226 enum insn_path path;
46227 enum dispatch_group group;
46228 int imm_size;
46229 int num_imm_operand;
46230 int num_imm32_operand;
46231 int num_imm64_operand;
46233 if (INSN_CODE (insn) < 0)
46234 return;
46236 byte_len = min_insn_size (insn);
46237 path = get_insn_path (insn);
46238 group = get_insn_group (insn);
46239 imm_size = get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
46240 &num_imm64_operand);
46242 fprintf (file, " insn info:\n");
46243 fprintf (file, " group = %s, path = %d, byte_len = %d\n",
46244 group_name[group], path, byte_len);
46245 fprintf (file, " num_imm = %d, num_imm_32 = %d, num_imm_64 = %d, imm_size = %d\n",
46246 num_imm_operand, num_imm32_operand, num_imm64_operand, imm_size);
46249 /* Print to STDERR the status of the ready list with respect to
46250 dispatch windows. */
46252 DEBUG_FUNCTION void
46253 debug_ready_dispatch (void)
46255 int i;
46256 int no_ready = number_in_ready ();
46258 fprintf (stdout, "Number of ready: %d\n", no_ready);
46260 for (i = 0; i < no_ready; i++)
46261 debug_insn_dispatch_info_file (stdout, get_ready_element (i));
46264 /* This routine is the driver of the dispatch scheduler. */
46266 static void
46267 do_dispatch (rtx insn, int mode)
46269 if (mode == DISPATCH_INIT)
46270 init_dispatch_sched ();
46271 else if (mode == ADD_TO_DISPATCH_WINDOW)
46272 add_to_dispatch_window (insn);
46275 /* Return TRUE if Dispatch Scheduling is supported. */
46277 static bool
46278 has_dispatch (rtx insn, int action)
46280 if ((TARGET_BDVER1 || TARGET_BDVER2 || TARGET_BDVER3 || TARGET_BDVER4)
46281 && flag_dispatch_scheduler)
46282 switch (action)
46284 default:
46285 return false;
46287 case IS_DISPATCH_ON:
46288 return true;
46289 break;
46291 case IS_CMP:
46292 return is_cmp (insn);
46294 case DISPATCH_VIOLATION:
46295 return dispatch_violation ();
46297 case FITS_DISPATCH_WINDOW:
46298 return fits_dispatch_window (insn);
46301 return false;
46304 /* Implementation of reassociation_width target hook used by
46305 reassoc phase to identify parallelism level in reassociated
46306 tree. Statements tree_code is passed in OPC. Arguments type
46307 is passed in MODE.
46309 Currently parallel reassociation is enabled for Atom
46310 processors only and we set reassociation width to be 2
46311 because Atom may issue up to 2 instructions per cycle.
46313 Return value should be fixed if parallel reassociation is
46314 enabled for other processors. */
46316 static int
46317 ix86_reassociation_width (unsigned int opc ATTRIBUTE_UNUSED,
46318 enum machine_mode mode)
46320 int res = 1;
46322 if (INTEGRAL_MODE_P (mode) && TARGET_REASSOC_INT_TO_PARALLEL)
46323 res = 2;
46324 else if (FLOAT_MODE_P (mode) && TARGET_REASSOC_FP_TO_PARALLEL)
46325 res = 2;
46327 return res;
46330 /* ??? No autovectorization into MMX or 3DNOW until we can reliably
46331 place emms and femms instructions. */
46333 static enum machine_mode
46334 ix86_preferred_simd_mode (enum machine_mode mode)
46336 if (!TARGET_SSE)
46337 return word_mode;
46339 switch (mode)
46341 case QImode:
46342 return (TARGET_AVX && !TARGET_PREFER_AVX128) ? V32QImode : V16QImode;
46343 case HImode:
46344 return (TARGET_AVX && !TARGET_PREFER_AVX128) ? V16HImode : V8HImode;
46345 case SImode:
46346 return TARGET_AVX512F ? V16SImode :
46347 (TARGET_AVX && !TARGET_PREFER_AVX128) ? V8SImode : V4SImode;
46348 case DImode:
46349 return TARGET_AVX512F ? V8DImode :
46350 (TARGET_AVX && !TARGET_PREFER_AVX128) ? V4DImode : V2DImode;
46352 case SFmode:
46353 if (TARGET_AVX512F)
46354 return V16SFmode;
46355 else if (TARGET_AVX && !TARGET_PREFER_AVX128)
46356 return V8SFmode;
46357 else
46358 return V4SFmode;
46360 case DFmode:
46361 if (!TARGET_VECTORIZE_DOUBLE)
46362 return word_mode;
46363 else if (TARGET_AVX512F)
46364 return V8DFmode;
46365 else if (TARGET_AVX && !TARGET_PREFER_AVX128)
46366 return V4DFmode;
46367 else if (TARGET_SSE2)
46368 return V2DFmode;
46369 /* FALLTHRU */
46371 default:
46372 return word_mode;
46376 /* If AVX is enabled then try vectorizing with both 256bit and 128bit
46377 vectors. If AVX512F is enabled then try vectorizing with 512bit,
46378 256bit and 128bit vectors. */
46380 static unsigned int
46381 ix86_autovectorize_vector_sizes (void)
46383 return TARGET_AVX512F ? 64 | 32 | 16 :
46384 (TARGET_AVX && !TARGET_PREFER_AVX128) ? 32 | 16 : 0;
46389 /* Return class of registers which could be used for pseudo of MODE
46390 and of class RCLASS for spilling instead of memory. Return NO_REGS
46391 if it is not possible or non-profitable. */
46392 static reg_class_t
46393 ix86_spill_class (reg_class_t rclass, enum machine_mode mode)
46395 if (TARGET_SSE && TARGET_GENERAL_REGS_SSE_SPILL && ! TARGET_MMX
46396 && (mode == SImode || (TARGET_64BIT && mode == DImode))
46397 && INTEGER_CLASS_P (rclass))
46398 return ALL_SSE_REGS;
46399 return NO_REGS;
46402 /* Implement targetm.vectorize.init_cost. */
46404 static void *
46405 ix86_init_cost (struct loop *loop_info ATTRIBUTE_UNUSED)
46407 unsigned *cost = XNEWVEC (unsigned, 3);
46408 cost[vect_prologue] = cost[vect_body] = cost[vect_epilogue] = 0;
46409 return cost;
46412 /* Implement targetm.vectorize.add_stmt_cost. */
46414 static unsigned
46415 ix86_add_stmt_cost (void *data, int count, enum vect_cost_for_stmt kind,
46416 struct _stmt_vec_info *stmt_info, int misalign,
46417 enum vect_cost_model_location where)
46419 unsigned *cost = (unsigned *) data;
46420 unsigned retval = 0;
46422 tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
46423 int stmt_cost = ix86_builtin_vectorization_cost (kind, vectype, misalign);
46425 /* Statements in an inner loop relative to the loop being
46426 vectorized are weighted more heavily. The value here is
46427 arbitrary and could potentially be improved with analysis. */
46428 if (where == vect_body && stmt_info && stmt_in_inner_loop_p (stmt_info))
46429 count *= 50; /* FIXME. */
46431 retval = (unsigned) (count * stmt_cost);
46432 cost[where] += retval;
46434 return retval;
46437 /* Implement targetm.vectorize.finish_cost. */
46439 static void
46440 ix86_finish_cost (void *data, unsigned *prologue_cost,
46441 unsigned *body_cost, unsigned *epilogue_cost)
46443 unsigned *cost = (unsigned *) data;
46444 *prologue_cost = cost[vect_prologue];
46445 *body_cost = cost[vect_body];
46446 *epilogue_cost = cost[vect_epilogue];
46449 /* Implement targetm.vectorize.destroy_cost_data. */
46451 static void
46452 ix86_destroy_cost_data (void *data)
46454 free (data);
46457 /* Validate target specific memory model bits in VAL. */
46459 static unsigned HOST_WIDE_INT
46460 ix86_memmodel_check (unsigned HOST_WIDE_INT val)
46462 unsigned HOST_WIDE_INT model = val & MEMMODEL_MASK;
46463 bool strong;
46465 if (val & ~(unsigned HOST_WIDE_INT)(IX86_HLE_ACQUIRE|IX86_HLE_RELEASE
46466 |MEMMODEL_MASK)
46467 || ((val & IX86_HLE_ACQUIRE) && (val & IX86_HLE_RELEASE)))
46469 warning (OPT_Winvalid_memory_model,
46470 "Unknown architecture specific memory model");
46471 return MEMMODEL_SEQ_CST;
46473 strong = (model == MEMMODEL_ACQ_REL || model == MEMMODEL_SEQ_CST);
46474 if (val & IX86_HLE_ACQUIRE && !(model == MEMMODEL_ACQUIRE || strong))
46476 warning (OPT_Winvalid_memory_model,
46477 "HLE_ACQUIRE not used with ACQUIRE or stronger memory model");
46478 return MEMMODEL_SEQ_CST | IX86_HLE_ACQUIRE;
46480 if (val & IX86_HLE_RELEASE && !(model == MEMMODEL_RELEASE || strong))
46482 warning (OPT_Winvalid_memory_model,
46483 "HLE_RELEASE not used with RELEASE or stronger memory model");
46484 return MEMMODEL_SEQ_CST | IX86_HLE_RELEASE;
46486 return val;
46489 /* Set CLONEI->vecsize_mangle, CLONEI->vecsize_int,
46490 CLONEI->vecsize_float and if CLONEI->simdlen is 0, also
46491 CLONEI->simdlen. Return 0 if SIMD clones shouldn't be emitted,
46492 or number of vecsize_mangle variants that should be emitted. */
46494 static int
46495 ix86_simd_clone_compute_vecsize_and_simdlen (struct cgraph_node *node,
46496 struct cgraph_simd_clone *clonei,
46497 tree base_type, int num)
46499 int ret = 1;
46501 if (clonei->simdlen
46502 && (clonei->simdlen < 2
46503 || clonei->simdlen > 16
46504 || (clonei->simdlen & (clonei->simdlen - 1)) != 0))
46506 warning_at (DECL_SOURCE_LOCATION (node->decl), 0,
46507 "unsupported simdlen %d", clonei->simdlen);
46508 return 0;
46511 tree ret_type = TREE_TYPE (TREE_TYPE (node->decl));
46512 if (TREE_CODE (ret_type) != VOID_TYPE)
46513 switch (TYPE_MODE (ret_type))
46515 case QImode:
46516 case HImode:
46517 case SImode:
46518 case DImode:
46519 case SFmode:
46520 case DFmode:
46521 /* case SCmode: */
46522 /* case DCmode: */
46523 break;
46524 default:
46525 warning_at (DECL_SOURCE_LOCATION (node->decl), 0,
46526 "unsupported return type %qT for simd\n", ret_type);
46527 return 0;
46530 tree t;
46531 int i;
46533 for (t = DECL_ARGUMENTS (node->decl), i = 0; t; t = DECL_CHAIN (t), i++)
46534 /* FIXME: Shouldn't we allow such arguments if they are uniform? */
46535 switch (TYPE_MODE (TREE_TYPE (t)))
46537 case QImode:
46538 case HImode:
46539 case SImode:
46540 case DImode:
46541 case SFmode:
46542 case DFmode:
46543 /* case SCmode: */
46544 /* case DCmode: */
46545 break;
46546 default:
46547 warning_at (DECL_SOURCE_LOCATION (node->decl), 0,
46548 "unsupported argument type %qT for simd\n", TREE_TYPE (t));
46549 return 0;
46552 if (clonei->cilk_elemental)
46554 /* Parse here processor clause. If not present, default to 'b'. */
46555 clonei->vecsize_mangle = 'b';
46557 else if (!TREE_PUBLIC (node->decl))
46559 /* If the function isn't exported, we can pick up just one ISA
46560 for the clones. */
46561 if (TARGET_AVX2)
46562 clonei->vecsize_mangle = 'd';
46563 else if (TARGET_AVX)
46564 clonei->vecsize_mangle = 'c';
46565 else
46566 clonei->vecsize_mangle = 'b';
46567 ret = 1;
46569 else
46571 clonei->vecsize_mangle = "bcd"[num];
46572 ret = 3;
46574 switch (clonei->vecsize_mangle)
46576 case 'b':
46577 clonei->vecsize_int = 128;
46578 clonei->vecsize_float = 128;
46579 break;
46580 case 'c':
46581 clonei->vecsize_int = 128;
46582 clonei->vecsize_float = 256;
46583 break;
46584 case 'd':
46585 clonei->vecsize_int = 256;
46586 clonei->vecsize_float = 256;
46587 break;
46589 if (clonei->simdlen == 0)
46591 if (SCALAR_INT_MODE_P (TYPE_MODE (base_type)))
46592 clonei->simdlen = clonei->vecsize_int;
46593 else
46594 clonei->simdlen = clonei->vecsize_float;
46595 clonei->simdlen /= GET_MODE_BITSIZE (TYPE_MODE (base_type));
46596 if (clonei->simdlen > 16)
46597 clonei->simdlen = 16;
46599 return ret;
46602 /* Add target attribute to SIMD clone NODE if needed. */
46604 static void
46605 ix86_simd_clone_adjust (struct cgraph_node *node)
46607 const char *str = NULL;
46608 gcc_assert (node->decl == cfun->decl);
46609 switch (node->simdclone->vecsize_mangle)
46611 case 'b':
46612 if (!TARGET_SSE2)
46613 str = "sse2";
46614 break;
46615 case 'c':
46616 if (!TARGET_AVX)
46617 str = "avx";
46618 break;
46619 case 'd':
46620 if (!TARGET_AVX2)
46621 str = "avx2";
46622 break;
46623 default:
46624 gcc_unreachable ();
46626 if (str == NULL)
46627 return;
46628 push_cfun (NULL);
46629 tree args = build_tree_list (NULL_TREE, build_string (strlen (str), str));
46630 bool ok = ix86_valid_target_attribute_p (node->decl, NULL, args, 0);
46631 gcc_assert (ok);
46632 pop_cfun ();
46633 ix86_previous_fndecl = NULL_TREE;
46634 ix86_set_current_function (node->decl);
46637 /* If SIMD clone NODE can't be used in a vectorized loop
46638 in current function, return -1, otherwise return a badness of using it
46639 (0 if it is most desirable from vecsize_mangle point of view, 1
46640 slightly less desirable, etc.). */
46642 static int
46643 ix86_simd_clone_usable (struct cgraph_node *node)
46645 switch (node->simdclone->vecsize_mangle)
46647 case 'b':
46648 if (!TARGET_SSE2)
46649 return -1;
46650 if (!TARGET_AVX)
46651 return 0;
46652 return TARGET_AVX2 ? 2 : 1;
46653 case 'c':
46654 if (!TARGET_AVX)
46655 return -1;
46656 return TARGET_AVX2 ? 1 : 0;
46657 break;
46658 case 'd':
46659 if (!TARGET_AVX2)
46660 return -1;
46661 return 0;
46662 default:
46663 gcc_unreachable ();
46667 /* This function gives out the number of memory references.
46668 This value determines the unrolling factor for
46669 bdver3 and bdver4 architectures. */
46671 static int
46672 ix86_loop_memcount (rtx *x, unsigned *mem_count)
46674 if (*x != NULL_RTX && MEM_P (*x))
46676 enum machine_mode mode;
46677 unsigned int n_words;
46679 mode = GET_MODE (*x);
46680 n_words = GET_MODE_SIZE (mode) / UNITS_PER_WORD;
46682 if (n_words > 4)
46683 (*mem_count)+=2;
46684 else
46685 (*mem_count)+=1;
46687 return 0;
46690 /* This function adjusts the unroll factor based on
46691 the hardware capabilities. For ex, bdver3 has
46692 a loop buffer which makes unrolling of smaller
46693 loops less important. This function decides the
46694 unroll factor using number of memory references
46695 (value 32 is used) as a heuristic. */
46697 static unsigned
46698 ix86_loop_unroll_adjust (unsigned nunroll, struct loop *loop)
46700 basic_block *bbs;
46701 rtx insn;
46702 unsigned i;
46703 unsigned mem_count = 0;
46705 if (!TARGET_ADJUST_UNROLL)
46706 return nunroll;
46708 /* Count the number of memory references within the loop body. */
46709 bbs = get_loop_body (loop);
46710 for (i = 0; i < loop->num_nodes; i++)
46712 for (insn = BB_HEAD (bbs[i]); insn != BB_END (bbs[i]); insn = NEXT_INSN (insn))
46713 if (NONDEBUG_INSN_P (insn))
46714 for_each_rtx (&insn, (rtx_function) ix86_loop_memcount, &mem_count);
46716 free (bbs);
46718 if (mem_count && mem_count <=32)
46719 return 32/mem_count;
46721 return nunroll;
46725 /* Implement TARGET_FLOAT_EXCEPTIONS_ROUNDING_SUPPORTED_P. */
46727 static bool
46728 ix86_float_exceptions_rounding_supported_p (void)
46730 /* For x87 floating point with standard excess precision handling,
46731 there is no adddf3 pattern (since x87 floating point only has
46732 XFmode operations) so the default hook implementation gets this
46733 wrong. */
46734 return TARGET_80387 || TARGET_SSE_MATH;
46737 /* Implement TARGET_ATOMIC_ASSIGN_EXPAND_FENV. */
46739 static void
46740 ix86_atomic_assign_expand_fenv (tree *hold, tree *clear, tree *update)
46742 if (!TARGET_80387 && !TARGET_SSE_MATH)
46743 return;
46744 tree exceptions_var = create_tmp_var (integer_type_node, NULL);
46745 if (TARGET_80387)
46747 tree fenv_index_type = build_index_type (size_int (6));
46748 tree fenv_type = build_array_type (unsigned_type_node, fenv_index_type);
46749 tree fenv_var = create_tmp_var (fenv_type, NULL);
46750 mark_addressable (fenv_var);
46751 tree fenv_ptr = build_pointer_type (fenv_type);
46752 tree fenv_addr = build1 (ADDR_EXPR, fenv_ptr, fenv_var);
46753 fenv_addr = fold_convert (ptr_type_node, fenv_addr);
46754 tree fnstenv = ix86_builtins[IX86_BUILTIN_FNSTENV];
46755 tree fldenv = ix86_builtins[IX86_BUILTIN_FLDENV];
46756 tree fnstsw = ix86_builtins[IX86_BUILTIN_FNSTSW];
46757 tree fnclex = ix86_builtins[IX86_BUILTIN_FNCLEX];
46758 tree hold_fnstenv = build_call_expr (fnstenv, 1, fenv_addr);
46759 tree hold_fnclex = build_call_expr (fnclex, 0);
46760 *hold = build2 (COMPOUND_EXPR, void_type_node, hold_fnstenv,
46761 hold_fnclex);
46762 *clear = build_call_expr (fnclex, 0);
46763 tree sw_var = create_tmp_var (short_unsigned_type_node, NULL);
46764 mark_addressable (sw_var);
46765 tree su_ptr = build_pointer_type (short_unsigned_type_node);
46766 tree sw_addr = build1 (ADDR_EXPR, su_ptr, sw_var);
46767 tree fnstsw_call = build_call_expr (fnstsw, 1, sw_addr);
46768 tree exceptions_x87 = fold_convert (integer_type_node, sw_var);
46769 tree update_mod = build2 (MODIFY_EXPR, integer_type_node,
46770 exceptions_var, exceptions_x87);
46771 *update = build2 (COMPOUND_EXPR, integer_type_node,
46772 fnstsw_call, update_mod);
46773 tree update_fldenv = build_call_expr (fldenv, 1, fenv_addr);
46774 *update = build2 (COMPOUND_EXPR, void_type_node, *update, update_fldenv);
46776 if (TARGET_SSE_MATH)
46778 tree mxcsr_orig_var = create_tmp_var (unsigned_type_node, NULL);
46779 tree mxcsr_mod_var = create_tmp_var (unsigned_type_node, NULL);
46780 tree stmxcsr = ix86_builtins[IX86_BUILTIN_STMXCSR];
46781 tree ldmxcsr = ix86_builtins[IX86_BUILTIN_LDMXCSR];
46782 tree stmxcsr_hold_call = build_call_expr (stmxcsr, 0);
46783 tree hold_assign_orig = build2 (MODIFY_EXPR, unsigned_type_node,
46784 mxcsr_orig_var, stmxcsr_hold_call);
46785 tree hold_mod_val = build2 (BIT_IOR_EXPR, unsigned_type_node,
46786 mxcsr_orig_var,
46787 build_int_cst (unsigned_type_node, 0x1f80));
46788 hold_mod_val = build2 (BIT_AND_EXPR, unsigned_type_node, hold_mod_val,
46789 build_int_cst (unsigned_type_node, 0xffffffc0));
46790 tree hold_assign_mod = build2 (MODIFY_EXPR, unsigned_type_node,
46791 mxcsr_mod_var, hold_mod_val);
46792 tree ldmxcsr_hold_call = build_call_expr (ldmxcsr, 1, mxcsr_mod_var);
46793 tree hold_all = build2 (COMPOUND_EXPR, unsigned_type_node,
46794 hold_assign_orig, hold_assign_mod);
46795 hold_all = build2 (COMPOUND_EXPR, void_type_node, hold_all,
46796 ldmxcsr_hold_call);
46797 if (*hold)
46798 *hold = build2 (COMPOUND_EXPR, void_type_node, *hold, hold_all);
46799 else
46800 *hold = hold_all;
46801 tree ldmxcsr_clear_call = build_call_expr (ldmxcsr, 1, mxcsr_mod_var);
46802 if (*clear)
46803 *clear = build2 (COMPOUND_EXPR, void_type_node, *clear,
46804 ldmxcsr_clear_call);
46805 else
46806 *clear = ldmxcsr_clear_call;
46807 tree stxmcsr_update_call = build_call_expr (stmxcsr, 0);
46808 tree exceptions_sse = fold_convert (integer_type_node,
46809 stxmcsr_update_call);
46810 if (*update)
46812 tree exceptions_mod = build2 (BIT_IOR_EXPR, integer_type_node,
46813 exceptions_var, exceptions_sse);
46814 tree exceptions_assign = build2 (MODIFY_EXPR, integer_type_node,
46815 exceptions_var, exceptions_mod);
46816 *update = build2 (COMPOUND_EXPR, integer_type_node, *update,
46817 exceptions_assign);
46819 else
46820 *update = build2 (MODIFY_EXPR, integer_type_node,
46821 exceptions_var, exceptions_sse);
46822 tree ldmxcsr_update_call = build_call_expr (ldmxcsr, 1, mxcsr_orig_var);
46823 *update = build2 (COMPOUND_EXPR, void_type_node, *update,
46824 ldmxcsr_update_call);
46826 tree atomic_feraiseexcept
46827 = builtin_decl_implicit (BUILT_IN_ATOMIC_FERAISEEXCEPT);
46828 tree atomic_feraiseexcept_call = build_call_expr (atomic_feraiseexcept,
46829 1, exceptions_var);
46830 *update = build2 (COMPOUND_EXPR, void_type_node, *update,
46831 atomic_feraiseexcept_call);
46834 /* Initialize the GCC target structure. */
46835 #undef TARGET_RETURN_IN_MEMORY
46836 #define TARGET_RETURN_IN_MEMORY ix86_return_in_memory
46838 #undef TARGET_LEGITIMIZE_ADDRESS
46839 #define TARGET_LEGITIMIZE_ADDRESS ix86_legitimize_address
46841 #undef TARGET_ATTRIBUTE_TABLE
46842 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
46843 #undef TARGET_FUNCTION_ATTRIBUTE_INLINABLE_P
46844 #define TARGET_FUNCTION_ATTRIBUTE_INLINABLE_P hook_bool_const_tree_true
46845 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
46846 # undef TARGET_MERGE_DECL_ATTRIBUTES
46847 # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
46848 #endif
46850 #undef TARGET_COMP_TYPE_ATTRIBUTES
46851 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
46853 #undef TARGET_INIT_BUILTINS
46854 #define TARGET_INIT_BUILTINS ix86_init_builtins
46855 #undef TARGET_BUILTIN_DECL
46856 #define TARGET_BUILTIN_DECL ix86_builtin_decl
46857 #undef TARGET_EXPAND_BUILTIN
46858 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
46860 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
46861 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
46862 ix86_builtin_vectorized_function
46864 #undef TARGET_VECTORIZE_BUILTIN_TM_LOAD
46865 #define TARGET_VECTORIZE_BUILTIN_TM_LOAD ix86_builtin_tm_load
46867 #undef TARGET_VECTORIZE_BUILTIN_TM_STORE
46868 #define TARGET_VECTORIZE_BUILTIN_TM_STORE ix86_builtin_tm_store
46870 #undef TARGET_VECTORIZE_BUILTIN_GATHER
46871 #define TARGET_VECTORIZE_BUILTIN_GATHER ix86_vectorize_builtin_gather
46873 #undef TARGET_BUILTIN_RECIPROCAL
46874 #define TARGET_BUILTIN_RECIPROCAL ix86_builtin_reciprocal
46876 #undef TARGET_ASM_FUNCTION_EPILOGUE
46877 #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
46879 #undef TARGET_ENCODE_SECTION_INFO
46880 #ifndef SUBTARGET_ENCODE_SECTION_INFO
46881 #define TARGET_ENCODE_SECTION_INFO ix86_encode_section_info
46882 #else
46883 #define TARGET_ENCODE_SECTION_INFO SUBTARGET_ENCODE_SECTION_INFO
46884 #endif
46886 #undef TARGET_ASM_OPEN_PAREN
46887 #define TARGET_ASM_OPEN_PAREN ""
46888 #undef TARGET_ASM_CLOSE_PAREN
46889 #define TARGET_ASM_CLOSE_PAREN ""
46891 #undef TARGET_ASM_BYTE_OP
46892 #define TARGET_ASM_BYTE_OP ASM_BYTE
46894 #undef TARGET_ASM_ALIGNED_HI_OP
46895 #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
46896 #undef TARGET_ASM_ALIGNED_SI_OP
46897 #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
46898 #ifdef ASM_QUAD
46899 #undef TARGET_ASM_ALIGNED_DI_OP
46900 #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
46901 #endif
46903 #undef TARGET_PROFILE_BEFORE_PROLOGUE
46904 #define TARGET_PROFILE_BEFORE_PROLOGUE ix86_profile_before_prologue
46906 #undef TARGET_MANGLE_DECL_ASSEMBLER_NAME
46907 #define TARGET_MANGLE_DECL_ASSEMBLER_NAME ix86_mangle_decl_assembler_name
46909 #undef TARGET_ASM_UNALIGNED_HI_OP
46910 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
46911 #undef TARGET_ASM_UNALIGNED_SI_OP
46912 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
46913 #undef TARGET_ASM_UNALIGNED_DI_OP
46914 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
46916 #undef TARGET_PRINT_OPERAND
46917 #define TARGET_PRINT_OPERAND ix86_print_operand
46918 #undef TARGET_PRINT_OPERAND_ADDRESS
46919 #define TARGET_PRINT_OPERAND_ADDRESS ix86_print_operand_address
46920 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
46921 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P ix86_print_operand_punct_valid_p
46922 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
46923 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA i386_asm_output_addr_const_extra
46925 #undef TARGET_SCHED_INIT_GLOBAL
46926 #define TARGET_SCHED_INIT_GLOBAL ix86_sched_init_global
46927 #undef TARGET_SCHED_ADJUST_COST
46928 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
46929 #undef TARGET_SCHED_ISSUE_RATE
46930 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
46931 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
46932 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
46933 ia32_multipass_dfa_lookahead
46934 #undef TARGET_SCHED_MACRO_FUSION_P
46935 #define TARGET_SCHED_MACRO_FUSION_P ix86_macro_fusion_p
46936 #undef TARGET_SCHED_MACRO_FUSION_PAIR_P
46937 #define TARGET_SCHED_MACRO_FUSION_PAIR_P ix86_macro_fusion_pair_p
46939 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
46940 #define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
46942 #undef TARGET_MEMMODEL_CHECK
46943 #define TARGET_MEMMODEL_CHECK ix86_memmodel_check
46945 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
46946 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV ix86_atomic_assign_expand_fenv
46948 #ifdef HAVE_AS_TLS
46949 #undef TARGET_HAVE_TLS
46950 #define TARGET_HAVE_TLS true
46951 #endif
46952 #undef TARGET_CANNOT_FORCE_CONST_MEM
46953 #define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
46954 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
46955 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P hook_bool_mode_const_rtx_true
46957 #undef TARGET_DELEGITIMIZE_ADDRESS
46958 #define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address
46960 #undef TARGET_MS_BITFIELD_LAYOUT_P
46961 #define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
46963 #if TARGET_MACHO
46964 #undef TARGET_BINDS_LOCAL_P
46965 #define TARGET_BINDS_LOCAL_P darwin_binds_local_p
46966 #endif
46967 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
46968 #undef TARGET_BINDS_LOCAL_P
46969 #define TARGET_BINDS_LOCAL_P i386_pe_binds_local_p
46970 #endif
46972 #undef TARGET_ASM_OUTPUT_MI_THUNK
46973 #define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
46974 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
46975 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
46977 #undef TARGET_ASM_FILE_START
46978 #define TARGET_ASM_FILE_START x86_file_start
46980 #undef TARGET_OPTION_OVERRIDE
46981 #define TARGET_OPTION_OVERRIDE ix86_option_override
46983 #undef TARGET_REGISTER_MOVE_COST
46984 #define TARGET_REGISTER_MOVE_COST ix86_register_move_cost
46985 #undef TARGET_MEMORY_MOVE_COST
46986 #define TARGET_MEMORY_MOVE_COST ix86_memory_move_cost
46987 #undef TARGET_RTX_COSTS
46988 #define TARGET_RTX_COSTS ix86_rtx_costs
46989 #undef TARGET_ADDRESS_COST
46990 #define TARGET_ADDRESS_COST ix86_address_cost
46992 #undef TARGET_FIXED_CONDITION_CODE_REGS
46993 #define TARGET_FIXED_CONDITION_CODE_REGS ix86_fixed_condition_code_regs
46994 #undef TARGET_CC_MODES_COMPATIBLE
46995 #define TARGET_CC_MODES_COMPATIBLE ix86_cc_modes_compatible
46997 #undef TARGET_MACHINE_DEPENDENT_REORG
46998 #define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg
47000 #undef TARGET_BUILTIN_SETJMP_FRAME_VALUE
47001 #define TARGET_BUILTIN_SETJMP_FRAME_VALUE ix86_builtin_setjmp_frame_value
47003 #undef TARGET_BUILD_BUILTIN_VA_LIST
47004 #define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list
47006 #undef TARGET_FOLD_BUILTIN
47007 #define TARGET_FOLD_BUILTIN ix86_fold_builtin
47009 #undef TARGET_COMPARE_VERSION_PRIORITY
47010 #define TARGET_COMPARE_VERSION_PRIORITY ix86_compare_version_priority
47012 #undef TARGET_GENERATE_VERSION_DISPATCHER_BODY
47013 #define TARGET_GENERATE_VERSION_DISPATCHER_BODY \
47014 ix86_generate_version_dispatcher_body
47016 #undef TARGET_GET_FUNCTION_VERSIONS_DISPATCHER
47017 #define TARGET_GET_FUNCTION_VERSIONS_DISPATCHER \
47018 ix86_get_function_versions_dispatcher
47020 #undef TARGET_ENUM_VA_LIST_P
47021 #define TARGET_ENUM_VA_LIST_P ix86_enum_va_list
47023 #undef TARGET_FN_ABI_VA_LIST
47024 #define TARGET_FN_ABI_VA_LIST ix86_fn_abi_va_list
47026 #undef TARGET_CANONICAL_VA_LIST_TYPE
47027 #define TARGET_CANONICAL_VA_LIST_TYPE ix86_canonical_va_list_type
47029 #undef TARGET_EXPAND_BUILTIN_VA_START
47030 #define TARGET_EXPAND_BUILTIN_VA_START ix86_va_start
47032 #undef TARGET_MD_ASM_CLOBBERS
47033 #define TARGET_MD_ASM_CLOBBERS ix86_md_asm_clobbers
47035 #undef TARGET_PROMOTE_PROTOTYPES
47036 #define TARGET_PROMOTE_PROTOTYPES hook_bool_const_tree_true
47037 #undef TARGET_SETUP_INCOMING_VARARGS
47038 #define TARGET_SETUP_INCOMING_VARARGS ix86_setup_incoming_varargs
47039 #undef TARGET_MUST_PASS_IN_STACK
47040 #define TARGET_MUST_PASS_IN_STACK ix86_must_pass_in_stack
47041 #undef TARGET_FUNCTION_ARG_ADVANCE
47042 #define TARGET_FUNCTION_ARG_ADVANCE ix86_function_arg_advance
47043 #undef TARGET_FUNCTION_ARG
47044 #define TARGET_FUNCTION_ARG ix86_function_arg
47045 #undef TARGET_FUNCTION_ARG_BOUNDARY
47046 #define TARGET_FUNCTION_ARG_BOUNDARY ix86_function_arg_boundary
47047 #undef TARGET_PASS_BY_REFERENCE
47048 #define TARGET_PASS_BY_REFERENCE ix86_pass_by_reference
47049 #undef TARGET_INTERNAL_ARG_POINTER
47050 #define TARGET_INTERNAL_ARG_POINTER ix86_internal_arg_pointer
47051 #undef TARGET_UPDATE_STACK_BOUNDARY
47052 #define TARGET_UPDATE_STACK_BOUNDARY ix86_update_stack_boundary
47053 #undef TARGET_GET_DRAP_RTX
47054 #define TARGET_GET_DRAP_RTX ix86_get_drap_rtx
47055 #undef TARGET_STRICT_ARGUMENT_NAMING
47056 #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
47057 #undef TARGET_STATIC_CHAIN
47058 #define TARGET_STATIC_CHAIN ix86_static_chain
47059 #undef TARGET_TRAMPOLINE_INIT
47060 #define TARGET_TRAMPOLINE_INIT ix86_trampoline_init
47061 #undef TARGET_RETURN_POPS_ARGS
47062 #define TARGET_RETURN_POPS_ARGS ix86_return_pops_args
47064 #undef TARGET_LEGITIMATE_COMBINED_INSN
47065 #define TARGET_LEGITIMATE_COMBINED_INSN ix86_legitimate_combined_insn
47067 #undef TARGET_ASAN_SHADOW_OFFSET
47068 #define TARGET_ASAN_SHADOW_OFFSET ix86_asan_shadow_offset
47070 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
47071 #define TARGET_GIMPLIFY_VA_ARG_EXPR ix86_gimplify_va_arg
47073 #undef TARGET_SCALAR_MODE_SUPPORTED_P
47074 #define TARGET_SCALAR_MODE_SUPPORTED_P ix86_scalar_mode_supported_p
47076 #undef TARGET_VECTOR_MODE_SUPPORTED_P
47077 #define TARGET_VECTOR_MODE_SUPPORTED_P ix86_vector_mode_supported_p
47079 #undef TARGET_C_MODE_FOR_SUFFIX
47080 #define TARGET_C_MODE_FOR_SUFFIX ix86_c_mode_for_suffix
47082 #ifdef HAVE_AS_TLS
47083 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
47084 #define TARGET_ASM_OUTPUT_DWARF_DTPREL i386_output_dwarf_dtprel
47085 #endif
47087 #ifdef SUBTARGET_INSERT_ATTRIBUTES
47088 #undef TARGET_INSERT_ATTRIBUTES
47089 #define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
47090 #endif
47092 #undef TARGET_MANGLE_TYPE
47093 #define TARGET_MANGLE_TYPE ix86_mangle_type
47095 #if !TARGET_MACHO
47096 #undef TARGET_STACK_PROTECT_FAIL
47097 #define TARGET_STACK_PROTECT_FAIL ix86_stack_protect_fail
47098 #endif
47100 #undef TARGET_FUNCTION_VALUE
47101 #define TARGET_FUNCTION_VALUE ix86_function_value
47103 #undef TARGET_FUNCTION_VALUE_REGNO_P
47104 #define TARGET_FUNCTION_VALUE_REGNO_P ix86_function_value_regno_p
47106 #undef TARGET_PROMOTE_FUNCTION_MODE
47107 #define TARGET_PROMOTE_FUNCTION_MODE ix86_promote_function_mode
47109 #undef TARGET_MEMBER_TYPE_FORCES_BLK
47110 #define TARGET_MEMBER_TYPE_FORCES_BLK ix86_member_type_forces_blk
47112 #undef TARGET_INSTANTIATE_DECLS
47113 #define TARGET_INSTANTIATE_DECLS ix86_instantiate_decls
47115 #undef TARGET_SECONDARY_RELOAD
47116 #define TARGET_SECONDARY_RELOAD ix86_secondary_reload
47118 #undef TARGET_CLASS_MAX_NREGS
47119 #define TARGET_CLASS_MAX_NREGS ix86_class_max_nregs
47121 #undef TARGET_PREFERRED_RELOAD_CLASS
47122 #define TARGET_PREFERRED_RELOAD_CLASS ix86_preferred_reload_class
47123 #undef TARGET_PREFERRED_OUTPUT_RELOAD_CLASS
47124 #define TARGET_PREFERRED_OUTPUT_RELOAD_CLASS ix86_preferred_output_reload_class
47125 #undef TARGET_CLASS_LIKELY_SPILLED_P
47126 #define TARGET_CLASS_LIKELY_SPILLED_P ix86_class_likely_spilled_p
47128 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
47129 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
47130 ix86_builtin_vectorization_cost
47131 #undef TARGET_VECTORIZE_VEC_PERM_CONST_OK
47132 #define TARGET_VECTORIZE_VEC_PERM_CONST_OK \
47133 ix86_vectorize_vec_perm_const_ok
47134 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
47135 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE \
47136 ix86_preferred_simd_mode
47137 #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
47138 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
47139 ix86_autovectorize_vector_sizes
47140 #undef TARGET_VECTORIZE_INIT_COST
47141 #define TARGET_VECTORIZE_INIT_COST ix86_init_cost
47142 #undef TARGET_VECTORIZE_ADD_STMT_COST
47143 #define TARGET_VECTORIZE_ADD_STMT_COST ix86_add_stmt_cost
47144 #undef TARGET_VECTORIZE_FINISH_COST
47145 #define TARGET_VECTORIZE_FINISH_COST ix86_finish_cost
47146 #undef TARGET_VECTORIZE_DESTROY_COST_DATA
47147 #define TARGET_VECTORIZE_DESTROY_COST_DATA ix86_destroy_cost_data
47149 #undef TARGET_SET_CURRENT_FUNCTION
47150 #define TARGET_SET_CURRENT_FUNCTION ix86_set_current_function
47152 #undef TARGET_OPTION_VALID_ATTRIBUTE_P
47153 #define TARGET_OPTION_VALID_ATTRIBUTE_P ix86_valid_target_attribute_p
47155 #undef TARGET_OPTION_SAVE
47156 #define TARGET_OPTION_SAVE ix86_function_specific_save
47158 #undef TARGET_OPTION_RESTORE
47159 #define TARGET_OPTION_RESTORE ix86_function_specific_restore
47161 #undef TARGET_OPTION_PRINT
47162 #define TARGET_OPTION_PRINT ix86_function_specific_print
47164 #undef TARGET_OPTION_FUNCTION_VERSIONS
47165 #define TARGET_OPTION_FUNCTION_VERSIONS ix86_function_versions
47167 #undef TARGET_CAN_INLINE_P
47168 #define TARGET_CAN_INLINE_P ix86_can_inline_p
47170 #undef TARGET_EXPAND_TO_RTL_HOOK
47171 #define TARGET_EXPAND_TO_RTL_HOOK ix86_maybe_switch_abi
47173 #undef TARGET_LEGITIMATE_ADDRESS_P
47174 #define TARGET_LEGITIMATE_ADDRESS_P ix86_legitimate_address_p
47176 #undef TARGET_LRA_P
47177 #define TARGET_LRA_P hook_bool_void_true
47179 #undef TARGET_REGISTER_PRIORITY
47180 #define TARGET_REGISTER_PRIORITY ix86_register_priority
47182 #undef TARGET_REGISTER_USAGE_LEVELING_P
47183 #define TARGET_REGISTER_USAGE_LEVELING_P hook_bool_void_true
47185 #undef TARGET_LEGITIMATE_CONSTANT_P
47186 #define TARGET_LEGITIMATE_CONSTANT_P ix86_legitimate_constant_p
47188 #undef TARGET_FRAME_POINTER_REQUIRED
47189 #define TARGET_FRAME_POINTER_REQUIRED ix86_frame_pointer_required
47191 #undef TARGET_CAN_ELIMINATE
47192 #define TARGET_CAN_ELIMINATE ix86_can_eliminate
47194 #undef TARGET_EXTRA_LIVE_ON_ENTRY
47195 #define TARGET_EXTRA_LIVE_ON_ENTRY ix86_live_on_entry
47197 #undef TARGET_ASM_CODE_END
47198 #define TARGET_ASM_CODE_END ix86_code_end
47200 #undef TARGET_CONDITIONAL_REGISTER_USAGE
47201 #define TARGET_CONDITIONAL_REGISTER_USAGE ix86_conditional_register_usage
47203 #if TARGET_MACHO
47204 #undef TARGET_INIT_LIBFUNCS
47205 #define TARGET_INIT_LIBFUNCS darwin_rename_builtins
47206 #endif
47208 #undef TARGET_LOOP_UNROLL_ADJUST
47209 #define TARGET_LOOP_UNROLL_ADJUST ix86_loop_unroll_adjust
47211 #undef TARGET_SPILL_CLASS
47212 #define TARGET_SPILL_CLASS ix86_spill_class
47214 #undef TARGET_SIMD_CLONE_COMPUTE_VECSIZE_AND_SIMDLEN
47215 #define TARGET_SIMD_CLONE_COMPUTE_VECSIZE_AND_SIMDLEN \
47216 ix86_simd_clone_compute_vecsize_and_simdlen
47218 #undef TARGET_SIMD_CLONE_ADJUST
47219 #define TARGET_SIMD_CLONE_ADJUST \
47220 ix86_simd_clone_adjust
47222 #undef TARGET_SIMD_CLONE_USABLE
47223 #define TARGET_SIMD_CLONE_USABLE \
47224 ix86_simd_clone_usable
47226 #undef TARGET_FLOAT_EXCEPTIONS_ROUNDING_SUPPORTED_P
47227 #define TARGET_FLOAT_EXCEPTIONS_ROUNDING_SUPPORTED_P \
47228 ix86_float_exceptions_rounding_supported_p
47230 struct gcc_target targetm = TARGET_INITIALIZER;
47232 #include "gt-i386.h"