2010-12-29 Gary Funck <gary@intrepid.com>
[official-gcc.git] / gcc / config / i386 / i386.c
bloba9f07a239bd21f3885ce2fada5830955064d3bd5
1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
3 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010
4 Free Software Foundation, Inc.
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 3, or (at your option)
11 any later version.
13 GCC is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "tm.h"
26 #include "rtl.h"
27 #include "tree.h"
28 #include "tm_p.h"
29 #include "regs.h"
30 #include "hard-reg-set.h"
31 #include "insn-config.h"
32 #include "conditions.h"
33 #include "output.h"
34 #include "insn-codes.h"
35 #include "insn-attr.h"
36 #include "flags.h"
37 #include "except.h"
38 #include "function.h"
39 #include "recog.h"
40 #include "expr.h"
41 #include "optabs.h"
42 #include "diagnostic-core.h"
43 #include "toplev.h"
44 #include "basic-block.h"
45 #include "ggc.h"
46 #include "target.h"
47 #include "target-def.h"
48 #include "langhooks.h"
49 #include "cgraph.h"
50 #include "gimple.h"
51 #include "dwarf2.h"
52 #include "df.h"
53 #include "tm-constrs.h"
54 #include "params.h"
55 #include "cselib.h"
56 #include "debug.h"
57 #include "dwarf2out.h"
58 #include "sched-int.h"
60 enum upper_128bits_state
62 unknown = 0, /* Unknown. */
63 unused, /* Not used or not referenced. */
64 used /* Used or referenced. */
67 typedef struct block_info_def
69 /* State of the upper 128bits of any AVX registers at exit. */
70 enum upper_128bits_state state;
71 /* If the upper 128bits of any AVX registers are referenced. */
72 enum upper_128bits_state referenced;
73 /* Number of vzerouppers in this block. */
74 unsigned int count;
75 /* TRUE if block has been processed. */
76 bool processed;
77 /* TRUE if block has been rescanned. */
78 bool rescanned;
79 } *block_info;
81 #define BLOCK_INFO(B) ((block_info) (B)->aux)
83 enum call_avx256_state
85 /* Callee returns 256bit AVX register. */
86 callee_return_avx256 = -1,
87 /* Callee returns and passes 256bit AVX register. */
88 callee_return_pass_avx256,
89 /* Callee passes 256bit AVX register. */
90 callee_pass_avx256,
91 /* Callee doesn't return nor passe 256bit AVX register, or no
92 256bit AVX register in function return. */
93 call_no_avx256,
94 /* vzeroupper intrinsic. */
95 vzeroupper_intrinsic
98 /* Check if a 256bit AVX register is referenced in stores. */
100 static void
101 check_avx256_stores (rtx dest, const_rtx set, void *data)
103 if ((REG_P (dest)
104 && VALID_AVX256_REG_MODE (GET_MODE (dest)))
105 || (GET_CODE (set) == SET
106 && REG_P (SET_SRC (set))
107 && VALID_AVX256_REG_MODE (GET_MODE (SET_SRC (set)))))
109 enum upper_128bits_state *state
110 = (enum upper_128bits_state *) data;
111 *state = used;
115 /* Helper function for move_or_delete_vzeroupper_1. Look for vzeroupper
116 in basic block BB. Delete it if upper 128bit AVX registers are
117 unused. If it isn't deleted, move it to just before a jump insn.
119 UPPER_128BITS_LIVE is TRUE if the upper 128bits of any AVX registers
120 are live at entry. */
122 static void
123 move_or_delete_vzeroupper_2 (basic_block bb,
124 enum upper_128bits_state state)
126 rtx insn, bb_end;
127 rtx vzeroupper_insn = NULL_RTX;
128 rtx pat;
129 int avx256;
130 enum upper_128bits_state referenced = BLOCK_INFO (bb)->referenced;
131 int count = BLOCK_INFO (bb)->count;
133 if (dump_file)
134 fprintf (dump_file, " [bb %i] entry: upper 128bits: %d\n",
135 bb->index, state);
137 /* BB_END changes when it is deleted. */
138 bb_end = BB_END (bb);
139 insn = BB_HEAD (bb);
140 while (insn != bb_end)
142 insn = NEXT_INSN (insn);
144 if (!NONDEBUG_INSN_P (insn))
145 continue;
147 /* Move vzeroupper before jump/call. */
148 if (JUMP_P (insn) || CALL_P (insn))
150 if (!vzeroupper_insn)
151 continue;
153 if (PREV_INSN (insn) != vzeroupper_insn)
155 if (dump_file)
157 fprintf (dump_file, "Move vzeroupper after:\n");
158 print_rtl_single (dump_file, PREV_INSN (insn));
159 fprintf (dump_file, "before:\n");
160 print_rtl_single (dump_file, insn);
162 reorder_insns_nobb (vzeroupper_insn, vzeroupper_insn,
163 PREV_INSN (insn));
165 vzeroupper_insn = NULL_RTX;
166 continue;
169 pat = PATTERN (insn);
171 /* Check insn for vzeroupper intrinsic. */
172 if (GET_CODE (pat) == UNSPEC_VOLATILE
173 && XINT (pat, 1) == UNSPECV_VZEROUPPER)
175 if (dump_file)
177 /* Found vzeroupper intrinsic. */
178 fprintf (dump_file, "Found vzeroupper:\n");
179 print_rtl_single (dump_file, insn);
182 else
184 /* Check insn for vzeroall intrinsic. */
185 if (GET_CODE (pat) == PARALLEL
186 && GET_CODE (XVECEXP (pat, 0, 0)) == UNSPEC_VOLATILE
187 && XINT (XVECEXP (pat, 0, 0), 1) == UNSPECV_VZEROALL)
189 state = unused;
191 /* Delete pending vzeroupper insertion. */
192 if (vzeroupper_insn)
194 count--;
195 delete_insn (vzeroupper_insn);
196 vzeroupper_insn = NULL_RTX;
199 else if (state != used && referenced != unused)
201 /* No need to call note_stores if the upper 128bits of
202 AVX registers are never referenced. */
203 note_stores (pat, check_avx256_stores, &state);
204 if (state == used)
205 referenced = used;
207 continue;
210 /* Process vzeroupper intrinsic. */
211 count++;
212 avx256 = INTVAL (XVECEXP (pat, 0, 0));
214 if (state == unused)
216 /* Since the upper 128bits are cleared, callee must not pass
217 256bit AVX register. We only need to check if callee
218 returns 256bit AVX register. */
219 if (avx256 == callee_return_avx256)
220 state = used;
222 /* Remove unnecessary vzeroupper since upper 128bits are
223 cleared. */
224 if (dump_file)
226 fprintf (dump_file, "Delete redundant vzeroupper:\n");
227 print_rtl_single (dump_file, insn);
229 count--;
230 delete_insn (insn);
232 else
234 /* Set state to UNUSED if callee doesn't return 256bit AVX
235 register. */
236 if (avx256 != callee_return_pass_avx256)
237 state = unused;
239 if (avx256 == callee_return_pass_avx256
240 || avx256 == callee_pass_avx256)
242 /* Must remove vzeroupper since callee passes in 256bit
243 AVX register. */
244 if (dump_file)
246 fprintf (dump_file, "Delete callee pass vzeroupper:\n");
247 print_rtl_single (dump_file, insn);
249 count--;
250 delete_insn (insn);
252 else
253 vzeroupper_insn = insn;
257 BLOCK_INFO (bb)->state = state;
259 if (BLOCK_INFO (bb)->referenced == unknown)
261 /* The upper 128bits of AVX registers are never referenced if
262 REFERENCED isn't updated. */
263 if (referenced == unknown)
264 referenced = unused;
265 BLOCK_INFO (bb)->referenced = referenced;
266 BLOCK_INFO (bb)->count = count;
269 if (dump_file)
270 fprintf (dump_file, " [bb %i] exit: upper 128bits: %d\n",
271 bb->index, state);
274 /* Helper function for move_or_delete_vzeroupper. Process vzeroupper
275 in BLOCK and its predecessor blocks recursively. */
277 static void
278 move_or_delete_vzeroupper_1 (basic_block block)
280 edge e;
281 edge_iterator ei;
282 enum upper_128bits_state state;
284 if (dump_file)
285 fprintf (dump_file, " Process [bb %i]: status: %d\n",
286 block->index, BLOCK_INFO (block)->processed);
288 if (BLOCK_INFO (block)->processed)
289 return;
291 BLOCK_INFO (block)->processed = true;
293 state = unknown;
295 /* Process all predecessor edges of this block. */
296 FOR_EACH_EDGE (e, ei, block->preds)
298 if (e->src == block)
299 continue;
300 move_or_delete_vzeroupper_1 (e->src);
301 switch (BLOCK_INFO (e->src)->state)
303 case unknown:
304 if (state == unused)
305 state = unknown;
306 break;
307 case used:
308 state = used;
309 break;
310 case unused:
311 break;
315 /* If state of any predecessor edges is unknown, we need to rescan. */
316 if (state == unknown)
317 cfun->machine->rescan_vzeroupper_p = 1;
319 /* Process this block. */
320 move_or_delete_vzeroupper_2 (block, state);
323 /* Helper function for move_or_delete_vzeroupper. Rescan vzeroupper
324 in BLOCK and its predecessor blocks recursively. */
326 static void
327 rescan_move_or_delete_vzeroupper (basic_block block)
329 edge e;
330 edge_iterator ei;
331 enum upper_128bits_state state;
333 if (dump_file)
334 fprintf (dump_file, " Rescan [bb %i]: status: %d\n",
335 block->index, BLOCK_INFO (block)->rescanned);
337 if (BLOCK_INFO (block)->rescanned)
338 return;
340 BLOCK_INFO (block)->rescanned = true;
342 state = unused;
344 /* Rescan all predecessor edges of this block. */
345 FOR_EACH_EDGE (e, ei, block->preds)
347 if (e->src == block)
348 continue;
349 rescan_move_or_delete_vzeroupper (e->src);
350 /* For rescan, UKKNOWN state is treated as UNUSED. */
351 if (BLOCK_INFO (e->src)->state == used)
352 state = used;
355 /* Rescan this block only if there are vzerouppers or the upper
356 128bits of AVX registers are referenced. */
357 if (BLOCK_INFO (block)->count == 0
358 && (state == used || BLOCK_INFO (block)->referenced != used))
360 if (state == used)
361 BLOCK_INFO (block)->state = state;
362 if (dump_file)
363 fprintf (dump_file, " [bb %i] exit: upper 128bits: %d\n",
364 block->index, BLOCK_INFO (block)->state);
366 else
367 move_or_delete_vzeroupper_2 (block, state);
370 /* Go through the instruction stream looking for vzeroupper. Delete
371 it if upper 128bit AVX registers are unused. If it isn't deleted,
372 move it to just before a jump insn. */
374 static void
375 move_or_delete_vzeroupper (void)
377 edge e;
378 edge_iterator ei;
379 basic_block bb;
380 unsigned int count = 0;
382 /* Set up block info for each basic block. */
383 alloc_aux_for_blocks (sizeof (struct block_info_def));
385 /* Process successor blocks of all entry points. */
386 if (dump_file)
387 fprintf (dump_file, "Process all entry points\n");
389 FOR_EACH_EDGE (e, ei, ENTRY_BLOCK_PTR->succs)
391 move_or_delete_vzeroupper_2 (e->dest,
392 cfun->machine->caller_pass_avx256_p
393 ? used : unused);
394 BLOCK_INFO (e->dest)->processed = true;
395 BLOCK_INFO (e->dest)->rescanned = true;
398 /* Process all basic blocks. */
399 if (dump_file)
400 fprintf (dump_file, "Process all basic blocks\n");
402 FOR_EACH_BB (bb)
404 move_or_delete_vzeroupper_1 (bb);
405 count += BLOCK_INFO (bb)->count;
408 /* Rescan all basic blocks if needed. */
409 if (count && cfun->machine->rescan_vzeroupper_p)
411 if (dump_file)
412 fprintf (dump_file, "Rescan all basic blocks\n");
414 FOR_EACH_BB (bb)
415 rescan_move_or_delete_vzeroupper (bb);
418 free_aux_for_blocks ();
421 static rtx legitimize_dllimport_symbol (rtx, bool);
423 #ifndef CHECK_STACK_LIMIT
424 #define CHECK_STACK_LIMIT (-1)
425 #endif
427 /* Return index of given mode in mult and division cost tables. */
428 #define MODE_INDEX(mode) \
429 ((mode) == QImode ? 0 \
430 : (mode) == HImode ? 1 \
431 : (mode) == SImode ? 2 \
432 : (mode) == DImode ? 3 \
433 : 4)
435 /* Processor costs (relative to an add) */
436 /* We assume COSTS_N_INSNS is defined as (N)*4 and an addition is 2 bytes. */
437 #define COSTS_N_BYTES(N) ((N) * 2)
439 #define DUMMY_STRINGOP_ALGS {libcall, {{-1, libcall}}}
441 const
442 struct processor_costs ix86_size_cost = {/* costs for tuning for size */
443 COSTS_N_BYTES (2), /* cost of an add instruction */
444 COSTS_N_BYTES (3), /* cost of a lea instruction */
445 COSTS_N_BYTES (2), /* variable shift costs */
446 COSTS_N_BYTES (3), /* constant shift costs */
447 {COSTS_N_BYTES (3), /* cost of starting multiply for QI */
448 COSTS_N_BYTES (3), /* HI */
449 COSTS_N_BYTES (3), /* SI */
450 COSTS_N_BYTES (3), /* DI */
451 COSTS_N_BYTES (5)}, /* other */
452 0, /* cost of multiply per each bit set */
453 {COSTS_N_BYTES (3), /* cost of a divide/mod for QI */
454 COSTS_N_BYTES (3), /* HI */
455 COSTS_N_BYTES (3), /* SI */
456 COSTS_N_BYTES (3), /* DI */
457 COSTS_N_BYTES (5)}, /* other */
458 COSTS_N_BYTES (3), /* cost of movsx */
459 COSTS_N_BYTES (3), /* cost of movzx */
460 0, /* "large" insn */
461 2, /* MOVE_RATIO */
462 2, /* cost for loading QImode using movzbl */
463 {2, 2, 2}, /* cost of loading integer registers
464 in QImode, HImode and SImode.
465 Relative to reg-reg move (2). */
466 {2, 2, 2}, /* cost of storing integer registers */
467 2, /* cost of reg,reg fld/fst */
468 {2, 2, 2}, /* cost of loading fp registers
469 in SFmode, DFmode and XFmode */
470 {2, 2, 2}, /* cost of storing fp registers
471 in SFmode, DFmode and XFmode */
472 3, /* cost of moving MMX register */
473 {3, 3}, /* cost of loading MMX registers
474 in SImode and DImode */
475 {3, 3}, /* cost of storing MMX registers
476 in SImode and DImode */
477 3, /* cost of moving SSE register */
478 {3, 3, 3}, /* cost of loading SSE registers
479 in SImode, DImode and TImode */
480 {3, 3, 3}, /* cost of storing SSE registers
481 in SImode, DImode and TImode */
482 3, /* MMX or SSE register to integer */
483 0, /* size of l1 cache */
484 0, /* size of l2 cache */
485 0, /* size of prefetch block */
486 0, /* number of parallel prefetches */
487 2, /* Branch cost */
488 COSTS_N_BYTES (2), /* cost of FADD and FSUB insns. */
489 COSTS_N_BYTES (2), /* cost of FMUL instruction. */
490 COSTS_N_BYTES (2), /* cost of FDIV instruction. */
491 COSTS_N_BYTES (2), /* cost of FABS instruction. */
492 COSTS_N_BYTES (2), /* cost of FCHS instruction. */
493 COSTS_N_BYTES (2), /* cost of FSQRT instruction. */
494 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
495 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}}},
496 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
497 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}}},
498 1, /* scalar_stmt_cost. */
499 1, /* scalar load_cost. */
500 1, /* scalar_store_cost. */
501 1, /* vec_stmt_cost. */
502 1, /* vec_to_scalar_cost. */
503 1, /* scalar_to_vec_cost. */
504 1, /* vec_align_load_cost. */
505 1, /* vec_unalign_load_cost. */
506 1, /* vec_store_cost. */
507 1, /* cond_taken_branch_cost. */
508 1, /* cond_not_taken_branch_cost. */
511 /* Processor costs (relative to an add) */
512 static const
513 struct processor_costs i386_cost = { /* 386 specific costs */
514 COSTS_N_INSNS (1), /* cost of an add instruction */
515 COSTS_N_INSNS (1), /* cost of a lea instruction */
516 COSTS_N_INSNS (3), /* variable shift costs */
517 COSTS_N_INSNS (2), /* constant shift costs */
518 {COSTS_N_INSNS (6), /* cost of starting multiply for QI */
519 COSTS_N_INSNS (6), /* HI */
520 COSTS_N_INSNS (6), /* SI */
521 COSTS_N_INSNS (6), /* DI */
522 COSTS_N_INSNS (6)}, /* other */
523 COSTS_N_INSNS (1), /* cost of multiply per each bit set */
524 {COSTS_N_INSNS (23), /* cost of a divide/mod for QI */
525 COSTS_N_INSNS (23), /* HI */
526 COSTS_N_INSNS (23), /* SI */
527 COSTS_N_INSNS (23), /* DI */
528 COSTS_N_INSNS (23)}, /* other */
529 COSTS_N_INSNS (3), /* cost of movsx */
530 COSTS_N_INSNS (2), /* cost of movzx */
531 15, /* "large" insn */
532 3, /* MOVE_RATIO */
533 4, /* cost for loading QImode using movzbl */
534 {2, 4, 2}, /* cost of loading integer registers
535 in QImode, HImode and SImode.
536 Relative to reg-reg move (2). */
537 {2, 4, 2}, /* cost of storing integer registers */
538 2, /* cost of reg,reg fld/fst */
539 {8, 8, 8}, /* cost of loading fp registers
540 in SFmode, DFmode and XFmode */
541 {8, 8, 8}, /* cost of storing fp registers
542 in SFmode, DFmode and XFmode */
543 2, /* cost of moving MMX register */
544 {4, 8}, /* cost of loading MMX registers
545 in SImode and DImode */
546 {4, 8}, /* cost of storing MMX registers
547 in SImode and DImode */
548 2, /* cost of moving SSE register */
549 {4, 8, 16}, /* cost of loading SSE registers
550 in SImode, DImode and TImode */
551 {4, 8, 16}, /* cost of storing SSE registers
552 in SImode, DImode and TImode */
553 3, /* MMX or SSE register to integer */
554 0, /* size of l1 cache */
555 0, /* size of l2 cache */
556 0, /* size of prefetch block */
557 0, /* number of parallel prefetches */
558 1, /* Branch cost */
559 COSTS_N_INSNS (23), /* cost of FADD and FSUB insns. */
560 COSTS_N_INSNS (27), /* cost of FMUL instruction. */
561 COSTS_N_INSNS (88), /* cost of FDIV instruction. */
562 COSTS_N_INSNS (22), /* cost of FABS instruction. */
563 COSTS_N_INSNS (24), /* cost of FCHS instruction. */
564 COSTS_N_INSNS (122), /* cost of FSQRT instruction. */
565 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
566 DUMMY_STRINGOP_ALGS},
567 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
568 DUMMY_STRINGOP_ALGS},
569 1, /* scalar_stmt_cost. */
570 1, /* scalar load_cost. */
571 1, /* scalar_store_cost. */
572 1, /* vec_stmt_cost. */
573 1, /* vec_to_scalar_cost. */
574 1, /* scalar_to_vec_cost. */
575 1, /* vec_align_load_cost. */
576 2, /* vec_unalign_load_cost. */
577 1, /* vec_store_cost. */
578 3, /* cond_taken_branch_cost. */
579 1, /* cond_not_taken_branch_cost. */
582 static const
583 struct processor_costs i486_cost = { /* 486 specific costs */
584 COSTS_N_INSNS (1), /* cost of an add instruction */
585 COSTS_N_INSNS (1), /* cost of a lea instruction */
586 COSTS_N_INSNS (3), /* variable shift costs */
587 COSTS_N_INSNS (2), /* constant shift costs */
588 {COSTS_N_INSNS (12), /* cost of starting multiply for QI */
589 COSTS_N_INSNS (12), /* HI */
590 COSTS_N_INSNS (12), /* SI */
591 COSTS_N_INSNS (12), /* DI */
592 COSTS_N_INSNS (12)}, /* other */
593 1, /* cost of multiply per each bit set */
594 {COSTS_N_INSNS (40), /* cost of a divide/mod for QI */
595 COSTS_N_INSNS (40), /* HI */
596 COSTS_N_INSNS (40), /* SI */
597 COSTS_N_INSNS (40), /* DI */
598 COSTS_N_INSNS (40)}, /* other */
599 COSTS_N_INSNS (3), /* cost of movsx */
600 COSTS_N_INSNS (2), /* cost of movzx */
601 15, /* "large" insn */
602 3, /* MOVE_RATIO */
603 4, /* cost for loading QImode using movzbl */
604 {2, 4, 2}, /* cost of loading integer registers
605 in QImode, HImode and SImode.
606 Relative to reg-reg move (2). */
607 {2, 4, 2}, /* cost of storing integer registers */
608 2, /* cost of reg,reg fld/fst */
609 {8, 8, 8}, /* cost of loading fp registers
610 in SFmode, DFmode and XFmode */
611 {8, 8, 8}, /* cost of storing fp registers
612 in SFmode, DFmode and XFmode */
613 2, /* cost of moving MMX register */
614 {4, 8}, /* cost of loading MMX registers
615 in SImode and DImode */
616 {4, 8}, /* cost of storing MMX registers
617 in SImode and DImode */
618 2, /* cost of moving SSE register */
619 {4, 8, 16}, /* cost of loading SSE registers
620 in SImode, DImode and TImode */
621 {4, 8, 16}, /* cost of storing SSE registers
622 in SImode, DImode and TImode */
623 3, /* MMX or SSE register to integer */
624 4, /* size of l1 cache. 486 has 8kB cache
625 shared for code and data, so 4kB is
626 not really precise. */
627 4, /* size of l2 cache */
628 0, /* size of prefetch block */
629 0, /* number of parallel prefetches */
630 1, /* Branch cost */
631 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
632 COSTS_N_INSNS (16), /* cost of FMUL instruction. */
633 COSTS_N_INSNS (73), /* cost of FDIV instruction. */
634 COSTS_N_INSNS (3), /* cost of FABS instruction. */
635 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
636 COSTS_N_INSNS (83), /* cost of FSQRT instruction. */
637 {{rep_prefix_4_byte, {{-1, rep_prefix_4_byte}}},
638 DUMMY_STRINGOP_ALGS},
639 {{rep_prefix_4_byte, {{-1, rep_prefix_4_byte}}},
640 DUMMY_STRINGOP_ALGS},
641 1, /* scalar_stmt_cost. */
642 1, /* scalar load_cost. */
643 1, /* scalar_store_cost. */
644 1, /* vec_stmt_cost. */
645 1, /* vec_to_scalar_cost. */
646 1, /* scalar_to_vec_cost. */
647 1, /* vec_align_load_cost. */
648 2, /* vec_unalign_load_cost. */
649 1, /* vec_store_cost. */
650 3, /* cond_taken_branch_cost. */
651 1, /* cond_not_taken_branch_cost. */
654 static const
655 struct processor_costs pentium_cost = {
656 COSTS_N_INSNS (1), /* cost of an add instruction */
657 COSTS_N_INSNS (1), /* cost of a lea instruction */
658 COSTS_N_INSNS (4), /* variable shift costs */
659 COSTS_N_INSNS (1), /* constant shift costs */
660 {COSTS_N_INSNS (11), /* cost of starting multiply for QI */
661 COSTS_N_INSNS (11), /* HI */
662 COSTS_N_INSNS (11), /* SI */
663 COSTS_N_INSNS (11), /* DI */
664 COSTS_N_INSNS (11)}, /* other */
665 0, /* cost of multiply per each bit set */
666 {COSTS_N_INSNS (25), /* cost of a divide/mod for QI */
667 COSTS_N_INSNS (25), /* HI */
668 COSTS_N_INSNS (25), /* SI */
669 COSTS_N_INSNS (25), /* DI */
670 COSTS_N_INSNS (25)}, /* other */
671 COSTS_N_INSNS (3), /* cost of movsx */
672 COSTS_N_INSNS (2), /* cost of movzx */
673 8, /* "large" insn */
674 6, /* MOVE_RATIO */
675 6, /* cost for loading QImode using movzbl */
676 {2, 4, 2}, /* cost of loading integer registers
677 in QImode, HImode and SImode.
678 Relative to reg-reg move (2). */
679 {2, 4, 2}, /* cost of storing integer registers */
680 2, /* cost of reg,reg fld/fst */
681 {2, 2, 6}, /* cost of loading fp registers
682 in SFmode, DFmode and XFmode */
683 {4, 4, 6}, /* cost of storing fp registers
684 in SFmode, DFmode and XFmode */
685 8, /* cost of moving MMX register */
686 {8, 8}, /* cost of loading MMX registers
687 in SImode and DImode */
688 {8, 8}, /* cost of storing MMX registers
689 in SImode and DImode */
690 2, /* cost of moving SSE register */
691 {4, 8, 16}, /* cost of loading SSE registers
692 in SImode, DImode and TImode */
693 {4, 8, 16}, /* cost of storing SSE registers
694 in SImode, DImode and TImode */
695 3, /* MMX or SSE register to integer */
696 8, /* size of l1 cache. */
697 8, /* size of l2 cache */
698 0, /* size of prefetch block */
699 0, /* number of parallel prefetches */
700 2, /* Branch cost */
701 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
702 COSTS_N_INSNS (3), /* cost of FMUL instruction. */
703 COSTS_N_INSNS (39), /* cost of FDIV instruction. */
704 COSTS_N_INSNS (1), /* cost of FABS instruction. */
705 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
706 COSTS_N_INSNS (70), /* cost of FSQRT instruction. */
707 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
708 DUMMY_STRINGOP_ALGS},
709 {{libcall, {{-1, rep_prefix_4_byte}}},
710 DUMMY_STRINGOP_ALGS},
711 1, /* scalar_stmt_cost. */
712 1, /* scalar load_cost. */
713 1, /* scalar_store_cost. */
714 1, /* vec_stmt_cost. */
715 1, /* vec_to_scalar_cost. */
716 1, /* scalar_to_vec_cost. */
717 1, /* vec_align_load_cost. */
718 2, /* vec_unalign_load_cost. */
719 1, /* vec_store_cost. */
720 3, /* cond_taken_branch_cost. */
721 1, /* cond_not_taken_branch_cost. */
724 static const
725 struct processor_costs pentiumpro_cost = {
726 COSTS_N_INSNS (1), /* cost of an add instruction */
727 COSTS_N_INSNS (1), /* cost of a lea instruction */
728 COSTS_N_INSNS (1), /* variable shift costs */
729 COSTS_N_INSNS (1), /* constant shift costs */
730 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
731 COSTS_N_INSNS (4), /* HI */
732 COSTS_N_INSNS (4), /* SI */
733 COSTS_N_INSNS (4), /* DI */
734 COSTS_N_INSNS (4)}, /* other */
735 0, /* cost of multiply per each bit set */
736 {COSTS_N_INSNS (17), /* cost of a divide/mod for QI */
737 COSTS_N_INSNS (17), /* HI */
738 COSTS_N_INSNS (17), /* SI */
739 COSTS_N_INSNS (17), /* DI */
740 COSTS_N_INSNS (17)}, /* other */
741 COSTS_N_INSNS (1), /* cost of movsx */
742 COSTS_N_INSNS (1), /* cost of movzx */
743 8, /* "large" insn */
744 6, /* MOVE_RATIO */
745 2, /* cost for loading QImode using movzbl */
746 {4, 4, 4}, /* cost of loading integer registers
747 in QImode, HImode and SImode.
748 Relative to reg-reg move (2). */
749 {2, 2, 2}, /* cost of storing integer registers */
750 2, /* cost of reg,reg fld/fst */
751 {2, 2, 6}, /* cost of loading fp registers
752 in SFmode, DFmode and XFmode */
753 {4, 4, 6}, /* cost of storing fp registers
754 in SFmode, DFmode and XFmode */
755 2, /* cost of moving MMX register */
756 {2, 2}, /* cost of loading MMX registers
757 in SImode and DImode */
758 {2, 2}, /* cost of storing MMX registers
759 in SImode and DImode */
760 2, /* cost of moving SSE register */
761 {2, 2, 8}, /* cost of loading SSE registers
762 in SImode, DImode and TImode */
763 {2, 2, 8}, /* cost of storing SSE registers
764 in SImode, DImode and TImode */
765 3, /* MMX or SSE register to integer */
766 8, /* size of l1 cache. */
767 256, /* size of l2 cache */
768 32, /* size of prefetch block */
769 6, /* number of parallel prefetches */
770 2, /* Branch cost */
771 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
772 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
773 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
774 COSTS_N_INSNS (2), /* cost of FABS instruction. */
775 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
776 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
777 /* PentiumPro has optimized rep instructions for blocks aligned by 8 bytes
778 (we ensure the alignment). For small blocks inline loop is still a
779 noticeable win, for bigger blocks either rep movsl or rep movsb is
780 way to go. Rep movsb has apparently more expensive startup time in CPU,
781 but after 4K the difference is down in the noise. */
782 {{rep_prefix_4_byte, {{128, loop}, {1024, unrolled_loop},
783 {8192, rep_prefix_4_byte}, {-1, rep_prefix_1_byte}}},
784 DUMMY_STRINGOP_ALGS},
785 {{rep_prefix_4_byte, {{1024, unrolled_loop},
786 {8192, rep_prefix_4_byte}, {-1, libcall}}},
787 DUMMY_STRINGOP_ALGS},
788 1, /* scalar_stmt_cost. */
789 1, /* scalar load_cost. */
790 1, /* scalar_store_cost. */
791 1, /* vec_stmt_cost. */
792 1, /* vec_to_scalar_cost. */
793 1, /* scalar_to_vec_cost. */
794 1, /* vec_align_load_cost. */
795 2, /* vec_unalign_load_cost. */
796 1, /* vec_store_cost. */
797 3, /* cond_taken_branch_cost. */
798 1, /* cond_not_taken_branch_cost. */
801 static const
802 struct processor_costs geode_cost = {
803 COSTS_N_INSNS (1), /* cost of an add instruction */
804 COSTS_N_INSNS (1), /* cost of a lea instruction */
805 COSTS_N_INSNS (2), /* variable shift costs */
806 COSTS_N_INSNS (1), /* constant shift costs */
807 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
808 COSTS_N_INSNS (4), /* HI */
809 COSTS_N_INSNS (7), /* SI */
810 COSTS_N_INSNS (7), /* DI */
811 COSTS_N_INSNS (7)}, /* other */
812 0, /* cost of multiply per each bit set */
813 {COSTS_N_INSNS (15), /* cost of a divide/mod for QI */
814 COSTS_N_INSNS (23), /* HI */
815 COSTS_N_INSNS (39), /* SI */
816 COSTS_N_INSNS (39), /* DI */
817 COSTS_N_INSNS (39)}, /* other */
818 COSTS_N_INSNS (1), /* cost of movsx */
819 COSTS_N_INSNS (1), /* cost of movzx */
820 8, /* "large" insn */
821 4, /* MOVE_RATIO */
822 1, /* cost for loading QImode using movzbl */
823 {1, 1, 1}, /* cost of loading integer registers
824 in QImode, HImode and SImode.
825 Relative to reg-reg move (2). */
826 {1, 1, 1}, /* cost of storing integer registers */
827 1, /* cost of reg,reg fld/fst */
828 {1, 1, 1}, /* cost of loading fp registers
829 in SFmode, DFmode and XFmode */
830 {4, 6, 6}, /* cost of storing fp registers
831 in SFmode, DFmode and XFmode */
833 1, /* cost of moving MMX register */
834 {1, 1}, /* cost of loading MMX registers
835 in SImode and DImode */
836 {1, 1}, /* cost of storing MMX registers
837 in SImode and DImode */
838 1, /* cost of moving SSE register */
839 {1, 1, 1}, /* cost of loading SSE registers
840 in SImode, DImode and TImode */
841 {1, 1, 1}, /* cost of storing SSE registers
842 in SImode, DImode and TImode */
843 1, /* MMX or SSE register to integer */
844 64, /* size of l1 cache. */
845 128, /* size of l2 cache. */
846 32, /* size of prefetch block */
847 1, /* number of parallel prefetches */
848 1, /* Branch cost */
849 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
850 COSTS_N_INSNS (11), /* cost of FMUL instruction. */
851 COSTS_N_INSNS (47), /* cost of FDIV instruction. */
852 COSTS_N_INSNS (1), /* cost of FABS instruction. */
853 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
854 COSTS_N_INSNS (54), /* cost of FSQRT instruction. */
855 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
856 DUMMY_STRINGOP_ALGS},
857 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
858 DUMMY_STRINGOP_ALGS},
859 1, /* scalar_stmt_cost. */
860 1, /* scalar load_cost. */
861 1, /* scalar_store_cost. */
862 1, /* vec_stmt_cost. */
863 1, /* vec_to_scalar_cost. */
864 1, /* scalar_to_vec_cost. */
865 1, /* vec_align_load_cost. */
866 2, /* vec_unalign_load_cost. */
867 1, /* vec_store_cost. */
868 3, /* cond_taken_branch_cost. */
869 1, /* cond_not_taken_branch_cost. */
872 static const
873 struct processor_costs k6_cost = {
874 COSTS_N_INSNS (1), /* cost of an add instruction */
875 COSTS_N_INSNS (2), /* cost of a lea instruction */
876 COSTS_N_INSNS (1), /* variable shift costs */
877 COSTS_N_INSNS (1), /* constant shift costs */
878 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
879 COSTS_N_INSNS (3), /* HI */
880 COSTS_N_INSNS (3), /* SI */
881 COSTS_N_INSNS (3), /* DI */
882 COSTS_N_INSNS (3)}, /* other */
883 0, /* cost of multiply per each bit set */
884 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
885 COSTS_N_INSNS (18), /* HI */
886 COSTS_N_INSNS (18), /* SI */
887 COSTS_N_INSNS (18), /* DI */
888 COSTS_N_INSNS (18)}, /* other */
889 COSTS_N_INSNS (2), /* cost of movsx */
890 COSTS_N_INSNS (2), /* cost of movzx */
891 8, /* "large" insn */
892 4, /* MOVE_RATIO */
893 3, /* cost for loading QImode using movzbl */
894 {4, 5, 4}, /* cost of loading integer registers
895 in QImode, HImode and SImode.
896 Relative to reg-reg move (2). */
897 {2, 3, 2}, /* cost of storing integer registers */
898 4, /* cost of reg,reg fld/fst */
899 {6, 6, 6}, /* cost of loading fp registers
900 in SFmode, DFmode and XFmode */
901 {4, 4, 4}, /* cost of storing fp registers
902 in SFmode, DFmode and XFmode */
903 2, /* cost of moving MMX register */
904 {2, 2}, /* cost of loading MMX registers
905 in SImode and DImode */
906 {2, 2}, /* cost of storing MMX registers
907 in SImode and DImode */
908 2, /* cost of moving SSE register */
909 {2, 2, 8}, /* cost of loading SSE registers
910 in SImode, DImode and TImode */
911 {2, 2, 8}, /* cost of storing SSE registers
912 in SImode, DImode and TImode */
913 6, /* MMX or SSE register to integer */
914 32, /* size of l1 cache. */
915 32, /* size of l2 cache. Some models
916 have integrated l2 cache, but
917 optimizing for k6 is not important
918 enough to worry about that. */
919 32, /* size of prefetch block */
920 1, /* number of parallel prefetches */
921 1, /* Branch cost */
922 COSTS_N_INSNS (2), /* cost of FADD and FSUB insns. */
923 COSTS_N_INSNS (2), /* cost of FMUL instruction. */
924 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
925 COSTS_N_INSNS (2), /* cost of FABS instruction. */
926 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
927 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
928 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
929 DUMMY_STRINGOP_ALGS},
930 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
931 DUMMY_STRINGOP_ALGS},
932 1, /* scalar_stmt_cost. */
933 1, /* scalar load_cost. */
934 1, /* scalar_store_cost. */
935 1, /* vec_stmt_cost. */
936 1, /* vec_to_scalar_cost. */
937 1, /* scalar_to_vec_cost. */
938 1, /* vec_align_load_cost. */
939 2, /* vec_unalign_load_cost. */
940 1, /* vec_store_cost. */
941 3, /* cond_taken_branch_cost. */
942 1, /* cond_not_taken_branch_cost. */
945 static const
946 struct processor_costs athlon_cost = {
947 COSTS_N_INSNS (1), /* cost of an add instruction */
948 COSTS_N_INSNS (2), /* cost of a lea instruction */
949 COSTS_N_INSNS (1), /* variable shift costs */
950 COSTS_N_INSNS (1), /* constant shift costs */
951 {COSTS_N_INSNS (5), /* cost of starting multiply for QI */
952 COSTS_N_INSNS (5), /* HI */
953 COSTS_N_INSNS (5), /* SI */
954 COSTS_N_INSNS (5), /* DI */
955 COSTS_N_INSNS (5)}, /* other */
956 0, /* cost of multiply per each bit set */
957 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
958 COSTS_N_INSNS (26), /* HI */
959 COSTS_N_INSNS (42), /* SI */
960 COSTS_N_INSNS (74), /* DI */
961 COSTS_N_INSNS (74)}, /* other */
962 COSTS_N_INSNS (1), /* cost of movsx */
963 COSTS_N_INSNS (1), /* cost of movzx */
964 8, /* "large" insn */
965 9, /* MOVE_RATIO */
966 4, /* cost for loading QImode using movzbl */
967 {3, 4, 3}, /* cost of loading integer registers
968 in QImode, HImode and SImode.
969 Relative to reg-reg move (2). */
970 {3, 4, 3}, /* cost of storing integer registers */
971 4, /* cost of reg,reg fld/fst */
972 {4, 4, 12}, /* cost of loading fp registers
973 in SFmode, DFmode and XFmode */
974 {6, 6, 8}, /* cost of storing fp registers
975 in SFmode, DFmode and XFmode */
976 2, /* cost of moving MMX register */
977 {4, 4}, /* cost of loading MMX registers
978 in SImode and DImode */
979 {4, 4}, /* cost of storing MMX registers
980 in SImode and DImode */
981 2, /* cost of moving SSE register */
982 {4, 4, 6}, /* cost of loading SSE registers
983 in SImode, DImode and TImode */
984 {4, 4, 5}, /* cost of storing SSE registers
985 in SImode, DImode and TImode */
986 5, /* MMX or SSE register to integer */
987 64, /* size of l1 cache. */
988 256, /* size of l2 cache. */
989 64, /* size of prefetch block */
990 6, /* number of parallel prefetches */
991 5, /* Branch cost */
992 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
993 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
994 COSTS_N_INSNS (24), /* cost of FDIV instruction. */
995 COSTS_N_INSNS (2), /* cost of FABS instruction. */
996 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
997 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
998 /* For some reason, Athlon deals better with REP prefix (relative to loops)
999 compared to K8. Alignment becomes important after 8 bytes for memcpy and
1000 128 bytes for memset. */
1001 {{libcall, {{2048, rep_prefix_4_byte}, {-1, libcall}}},
1002 DUMMY_STRINGOP_ALGS},
1003 {{libcall, {{2048, rep_prefix_4_byte}, {-1, libcall}}},
1004 DUMMY_STRINGOP_ALGS},
1005 1, /* scalar_stmt_cost. */
1006 1, /* scalar load_cost. */
1007 1, /* scalar_store_cost. */
1008 1, /* vec_stmt_cost. */
1009 1, /* vec_to_scalar_cost. */
1010 1, /* scalar_to_vec_cost. */
1011 1, /* vec_align_load_cost. */
1012 2, /* vec_unalign_load_cost. */
1013 1, /* vec_store_cost. */
1014 3, /* cond_taken_branch_cost. */
1015 1, /* cond_not_taken_branch_cost. */
1018 static const
1019 struct processor_costs k8_cost = {
1020 COSTS_N_INSNS (1), /* cost of an add instruction */
1021 COSTS_N_INSNS (2), /* cost of a lea instruction */
1022 COSTS_N_INSNS (1), /* variable shift costs */
1023 COSTS_N_INSNS (1), /* constant shift costs */
1024 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1025 COSTS_N_INSNS (4), /* HI */
1026 COSTS_N_INSNS (3), /* SI */
1027 COSTS_N_INSNS (4), /* DI */
1028 COSTS_N_INSNS (5)}, /* other */
1029 0, /* cost of multiply per each bit set */
1030 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1031 COSTS_N_INSNS (26), /* HI */
1032 COSTS_N_INSNS (42), /* SI */
1033 COSTS_N_INSNS (74), /* DI */
1034 COSTS_N_INSNS (74)}, /* other */
1035 COSTS_N_INSNS (1), /* cost of movsx */
1036 COSTS_N_INSNS (1), /* cost of movzx */
1037 8, /* "large" insn */
1038 9, /* MOVE_RATIO */
1039 4, /* cost for loading QImode using movzbl */
1040 {3, 4, 3}, /* cost of loading integer registers
1041 in QImode, HImode and SImode.
1042 Relative to reg-reg move (2). */
1043 {3, 4, 3}, /* cost of storing integer registers */
1044 4, /* cost of reg,reg fld/fst */
1045 {4, 4, 12}, /* cost of loading fp registers
1046 in SFmode, DFmode and XFmode */
1047 {6, 6, 8}, /* cost of storing fp registers
1048 in SFmode, DFmode and XFmode */
1049 2, /* cost of moving MMX register */
1050 {3, 3}, /* cost of loading MMX registers
1051 in SImode and DImode */
1052 {4, 4}, /* cost of storing MMX registers
1053 in SImode and DImode */
1054 2, /* cost of moving SSE register */
1055 {4, 3, 6}, /* cost of loading SSE registers
1056 in SImode, DImode and TImode */
1057 {4, 4, 5}, /* cost of storing SSE registers
1058 in SImode, DImode and TImode */
1059 5, /* MMX or SSE register to integer */
1060 64, /* size of l1 cache. */
1061 512, /* size of l2 cache. */
1062 64, /* size of prefetch block */
1063 /* New AMD processors never drop prefetches; if they cannot be performed
1064 immediately, they are queued. We set number of simultaneous prefetches
1065 to a large constant to reflect this (it probably is not a good idea not
1066 to limit number of prefetches at all, as their execution also takes some
1067 time). */
1068 100, /* number of parallel prefetches */
1069 3, /* Branch cost */
1070 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
1071 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
1072 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
1073 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1074 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1075 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
1076 /* K8 has optimized REP instruction for medium sized blocks, but for very
1077 small blocks it is better to use loop. For large blocks, libcall can
1078 do nontemporary accesses and beat inline considerably. */
1079 {{libcall, {{6, loop}, {14, unrolled_loop}, {-1, rep_prefix_4_byte}}},
1080 {libcall, {{16, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1081 {{libcall, {{8, loop}, {24, unrolled_loop},
1082 {2048, rep_prefix_4_byte}, {-1, libcall}}},
1083 {libcall, {{48, unrolled_loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1084 4, /* scalar_stmt_cost. */
1085 2, /* scalar load_cost. */
1086 2, /* scalar_store_cost. */
1087 5, /* vec_stmt_cost. */
1088 0, /* vec_to_scalar_cost. */
1089 2, /* scalar_to_vec_cost. */
1090 2, /* vec_align_load_cost. */
1091 3, /* vec_unalign_load_cost. */
1092 3, /* vec_store_cost. */
1093 3, /* cond_taken_branch_cost. */
1094 2, /* cond_not_taken_branch_cost. */
1097 struct processor_costs amdfam10_cost = {
1098 COSTS_N_INSNS (1), /* cost of an add instruction */
1099 COSTS_N_INSNS (2), /* cost of a lea instruction */
1100 COSTS_N_INSNS (1), /* variable shift costs */
1101 COSTS_N_INSNS (1), /* constant shift costs */
1102 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1103 COSTS_N_INSNS (4), /* HI */
1104 COSTS_N_INSNS (3), /* SI */
1105 COSTS_N_INSNS (4), /* DI */
1106 COSTS_N_INSNS (5)}, /* other */
1107 0, /* cost of multiply per each bit set */
1108 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1109 COSTS_N_INSNS (35), /* HI */
1110 COSTS_N_INSNS (51), /* SI */
1111 COSTS_N_INSNS (83), /* DI */
1112 COSTS_N_INSNS (83)}, /* other */
1113 COSTS_N_INSNS (1), /* cost of movsx */
1114 COSTS_N_INSNS (1), /* cost of movzx */
1115 8, /* "large" insn */
1116 9, /* MOVE_RATIO */
1117 4, /* cost for loading QImode using movzbl */
1118 {3, 4, 3}, /* cost of loading integer registers
1119 in QImode, HImode and SImode.
1120 Relative to reg-reg move (2). */
1121 {3, 4, 3}, /* cost of storing integer registers */
1122 4, /* cost of reg,reg fld/fst */
1123 {4, 4, 12}, /* cost of loading fp registers
1124 in SFmode, DFmode and XFmode */
1125 {6, 6, 8}, /* cost of storing fp registers
1126 in SFmode, DFmode and XFmode */
1127 2, /* cost of moving MMX register */
1128 {3, 3}, /* cost of loading MMX registers
1129 in SImode and DImode */
1130 {4, 4}, /* cost of storing MMX registers
1131 in SImode and DImode */
1132 2, /* cost of moving SSE register */
1133 {4, 4, 3}, /* cost of loading SSE registers
1134 in SImode, DImode and TImode */
1135 {4, 4, 5}, /* cost of storing SSE registers
1136 in SImode, DImode and TImode */
1137 3, /* MMX or SSE register to integer */
1138 /* On K8:
1139 MOVD reg64, xmmreg Double FSTORE 4
1140 MOVD reg32, xmmreg Double FSTORE 4
1141 On AMDFAM10:
1142 MOVD reg64, xmmreg Double FADD 3
1143 1/1 1/1
1144 MOVD reg32, xmmreg Double FADD 3
1145 1/1 1/1 */
1146 64, /* size of l1 cache. */
1147 512, /* size of l2 cache. */
1148 64, /* size of prefetch block */
1149 /* New AMD processors never drop prefetches; if they cannot be performed
1150 immediately, they are queued. We set number of simultaneous prefetches
1151 to a large constant to reflect this (it probably is not a good idea not
1152 to limit number of prefetches at all, as their execution also takes some
1153 time). */
1154 100, /* number of parallel prefetches */
1155 2, /* Branch cost */
1156 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
1157 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
1158 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
1159 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1160 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1161 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
1163 /* AMDFAM10 has optimized REP instruction for medium sized blocks, but for
1164 very small blocks it is better to use loop. For large blocks, libcall can
1165 do nontemporary accesses and beat inline considerably. */
1166 {{libcall, {{6, loop}, {14, unrolled_loop}, {-1, rep_prefix_4_byte}}},
1167 {libcall, {{16, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1168 {{libcall, {{8, loop}, {24, unrolled_loop},
1169 {2048, rep_prefix_4_byte}, {-1, libcall}}},
1170 {libcall, {{48, unrolled_loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1171 4, /* scalar_stmt_cost. */
1172 2, /* scalar load_cost. */
1173 2, /* scalar_store_cost. */
1174 6, /* vec_stmt_cost. */
1175 0, /* vec_to_scalar_cost. */
1176 2, /* scalar_to_vec_cost. */
1177 2, /* vec_align_load_cost. */
1178 2, /* vec_unalign_load_cost. */
1179 2, /* vec_store_cost. */
1180 2, /* cond_taken_branch_cost. */
1181 1, /* cond_not_taken_branch_cost. */
1184 struct processor_costs bdver1_cost = {
1185 COSTS_N_INSNS (1), /* cost of an add instruction */
1186 COSTS_N_INSNS (1), /* cost of a lea instruction */
1187 COSTS_N_INSNS (1), /* variable shift costs */
1188 COSTS_N_INSNS (1), /* constant shift costs */
1189 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
1190 COSTS_N_INSNS (4), /* HI */
1191 COSTS_N_INSNS (4), /* SI */
1192 COSTS_N_INSNS (6), /* DI */
1193 COSTS_N_INSNS (6)}, /* other */
1194 0, /* cost of multiply per each bit set */
1195 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1196 COSTS_N_INSNS (35), /* HI */
1197 COSTS_N_INSNS (51), /* SI */
1198 COSTS_N_INSNS (83), /* DI */
1199 COSTS_N_INSNS (83)}, /* other */
1200 COSTS_N_INSNS (1), /* cost of movsx */
1201 COSTS_N_INSNS (1), /* cost of movzx */
1202 8, /* "large" insn */
1203 9, /* MOVE_RATIO */
1204 4, /* cost for loading QImode using movzbl */
1205 {5, 5, 4}, /* cost of loading integer registers
1206 in QImode, HImode and SImode.
1207 Relative to reg-reg move (2). */
1208 {4, 4, 4}, /* cost of storing integer registers */
1209 2, /* cost of reg,reg fld/fst */
1210 {5, 5, 12}, /* cost of loading fp registers
1211 in SFmode, DFmode and XFmode */
1212 {4, 4, 8}, /* cost of storing fp registers
1213 in SFmode, DFmode and XFmode */
1214 2, /* cost of moving MMX register */
1215 {4, 4}, /* cost of loading MMX registers
1216 in SImode and DImode */
1217 {4, 4}, /* cost of storing MMX registers
1218 in SImode and DImode */
1219 2, /* cost of moving SSE register */
1220 {4, 4, 4}, /* cost of loading SSE registers
1221 in SImode, DImode and TImode */
1222 {4, 4, 4}, /* cost of storing SSE registers
1223 in SImode, DImode and TImode */
1224 2, /* MMX or SSE register to integer */
1225 /* On K8:
1226 MOVD reg64, xmmreg Double FSTORE 4
1227 MOVD reg32, xmmreg Double FSTORE 4
1228 On AMDFAM10:
1229 MOVD reg64, xmmreg Double FADD 3
1230 1/1 1/1
1231 MOVD reg32, xmmreg Double FADD 3
1232 1/1 1/1 */
1233 16, /* size of l1 cache. */
1234 2048, /* size of l2 cache. */
1235 64, /* size of prefetch block */
1236 /* New AMD processors never drop prefetches; if they cannot be performed
1237 immediately, they are queued. We set number of simultaneous prefetches
1238 to a large constant to reflect this (it probably is not a good idea not
1239 to limit number of prefetches at all, as their execution also takes some
1240 time). */
1241 100, /* number of parallel prefetches */
1242 2, /* Branch cost */
1243 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1244 COSTS_N_INSNS (6), /* cost of FMUL instruction. */
1245 COSTS_N_INSNS (42), /* cost of FDIV instruction. */
1246 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1247 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1248 COSTS_N_INSNS (52), /* cost of FSQRT instruction. */
1250 /* BDVER1 has optimized REP instruction for medium sized blocks, but for
1251 very small blocks it is better to use loop. For large blocks, libcall
1252 can do nontemporary accesses and beat inline considerably. */
1253 {{libcall, {{6, loop}, {14, unrolled_loop}, {-1, rep_prefix_4_byte}}},
1254 {libcall, {{16, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1255 {{libcall, {{8, loop}, {24, unrolled_loop},
1256 {2048, rep_prefix_4_byte}, {-1, libcall}}},
1257 {libcall, {{48, unrolled_loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1258 6, /* scalar_stmt_cost. */
1259 4, /* scalar load_cost. */
1260 4, /* scalar_store_cost. */
1261 6, /* vec_stmt_cost. */
1262 0, /* vec_to_scalar_cost. */
1263 2, /* scalar_to_vec_cost. */
1264 4, /* vec_align_load_cost. */
1265 4, /* vec_unalign_load_cost. */
1266 4, /* vec_store_cost. */
1267 2, /* cond_taken_branch_cost. */
1268 1, /* cond_not_taken_branch_cost. */
1271 static const
1272 struct processor_costs pentium4_cost = {
1273 COSTS_N_INSNS (1), /* cost of an add instruction */
1274 COSTS_N_INSNS (3), /* cost of a lea instruction */
1275 COSTS_N_INSNS (4), /* variable shift costs */
1276 COSTS_N_INSNS (4), /* constant shift costs */
1277 {COSTS_N_INSNS (15), /* cost of starting multiply for QI */
1278 COSTS_N_INSNS (15), /* HI */
1279 COSTS_N_INSNS (15), /* SI */
1280 COSTS_N_INSNS (15), /* DI */
1281 COSTS_N_INSNS (15)}, /* other */
1282 0, /* cost of multiply per each bit set */
1283 {COSTS_N_INSNS (56), /* cost of a divide/mod for QI */
1284 COSTS_N_INSNS (56), /* HI */
1285 COSTS_N_INSNS (56), /* SI */
1286 COSTS_N_INSNS (56), /* DI */
1287 COSTS_N_INSNS (56)}, /* other */
1288 COSTS_N_INSNS (1), /* cost of movsx */
1289 COSTS_N_INSNS (1), /* cost of movzx */
1290 16, /* "large" insn */
1291 6, /* MOVE_RATIO */
1292 2, /* cost for loading QImode using movzbl */
1293 {4, 5, 4}, /* cost of loading integer registers
1294 in QImode, HImode and SImode.
1295 Relative to reg-reg move (2). */
1296 {2, 3, 2}, /* cost of storing integer registers */
1297 2, /* cost of reg,reg fld/fst */
1298 {2, 2, 6}, /* cost of loading fp registers
1299 in SFmode, DFmode and XFmode */
1300 {4, 4, 6}, /* cost of storing fp registers
1301 in SFmode, DFmode and XFmode */
1302 2, /* cost of moving MMX register */
1303 {2, 2}, /* cost of loading MMX registers
1304 in SImode and DImode */
1305 {2, 2}, /* cost of storing MMX registers
1306 in SImode and DImode */
1307 12, /* cost of moving SSE register */
1308 {12, 12, 12}, /* cost of loading SSE registers
1309 in SImode, DImode and TImode */
1310 {2, 2, 8}, /* cost of storing SSE registers
1311 in SImode, DImode and TImode */
1312 10, /* MMX or SSE register to integer */
1313 8, /* size of l1 cache. */
1314 256, /* size of l2 cache. */
1315 64, /* size of prefetch block */
1316 6, /* number of parallel prefetches */
1317 2, /* Branch cost */
1318 COSTS_N_INSNS (5), /* cost of FADD and FSUB insns. */
1319 COSTS_N_INSNS (7), /* cost of FMUL instruction. */
1320 COSTS_N_INSNS (43), /* cost of FDIV instruction. */
1321 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1322 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1323 COSTS_N_INSNS (43), /* cost of FSQRT instruction. */
1324 {{libcall, {{12, loop_1_byte}, {-1, rep_prefix_4_byte}}},
1325 DUMMY_STRINGOP_ALGS},
1326 {{libcall, {{6, loop_1_byte}, {48, loop}, {20480, rep_prefix_4_byte},
1327 {-1, libcall}}},
1328 DUMMY_STRINGOP_ALGS},
1329 1, /* scalar_stmt_cost. */
1330 1, /* scalar load_cost. */
1331 1, /* scalar_store_cost. */
1332 1, /* vec_stmt_cost. */
1333 1, /* vec_to_scalar_cost. */
1334 1, /* scalar_to_vec_cost. */
1335 1, /* vec_align_load_cost. */
1336 2, /* vec_unalign_load_cost. */
1337 1, /* vec_store_cost. */
1338 3, /* cond_taken_branch_cost. */
1339 1, /* cond_not_taken_branch_cost. */
1342 static const
1343 struct processor_costs nocona_cost = {
1344 COSTS_N_INSNS (1), /* cost of an add instruction */
1345 COSTS_N_INSNS (1), /* cost of a lea instruction */
1346 COSTS_N_INSNS (1), /* variable shift costs */
1347 COSTS_N_INSNS (1), /* constant shift costs */
1348 {COSTS_N_INSNS (10), /* cost of starting multiply for QI */
1349 COSTS_N_INSNS (10), /* HI */
1350 COSTS_N_INSNS (10), /* SI */
1351 COSTS_N_INSNS (10), /* DI */
1352 COSTS_N_INSNS (10)}, /* other */
1353 0, /* cost of multiply per each bit set */
1354 {COSTS_N_INSNS (66), /* cost of a divide/mod for QI */
1355 COSTS_N_INSNS (66), /* HI */
1356 COSTS_N_INSNS (66), /* SI */
1357 COSTS_N_INSNS (66), /* DI */
1358 COSTS_N_INSNS (66)}, /* other */
1359 COSTS_N_INSNS (1), /* cost of movsx */
1360 COSTS_N_INSNS (1), /* cost of movzx */
1361 16, /* "large" insn */
1362 17, /* MOVE_RATIO */
1363 4, /* cost for loading QImode using movzbl */
1364 {4, 4, 4}, /* cost of loading integer registers
1365 in QImode, HImode and SImode.
1366 Relative to reg-reg move (2). */
1367 {4, 4, 4}, /* cost of storing integer registers */
1368 3, /* cost of reg,reg fld/fst */
1369 {12, 12, 12}, /* cost of loading fp registers
1370 in SFmode, DFmode and XFmode */
1371 {4, 4, 4}, /* cost of storing fp registers
1372 in SFmode, DFmode and XFmode */
1373 6, /* cost of moving MMX register */
1374 {12, 12}, /* cost of loading MMX registers
1375 in SImode and DImode */
1376 {12, 12}, /* cost of storing MMX registers
1377 in SImode and DImode */
1378 6, /* cost of moving SSE register */
1379 {12, 12, 12}, /* cost of loading SSE registers
1380 in SImode, DImode and TImode */
1381 {12, 12, 12}, /* cost of storing SSE registers
1382 in SImode, DImode and TImode */
1383 8, /* MMX or SSE register to integer */
1384 8, /* size of l1 cache. */
1385 1024, /* size of l2 cache. */
1386 128, /* size of prefetch block */
1387 8, /* number of parallel prefetches */
1388 1, /* Branch cost */
1389 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1390 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1391 COSTS_N_INSNS (40), /* cost of FDIV instruction. */
1392 COSTS_N_INSNS (3), /* cost of FABS instruction. */
1393 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
1394 COSTS_N_INSNS (44), /* cost of FSQRT instruction. */
1395 {{libcall, {{12, loop_1_byte}, {-1, rep_prefix_4_byte}}},
1396 {libcall, {{32, loop}, {20000, rep_prefix_8_byte},
1397 {100000, unrolled_loop}, {-1, libcall}}}},
1398 {{libcall, {{6, loop_1_byte}, {48, loop}, {20480, rep_prefix_4_byte},
1399 {-1, libcall}}},
1400 {libcall, {{24, loop}, {64, unrolled_loop},
1401 {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1402 1, /* scalar_stmt_cost. */
1403 1, /* scalar load_cost. */
1404 1, /* scalar_store_cost. */
1405 1, /* vec_stmt_cost. */
1406 1, /* vec_to_scalar_cost. */
1407 1, /* scalar_to_vec_cost. */
1408 1, /* vec_align_load_cost. */
1409 2, /* vec_unalign_load_cost. */
1410 1, /* vec_store_cost. */
1411 3, /* cond_taken_branch_cost. */
1412 1, /* cond_not_taken_branch_cost. */
1415 static const
1416 struct processor_costs atom_cost = {
1417 COSTS_N_INSNS (1), /* cost of an add instruction */
1418 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1419 COSTS_N_INSNS (1), /* variable shift costs */
1420 COSTS_N_INSNS (1), /* constant shift costs */
1421 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1422 COSTS_N_INSNS (4), /* HI */
1423 COSTS_N_INSNS (3), /* SI */
1424 COSTS_N_INSNS (4), /* DI */
1425 COSTS_N_INSNS (2)}, /* other */
1426 0, /* cost of multiply per each bit set */
1427 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1428 COSTS_N_INSNS (26), /* HI */
1429 COSTS_N_INSNS (42), /* SI */
1430 COSTS_N_INSNS (74), /* DI */
1431 COSTS_N_INSNS (74)}, /* other */
1432 COSTS_N_INSNS (1), /* cost of movsx */
1433 COSTS_N_INSNS (1), /* cost of movzx */
1434 8, /* "large" insn */
1435 17, /* MOVE_RATIO */
1436 2, /* cost for loading QImode using movzbl */
1437 {4, 4, 4}, /* cost of loading integer registers
1438 in QImode, HImode and SImode.
1439 Relative to reg-reg move (2). */
1440 {4, 4, 4}, /* cost of storing integer registers */
1441 4, /* cost of reg,reg fld/fst */
1442 {12, 12, 12}, /* cost of loading fp registers
1443 in SFmode, DFmode and XFmode */
1444 {6, 6, 8}, /* cost of storing fp registers
1445 in SFmode, DFmode and XFmode */
1446 2, /* cost of moving MMX register */
1447 {8, 8}, /* cost of loading MMX registers
1448 in SImode and DImode */
1449 {8, 8}, /* cost of storing MMX registers
1450 in SImode and DImode */
1451 2, /* cost of moving SSE register */
1452 {8, 8, 8}, /* cost of loading SSE registers
1453 in SImode, DImode and TImode */
1454 {8, 8, 8}, /* cost of storing SSE registers
1455 in SImode, DImode and TImode */
1456 5, /* MMX or SSE register to integer */
1457 32, /* size of l1 cache. */
1458 256, /* size of l2 cache. */
1459 64, /* size of prefetch block */
1460 6, /* number of parallel prefetches */
1461 3, /* Branch cost */
1462 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1463 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1464 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1465 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1466 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1467 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1468 {{libcall, {{11, loop}, {-1, rep_prefix_4_byte}}},
1469 {libcall, {{32, loop}, {64, rep_prefix_4_byte},
1470 {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1471 {{libcall, {{8, loop}, {15, unrolled_loop},
1472 {2048, rep_prefix_4_byte}, {-1, libcall}}},
1473 {libcall, {{24, loop}, {32, unrolled_loop},
1474 {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1475 1, /* scalar_stmt_cost. */
1476 1, /* scalar load_cost. */
1477 1, /* scalar_store_cost. */
1478 1, /* vec_stmt_cost. */
1479 1, /* vec_to_scalar_cost. */
1480 1, /* scalar_to_vec_cost. */
1481 1, /* vec_align_load_cost. */
1482 2, /* vec_unalign_load_cost. */
1483 1, /* vec_store_cost. */
1484 3, /* cond_taken_branch_cost. */
1485 1, /* cond_not_taken_branch_cost. */
1488 /* Generic64 should produce code tuned for Nocona and K8. */
1489 static const
1490 struct processor_costs generic64_cost = {
1491 COSTS_N_INSNS (1), /* cost of an add instruction */
1492 /* On all chips taken into consideration lea is 2 cycles and more. With
1493 this cost however our current implementation of synth_mult results in
1494 use of unnecessary temporary registers causing regression on several
1495 SPECfp benchmarks. */
1496 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1497 COSTS_N_INSNS (1), /* variable shift costs */
1498 COSTS_N_INSNS (1), /* constant shift costs */
1499 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1500 COSTS_N_INSNS (4), /* HI */
1501 COSTS_N_INSNS (3), /* SI */
1502 COSTS_N_INSNS (4), /* DI */
1503 COSTS_N_INSNS (2)}, /* other */
1504 0, /* cost of multiply per each bit set */
1505 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1506 COSTS_N_INSNS (26), /* HI */
1507 COSTS_N_INSNS (42), /* SI */
1508 COSTS_N_INSNS (74), /* DI */
1509 COSTS_N_INSNS (74)}, /* other */
1510 COSTS_N_INSNS (1), /* cost of movsx */
1511 COSTS_N_INSNS (1), /* cost of movzx */
1512 8, /* "large" insn */
1513 17, /* MOVE_RATIO */
1514 4, /* cost for loading QImode using movzbl */
1515 {4, 4, 4}, /* cost of loading integer registers
1516 in QImode, HImode and SImode.
1517 Relative to reg-reg move (2). */
1518 {4, 4, 4}, /* cost of storing integer registers */
1519 4, /* cost of reg,reg fld/fst */
1520 {12, 12, 12}, /* cost of loading fp registers
1521 in SFmode, DFmode and XFmode */
1522 {6, 6, 8}, /* cost of storing fp registers
1523 in SFmode, DFmode and XFmode */
1524 2, /* cost of moving MMX register */
1525 {8, 8}, /* cost of loading MMX registers
1526 in SImode and DImode */
1527 {8, 8}, /* cost of storing MMX registers
1528 in SImode and DImode */
1529 2, /* cost of moving SSE register */
1530 {8, 8, 8}, /* cost of loading SSE registers
1531 in SImode, DImode and TImode */
1532 {8, 8, 8}, /* cost of storing SSE registers
1533 in SImode, DImode and TImode */
1534 5, /* MMX or SSE register to integer */
1535 32, /* size of l1 cache. */
1536 512, /* size of l2 cache. */
1537 64, /* size of prefetch block */
1538 6, /* number of parallel prefetches */
1539 /* Benchmarks shows large regressions on K8 sixtrack benchmark when this
1540 value is increased to perhaps more appropriate value of 5. */
1541 3, /* Branch cost */
1542 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1543 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1544 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1545 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1546 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1547 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1548 {DUMMY_STRINGOP_ALGS,
1549 {libcall, {{32, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1550 {DUMMY_STRINGOP_ALGS,
1551 {libcall, {{32, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1552 1, /* scalar_stmt_cost. */
1553 1, /* scalar load_cost. */
1554 1, /* scalar_store_cost. */
1555 1, /* vec_stmt_cost. */
1556 1, /* vec_to_scalar_cost. */
1557 1, /* scalar_to_vec_cost. */
1558 1, /* vec_align_load_cost. */
1559 2, /* vec_unalign_load_cost. */
1560 1, /* vec_store_cost. */
1561 3, /* cond_taken_branch_cost. */
1562 1, /* cond_not_taken_branch_cost. */
1565 /* Generic32 should produce code tuned for PPro, Pentium4, Nocona,
1566 Athlon and K8. */
1567 static const
1568 struct processor_costs generic32_cost = {
1569 COSTS_N_INSNS (1), /* cost of an add instruction */
1570 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1571 COSTS_N_INSNS (1), /* variable shift costs */
1572 COSTS_N_INSNS (1), /* constant shift costs */
1573 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1574 COSTS_N_INSNS (4), /* HI */
1575 COSTS_N_INSNS (3), /* SI */
1576 COSTS_N_INSNS (4), /* DI */
1577 COSTS_N_INSNS (2)}, /* other */
1578 0, /* cost of multiply per each bit set */
1579 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1580 COSTS_N_INSNS (26), /* HI */
1581 COSTS_N_INSNS (42), /* SI */
1582 COSTS_N_INSNS (74), /* DI */
1583 COSTS_N_INSNS (74)}, /* other */
1584 COSTS_N_INSNS (1), /* cost of movsx */
1585 COSTS_N_INSNS (1), /* cost of movzx */
1586 8, /* "large" insn */
1587 17, /* MOVE_RATIO */
1588 4, /* cost for loading QImode using movzbl */
1589 {4, 4, 4}, /* cost of loading integer registers
1590 in QImode, HImode and SImode.
1591 Relative to reg-reg move (2). */
1592 {4, 4, 4}, /* cost of storing integer registers */
1593 4, /* cost of reg,reg fld/fst */
1594 {12, 12, 12}, /* cost of loading fp registers
1595 in SFmode, DFmode and XFmode */
1596 {6, 6, 8}, /* cost of storing fp registers
1597 in SFmode, DFmode and XFmode */
1598 2, /* cost of moving MMX register */
1599 {8, 8}, /* cost of loading MMX registers
1600 in SImode and DImode */
1601 {8, 8}, /* cost of storing MMX registers
1602 in SImode and DImode */
1603 2, /* cost of moving SSE register */
1604 {8, 8, 8}, /* cost of loading SSE registers
1605 in SImode, DImode and TImode */
1606 {8, 8, 8}, /* cost of storing SSE registers
1607 in SImode, DImode and TImode */
1608 5, /* MMX or SSE register to integer */
1609 32, /* size of l1 cache. */
1610 256, /* size of l2 cache. */
1611 64, /* size of prefetch block */
1612 6, /* number of parallel prefetches */
1613 3, /* Branch cost */
1614 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1615 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1616 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1617 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1618 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1619 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1620 {{libcall, {{32, loop}, {8192, rep_prefix_4_byte}, {-1, libcall}}},
1621 DUMMY_STRINGOP_ALGS},
1622 {{libcall, {{32, loop}, {8192, rep_prefix_4_byte}, {-1, libcall}}},
1623 DUMMY_STRINGOP_ALGS},
1624 1, /* scalar_stmt_cost. */
1625 1, /* scalar load_cost. */
1626 1, /* scalar_store_cost. */
1627 1, /* vec_stmt_cost. */
1628 1, /* vec_to_scalar_cost. */
1629 1, /* scalar_to_vec_cost. */
1630 1, /* vec_align_load_cost. */
1631 2, /* vec_unalign_load_cost. */
1632 1, /* vec_store_cost. */
1633 3, /* cond_taken_branch_cost. */
1634 1, /* cond_not_taken_branch_cost. */
1637 const struct processor_costs *ix86_cost = &pentium_cost;
1639 /* Processor feature/optimization bitmasks. */
1640 #define m_386 (1<<PROCESSOR_I386)
1641 #define m_486 (1<<PROCESSOR_I486)
1642 #define m_PENT (1<<PROCESSOR_PENTIUM)
1643 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
1644 #define m_PENT4 (1<<PROCESSOR_PENTIUM4)
1645 #define m_NOCONA (1<<PROCESSOR_NOCONA)
1646 #define m_CORE2_32 (1<<PROCESSOR_CORE2_32)
1647 #define m_CORE2_64 (1<<PROCESSOR_CORE2_64)
1648 #define m_COREI7_32 (1<<PROCESSOR_COREI7_32)
1649 #define m_COREI7_64 (1<<PROCESSOR_COREI7_64)
1650 #define m_COREI7 (m_COREI7_32 | m_COREI7_64)
1651 #define m_CORE2I7_32 (m_CORE2_32 | m_COREI7_32)
1652 #define m_CORE2I7_64 (m_CORE2_64 | m_COREI7_64)
1653 #define m_CORE2I7 (m_CORE2I7_32 | m_CORE2I7_64)
1654 #define m_ATOM (1<<PROCESSOR_ATOM)
1656 #define m_GEODE (1<<PROCESSOR_GEODE)
1657 #define m_K6 (1<<PROCESSOR_K6)
1658 #define m_K6_GEODE (m_K6 | m_GEODE)
1659 #define m_K8 (1<<PROCESSOR_K8)
1660 #define m_ATHLON (1<<PROCESSOR_ATHLON)
1661 #define m_ATHLON_K8 (m_K8 | m_ATHLON)
1662 #define m_AMDFAM10 (1<<PROCESSOR_AMDFAM10)
1663 #define m_BDVER1 (1<<PROCESSOR_BDVER1)
1664 #define m_AMD_MULTIPLE (m_K8 | m_ATHLON | m_AMDFAM10 | m_BDVER1)
1666 #define m_GENERIC32 (1<<PROCESSOR_GENERIC32)
1667 #define m_GENERIC64 (1<<PROCESSOR_GENERIC64)
1669 /* Generic instruction choice should be common subset of supported CPUs
1670 (PPro/PENT4/NOCONA/CORE2/Athlon/K8). */
1671 #define m_GENERIC (m_GENERIC32 | m_GENERIC64)
1673 /* Feature tests against the various tunings. */
1674 unsigned char ix86_tune_features[X86_TUNE_LAST];
1676 /* Feature tests against the various tunings used to create ix86_tune_features
1677 based on the processor mask. */
1678 static unsigned int initial_ix86_tune_features[X86_TUNE_LAST] = {
1679 /* X86_TUNE_USE_LEAVE: Leave does not affect Nocona SPEC2000 results
1680 negatively, so enabling for Generic64 seems like good code size
1681 tradeoff. We can't enable it for 32bit generic because it does not
1682 work well with PPro base chips. */
1683 m_386 | m_K6_GEODE | m_AMD_MULTIPLE | m_CORE2I7_64 | m_GENERIC64,
1685 /* X86_TUNE_PUSH_MEMORY */
1686 m_386 | m_K6_GEODE | m_AMD_MULTIPLE | m_PENT4
1687 | m_NOCONA | m_CORE2I7 | m_GENERIC,
1689 /* X86_TUNE_ZERO_EXTEND_WITH_AND */
1690 m_486 | m_PENT,
1692 /* X86_TUNE_UNROLL_STRLEN */
1693 m_486 | m_PENT | m_ATOM | m_PPRO | m_AMD_MULTIPLE | m_K6
1694 | m_CORE2I7 | m_GENERIC,
1696 /* X86_TUNE_DEEP_BRANCH_PREDICTION */
1697 m_ATOM | m_PPRO | m_K6_GEODE | m_AMD_MULTIPLE | m_PENT4
1698 | m_CORE2I7 | m_GENERIC,
1700 /* X86_TUNE_BRANCH_PREDICTION_HINTS: Branch hints were put in P4 based
1701 on simulation result. But after P4 was made, no performance benefit
1702 was observed with branch hints. It also increases the code size.
1703 As a result, icc never generates branch hints. */
1706 /* X86_TUNE_DOUBLE_WITH_ADD */
1707 ~m_386,
1709 /* X86_TUNE_USE_SAHF */
1710 m_ATOM | m_PPRO | m_K6_GEODE | m_K8 | m_AMDFAM10 | m_BDVER1 | m_PENT4
1711 | m_NOCONA | m_CORE2I7 | m_GENERIC,
1713 /* X86_TUNE_MOVX: Enable to zero extend integer registers to avoid
1714 partial dependencies. */
1715 m_AMD_MULTIPLE | m_ATOM | m_PPRO | m_PENT4 | m_NOCONA
1716 | m_CORE2I7 | m_GENERIC | m_GEODE /* m_386 | m_K6 */,
1718 /* X86_TUNE_PARTIAL_REG_STALL: We probably ought to watch for partial
1719 register stalls on Generic32 compilation setting as well. However
1720 in current implementation the partial register stalls are not eliminated
1721 very well - they can be introduced via subregs synthesized by combine
1722 and can happen in caller/callee saving sequences. Because this option
1723 pays back little on PPro based chips and is in conflict with partial reg
1724 dependencies used by Athlon/P4 based chips, it is better to leave it off
1725 for generic32 for now. */
1726 m_PPRO,
1728 /* X86_TUNE_PARTIAL_FLAG_REG_STALL */
1729 m_CORE2I7 | m_GENERIC,
1731 /* X86_TUNE_USE_HIMODE_FIOP */
1732 m_386 | m_486 | m_K6_GEODE,
1734 /* X86_TUNE_USE_SIMODE_FIOP */
1735 ~(m_PPRO | m_AMD_MULTIPLE | m_PENT | m_ATOM | m_CORE2I7 | m_GENERIC),
1737 /* X86_TUNE_USE_MOV0 */
1738 m_K6,
1740 /* X86_TUNE_USE_CLTD */
1741 ~(m_PENT | m_ATOM | m_K6 | m_CORE2I7 | m_GENERIC),
1743 /* X86_TUNE_USE_XCHGB: Use xchgb %rh,%rl instead of rolw/rorw $8,rx. */
1744 m_PENT4,
1746 /* X86_TUNE_SPLIT_LONG_MOVES */
1747 m_PPRO,
1749 /* X86_TUNE_READ_MODIFY_WRITE */
1750 ~m_PENT,
1752 /* X86_TUNE_READ_MODIFY */
1753 ~(m_PENT | m_PPRO),
1755 /* X86_TUNE_PROMOTE_QIMODE */
1756 m_K6_GEODE | m_PENT | m_ATOM | m_386 | m_486 | m_AMD_MULTIPLE
1757 | m_CORE2I7 | m_GENERIC /* | m_PENT4 ? */,
1759 /* X86_TUNE_FAST_PREFIX */
1760 ~(m_PENT | m_486 | m_386),
1762 /* X86_TUNE_SINGLE_STRINGOP */
1763 m_386 | m_PENT4 | m_NOCONA,
1765 /* X86_TUNE_QIMODE_MATH */
1768 /* X86_TUNE_HIMODE_MATH: On PPro this flag is meant to avoid partial
1769 register stalls. Just like X86_TUNE_PARTIAL_REG_STALL this option
1770 might be considered for Generic32 if our scheme for avoiding partial
1771 stalls was more effective. */
1772 ~m_PPRO,
1774 /* X86_TUNE_PROMOTE_QI_REGS */
1777 /* X86_TUNE_PROMOTE_HI_REGS */
1778 m_PPRO,
1780 /* X86_TUNE_SINGLE_POP: Enable if single pop insn is preferred
1781 over esp addition. */
1782 m_386 | m_486 | m_PENT | m_PPRO,
1784 /* X86_TUNE_DOUBLE_POP: Enable if double pop insn is preferred
1785 over esp addition. */
1786 m_PENT,
1788 /* X86_TUNE_SINGLE_PUSH: Enable if single push insn is preferred
1789 over esp subtraction. */
1790 m_386 | m_486 | m_PENT | m_K6_GEODE,
1792 /* X86_TUNE_DOUBLE_PUSH. Enable if double push insn is preferred
1793 over esp subtraction. */
1794 m_PENT | m_K6_GEODE,
1796 /* X86_TUNE_INTEGER_DFMODE_MOVES: Enable if integer moves are preferred
1797 for DFmode copies */
1798 ~(m_AMD_MULTIPLE | m_ATOM | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2I7
1799 | m_GENERIC | m_GEODE),
1801 /* X86_TUNE_PARTIAL_REG_DEPENDENCY */
1802 m_AMD_MULTIPLE | m_ATOM | m_PENT4 | m_NOCONA | m_CORE2I7 | m_GENERIC,
1804 /* X86_TUNE_SSE_PARTIAL_REG_DEPENDENCY: In the Generic model we have a
1805 conflict here in between PPro/Pentium4 based chips that thread 128bit
1806 SSE registers as single units versus K8 based chips that divide SSE
1807 registers to two 64bit halves. This knob promotes all store destinations
1808 to be 128bit to allow register renaming on 128bit SSE units, but usually
1809 results in one extra microop on 64bit SSE units. Experimental results
1810 shows that disabling this option on P4 brings over 20% SPECfp regression,
1811 while enabling it on K8 brings roughly 2.4% regression that can be partly
1812 masked by careful scheduling of moves. */
1813 m_ATOM | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2I7 | m_GENERIC
1814 | m_AMDFAM10 | m_BDVER1,
1816 /* X86_TUNE_SSE_UNALIGNED_LOAD_OPTIMAL */
1817 m_AMDFAM10 | m_BDVER1 | m_COREI7,
1819 /* X86_TUNE_SSE_UNALIGNED_STORE_OPTIMAL */
1820 m_BDVER1 | m_COREI7,
1822 /* X86_TUNE_SSE_PACKED_SINGLE_INSN_OPTIMAL */
1823 m_BDVER1,
1825 /* X86_TUNE_SSE_SPLIT_REGS: Set for machines where the type and dependencies
1826 are resolved on SSE register parts instead of whole registers, so we may
1827 maintain just lower part of scalar values in proper format leaving the
1828 upper part undefined. */
1829 m_ATHLON_K8,
1831 /* X86_TUNE_SSE_TYPELESS_STORES */
1832 m_AMD_MULTIPLE,
1834 /* X86_TUNE_SSE_LOAD0_BY_PXOR */
1835 m_PPRO | m_PENT4 | m_NOCONA,
1837 /* X86_TUNE_MEMORY_MISMATCH_STALL */
1838 m_AMD_MULTIPLE | m_ATOM | m_PENT4 | m_NOCONA | m_CORE2I7 | m_GENERIC,
1840 /* X86_TUNE_PROLOGUE_USING_MOVE */
1841 m_ATHLON_K8 | m_ATOM | m_PPRO | m_CORE2I7 | m_GENERIC,
1843 /* X86_TUNE_EPILOGUE_USING_MOVE */
1844 m_ATHLON_K8 | m_ATOM | m_PPRO | m_CORE2I7 | m_GENERIC,
1846 /* X86_TUNE_SHIFT1 */
1847 ~m_486,
1849 /* X86_TUNE_USE_FFREEP */
1850 m_AMD_MULTIPLE,
1852 /* X86_TUNE_INTER_UNIT_MOVES */
1853 ~(m_AMD_MULTIPLE | m_GENERIC),
1855 /* X86_TUNE_INTER_UNIT_CONVERSIONS */
1856 ~(m_AMDFAM10 | m_BDVER1),
1858 /* X86_TUNE_FOUR_JUMP_LIMIT: Some CPU cores are not able to predict more
1859 than 4 branch instructions in the 16 byte window. */
1860 m_ATOM | m_PPRO | m_AMD_MULTIPLE | m_PENT4 | m_NOCONA | m_CORE2I7
1861 | m_GENERIC,
1863 /* X86_TUNE_SCHEDULE */
1864 m_PPRO | m_AMD_MULTIPLE | m_K6_GEODE | m_PENT | m_ATOM | m_CORE2I7
1865 | m_GENERIC,
1867 /* X86_TUNE_USE_BT */
1868 m_AMD_MULTIPLE | m_ATOM | m_CORE2I7 | m_GENERIC,
1870 /* X86_TUNE_USE_INCDEC */
1871 ~(m_PENT4 | m_NOCONA | m_CORE2I7 | m_GENERIC | m_ATOM),
1873 /* X86_TUNE_PAD_RETURNS */
1874 m_AMD_MULTIPLE | m_CORE2I7 | m_GENERIC,
1876 /* X86_TUNE_PAD_SHORT_FUNCTION: Pad short funtion. */
1877 m_ATOM,
1879 /* X86_TUNE_EXT_80387_CONSTANTS */
1880 m_K6_GEODE | m_ATHLON_K8 | m_ATOM | m_PENT4 | m_NOCONA | m_PPRO
1881 | m_CORE2I7 | m_GENERIC,
1883 /* X86_TUNE_SHORTEN_X87_SSE */
1884 ~m_K8,
1886 /* X86_TUNE_AVOID_VECTOR_DECODE */
1887 m_K8 | m_CORE2I7_64 | m_GENERIC64,
1889 /* X86_TUNE_PROMOTE_HIMODE_IMUL: Modern CPUs have same latency for HImode
1890 and SImode multiply, but 386 and 486 do HImode multiply faster. */
1891 ~(m_386 | m_486),
1893 /* X86_TUNE_SLOW_IMUL_IMM32_MEM: Imul of 32-bit constant and memory is
1894 vector path on AMD machines. */
1895 m_K8 | m_CORE2I7_64 | m_GENERIC64 | m_AMDFAM10 | m_BDVER1,
1897 /* X86_TUNE_SLOW_IMUL_IMM8: Imul of 8-bit constant is vector path on AMD
1898 machines. */
1899 m_K8 | m_CORE2I7_64 | m_GENERIC64 | m_AMDFAM10 | m_BDVER1,
1901 /* X86_TUNE_MOVE_M1_VIA_OR: On pentiums, it is faster to load -1 via OR
1902 than a MOV. */
1903 m_PENT,
1905 /* X86_TUNE_NOT_UNPAIRABLE: NOT is not pairable on Pentium, while XOR is,
1906 but one byte longer. */
1907 m_PENT,
1909 /* X86_TUNE_NOT_VECTORMODE: On AMD K6, NOT is vector decoded with memory
1910 operand that cannot be represented using a modRM byte. The XOR
1911 replacement is long decoded, so this split helps here as well. */
1912 m_K6,
1914 /* X86_TUNE_USE_VECTOR_FP_CONVERTS: Prefer vector packed SSE conversion
1915 from FP to FP. */
1916 m_AMDFAM10 | m_CORE2I7 | m_GENERIC,
1918 /* X86_TUNE_USE_VECTOR_CONVERTS: Prefer vector packed SSE conversion
1919 from integer to FP. */
1920 m_AMDFAM10,
1922 /* X86_TUNE_FUSE_CMP_AND_BRANCH: Fuse a compare or test instruction
1923 with a subsequent conditional jump instruction into a single
1924 compare-and-branch uop. */
1925 m_BDVER1,
1927 /* X86_TUNE_OPT_AGU: Optimize for Address Generation Unit. This flag
1928 will impact LEA instruction selection. */
1929 m_ATOM,
1931 /* X86_TUNE_VECTORIZE_DOUBLE: Enable double precision vector
1932 instructions. */
1933 ~m_ATOM,
1936 /* Feature tests against the various architecture variations. */
1937 unsigned char ix86_arch_features[X86_ARCH_LAST];
1939 /* Feature tests against the various architecture variations, used to create
1940 ix86_arch_features based on the processor mask. */
1941 static unsigned int initial_ix86_arch_features[X86_ARCH_LAST] = {
1942 /* X86_ARCH_CMOVE: Conditional move was added for pentiumpro. */
1943 ~(m_386 | m_486 | m_PENT | m_K6),
1945 /* X86_ARCH_CMPXCHG: Compare and exchange was added for 80486. */
1946 ~m_386,
1948 /* X86_ARCH_CMPXCHG8B: Compare and exchange 8 bytes was added for pentium. */
1949 ~(m_386 | m_486),
1951 /* X86_ARCH_XADD: Exchange and add was added for 80486. */
1952 ~m_386,
1954 /* X86_ARCH_BSWAP: Byteswap was added for 80486. */
1955 ~m_386,
1958 static const unsigned int x86_accumulate_outgoing_args
1959 = m_AMD_MULTIPLE | m_ATOM | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2I7
1960 | m_GENERIC;
1962 static const unsigned int x86_arch_always_fancy_math_387
1963 = m_PENT | m_ATOM | m_PPRO | m_AMD_MULTIPLE | m_PENT4
1964 | m_NOCONA | m_CORE2I7 | m_GENERIC;
1966 static enum stringop_alg stringop_alg = no_stringop;
1968 /* In case the average insn count for single function invocation is
1969 lower than this constant, emit fast (but longer) prologue and
1970 epilogue code. */
1971 #define FAST_PROLOGUE_INSN_COUNT 20
1973 /* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
1974 static const char *const qi_reg_name[] = QI_REGISTER_NAMES;
1975 static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
1976 static const char *const hi_reg_name[] = HI_REGISTER_NAMES;
1978 /* Array of the smallest class containing reg number REGNO, indexed by
1979 REGNO. Used by REGNO_REG_CLASS in i386.h. */
1981 enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
1983 /* ax, dx, cx, bx */
1984 AREG, DREG, CREG, BREG,
1985 /* si, di, bp, sp */
1986 SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
1987 /* FP registers */
1988 FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
1989 FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
1990 /* arg pointer */
1991 NON_Q_REGS,
1992 /* flags, fpsr, fpcr, frame */
1993 NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
1994 /* SSE registers */
1995 SSE_FIRST_REG, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
1996 SSE_REGS, SSE_REGS,
1997 /* MMX registers */
1998 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
1999 MMX_REGS, MMX_REGS,
2000 /* REX registers */
2001 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
2002 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
2003 /* SSE REX registers */
2004 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
2005 SSE_REGS, SSE_REGS,
2008 /* The "default" register map used in 32bit mode. */
2010 int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
2012 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
2013 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
2014 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
2015 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
2016 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
2017 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
2018 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
2021 /* The "default" register map used in 64bit mode. */
2023 int const dbx64_register_map[FIRST_PSEUDO_REGISTER] =
2025 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
2026 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
2027 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
2028 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
2029 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
2030 8,9,10,11,12,13,14,15, /* extended integer registers */
2031 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
2034 /* Define the register numbers to be used in Dwarf debugging information.
2035 The SVR4 reference port C compiler uses the following register numbers
2036 in its Dwarf output code:
2037 0 for %eax (gcc regno = 0)
2038 1 for %ecx (gcc regno = 2)
2039 2 for %edx (gcc regno = 1)
2040 3 for %ebx (gcc regno = 3)
2041 4 for %esp (gcc regno = 7)
2042 5 for %ebp (gcc regno = 6)
2043 6 for %esi (gcc regno = 4)
2044 7 for %edi (gcc regno = 5)
2045 The following three DWARF register numbers are never generated by
2046 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
2047 believes these numbers have these meanings.
2048 8 for %eip (no gcc equivalent)
2049 9 for %eflags (gcc regno = 17)
2050 10 for %trapno (no gcc equivalent)
2051 It is not at all clear how we should number the FP stack registers
2052 for the x86 architecture. If the version of SDB on x86/svr4 were
2053 a bit less brain dead with respect to floating-point then we would
2054 have a precedent to follow with respect to DWARF register numbers
2055 for x86 FP registers, but the SDB on x86/svr4 is so completely
2056 broken with respect to FP registers that it is hardly worth thinking
2057 of it as something to strive for compatibility with.
2058 The version of x86/svr4 SDB I have at the moment does (partially)
2059 seem to believe that DWARF register number 11 is associated with
2060 the x86 register %st(0), but that's about all. Higher DWARF
2061 register numbers don't seem to be associated with anything in
2062 particular, and even for DWARF regno 11, SDB only seems to under-
2063 stand that it should say that a variable lives in %st(0) (when
2064 asked via an `=' command) if we said it was in DWARF regno 11,
2065 but SDB still prints garbage when asked for the value of the
2066 variable in question (via a `/' command).
2067 (Also note that the labels SDB prints for various FP stack regs
2068 when doing an `x' command are all wrong.)
2069 Note that these problems generally don't affect the native SVR4
2070 C compiler because it doesn't allow the use of -O with -g and
2071 because when it is *not* optimizing, it allocates a memory
2072 location for each floating-point variable, and the memory
2073 location is what gets described in the DWARF AT_location
2074 attribute for the variable in question.
2075 Regardless of the severe mental illness of the x86/svr4 SDB, we
2076 do something sensible here and we use the following DWARF
2077 register numbers. Note that these are all stack-top-relative
2078 numbers.
2079 11 for %st(0) (gcc regno = 8)
2080 12 for %st(1) (gcc regno = 9)
2081 13 for %st(2) (gcc regno = 10)
2082 14 for %st(3) (gcc regno = 11)
2083 15 for %st(4) (gcc regno = 12)
2084 16 for %st(5) (gcc regno = 13)
2085 17 for %st(6) (gcc regno = 14)
2086 18 for %st(7) (gcc regno = 15)
2088 int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
2090 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
2091 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
2092 -1, 9, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
2093 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
2094 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
2095 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
2096 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
2099 /* Define parameter passing and return registers. */
2101 static int const x86_64_int_parameter_registers[6] =
2103 DI_REG, SI_REG, DX_REG, CX_REG, R8_REG, R9_REG
2106 static int const x86_64_ms_abi_int_parameter_registers[4] =
2108 CX_REG, DX_REG, R8_REG, R9_REG
2111 static int const x86_64_int_return_registers[4] =
2113 AX_REG, DX_REG, DI_REG, SI_REG
2116 /* Define the structure for the machine field in struct function. */
2118 struct GTY(()) stack_local_entry {
2119 unsigned short mode;
2120 unsigned short n;
2121 rtx rtl;
2122 struct stack_local_entry *next;
2125 /* Structure describing stack frame layout.
2126 Stack grows downward:
2128 [arguments]
2129 <- ARG_POINTER
2130 saved pc
2132 saved static chain if ix86_static_chain_on_stack
2134 saved frame pointer if frame_pointer_needed
2135 <- HARD_FRAME_POINTER
2136 [saved regs]
2137 <- regs_save_offset
2138 [padding0]
2140 [saved SSE regs]
2141 <- sse_regs_save_offset
2142 [padding1] |
2143 | <- FRAME_POINTER
2144 [va_arg registers] |
2146 [frame] |
2148 [padding2] | = to_allocate
2149 <- STACK_POINTER
2151 struct ix86_frame
2153 int nsseregs;
2154 int nregs;
2155 int va_arg_size;
2156 int red_zone_size;
2157 int outgoing_arguments_size;
2158 HOST_WIDE_INT frame;
2160 /* The offsets relative to ARG_POINTER. */
2161 HOST_WIDE_INT frame_pointer_offset;
2162 HOST_WIDE_INT hard_frame_pointer_offset;
2163 HOST_WIDE_INT stack_pointer_offset;
2164 HOST_WIDE_INT hfp_save_offset;
2165 HOST_WIDE_INT reg_save_offset;
2166 HOST_WIDE_INT sse_reg_save_offset;
2168 /* When save_regs_using_mov is set, emit prologue using
2169 move instead of push instructions. */
2170 bool save_regs_using_mov;
2173 /* Code model option. */
2174 enum cmodel ix86_cmodel;
2175 /* Asm dialect. */
2176 enum asm_dialect ix86_asm_dialect = ASM_ATT;
2177 /* TLS dialects. */
2178 enum tls_dialect ix86_tls_dialect = TLS_DIALECT_GNU;
2180 /* Which unit we are generating floating point math for. */
2181 enum fpmath_unit ix86_fpmath;
2183 /* Which cpu are we scheduling for. */
2184 enum attr_cpu ix86_schedule;
2186 /* Which cpu are we optimizing for. */
2187 enum processor_type ix86_tune;
2189 /* Which instruction set architecture to use. */
2190 enum processor_type ix86_arch;
2192 /* true if sse prefetch instruction is not NOOP. */
2193 int x86_prefetch_sse;
2195 /* ix86_regparm_string as a number */
2196 static int ix86_regparm;
2198 /* -mstackrealign option */
2199 static const char ix86_force_align_arg_pointer_string[]
2200 = "force_align_arg_pointer";
2202 static rtx (*ix86_gen_leave) (void);
2203 static rtx (*ix86_gen_add3) (rtx, rtx, rtx);
2204 static rtx (*ix86_gen_sub3) (rtx, rtx, rtx);
2205 static rtx (*ix86_gen_sub3_carry) (rtx, rtx, rtx, rtx, rtx);
2206 static rtx (*ix86_gen_one_cmpl2) (rtx, rtx);
2207 static rtx (*ix86_gen_monitor) (rtx, rtx, rtx);
2208 static rtx (*ix86_gen_andsp) (rtx, rtx, rtx);
2209 static rtx (*ix86_gen_allocate_stack_worker) (rtx, rtx);
2210 static rtx (*ix86_gen_adjust_stack_and_probe) (rtx, rtx, rtx);
2211 static rtx (*ix86_gen_probe_stack_range) (rtx, rtx, rtx);
2213 /* Preferred alignment for stack boundary in bits. */
2214 unsigned int ix86_preferred_stack_boundary;
2216 /* Alignment for incoming stack boundary in bits specified at
2217 command line. */
2218 static unsigned int ix86_user_incoming_stack_boundary;
2220 /* Default alignment for incoming stack boundary in bits. */
2221 static unsigned int ix86_default_incoming_stack_boundary;
2223 /* Alignment for incoming stack boundary in bits. */
2224 unsigned int ix86_incoming_stack_boundary;
2226 /* The abi used by target. */
2227 enum calling_abi ix86_abi;
2229 /* Values 1-5: see jump.c */
2230 int ix86_branch_cost;
2232 /* Calling abi specific va_list type nodes. */
2233 static GTY(()) tree sysv_va_list_type_node;
2234 static GTY(()) tree ms_va_list_type_node;
2236 /* Variables which are this size or smaller are put in the data/bss
2237 or ldata/lbss sections. */
2239 int ix86_section_threshold = 65536;
2241 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
2242 char internal_label_prefix[16];
2243 int internal_label_prefix_len;
2245 /* Fence to use after loop using movnt. */
2246 tree x86_mfence;
2248 /* Register class used for passing given 64bit part of the argument.
2249 These represent classes as documented by the PS ABI, with the exception
2250 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
2251 use SF or DFmode move instead of DImode to avoid reformatting penalties.
2253 Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
2254 whenever possible (upper half does contain padding). */
2255 enum x86_64_reg_class
2257 X86_64_NO_CLASS,
2258 X86_64_INTEGER_CLASS,
2259 X86_64_INTEGERSI_CLASS,
2260 X86_64_SSE_CLASS,
2261 X86_64_SSESF_CLASS,
2262 X86_64_SSEDF_CLASS,
2263 X86_64_SSEUP_CLASS,
2264 X86_64_X87_CLASS,
2265 X86_64_X87UP_CLASS,
2266 X86_64_COMPLEX_X87_CLASS,
2267 X86_64_MEMORY_CLASS
2270 #define MAX_CLASSES 4
2272 /* Table of constants used by fldpi, fldln2, etc.... */
2273 static REAL_VALUE_TYPE ext_80387_constants_table [5];
2274 static bool ext_80387_constants_init = 0;
2277 static struct machine_function * ix86_init_machine_status (void);
2278 static rtx ix86_function_value (const_tree, const_tree, bool);
2279 static bool ix86_function_value_regno_p (const unsigned int);
2280 static unsigned int ix86_function_arg_boundary (enum machine_mode,
2281 const_tree);
2282 static rtx ix86_static_chain (const_tree, bool);
2283 static int ix86_function_regparm (const_tree, const_tree);
2284 static void ix86_compute_frame_layout (struct ix86_frame *);
2285 static bool ix86_expand_vector_init_one_nonzero (bool, enum machine_mode,
2286 rtx, rtx, int);
2287 static void ix86_add_new_builtins (int);
2288 static rtx ix86_expand_vec_perm_builtin (tree);
2289 static tree ix86_canonical_va_list_type (tree);
2290 static void predict_jump (int);
2291 static unsigned int split_stack_prologue_scratch_regno (void);
2292 static bool i386_asm_output_addr_const_extra (FILE *, rtx);
2294 enum ix86_function_specific_strings
2296 IX86_FUNCTION_SPECIFIC_ARCH,
2297 IX86_FUNCTION_SPECIFIC_TUNE,
2298 IX86_FUNCTION_SPECIFIC_FPMATH,
2299 IX86_FUNCTION_SPECIFIC_MAX
2302 static char *ix86_target_string (int, int, const char *, const char *,
2303 const char *, bool);
2304 static void ix86_debug_options (void) ATTRIBUTE_UNUSED;
2305 static void ix86_function_specific_save (struct cl_target_option *);
2306 static void ix86_function_specific_restore (struct cl_target_option *);
2307 static void ix86_function_specific_print (FILE *, int,
2308 struct cl_target_option *);
2309 static bool ix86_valid_target_attribute_p (tree, tree, tree, int);
2310 static bool ix86_valid_target_attribute_inner_p (tree, char *[]);
2311 static bool ix86_can_inline_p (tree, tree);
2312 static void ix86_set_current_function (tree);
2313 static unsigned int ix86_minimum_incoming_stack_boundary (bool);
2315 static enum calling_abi ix86_function_abi (const_tree);
2318 #ifndef SUBTARGET32_DEFAULT_CPU
2319 #define SUBTARGET32_DEFAULT_CPU "i386"
2320 #endif
2322 /* The svr4 ABI for the i386 says that records and unions are returned
2323 in memory. */
2324 #ifndef DEFAULT_PCC_STRUCT_RETURN
2325 #define DEFAULT_PCC_STRUCT_RETURN 1
2326 #endif
2328 /* Whether -mtune= or -march= were specified */
2329 static int ix86_tune_defaulted;
2330 static int ix86_arch_specified;
2332 /* A mask of ix86_isa_flags that includes bit X if X
2333 was set or cleared on the command line. */
2334 static int ix86_isa_flags_explicit;
2336 /* Define a set of ISAs which are available when a given ISA is
2337 enabled. MMX and SSE ISAs are handled separately. */
2339 #define OPTION_MASK_ISA_MMX_SET OPTION_MASK_ISA_MMX
2340 #define OPTION_MASK_ISA_3DNOW_SET \
2341 (OPTION_MASK_ISA_3DNOW | OPTION_MASK_ISA_MMX_SET)
2343 #define OPTION_MASK_ISA_SSE_SET OPTION_MASK_ISA_SSE
2344 #define OPTION_MASK_ISA_SSE2_SET \
2345 (OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE_SET)
2346 #define OPTION_MASK_ISA_SSE3_SET \
2347 (OPTION_MASK_ISA_SSE3 | OPTION_MASK_ISA_SSE2_SET)
2348 #define OPTION_MASK_ISA_SSSE3_SET \
2349 (OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_SSE3_SET)
2350 #define OPTION_MASK_ISA_SSE4_1_SET \
2351 (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_SSSE3_SET)
2352 #define OPTION_MASK_ISA_SSE4_2_SET \
2353 (OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_SSE4_1_SET)
2354 #define OPTION_MASK_ISA_AVX_SET \
2355 (OPTION_MASK_ISA_AVX | OPTION_MASK_ISA_SSE4_2_SET)
2356 #define OPTION_MASK_ISA_FMA_SET \
2357 (OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_AVX_SET)
2359 /* SSE4 includes both SSE4.1 and SSE4.2. -msse4 should be the same
2360 as -msse4.2. */
2361 #define OPTION_MASK_ISA_SSE4_SET OPTION_MASK_ISA_SSE4_2_SET
2363 #define OPTION_MASK_ISA_SSE4A_SET \
2364 (OPTION_MASK_ISA_SSE4A | OPTION_MASK_ISA_SSE3_SET)
2365 #define OPTION_MASK_ISA_FMA4_SET \
2366 (OPTION_MASK_ISA_FMA4 | OPTION_MASK_ISA_SSE4A_SET \
2367 | OPTION_MASK_ISA_AVX_SET)
2368 #define OPTION_MASK_ISA_XOP_SET \
2369 (OPTION_MASK_ISA_XOP | OPTION_MASK_ISA_FMA4_SET)
2370 #define OPTION_MASK_ISA_LWP_SET \
2371 OPTION_MASK_ISA_LWP
2373 /* AES and PCLMUL need SSE2 because they use xmm registers */
2374 #define OPTION_MASK_ISA_AES_SET \
2375 (OPTION_MASK_ISA_AES | OPTION_MASK_ISA_SSE2_SET)
2376 #define OPTION_MASK_ISA_PCLMUL_SET \
2377 (OPTION_MASK_ISA_PCLMUL | OPTION_MASK_ISA_SSE2_SET)
2379 #define OPTION_MASK_ISA_ABM_SET \
2380 (OPTION_MASK_ISA_ABM | OPTION_MASK_ISA_POPCNT)
2382 #define OPTION_MASK_ISA_BMI_SET OPTION_MASK_ISA_BMI
2383 #define OPTION_MASK_ISA_TBM_SET OPTION_MASK_ISA_TBM
2384 #define OPTION_MASK_ISA_POPCNT_SET OPTION_MASK_ISA_POPCNT
2385 #define OPTION_MASK_ISA_CX16_SET OPTION_MASK_ISA_CX16
2386 #define OPTION_MASK_ISA_SAHF_SET OPTION_MASK_ISA_SAHF
2387 #define OPTION_MASK_ISA_MOVBE_SET OPTION_MASK_ISA_MOVBE
2388 #define OPTION_MASK_ISA_CRC32_SET OPTION_MASK_ISA_CRC32
2390 #define OPTION_MASK_ISA_FSGSBASE_SET OPTION_MASK_ISA_FSGSBASE
2391 #define OPTION_MASK_ISA_RDRND_SET OPTION_MASK_ISA_RDRND
2392 #define OPTION_MASK_ISA_F16C_SET \
2393 (OPTION_MASK_ISA_F16C | OPTION_MASK_ISA_AVX_SET)
2395 /* Define a set of ISAs which aren't available when a given ISA is
2396 disabled. MMX and SSE ISAs are handled separately. */
2398 #define OPTION_MASK_ISA_MMX_UNSET \
2399 (OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_3DNOW_UNSET)
2400 #define OPTION_MASK_ISA_3DNOW_UNSET \
2401 (OPTION_MASK_ISA_3DNOW | OPTION_MASK_ISA_3DNOW_A_UNSET)
2402 #define OPTION_MASK_ISA_3DNOW_A_UNSET OPTION_MASK_ISA_3DNOW_A
2404 #define OPTION_MASK_ISA_SSE_UNSET \
2405 (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_SSE2_UNSET)
2406 #define OPTION_MASK_ISA_SSE2_UNSET \
2407 (OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE3_UNSET)
2408 #define OPTION_MASK_ISA_SSE3_UNSET \
2409 (OPTION_MASK_ISA_SSE3 \
2410 | OPTION_MASK_ISA_SSSE3_UNSET \
2411 | OPTION_MASK_ISA_SSE4A_UNSET )
2412 #define OPTION_MASK_ISA_SSSE3_UNSET \
2413 (OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_SSE4_1_UNSET)
2414 #define OPTION_MASK_ISA_SSE4_1_UNSET \
2415 (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_SSE4_2_UNSET)
2416 #define OPTION_MASK_ISA_SSE4_2_UNSET \
2417 (OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_AVX_UNSET )
2418 #define OPTION_MASK_ISA_AVX_UNSET \
2419 (OPTION_MASK_ISA_AVX | OPTION_MASK_ISA_FMA_UNSET \
2420 | OPTION_MASK_ISA_FMA4_UNSET | OPTION_MASK_ISA_F16C_UNSET)
2421 #define OPTION_MASK_ISA_FMA_UNSET OPTION_MASK_ISA_FMA
2423 /* SSE4 includes both SSE4.1 and SSE4.2. -mno-sse4 should the same
2424 as -mno-sse4.1. */
2425 #define OPTION_MASK_ISA_SSE4_UNSET OPTION_MASK_ISA_SSE4_1_UNSET
2427 #define OPTION_MASK_ISA_SSE4A_UNSET \
2428 (OPTION_MASK_ISA_SSE4A | OPTION_MASK_ISA_FMA4_UNSET)
2430 #define OPTION_MASK_ISA_FMA4_UNSET \
2431 (OPTION_MASK_ISA_FMA4 | OPTION_MASK_ISA_XOP_UNSET)
2432 #define OPTION_MASK_ISA_XOP_UNSET OPTION_MASK_ISA_XOP
2433 #define OPTION_MASK_ISA_LWP_UNSET OPTION_MASK_ISA_LWP
2435 #define OPTION_MASK_ISA_AES_UNSET OPTION_MASK_ISA_AES
2436 #define OPTION_MASK_ISA_PCLMUL_UNSET OPTION_MASK_ISA_PCLMUL
2437 #define OPTION_MASK_ISA_ABM_UNSET OPTION_MASK_ISA_ABM
2438 #define OPTION_MASK_ISA_BMI_UNSET OPTION_MASK_ISA_BMI
2439 #define OPTION_MASK_ISA_TBM_UNSET OPTION_MASK_ISA_TBM
2440 #define OPTION_MASK_ISA_POPCNT_UNSET OPTION_MASK_ISA_POPCNT
2441 #define OPTION_MASK_ISA_CX16_UNSET OPTION_MASK_ISA_CX16
2442 #define OPTION_MASK_ISA_SAHF_UNSET OPTION_MASK_ISA_SAHF
2443 #define OPTION_MASK_ISA_MOVBE_UNSET OPTION_MASK_ISA_MOVBE
2444 #define OPTION_MASK_ISA_CRC32_UNSET OPTION_MASK_ISA_CRC32
2446 #define OPTION_MASK_ISA_FSGSBASE_UNSET OPTION_MASK_ISA_FSGSBASE
2447 #define OPTION_MASK_ISA_RDRND_UNSET OPTION_MASK_ISA_RDRND
2448 #define OPTION_MASK_ISA_F16C_UNSET OPTION_MASK_ISA_F16C
2450 /* Vectorization library interface and handlers. */
2451 static tree (*ix86_veclib_handler) (enum built_in_function, tree, tree);
2453 static tree ix86_veclibabi_svml (enum built_in_function, tree, tree);
2454 static tree ix86_veclibabi_acml (enum built_in_function, tree, tree);
2456 /* Processor target table, indexed by processor number */
2457 struct ptt
2459 const struct processor_costs *cost; /* Processor costs */
2460 const int align_loop; /* Default alignments. */
2461 const int align_loop_max_skip;
2462 const int align_jump;
2463 const int align_jump_max_skip;
2464 const int align_func;
2467 static const struct ptt processor_target_table[PROCESSOR_max] =
2469 {&i386_cost, 4, 3, 4, 3, 4},
2470 {&i486_cost, 16, 15, 16, 15, 16},
2471 {&pentium_cost, 16, 7, 16, 7, 16},
2472 {&pentiumpro_cost, 16, 15, 16, 10, 16},
2473 {&geode_cost, 0, 0, 0, 0, 0},
2474 {&k6_cost, 32, 7, 32, 7, 32},
2475 {&athlon_cost, 16, 7, 16, 7, 16},
2476 {&pentium4_cost, 0, 0, 0, 0, 0},
2477 {&k8_cost, 16, 7, 16, 7, 16},
2478 {&nocona_cost, 0, 0, 0, 0, 0},
2479 /* Core 2 32-bit. */
2480 {&generic32_cost, 16, 10, 16, 10, 16},
2481 /* Core 2 64-bit. */
2482 {&generic64_cost, 16, 10, 16, 10, 16},
2483 /* Core i7 32-bit. */
2484 {&generic32_cost, 16, 10, 16, 10, 16},
2485 /* Core i7 64-bit. */
2486 {&generic64_cost, 16, 10, 16, 10, 16},
2487 {&generic32_cost, 16, 7, 16, 7, 16},
2488 {&generic64_cost, 16, 10, 16, 10, 16},
2489 {&amdfam10_cost, 32, 24, 32, 7, 32},
2490 {&bdver1_cost, 32, 24, 32, 7, 32},
2491 {&atom_cost, 16, 7, 16, 7, 16}
2494 static const char *const cpu_names[TARGET_CPU_DEFAULT_max] =
2496 "generic",
2497 "i386",
2498 "i486",
2499 "pentium",
2500 "pentium-mmx",
2501 "pentiumpro",
2502 "pentium2",
2503 "pentium3",
2504 "pentium4",
2505 "pentium-m",
2506 "prescott",
2507 "nocona",
2508 "core2",
2509 "corei7",
2510 "atom",
2511 "geode",
2512 "k6",
2513 "k6-2",
2514 "k6-3",
2515 "athlon",
2516 "athlon-4",
2517 "k8",
2518 "amdfam10",
2519 "bdver1"
2522 /* Return true if a red-zone is in use. */
2524 static inline bool
2525 ix86_using_red_zone (void)
2527 return TARGET_RED_ZONE && !TARGET_64BIT_MS_ABI;
2530 /* Implement TARGET_HANDLE_OPTION. */
2532 static bool
2533 ix86_handle_option (size_t code, const char *arg ATTRIBUTE_UNUSED, int value)
2535 switch (code)
2537 case OPT_mmmx:
2538 if (value)
2540 ix86_isa_flags |= OPTION_MASK_ISA_MMX_SET;
2541 ix86_isa_flags_explicit |= OPTION_MASK_ISA_MMX_SET;
2543 else
2545 ix86_isa_flags &= ~OPTION_MASK_ISA_MMX_UNSET;
2546 ix86_isa_flags_explicit |= OPTION_MASK_ISA_MMX_UNSET;
2548 return true;
2550 case OPT_m3dnow:
2551 if (value)
2553 ix86_isa_flags |= OPTION_MASK_ISA_3DNOW_SET;
2554 ix86_isa_flags_explicit |= OPTION_MASK_ISA_3DNOW_SET;
2556 else
2558 ix86_isa_flags &= ~OPTION_MASK_ISA_3DNOW_UNSET;
2559 ix86_isa_flags_explicit |= OPTION_MASK_ISA_3DNOW_UNSET;
2561 return true;
2563 case OPT_m3dnowa:
2564 return false;
2566 case OPT_msse:
2567 if (value)
2569 ix86_isa_flags |= OPTION_MASK_ISA_SSE_SET;
2570 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE_SET;
2572 else
2574 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE_UNSET;
2575 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE_UNSET;
2577 return true;
2579 case OPT_msse2:
2580 if (value)
2582 ix86_isa_flags |= OPTION_MASK_ISA_SSE2_SET;
2583 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE2_SET;
2585 else
2587 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE2_UNSET;
2588 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE2_UNSET;
2590 return true;
2592 case OPT_msse3:
2593 if (value)
2595 ix86_isa_flags |= OPTION_MASK_ISA_SSE3_SET;
2596 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE3_SET;
2598 else
2600 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE3_UNSET;
2601 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE3_UNSET;
2603 return true;
2605 case OPT_mssse3:
2606 if (value)
2608 ix86_isa_flags |= OPTION_MASK_ISA_SSSE3_SET;
2609 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSSE3_SET;
2611 else
2613 ix86_isa_flags &= ~OPTION_MASK_ISA_SSSE3_UNSET;
2614 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSSE3_UNSET;
2616 return true;
2618 case OPT_msse4_1:
2619 if (value)
2621 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_1_SET;
2622 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_1_SET;
2624 else
2626 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4_1_UNSET;
2627 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_1_UNSET;
2629 return true;
2631 case OPT_msse4_2:
2632 if (value)
2634 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_2_SET;
2635 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_2_SET;
2637 else
2639 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4_2_UNSET;
2640 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_2_UNSET;
2642 return true;
2644 case OPT_mavx:
2645 if (value)
2647 ix86_isa_flags |= OPTION_MASK_ISA_AVX_SET;
2648 ix86_isa_flags_explicit |= OPTION_MASK_ISA_AVX_SET;
2650 else
2652 ix86_isa_flags &= ~OPTION_MASK_ISA_AVX_UNSET;
2653 ix86_isa_flags_explicit |= OPTION_MASK_ISA_AVX_UNSET;
2655 return true;
2657 case OPT_mfma:
2658 if (value)
2660 ix86_isa_flags |= OPTION_MASK_ISA_FMA_SET;
2661 ix86_isa_flags_explicit |= OPTION_MASK_ISA_FMA_SET;
2663 else
2665 ix86_isa_flags &= ~OPTION_MASK_ISA_FMA_UNSET;
2666 ix86_isa_flags_explicit |= OPTION_MASK_ISA_FMA_UNSET;
2668 return true;
2670 case OPT_msse4:
2671 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_SET;
2672 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_SET;
2673 return true;
2675 case OPT_mno_sse4:
2676 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4_UNSET;
2677 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_UNSET;
2678 return true;
2680 case OPT_msse4a:
2681 if (value)
2683 ix86_isa_flags |= OPTION_MASK_ISA_SSE4A_SET;
2684 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4A_SET;
2686 else
2688 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4A_UNSET;
2689 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4A_UNSET;
2691 return true;
2693 case OPT_mfma4:
2694 if (value)
2696 ix86_isa_flags |= OPTION_MASK_ISA_FMA4_SET;
2697 ix86_isa_flags_explicit |= OPTION_MASK_ISA_FMA4_SET;
2699 else
2701 ix86_isa_flags &= ~OPTION_MASK_ISA_FMA4_UNSET;
2702 ix86_isa_flags_explicit |= OPTION_MASK_ISA_FMA4_UNSET;
2704 return true;
2706 case OPT_mxop:
2707 if (value)
2709 ix86_isa_flags |= OPTION_MASK_ISA_XOP_SET;
2710 ix86_isa_flags_explicit |= OPTION_MASK_ISA_XOP_SET;
2712 else
2714 ix86_isa_flags &= ~OPTION_MASK_ISA_XOP_UNSET;
2715 ix86_isa_flags_explicit |= OPTION_MASK_ISA_XOP_UNSET;
2717 return true;
2719 case OPT_mlwp:
2720 if (value)
2722 ix86_isa_flags |= OPTION_MASK_ISA_LWP_SET;
2723 ix86_isa_flags_explicit |= OPTION_MASK_ISA_LWP_SET;
2725 else
2727 ix86_isa_flags &= ~OPTION_MASK_ISA_LWP_UNSET;
2728 ix86_isa_flags_explicit |= OPTION_MASK_ISA_LWP_UNSET;
2730 return true;
2732 case OPT_mabm:
2733 if (value)
2735 ix86_isa_flags |= OPTION_MASK_ISA_ABM_SET;
2736 ix86_isa_flags_explicit |= OPTION_MASK_ISA_ABM_SET;
2738 else
2740 ix86_isa_flags &= ~OPTION_MASK_ISA_ABM_UNSET;
2741 ix86_isa_flags_explicit |= OPTION_MASK_ISA_ABM_UNSET;
2743 return true;
2745 case OPT_mbmi:
2746 if (value)
2748 ix86_isa_flags |= OPTION_MASK_ISA_BMI_SET;
2749 ix86_isa_flags_explicit |= OPTION_MASK_ISA_BMI_SET;
2751 else
2753 ix86_isa_flags &= ~OPTION_MASK_ISA_BMI_UNSET;
2754 ix86_isa_flags_explicit |= OPTION_MASK_ISA_BMI_UNSET;
2756 return true;
2758 case OPT_mtbm:
2759 if (value)
2761 ix86_isa_flags |= OPTION_MASK_ISA_TBM_SET;
2762 ix86_isa_flags_explicit |= OPTION_MASK_ISA_TBM_SET;
2764 else
2766 ix86_isa_flags &= ~OPTION_MASK_ISA_TBM_UNSET;
2767 ix86_isa_flags_explicit |= OPTION_MASK_ISA_TBM_UNSET;
2769 return true;
2771 case OPT_mpopcnt:
2772 if (value)
2774 ix86_isa_flags |= OPTION_MASK_ISA_POPCNT_SET;
2775 ix86_isa_flags_explicit |= OPTION_MASK_ISA_POPCNT_SET;
2777 else
2779 ix86_isa_flags &= ~OPTION_MASK_ISA_POPCNT_UNSET;
2780 ix86_isa_flags_explicit |= OPTION_MASK_ISA_POPCNT_UNSET;
2782 return true;
2784 case OPT_msahf:
2785 if (value)
2787 ix86_isa_flags |= OPTION_MASK_ISA_SAHF_SET;
2788 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SAHF_SET;
2790 else
2792 ix86_isa_flags &= ~OPTION_MASK_ISA_SAHF_UNSET;
2793 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SAHF_UNSET;
2795 return true;
2797 case OPT_mcx16:
2798 if (value)
2800 ix86_isa_flags |= OPTION_MASK_ISA_CX16_SET;
2801 ix86_isa_flags_explicit |= OPTION_MASK_ISA_CX16_SET;
2803 else
2805 ix86_isa_flags &= ~OPTION_MASK_ISA_CX16_UNSET;
2806 ix86_isa_flags_explicit |= OPTION_MASK_ISA_CX16_UNSET;
2808 return true;
2810 case OPT_mmovbe:
2811 if (value)
2813 ix86_isa_flags |= OPTION_MASK_ISA_MOVBE_SET;
2814 ix86_isa_flags_explicit |= OPTION_MASK_ISA_MOVBE_SET;
2816 else
2818 ix86_isa_flags &= ~OPTION_MASK_ISA_MOVBE_UNSET;
2819 ix86_isa_flags_explicit |= OPTION_MASK_ISA_MOVBE_UNSET;
2821 return true;
2823 case OPT_mcrc32:
2824 if (value)
2826 ix86_isa_flags |= OPTION_MASK_ISA_CRC32_SET;
2827 ix86_isa_flags_explicit |= OPTION_MASK_ISA_CRC32_SET;
2829 else
2831 ix86_isa_flags &= ~OPTION_MASK_ISA_CRC32_UNSET;
2832 ix86_isa_flags_explicit |= OPTION_MASK_ISA_CRC32_UNSET;
2834 return true;
2836 case OPT_maes:
2837 if (value)
2839 ix86_isa_flags |= OPTION_MASK_ISA_AES_SET;
2840 ix86_isa_flags_explicit |= OPTION_MASK_ISA_AES_SET;
2842 else
2844 ix86_isa_flags &= ~OPTION_MASK_ISA_AES_UNSET;
2845 ix86_isa_flags_explicit |= OPTION_MASK_ISA_AES_UNSET;
2847 return true;
2849 case OPT_mpclmul:
2850 if (value)
2852 ix86_isa_flags |= OPTION_MASK_ISA_PCLMUL_SET;
2853 ix86_isa_flags_explicit |= OPTION_MASK_ISA_PCLMUL_SET;
2855 else
2857 ix86_isa_flags &= ~OPTION_MASK_ISA_PCLMUL_UNSET;
2858 ix86_isa_flags_explicit |= OPTION_MASK_ISA_PCLMUL_UNSET;
2860 return true;
2862 case OPT_mfsgsbase:
2863 if (value)
2865 ix86_isa_flags |= OPTION_MASK_ISA_FSGSBASE_SET;
2866 ix86_isa_flags_explicit |= OPTION_MASK_ISA_FSGSBASE_SET;
2868 else
2870 ix86_isa_flags &= ~OPTION_MASK_ISA_FSGSBASE_UNSET;
2871 ix86_isa_flags_explicit |= OPTION_MASK_ISA_FSGSBASE_UNSET;
2873 return true;
2875 case OPT_mrdrnd:
2876 if (value)
2878 ix86_isa_flags |= OPTION_MASK_ISA_RDRND_SET;
2879 ix86_isa_flags_explicit |= OPTION_MASK_ISA_RDRND_SET;
2881 else
2883 ix86_isa_flags &= ~OPTION_MASK_ISA_RDRND_UNSET;
2884 ix86_isa_flags_explicit |= OPTION_MASK_ISA_RDRND_UNSET;
2886 return true;
2888 case OPT_mf16c:
2889 if (value)
2891 ix86_isa_flags |= OPTION_MASK_ISA_F16C_SET;
2892 ix86_isa_flags_explicit |= OPTION_MASK_ISA_F16C_SET;
2894 else
2896 ix86_isa_flags &= ~OPTION_MASK_ISA_F16C_UNSET;
2897 ix86_isa_flags_explicit |= OPTION_MASK_ISA_F16C_UNSET;
2899 return true;
2901 default:
2902 return true;
2906 /* Return a string that documents the current -m options. The caller is
2907 responsible for freeing the string. */
2909 static char *
2910 ix86_target_string (int isa, int flags, const char *arch, const char *tune,
2911 const char *fpmath, bool add_nl_p)
2913 struct ix86_target_opts
2915 const char *option; /* option string */
2916 int mask; /* isa mask options */
2919 /* This table is ordered so that options like -msse4.2 that imply
2920 preceding options while match those first. */
2921 static struct ix86_target_opts isa_opts[] =
2923 { "-m64", OPTION_MASK_ISA_64BIT },
2924 { "-mfma4", OPTION_MASK_ISA_FMA4 },
2925 { "-mfma", OPTION_MASK_ISA_FMA },
2926 { "-mxop", OPTION_MASK_ISA_XOP },
2927 { "-mlwp", OPTION_MASK_ISA_LWP },
2928 { "-msse4a", OPTION_MASK_ISA_SSE4A },
2929 { "-msse4.2", OPTION_MASK_ISA_SSE4_2 },
2930 { "-msse4.1", OPTION_MASK_ISA_SSE4_1 },
2931 { "-mssse3", OPTION_MASK_ISA_SSSE3 },
2932 { "-msse3", OPTION_MASK_ISA_SSE3 },
2933 { "-msse2", OPTION_MASK_ISA_SSE2 },
2934 { "-msse", OPTION_MASK_ISA_SSE },
2935 { "-m3dnow", OPTION_MASK_ISA_3DNOW },
2936 { "-m3dnowa", OPTION_MASK_ISA_3DNOW_A },
2937 { "-mmmx", OPTION_MASK_ISA_MMX },
2938 { "-mabm", OPTION_MASK_ISA_ABM },
2939 { "-mbmi", OPTION_MASK_ISA_BMI },
2940 { "-mtbm", OPTION_MASK_ISA_TBM },
2941 { "-mpopcnt", OPTION_MASK_ISA_POPCNT },
2942 { "-mmovbe", OPTION_MASK_ISA_MOVBE },
2943 { "-mcrc32", OPTION_MASK_ISA_CRC32 },
2944 { "-maes", OPTION_MASK_ISA_AES },
2945 { "-mpclmul", OPTION_MASK_ISA_PCLMUL },
2946 { "-mfsgsbase", OPTION_MASK_ISA_FSGSBASE },
2947 { "-mrdrnd", OPTION_MASK_ISA_RDRND },
2948 { "-mf16c", OPTION_MASK_ISA_F16C },
2951 /* Flag options. */
2952 static struct ix86_target_opts flag_opts[] =
2954 { "-m128bit-long-double", MASK_128BIT_LONG_DOUBLE },
2955 { "-m80387", MASK_80387 },
2956 { "-maccumulate-outgoing-args", MASK_ACCUMULATE_OUTGOING_ARGS },
2957 { "-malign-double", MASK_ALIGN_DOUBLE },
2958 { "-mcld", MASK_CLD },
2959 { "-mfp-ret-in-387", MASK_FLOAT_RETURNS },
2960 { "-mieee-fp", MASK_IEEE_FP },
2961 { "-minline-all-stringops", MASK_INLINE_ALL_STRINGOPS },
2962 { "-minline-stringops-dynamically", MASK_INLINE_STRINGOPS_DYNAMICALLY },
2963 { "-mms-bitfields", MASK_MS_BITFIELD_LAYOUT },
2964 { "-mno-align-stringops", MASK_NO_ALIGN_STRINGOPS },
2965 { "-mno-fancy-math-387", MASK_NO_FANCY_MATH_387 },
2966 { "-mno-push-args", MASK_NO_PUSH_ARGS },
2967 { "-mno-red-zone", MASK_NO_RED_ZONE },
2968 { "-momit-leaf-frame-pointer", MASK_OMIT_LEAF_FRAME_POINTER },
2969 { "-mrecip", MASK_RECIP },
2970 { "-mrtd", MASK_RTD },
2971 { "-msseregparm", MASK_SSEREGPARM },
2972 { "-mstack-arg-probe", MASK_STACK_PROBE },
2973 { "-mtls-direct-seg-refs", MASK_TLS_DIRECT_SEG_REFS },
2974 { "-mvect8-ret-in-mem", MASK_VECT8_RETURNS },
2975 { "-m8bit-idiv", MASK_USE_8BIT_IDIV },
2976 { "-mvzeroupper", MASK_VZEROUPPER },
2979 const char *opts[ARRAY_SIZE (isa_opts) + ARRAY_SIZE (flag_opts) + 6][2];
2981 char isa_other[40];
2982 char target_other[40];
2983 unsigned num = 0;
2984 unsigned i, j;
2985 char *ret;
2986 char *ptr;
2987 size_t len;
2988 size_t line_len;
2989 size_t sep_len;
2991 memset (opts, '\0', sizeof (opts));
2993 /* Add -march= option. */
2994 if (arch)
2996 opts[num][0] = "-march=";
2997 opts[num++][1] = arch;
3000 /* Add -mtune= option. */
3001 if (tune)
3003 opts[num][0] = "-mtune=";
3004 opts[num++][1] = tune;
3007 /* Pick out the options in isa options. */
3008 for (i = 0; i < ARRAY_SIZE (isa_opts); i++)
3010 if ((isa & isa_opts[i].mask) != 0)
3012 opts[num++][0] = isa_opts[i].option;
3013 isa &= ~ isa_opts[i].mask;
3017 if (isa && add_nl_p)
3019 opts[num++][0] = isa_other;
3020 sprintf (isa_other, "(other isa: %#x)", isa);
3023 /* Add flag options. */
3024 for (i = 0; i < ARRAY_SIZE (flag_opts); i++)
3026 if ((flags & flag_opts[i].mask) != 0)
3028 opts[num++][0] = flag_opts[i].option;
3029 flags &= ~ flag_opts[i].mask;
3033 if (flags && add_nl_p)
3035 opts[num++][0] = target_other;
3036 sprintf (target_other, "(other flags: %#x)", flags);
3039 /* Add -fpmath= option. */
3040 if (fpmath)
3042 opts[num][0] = "-mfpmath=";
3043 opts[num++][1] = fpmath;
3046 /* Any options? */
3047 if (num == 0)
3048 return NULL;
3050 gcc_assert (num < ARRAY_SIZE (opts));
3052 /* Size the string. */
3053 len = 0;
3054 sep_len = (add_nl_p) ? 3 : 1;
3055 for (i = 0; i < num; i++)
3057 len += sep_len;
3058 for (j = 0; j < 2; j++)
3059 if (opts[i][j])
3060 len += strlen (opts[i][j]);
3063 /* Build the string. */
3064 ret = ptr = (char *) xmalloc (len);
3065 line_len = 0;
3067 for (i = 0; i < num; i++)
3069 size_t len2[2];
3071 for (j = 0; j < 2; j++)
3072 len2[j] = (opts[i][j]) ? strlen (opts[i][j]) : 0;
3074 if (i != 0)
3076 *ptr++ = ' ';
3077 line_len++;
3079 if (add_nl_p && line_len + len2[0] + len2[1] > 70)
3081 *ptr++ = '\\';
3082 *ptr++ = '\n';
3083 line_len = 0;
3087 for (j = 0; j < 2; j++)
3088 if (opts[i][j])
3090 memcpy (ptr, opts[i][j], len2[j]);
3091 ptr += len2[j];
3092 line_len += len2[j];
3096 *ptr = '\0';
3097 gcc_assert (ret + len >= ptr);
3099 return ret;
3102 /* Return TRUE if software prefetching is beneficial for the
3103 given CPU. */
3105 static bool
3106 software_prefetching_beneficial_p (void)
3108 switch (ix86_tune)
3110 case PROCESSOR_GEODE:
3111 case PROCESSOR_K6:
3112 case PROCESSOR_ATHLON:
3113 case PROCESSOR_K8:
3114 case PROCESSOR_AMDFAM10:
3115 return true;
3117 default:
3118 return false;
3122 /* Return true, if profiling code should be emitted before
3123 prologue. Otherwise it returns false.
3124 Note: For x86 with "hotfix" it is sorried. */
3125 static bool
3126 ix86_profile_before_prologue (void)
3128 return flag_fentry != 0;
3131 /* Function that is callable from the debugger to print the current
3132 options. */
3133 void
3134 ix86_debug_options (void)
3136 char *opts = ix86_target_string (ix86_isa_flags, target_flags,
3137 ix86_arch_string, ix86_tune_string,
3138 ix86_fpmath_string, true);
3140 if (opts)
3142 fprintf (stderr, "%s\n\n", opts);
3143 free (opts);
3145 else
3146 fputs ("<no options>\n\n", stderr);
3148 return;
3151 /* Override various settings based on options. If MAIN_ARGS_P, the
3152 options are from the command line, otherwise they are from
3153 attributes. */
3155 static void
3156 ix86_option_override_internal (bool main_args_p)
3158 int i;
3159 unsigned int ix86_arch_mask, ix86_tune_mask;
3160 const bool ix86_tune_specified = (ix86_tune_string != NULL);
3161 const char *prefix;
3162 const char *suffix;
3163 const char *sw;
3165 /* Comes from final.c -- no real reason to change it. */
3166 #define MAX_CODE_ALIGN 16
3168 enum pta_flags
3170 PTA_SSE = 1 << 0,
3171 PTA_SSE2 = 1 << 1,
3172 PTA_SSE3 = 1 << 2,
3173 PTA_MMX = 1 << 3,
3174 PTA_PREFETCH_SSE = 1 << 4,
3175 PTA_3DNOW = 1 << 5,
3176 PTA_3DNOW_A = 1 << 6,
3177 PTA_64BIT = 1 << 7,
3178 PTA_SSSE3 = 1 << 8,
3179 PTA_CX16 = 1 << 9,
3180 PTA_POPCNT = 1 << 10,
3181 PTA_ABM = 1 << 11,
3182 PTA_SSE4A = 1 << 12,
3183 PTA_NO_SAHF = 1 << 13,
3184 PTA_SSE4_1 = 1 << 14,
3185 PTA_SSE4_2 = 1 << 15,
3186 PTA_AES = 1 << 16,
3187 PTA_PCLMUL = 1 << 17,
3188 PTA_AVX = 1 << 18,
3189 PTA_FMA = 1 << 19,
3190 PTA_MOVBE = 1 << 20,
3191 PTA_FMA4 = 1 << 21,
3192 PTA_XOP = 1 << 22,
3193 PTA_LWP = 1 << 23,
3194 PTA_FSGSBASE = 1 << 24,
3195 PTA_RDRND = 1 << 25,
3196 PTA_F16C = 1 << 26,
3197 PTA_BMI = 1 << 27,
3198 PTA_TBM = 1 << 28
3199 /* if this reaches 32, need to widen struct pta flags below */
3202 static struct pta
3204 const char *const name; /* processor name or nickname. */
3205 const enum processor_type processor;
3206 const enum attr_cpu schedule;
3207 const unsigned /*enum pta_flags*/ flags;
3209 const processor_alias_table[] =
3211 {"i386", PROCESSOR_I386, CPU_NONE, 0},
3212 {"i486", PROCESSOR_I486, CPU_NONE, 0},
3213 {"i586", PROCESSOR_PENTIUM, CPU_PENTIUM, 0},
3214 {"pentium", PROCESSOR_PENTIUM, CPU_PENTIUM, 0},
3215 {"pentium-mmx", PROCESSOR_PENTIUM, CPU_PENTIUM, PTA_MMX},
3216 {"winchip-c6", PROCESSOR_I486, CPU_NONE, PTA_MMX},
3217 {"winchip2", PROCESSOR_I486, CPU_NONE, PTA_MMX | PTA_3DNOW},
3218 {"c3", PROCESSOR_I486, CPU_NONE, PTA_MMX | PTA_3DNOW},
3219 {"c3-2", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, PTA_MMX | PTA_SSE},
3220 {"i686", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, 0},
3221 {"pentiumpro", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, 0},
3222 {"pentium2", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, PTA_MMX},
3223 {"pentium3", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
3224 PTA_MMX | PTA_SSE},
3225 {"pentium3m", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
3226 PTA_MMX | PTA_SSE},
3227 {"pentium-m", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
3228 PTA_MMX | PTA_SSE | PTA_SSE2},
3229 {"pentium4", PROCESSOR_PENTIUM4, CPU_NONE,
3230 PTA_MMX |PTA_SSE | PTA_SSE2},
3231 {"pentium4m", PROCESSOR_PENTIUM4, CPU_NONE,
3232 PTA_MMX | PTA_SSE | PTA_SSE2},
3233 {"prescott", PROCESSOR_NOCONA, CPU_NONE,
3234 PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3},
3235 {"nocona", PROCESSOR_NOCONA, CPU_NONE,
3236 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3237 | PTA_CX16 | PTA_NO_SAHF},
3238 {"core2", PROCESSOR_CORE2_64, CPU_CORE2,
3239 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3240 | PTA_SSSE3 | PTA_CX16},
3241 {"corei7", PROCESSOR_COREI7_64, CPU_COREI7,
3242 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3243 | PTA_SSSE3 | PTA_SSE4_1 | PTA_SSE4_2 | PTA_CX16},
3244 {"corei7-avx", PROCESSOR_COREI7_64, CPU_COREI7,
3245 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3246 | PTA_SSSE3 | PTA_SSE4_1 | PTA_SSE4_2 | PTA_AVX
3247 | PTA_CX16 | PTA_POPCNT | PTA_AES | PTA_PCLMUL},
3248 {"atom", PROCESSOR_ATOM, CPU_ATOM,
3249 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3250 | PTA_SSSE3 | PTA_CX16 | PTA_MOVBE},
3251 {"geode", PROCESSOR_GEODE, CPU_GEODE,
3252 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A |PTA_PREFETCH_SSE},
3253 {"k6", PROCESSOR_K6, CPU_K6, PTA_MMX},
3254 {"k6-2", PROCESSOR_K6, CPU_K6, PTA_MMX | PTA_3DNOW},
3255 {"k6-3", PROCESSOR_K6, CPU_K6, PTA_MMX | PTA_3DNOW},
3256 {"athlon", PROCESSOR_ATHLON, CPU_ATHLON,
3257 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_PREFETCH_SSE},
3258 {"athlon-tbird", PROCESSOR_ATHLON, CPU_ATHLON,
3259 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_PREFETCH_SSE},
3260 {"athlon-4", PROCESSOR_ATHLON, CPU_ATHLON,
3261 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE},
3262 {"athlon-xp", PROCESSOR_ATHLON, CPU_ATHLON,
3263 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE},
3264 {"athlon-mp", PROCESSOR_ATHLON, CPU_ATHLON,
3265 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE},
3266 {"x86-64", PROCESSOR_K8, CPU_K8,
3267 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_NO_SAHF},
3268 {"k8", PROCESSOR_K8, CPU_K8,
3269 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3270 | PTA_SSE2 | PTA_NO_SAHF},
3271 {"k8-sse3", PROCESSOR_K8, CPU_K8,
3272 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3273 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF},
3274 {"opteron", PROCESSOR_K8, CPU_K8,
3275 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3276 | PTA_SSE2 | PTA_NO_SAHF},
3277 {"opteron-sse3", PROCESSOR_K8, CPU_K8,
3278 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3279 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF},
3280 {"athlon64", PROCESSOR_K8, CPU_K8,
3281 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3282 | PTA_SSE2 | PTA_NO_SAHF},
3283 {"athlon64-sse3", PROCESSOR_K8, CPU_K8,
3284 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3285 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF},
3286 {"athlon-fx", PROCESSOR_K8, CPU_K8,
3287 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3288 | PTA_SSE2 | PTA_NO_SAHF},
3289 {"amdfam10", PROCESSOR_AMDFAM10, CPU_AMDFAM10,
3290 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3291 | PTA_SSE2 | PTA_SSE3 | PTA_SSE4A | PTA_CX16 | PTA_ABM},
3292 {"barcelona", PROCESSOR_AMDFAM10, CPU_AMDFAM10,
3293 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3294 | PTA_SSE2 | PTA_SSE3 | PTA_SSE4A | PTA_CX16 | PTA_ABM},
3295 {"bdver1", PROCESSOR_BDVER1, CPU_BDVER1,
3296 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3297 | PTA_SSE2 | PTA_SSE3 | PTA_SSE4A | PTA_CX16 | PTA_ABM
3298 | PTA_SSSE3 | PTA_SSE4_1 | PTA_SSE4_2 | PTA_AES
3299 | PTA_PCLMUL | PTA_AVX | PTA_FMA4 | PTA_XOP | PTA_LWP},
3300 {"generic32", PROCESSOR_GENERIC32, CPU_PENTIUMPRO,
3301 0 /* flags are only used for -march switch. */ },
3302 {"generic64", PROCESSOR_GENERIC64, CPU_GENERIC64,
3303 PTA_64BIT /* flags are only used for -march switch. */ },
3306 int const pta_size = ARRAY_SIZE (processor_alias_table);
3308 /* Set up prefix/suffix so the error messages refer to either the command
3309 line argument, or the attribute(target). */
3310 if (main_args_p)
3312 prefix = "-m";
3313 suffix = "";
3314 sw = "switch";
3316 else
3318 prefix = "option(\"";
3319 suffix = "\")";
3320 sw = "attribute";
3323 #ifdef SUBTARGET_OVERRIDE_OPTIONS
3324 SUBTARGET_OVERRIDE_OPTIONS;
3325 #endif
3327 #ifdef SUBSUBTARGET_OVERRIDE_OPTIONS
3328 SUBSUBTARGET_OVERRIDE_OPTIONS;
3329 #endif
3331 /* -fPIC is the default for x86_64. */
3332 if (TARGET_MACHO && TARGET_64BIT)
3333 flag_pic = 2;
3335 /* Need to check -mtune=generic first. */
3336 if (ix86_tune_string)
3338 if (!strcmp (ix86_tune_string, "generic")
3339 || !strcmp (ix86_tune_string, "i686")
3340 /* As special support for cross compilers we read -mtune=native
3341 as -mtune=generic. With native compilers we won't see the
3342 -mtune=native, as it was changed by the driver. */
3343 || !strcmp (ix86_tune_string, "native"))
3345 if (TARGET_64BIT)
3346 ix86_tune_string = "generic64";
3347 else
3348 ix86_tune_string = "generic32";
3350 /* If this call is for setting the option attribute, allow the
3351 generic32/generic64 that was previously set. */
3352 else if (!main_args_p
3353 && (!strcmp (ix86_tune_string, "generic32")
3354 || !strcmp (ix86_tune_string, "generic64")))
3356 else if (!strncmp (ix86_tune_string, "generic", 7))
3357 error ("bad value (%s) for %stune=%s %s",
3358 ix86_tune_string, prefix, suffix, sw);
3359 else if (!strcmp (ix86_tune_string, "x86-64"))
3360 warning (OPT_Wdeprecated, "%stune=x86-64%s is deprecated; use "
3361 "%stune=k8%s or %stune=generic%s instead as appropriate",
3362 prefix, suffix, prefix, suffix, prefix, suffix);
3364 else
3366 if (ix86_arch_string)
3367 ix86_tune_string = ix86_arch_string;
3368 if (!ix86_tune_string)
3370 ix86_tune_string = cpu_names[TARGET_CPU_DEFAULT];
3371 ix86_tune_defaulted = 1;
3374 /* ix86_tune_string is set to ix86_arch_string or defaulted. We
3375 need to use a sensible tune option. */
3376 if (!strcmp (ix86_tune_string, "generic")
3377 || !strcmp (ix86_tune_string, "x86-64")
3378 || !strcmp (ix86_tune_string, "i686"))
3380 if (TARGET_64BIT)
3381 ix86_tune_string = "generic64";
3382 else
3383 ix86_tune_string = "generic32";
3387 if (ix86_stringop_string)
3389 if (!strcmp (ix86_stringop_string, "rep_byte"))
3390 stringop_alg = rep_prefix_1_byte;
3391 else if (!strcmp (ix86_stringop_string, "libcall"))
3392 stringop_alg = libcall;
3393 else if (!strcmp (ix86_stringop_string, "rep_4byte"))
3394 stringop_alg = rep_prefix_4_byte;
3395 else if (!strcmp (ix86_stringop_string, "rep_8byte")
3396 && TARGET_64BIT)
3397 /* rep; movq isn't available in 32-bit code. */
3398 stringop_alg = rep_prefix_8_byte;
3399 else if (!strcmp (ix86_stringop_string, "byte_loop"))
3400 stringop_alg = loop_1_byte;
3401 else if (!strcmp (ix86_stringop_string, "loop"))
3402 stringop_alg = loop;
3403 else if (!strcmp (ix86_stringop_string, "unrolled_loop"))
3404 stringop_alg = unrolled_loop;
3405 else
3406 error ("bad value (%s) for %sstringop-strategy=%s %s",
3407 ix86_stringop_string, prefix, suffix, sw);
3410 if (!ix86_arch_string)
3411 ix86_arch_string = TARGET_64BIT ? "x86-64" : SUBTARGET32_DEFAULT_CPU;
3412 else
3413 ix86_arch_specified = 1;
3415 /* Validate -mabi= value. */
3416 if (ix86_abi_string)
3418 if (strcmp (ix86_abi_string, "sysv") == 0)
3419 ix86_abi = SYSV_ABI;
3420 else if (strcmp (ix86_abi_string, "ms") == 0)
3421 ix86_abi = MS_ABI;
3422 else
3423 error ("unknown ABI (%s) for %sabi=%s %s",
3424 ix86_abi_string, prefix, suffix, sw);
3426 else
3427 ix86_abi = DEFAULT_ABI;
3429 if (ix86_cmodel_string != 0)
3431 if (!strcmp (ix86_cmodel_string, "small"))
3432 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
3433 else if (!strcmp (ix86_cmodel_string, "medium"))
3434 ix86_cmodel = flag_pic ? CM_MEDIUM_PIC : CM_MEDIUM;
3435 else if (!strcmp (ix86_cmodel_string, "large"))
3436 ix86_cmodel = flag_pic ? CM_LARGE_PIC : CM_LARGE;
3437 else if (flag_pic)
3438 error ("code model %s does not support PIC mode", ix86_cmodel_string);
3439 else if (!strcmp (ix86_cmodel_string, "32"))
3440 ix86_cmodel = CM_32;
3441 else if (!strcmp (ix86_cmodel_string, "kernel") && !flag_pic)
3442 ix86_cmodel = CM_KERNEL;
3443 else
3444 error ("bad value (%s) for %scmodel=%s %s",
3445 ix86_cmodel_string, prefix, suffix, sw);
3447 else
3449 /* For TARGET_64BIT and MS_ABI, force pic on, in order to enable the
3450 use of rip-relative addressing. This eliminates fixups that
3451 would otherwise be needed if this object is to be placed in a
3452 DLL, and is essentially just as efficient as direct addressing. */
3453 if (TARGET_64BIT && DEFAULT_ABI == MS_ABI)
3454 ix86_cmodel = CM_SMALL_PIC, flag_pic = 1;
3455 else if (TARGET_64BIT)
3456 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
3457 else
3458 ix86_cmodel = CM_32;
3460 if (ix86_asm_string != 0)
3462 if (! TARGET_MACHO
3463 && !strcmp (ix86_asm_string, "intel"))
3464 ix86_asm_dialect = ASM_INTEL;
3465 else if (!strcmp (ix86_asm_string, "att"))
3466 ix86_asm_dialect = ASM_ATT;
3467 else
3468 error ("bad value (%s) for %sasm=%s %s",
3469 ix86_asm_string, prefix, suffix, sw);
3471 if ((TARGET_64BIT == 0) != (ix86_cmodel == CM_32))
3472 error ("code model %qs not supported in the %s bit mode",
3473 ix86_cmodel_string, TARGET_64BIT ? "64" : "32");
3474 if ((TARGET_64BIT != 0) != ((ix86_isa_flags & OPTION_MASK_ISA_64BIT) != 0))
3475 sorry ("%i-bit mode not compiled in",
3476 (ix86_isa_flags & OPTION_MASK_ISA_64BIT) ? 64 : 32);
3478 for (i = 0; i < pta_size; i++)
3479 if (! strcmp (ix86_arch_string, processor_alias_table[i].name))
3481 ix86_schedule = processor_alias_table[i].schedule;
3482 ix86_arch = processor_alias_table[i].processor;
3483 /* Default cpu tuning to the architecture. */
3484 ix86_tune = ix86_arch;
3486 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
3487 error ("CPU you selected does not support x86-64 "
3488 "instruction set");
3490 if (processor_alias_table[i].flags & PTA_MMX
3491 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_MMX))
3492 ix86_isa_flags |= OPTION_MASK_ISA_MMX;
3493 if (processor_alias_table[i].flags & PTA_3DNOW
3494 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_3DNOW))
3495 ix86_isa_flags |= OPTION_MASK_ISA_3DNOW;
3496 if (processor_alias_table[i].flags & PTA_3DNOW_A
3497 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_3DNOW_A))
3498 ix86_isa_flags |= OPTION_MASK_ISA_3DNOW_A;
3499 if (processor_alias_table[i].flags & PTA_SSE
3500 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE))
3501 ix86_isa_flags |= OPTION_MASK_ISA_SSE;
3502 if (processor_alias_table[i].flags & PTA_SSE2
3503 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE2))
3504 ix86_isa_flags |= OPTION_MASK_ISA_SSE2;
3505 if (processor_alias_table[i].flags & PTA_SSE3
3506 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE3))
3507 ix86_isa_flags |= OPTION_MASK_ISA_SSE3;
3508 if (processor_alias_table[i].flags & PTA_SSSE3
3509 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSSE3))
3510 ix86_isa_flags |= OPTION_MASK_ISA_SSSE3;
3511 if (processor_alias_table[i].flags & PTA_SSE4_1
3512 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4_1))
3513 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_1;
3514 if (processor_alias_table[i].flags & PTA_SSE4_2
3515 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4_2))
3516 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_2;
3517 if (processor_alias_table[i].flags & PTA_AVX
3518 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX))
3519 ix86_isa_flags |= OPTION_MASK_ISA_AVX;
3520 if (processor_alias_table[i].flags & PTA_FMA
3521 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_FMA))
3522 ix86_isa_flags |= OPTION_MASK_ISA_FMA;
3523 if (processor_alias_table[i].flags & PTA_SSE4A
3524 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4A))
3525 ix86_isa_flags |= OPTION_MASK_ISA_SSE4A;
3526 if (processor_alias_table[i].flags & PTA_FMA4
3527 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_FMA4))
3528 ix86_isa_flags |= OPTION_MASK_ISA_FMA4;
3529 if (processor_alias_table[i].flags & PTA_XOP
3530 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_XOP))
3531 ix86_isa_flags |= OPTION_MASK_ISA_XOP;
3532 if (processor_alias_table[i].flags & PTA_LWP
3533 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_LWP))
3534 ix86_isa_flags |= OPTION_MASK_ISA_LWP;
3535 if (processor_alias_table[i].flags & PTA_ABM
3536 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_ABM))
3537 ix86_isa_flags |= OPTION_MASK_ISA_ABM;
3538 if (processor_alias_table[i].flags & PTA_BMI
3539 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_BMI))
3540 ix86_isa_flags |= OPTION_MASK_ISA_BMI;
3541 if (processor_alias_table[i].flags & PTA_TBM
3542 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_TBM))
3543 ix86_isa_flags |= OPTION_MASK_ISA_TBM;
3544 if (processor_alias_table[i].flags & PTA_CX16
3545 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_CX16))
3546 ix86_isa_flags |= OPTION_MASK_ISA_CX16;
3547 if (processor_alias_table[i].flags & (PTA_POPCNT | PTA_ABM)
3548 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_POPCNT))
3549 ix86_isa_flags |= OPTION_MASK_ISA_POPCNT;
3550 if (!(TARGET_64BIT && (processor_alias_table[i].flags & PTA_NO_SAHF))
3551 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SAHF))
3552 ix86_isa_flags |= OPTION_MASK_ISA_SAHF;
3553 if (processor_alias_table[i].flags & PTA_MOVBE
3554 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_MOVBE))
3555 ix86_isa_flags |= OPTION_MASK_ISA_MOVBE;
3556 if (processor_alias_table[i].flags & PTA_AES
3557 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_AES))
3558 ix86_isa_flags |= OPTION_MASK_ISA_AES;
3559 if (processor_alias_table[i].flags & PTA_PCLMUL
3560 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_PCLMUL))
3561 ix86_isa_flags |= OPTION_MASK_ISA_PCLMUL;
3562 if (processor_alias_table[i].flags & PTA_FSGSBASE
3563 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_FSGSBASE))
3564 ix86_isa_flags |= OPTION_MASK_ISA_FSGSBASE;
3565 if (processor_alias_table[i].flags & PTA_RDRND
3566 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_RDRND))
3567 ix86_isa_flags |= OPTION_MASK_ISA_RDRND;
3568 if (processor_alias_table[i].flags & PTA_F16C
3569 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_F16C))
3570 ix86_isa_flags |= OPTION_MASK_ISA_F16C;
3571 if (processor_alias_table[i].flags & (PTA_PREFETCH_SSE | PTA_SSE))
3572 x86_prefetch_sse = true;
3574 break;
3577 if (!strcmp (ix86_arch_string, "generic"))
3578 error ("generic CPU can be used only for %stune=%s %s",
3579 prefix, suffix, sw);
3580 else if (!strncmp (ix86_arch_string, "generic", 7) || i == pta_size)
3581 error ("bad value (%s) for %sarch=%s %s",
3582 ix86_arch_string, prefix, suffix, sw);
3584 ix86_arch_mask = 1u << ix86_arch;
3585 for (i = 0; i < X86_ARCH_LAST; ++i)
3586 ix86_arch_features[i] = !!(initial_ix86_arch_features[i] & ix86_arch_mask);
3588 for (i = 0; i < pta_size; i++)
3589 if (! strcmp (ix86_tune_string, processor_alias_table[i].name))
3591 ix86_schedule = processor_alias_table[i].schedule;
3592 ix86_tune = processor_alias_table[i].processor;
3593 if (TARGET_64BIT)
3595 if (!(processor_alias_table[i].flags & PTA_64BIT))
3597 if (ix86_tune_defaulted)
3599 ix86_tune_string = "x86-64";
3600 for (i = 0; i < pta_size; i++)
3601 if (! strcmp (ix86_tune_string,
3602 processor_alias_table[i].name))
3603 break;
3604 ix86_schedule = processor_alias_table[i].schedule;
3605 ix86_tune = processor_alias_table[i].processor;
3607 else
3608 error ("CPU you selected does not support x86-64 "
3609 "instruction set");
3612 else
3614 /* Adjust tuning when compiling for 32-bit ABI. */
3615 switch (ix86_tune)
3617 case PROCESSOR_GENERIC64:
3618 ix86_tune = PROCESSOR_GENERIC32;
3619 ix86_schedule = CPU_PENTIUMPRO;
3620 break;
3622 case PROCESSOR_CORE2_64:
3623 ix86_tune = PROCESSOR_CORE2_32;
3624 break;
3626 case PROCESSOR_COREI7_64:
3627 ix86_tune = PROCESSOR_COREI7_32;
3628 break;
3630 default:
3631 break;
3634 /* Intel CPUs have always interpreted SSE prefetch instructions as
3635 NOPs; so, we can enable SSE prefetch instructions even when
3636 -mtune (rather than -march) points us to a processor that has them.
3637 However, the VIA C3 gives a SIGILL, so we only do that for i686 and
3638 higher processors. */
3639 if (TARGET_CMOVE
3640 && (processor_alias_table[i].flags & (PTA_PREFETCH_SSE | PTA_SSE)))
3641 x86_prefetch_sse = true;
3642 break;
3645 if (ix86_tune_specified && i == pta_size)
3646 error ("bad value (%s) for %stune=%s %s",
3647 ix86_tune_string, prefix, suffix, sw);
3649 ix86_tune_mask = 1u << ix86_tune;
3650 for (i = 0; i < X86_TUNE_LAST; ++i)
3651 ix86_tune_features[i] = !!(initial_ix86_tune_features[i] & ix86_tune_mask);
3653 #ifndef USE_IX86_FRAME_POINTER
3654 #define USE_IX86_FRAME_POINTER 0
3655 #endif
3657 #ifndef USE_X86_64_FRAME_POINTER
3658 #define USE_X86_64_FRAME_POINTER 0
3659 #endif
3661 /* Set the default values for switches whose default depends on TARGET_64BIT
3662 in case they weren't overwritten by command line options. */
3663 if (TARGET_64BIT)
3665 if (optimize > 1 && !global_options_set.x_flag_zee)
3666 flag_zee = 1;
3667 if (optimize >= 1 && !global_options_set.x_flag_omit_frame_pointer)
3668 flag_omit_frame_pointer = !USE_X86_64_FRAME_POINTER;
3669 if (flag_asynchronous_unwind_tables == 2)
3670 flag_unwind_tables = flag_asynchronous_unwind_tables = 1;
3671 if (flag_pcc_struct_return == 2)
3672 flag_pcc_struct_return = 0;
3674 else
3676 if (optimize >= 1 && !global_options_set.x_flag_omit_frame_pointer)
3677 flag_omit_frame_pointer = !(USE_IX86_FRAME_POINTER || optimize_size);
3678 if (flag_asynchronous_unwind_tables == 2)
3679 flag_asynchronous_unwind_tables = !USE_IX86_FRAME_POINTER;
3680 if (flag_pcc_struct_return == 2)
3681 flag_pcc_struct_return = DEFAULT_PCC_STRUCT_RETURN;
3684 if (optimize_size)
3685 ix86_cost = &ix86_size_cost;
3686 else
3687 ix86_cost = processor_target_table[ix86_tune].cost;
3689 /* Arrange to set up i386_stack_locals for all functions. */
3690 init_machine_status = ix86_init_machine_status;
3692 /* Validate -mregparm= value. */
3693 if (ix86_regparm_string)
3695 if (TARGET_64BIT)
3696 warning (0, "%sregparm%s is ignored in 64-bit mode", prefix, suffix);
3697 i = atoi (ix86_regparm_string);
3698 if (i < 0 || i > REGPARM_MAX)
3699 error ("%sregparm=%d%s is not between 0 and %d",
3700 prefix, i, suffix, REGPARM_MAX);
3701 else
3702 ix86_regparm = i;
3704 if (TARGET_64BIT)
3705 ix86_regparm = REGPARM_MAX;
3707 /* If the user has provided any of the -malign-* options,
3708 warn and use that value only if -falign-* is not set.
3709 Remove this code in GCC 3.2 or later. */
3710 if (ix86_align_loops_string)
3712 warning (0, "%salign-loops%s is obsolete, use -falign-loops%s",
3713 prefix, suffix, suffix);
3714 if (align_loops == 0)
3716 i = atoi (ix86_align_loops_string);
3717 if (i < 0 || i > MAX_CODE_ALIGN)
3718 error ("%salign-loops=%d%s is not between 0 and %d",
3719 prefix, i, suffix, MAX_CODE_ALIGN);
3720 else
3721 align_loops = 1 << i;
3725 if (ix86_align_jumps_string)
3727 warning (0, "%salign-jumps%s is obsolete, use -falign-jumps%s",
3728 prefix, suffix, suffix);
3729 if (align_jumps == 0)
3731 i = atoi (ix86_align_jumps_string);
3732 if (i < 0 || i > MAX_CODE_ALIGN)
3733 error ("%salign-loops=%d%s is not between 0 and %d",
3734 prefix, i, suffix, MAX_CODE_ALIGN);
3735 else
3736 align_jumps = 1 << i;
3740 if (ix86_align_funcs_string)
3742 warning (0, "%salign-functions%s is obsolete, use -falign-functions%s",
3743 prefix, suffix, suffix);
3744 if (align_functions == 0)
3746 i = atoi (ix86_align_funcs_string);
3747 if (i < 0 || i > MAX_CODE_ALIGN)
3748 error ("%salign-loops=%d%s is not between 0 and %d",
3749 prefix, i, suffix, MAX_CODE_ALIGN);
3750 else
3751 align_functions = 1 << i;
3755 /* Default align_* from the processor table. */
3756 if (align_loops == 0)
3758 align_loops = processor_target_table[ix86_tune].align_loop;
3759 align_loops_max_skip = processor_target_table[ix86_tune].align_loop_max_skip;
3761 if (align_jumps == 0)
3763 align_jumps = processor_target_table[ix86_tune].align_jump;
3764 align_jumps_max_skip = processor_target_table[ix86_tune].align_jump_max_skip;
3766 if (align_functions == 0)
3768 align_functions = processor_target_table[ix86_tune].align_func;
3771 /* Validate -mbranch-cost= value, or provide default. */
3772 ix86_branch_cost = ix86_cost->branch_cost;
3773 if (ix86_branch_cost_string)
3775 i = atoi (ix86_branch_cost_string);
3776 if (i < 0 || i > 5)
3777 error ("%sbranch-cost=%d%s is not between 0 and 5", prefix, i, suffix);
3778 else
3779 ix86_branch_cost = i;
3781 if (ix86_section_threshold_string)
3783 i = atoi (ix86_section_threshold_string);
3784 if (i < 0)
3785 error ("%slarge-data-threshold=%d%s is negative", prefix, i, suffix);
3786 else
3787 ix86_section_threshold = i;
3790 if (ix86_tls_dialect_string)
3792 if (strcmp (ix86_tls_dialect_string, "gnu") == 0)
3793 ix86_tls_dialect = TLS_DIALECT_GNU;
3794 else if (strcmp (ix86_tls_dialect_string, "gnu2") == 0)
3795 ix86_tls_dialect = TLS_DIALECT_GNU2;
3796 else
3797 error ("bad value (%s) for %stls-dialect=%s %s",
3798 ix86_tls_dialect_string, prefix, suffix, sw);
3801 if (ix87_precision_string)
3803 i = atoi (ix87_precision_string);
3804 if (i != 32 && i != 64 && i != 80)
3805 error ("pc%d is not valid precision setting (32, 64 or 80)", i);
3808 if (TARGET_64BIT)
3810 target_flags |= TARGET_SUBTARGET64_DEFAULT & ~target_flags_explicit;
3812 /* Enable by default the SSE and MMX builtins. Do allow the user to
3813 explicitly disable any of these. In particular, disabling SSE and
3814 MMX for kernel code is extremely useful. */
3815 if (!ix86_arch_specified)
3816 ix86_isa_flags
3817 |= ((OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_MMX
3818 | TARGET_SUBTARGET64_ISA_DEFAULT) & ~ix86_isa_flags_explicit);
3820 if (TARGET_RTD)
3821 warning (0, "%srtd%s is ignored in 64bit mode", prefix, suffix);
3823 else
3825 target_flags |= TARGET_SUBTARGET32_DEFAULT & ~target_flags_explicit;
3827 if (!ix86_arch_specified)
3828 ix86_isa_flags
3829 |= TARGET_SUBTARGET32_ISA_DEFAULT & ~ix86_isa_flags_explicit;
3831 /* i386 ABI does not specify red zone. It still makes sense to use it
3832 when programmer takes care to stack from being destroyed. */
3833 if (!(target_flags_explicit & MASK_NO_RED_ZONE))
3834 target_flags |= MASK_NO_RED_ZONE;
3837 /* Keep nonleaf frame pointers. */
3838 if (flag_omit_frame_pointer)
3839 target_flags &= ~MASK_OMIT_LEAF_FRAME_POINTER;
3840 else if (TARGET_OMIT_LEAF_FRAME_POINTER)
3841 flag_omit_frame_pointer = 1;
3843 /* If we're doing fast math, we don't care about comparison order
3844 wrt NaNs. This lets us use a shorter comparison sequence. */
3845 if (flag_finite_math_only)
3846 target_flags &= ~MASK_IEEE_FP;
3848 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
3849 since the insns won't need emulation. */
3850 if (x86_arch_always_fancy_math_387 & ix86_arch_mask)
3851 target_flags &= ~MASK_NO_FANCY_MATH_387;
3853 /* Likewise, if the target doesn't have a 387, or we've specified
3854 software floating point, don't use 387 inline intrinsics. */
3855 if (!TARGET_80387)
3856 target_flags |= MASK_NO_FANCY_MATH_387;
3858 /* Turn on MMX builtins for -msse. */
3859 if (TARGET_SSE)
3861 ix86_isa_flags |= OPTION_MASK_ISA_MMX & ~ix86_isa_flags_explicit;
3862 x86_prefetch_sse = true;
3865 /* Turn on popcnt instruction for -msse4.2 or -mabm. */
3866 if (TARGET_SSE4_2 || TARGET_ABM)
3867 ix86_isa_flags |= OPTION_MASK_ISA_POPCNT & ~ix86_isa_flags_explicit;
3869 /* Validate -mpreferred-stack-boundary= value or default it to
3870 PREFERRED_STACK_BOUNDARY_DEFAULT. */
3871 ix86_preferred_stack_boundary = PREFERRED_STACK_BOUNDARY_DEFAULT;
3872 if (ix86_preferred_stack_boundary_string)
3874 int min = (TARGET_64BIT ? 4 : 2);
3875 int max = (TARGET_SEH ? 4 : 12);
3877 i = atoi (ix86_preferred_stack_boundary_string);
3878 if (i < min || i > max)
3880 if (min == max)
3881 error ("%spreferred-stack-boundary%s is not supported "
3882 "for this target", prefix, suffix);
3883 else
3884 error ("%spreferred-stack-boundary=%d%s is not between %d and %d",
3885 prefix, i, suffix, min, max);
3887 else
3888 ix86_preferred_stack_boundary = (1 << i) * BITS_PER_UNIT;
3891 /* Set the default value for -mstackrealign. */
3892 if (ix86_force_align_arg_pointer == -1)
3893 ix86_force_align_arg_pointer = STACK_REALIGN_DEFAULT;
3895 ix86_default_incoming_stack_boundary = PREFERRED_STACK_BOUNDARY;
3897 /* Validate -mincoming-stack-boundary= value or default it to
3898 MIN_STACK_BOUNDARY/PREFERRED_STACK_BOUNDARY. */
3899 ix86_incoming_stack_boundary = ix86_default_incoming_stack_boundary;
3900 if (ix86_incoming_stack_boundary_string)
3902 i = atoi (ix86_incoming_stack_boundary_string);
3903 if (i < (TARGET_64BIT ? 4 : 2) || i > 12)
3904 error ("-mincoming-stack-boundary=%d is not between %d and 12",
3905 i, TARGET_64BIT ? 4 : 2);
3906 else
3908 ix86_user_incoming_stack_boundary = (1 << i) * BITS_PER_UNIT;
3909 ix86_incoming_stack_boundary
3910 = ix86_user_incoming_stack_boundary;
3914 /* Accept -msseregparm only if at least SSE support is enabled. */
3915 if (TARGET_SSEREGPARM
3916 && ! TARGET_SSE)
3917 error ("%ssseregparm%s used without SSE enabled", prefix, suffix);
3919 ix86_fpmath = TARGET_FPMATH_DEFAULT;
3920 if (ix86_fpmath_string != 0)
3922 if (! strcmp (ix86_fpmath_string, "387"))
3923 ix86_fpmath = FPMATH_387;
3924 else if (! strcmp (ix86_fpmath_string, "sse"))
3926 if (!TARGET_SSE)
3928 warning (0, "SSE instruction set disabled, using 387 arithmetics");
3929 ix86_fpmath = FPMATH_387;
3931 else
3932 ix86_fpmath = FPMATH_SSE;
3934 else if (! strcmp (ix86_fpmath_string, "387,sse")
3935 || ! strcmp (ix86_fpmath_string, "387+sse")
3936 || ! strcmp (ix86_fpmath_string, "sse,387")
3937 || ! strcmp (ix86_fpmath_string, "sse+387")
3938 || ! strcmp (ix86_fpmath_string, "both"))
3940 if (!TARGET_SSE)
3942 warning (0, "SSE instruction set disabled, using 387 arithmetics");
3943 ix86_fpmath = FPMATH_387;
3945 else if (!TARGET_80387)
3947 warning (0, "387 instruction set disabled, using SSE arithmetics");
3948 ix86_fpmath = FPMATH_SSE;
3950 else
3951 ix86_fpmath = (enum fpmath_unit) (FPMATH_SSE | FPMATH_387);
3953 else
3954 error ("bad value (%s) for %sfpmath=%s %s",
3955 ix86_fpmath_string, prefix, suffix, sw);
3958 /* If the i387 is disabled, then do not return values in it. */
3959 if (!TARGET_80387)
3960 target_flags &= ~MASK_FLOAT_RETURNS;
3962 /* Use external vectorized library in vectorizing intrinsics. */
3963 if (ix86_veclibabi_string)
3965 if (strcmp (ix86_veclibabi_string, "svml") == 0)
3966 ix86_veclib_handler = ix86_veclibabi_svml;
3967 else if (strcmp (ix86_veclibabi_string, "acml") == 0)
3968 ix86_veclib_handler = ix86_veclibabi_acml;
3969 else
3970 error ("unknown vectorization library ABI type (%s) for "
3971 "%sveclibabi=%s %s", ix86_veclibabi_string,
3972 prefix, suffix, sw);
3975 if ((!USE_IX86_FRAME_POINTER
3976 || (x86_accumulate_outgoing_args & ix86_tune_mask))
3977 && !(target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
3978 && !optimize_size)
3979 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
3981 /* ??? Unwind info is not correct around the CFG unless either a frame
3982 pointer is present or M_A_O_A is set. Fixing this requires rewriting
3983 unwind info generation to be aware of the CFG and propagating states
3984 around edges. */
3985 if ((flag_unwind_tables || flag_asynchronous_unwind_tables
3986 || flag_exceptions || flag_non_call_exceptions)
3987 && flag_omit_frame_pointer
3988 && !(target_flags & MASK_ACCUMULATE_OUTGOING_ARGS))
3990 if (target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
3991 warning (0, "unwind tables currently require either a frame pointer "
3992 "or %saccumulate-outgoing-args%s for correctness",
3993 prefix, suffix);
3994 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
3997 /* If stack probes are required, the space used for large function
3998 arguments on the stack must also be probed, so enable
3999 -maccumulate-outgoing-args so this happens in the prologue. */
4000 if (TARGET_STACK_PROBE
4001 && !(target_flags & MASK_ACCUMULATE_OUTGOING_ARGS))
4003 if (target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
4004 warning (0, "stack probing requires %saccumulate-outgoing-args%s "
4005 "for correctness", prefix, suffix);
4006 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
4009 /* For sane SSE instruction set generation we need fcomi instruction.
4010 It is safe to enable all CMOVE instructions. */
4011 if (TARGET_SSE)
4012 TARGET_CMOVE = 1;
4014 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
4016 char *p;
4017 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix, "LX", 0);
4018 p = strchr (internal_label_prefix, 'X');
4019 internal_label_prefix_len = p - internal_label_prefix;
4020 *p = '\0';
4023 /* When scheduling description is not available, disable scheduler pass
4024 so it won't slow down the compilation and make x87 code slower. */
4025 if (!TARGET_SCHEDULE)
4026 flag_schedule_insns_after_reload = flag_schedule_insns = 0;
4028 maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES,
4029 ix86_cost->simultaneous_prefetches,
4030 global_options.x_param_values,
4031 global_options_set.x_param_values);
4032 maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE, ix86_cost->prefetch_block,
4033 global_options.x_param_values,
4034 global_options_set.x_param_values);
4035 maybe_set_param_value (PARAM_L1_CACHE_SIZE, ix86_cost->l1_cache_size,
4036 global_options.x_param_values,
4037 global_options_set.x_param_values);
4038 maybe_set_param_value (PARAM_L2_CACHE_SIZE, ix86_cost->l2_cache_size,
4039 global_options.x_param_values,
4040 global_options_set.x_param_values);
4042 /* Enable sw prefetching at -O3 for CPUS that prefetching is helpful. */
4043 if (flag_prefetch_loop_arrays < 0
4044 && HAVE_prefetch
4045 && optimize >= 3
4046 && software_prefetching_beneficial_p ())
4047 flag_prefetch_loop_arrays = 1;
4049 /* If using typedef char *va_list, signal that __builtin_va_start (&ap, 0)
4050 can be optimized to ap = __builtin_next_arg (0). */
4051 if (!TARGET_64BIT && !flag_split_stack)
4052 targetm.expand_builtin_va_start = NULL;
4054 if (TARGET_64BIT)
4056 ix86_gen_leave = gen_leave_rex64;
4057 ix86_gen_add3 = gen_adddi3;
4058 ix86_gen_sub3 = gen_subdi3;
4059 ix86_gen_sub3_carry = gen_subdi3_carry;
4060 ix86_gen_one_cmpl2 = gen_one_cmpldi2;
4061 ix86_gen_monitor = gen_sse3_monitor64;
4062 ix86_gen_andsp = gen_anddi3;
4063 ix86_gen_allocate_stack_worker = gen_allocate_stack_worker_probe_di;
4064 ix86_gen_adjust_stack_and_probe = gen_adjust_stack_and_probedi;
4065 ix86_gen_probe_stack_range = gen_probe_stack_rangedi;
4067 else
4069 ix86_gen_leave = gen_leave;
4070 ix86_gen_add3 = gen_addsi3;
4071 ix86_gen_sub3 = gen_subsi3;
4072 ix86_gen_sub3_carry = gen_subsi3_carry;
4073 ix86_gen_one_cmpl2 = gen_one_cmplsi2;
4074 ix86_gen_monitor = gen_sse3_monitor;
4075 ix86_gen_andsp = gen_andsi3;
4076 ix86_gen_allocate_stack_worker = gen_allocate_stack_worker_probe_si;
4077 ix86_gen_adjust_stack_and_probe = gen_adjust_stack_and_probesi;
4078 ix86_gen_probe_stack_range = gen_probe_stack_rangesi;
4081 #ifdef USE_IX86_CLD
4082 /* Use -mcld by default for 32-bit code if configured with --enable-cld. */
4083 if (!TARGET_64BIT)
4084 target_flags |= MASK_CLD & ~target_flags_explicit;
4085 #endif
4087 if (!TARGET_64BIT && flag_pic)
4089 if (flag_fentry > 0)
4090 sorry ("-mfentry isn%'t supported for 32-bit in combination "
4091 "with -fpic");
4092 flag_fentry = 0;
4094 else if (TARGET_SEH)
4096 if (flag_fentry == 0)
4097 sorry ("-mno-fentry isn%'t compatible with SEH");
4098 flag_fentry = 1;
4100 else if (flag_fentry < 0)
4102 #if defined(PROFILE_BEFORE_PROLOGUE)
4103 flag_fentry = 1;
4104 #else
4105 flag_fentry = 0;
4106 #endif
4109 /* Save the initial options in case the user does function specific options */
4110 if (main_args_p)
4111 target_option_default_node = target_option_current_node
4112 = build_target_option_node ();
4114 if (TARGET_AVX)
4116 /* When not optimize for size, enable vzeroupper optimization for
4117 TARGET_AVX with -fexpensive-optimizations. */
4118 if (!optimize_size
4119 && flag_expensive_optimizations
4120 && !(target_flags_explicit & MASK_VZEROUPPER))
4121 target_flags |= MASK_VZEROUPPER;
4123 else
4125 /* Disable vzeroupper pass if TARGET_AVX is disabled. */
4126 target_flags &= ~MASK_VZEROUPPER;
4130 /* Return TRUE if VAL is passed in register with 256bit AVX modes. */
4132 static bool
4133 function_pass_avx256_p (const_rtx val)
4135 if (!val)
4136 return false;
4138 if (REG_P (val) && VALID_AVX256_REG_MODE (GET_MODE (val)))
4139 return true;
4141 if (GET_CODE (val) == PARALLEL)
4143 int i;
4144 rtx r;
4146 for (i = XVECLEN (val, 0) - 1; i >= 0; i--)
4148 r = XVECEXP (val, 0, i);
4149 if (GET_CODE (r) == EXPR_LIST
4150 && XEXP (r, 0)
4151 && REG_P (XEXP (r, 0))
4152 && (GET_MODE (XEXP (r, 0)) == OImode
4153 || VALID_AVX256_REG_MODE (GET_MODE (XEXP (r, 0)))))
4154 return true;
4158 return false;
4161 /* Implement the TARGET_OPTION_OVERRIDE hook. */
4163 static void
4164 ix86_option_override (void)
4166 ix86_option_override_internal (true);
4169 /* Update register usage after having seen the compiler flags. */
4171 static void
4172 ix86_conditional_register_usage (void)
4174 int i;
4175 unsigned int j;
4177 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
4179 if (fixed_regs[i] > 1)
4180 fixed_regs[i] = (fixed_regs[i] == (TARGET_64BIT ? 3 : 2));
4181 if (call_used_regs[i] > 1)
4182 call_used_regs[i] = (call_used_regs[i] == (TARGET_64BIT ? 3 : 2));
4185 /* The PIC register, if it exists, is fixed. */
4186 j = PIC_OFFSET_TABLE_REGNUM;
4187 if (j != INVALID_REGNUM)
4188 fixed_regs[j] = call_used_regs[j] = 1;
4190 /* The MS_ABI changes the set of call-used registers. */
4191 if (TARGET_64BIT && ix86_cfun_abi () == MS_ABI)
4193 call_used_regs[SI_REG] = 0;
4194 call_used_regs[DI_REG] = 0;
4195 call_used_regs[XMM6_REG] = 0;
4196 call_used_regs[XMM7_REG] = 0;
4197 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
4198 call_used_regs[i] = 0;
4201 /* The default setting of CLOBBERED_REGS is for 32-bit; add in the
4202 other call-clobbered regs for 64-bit. */
4203 if (TARGET_64BIT)
4205 CLEAR_HARD_REG_SET (reg_class_contents[(int)CLOBBERED_REGS]);
4207 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
4208 if (TEST_HARD_REG_BIT (reg_class_contents[(int)GENERAL_REGS], i)
4209 && call_used_regs[i])
4210 SET_HARD_REG_BIT (reg_class_contents[(int)CLOBBERED_REGS], i);
4213 /* If MMX is disabled, squash the registers. */
4214 if (! TARGET_MMX)
4215 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
4216 if (TEST_HARD_REG_BIT (reg_class_contents[(int)MMX_REGS], i))
4217 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4219 /* If SSE is disabled, squash the registers. */
4220 if (! TARGET_SSE)
4221 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
4222 if (TEST_HARD_REG_BIT (reg_class_contents[(int)SSE_REGS], i))
4223 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4225 /* If the FPU is disabled, squash the registers. */
4226 if (! (TARGET_80387 || TARGET_FLOAT_RETURNS_IN_80387))
4227 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
4228 if (TEST_HARD_REG_BIT (reg_class_contents[(int)FLOAT_REGS], i))
4229 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4231 /* If 32-bit, squash the 64-bit registers. */
4232 if (! TARGET_64BIT)
4234 for (i = FIRST_REX_INT_REG; i <= LAST_REX_INT_REG; i++)
4235 reg_names[i] = "";
4236 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
4237 reg_names[i] = "";
4242 /* Save the current options */
4244 static void
4245 ix86_function_specific_save (struct cl_target_option *ptr)
4247 ptr->arch = ix86_arch;
4248 ptr->schedule = ix86_schedule;
4249 ptr->tune = ix86_tune;
4250 ptr->fpmath = ix86_fpmath;
4251 ptr->branch_cost = ix86_branch_cost;
4252 ptr->tune_defaulted = ix86_tune_defaulted;
4253 ptr->arch_specified = ix86_arch_specified;
4254 ptr->ix86_isa_flags_explicit = ix86_isa_flags_explicit;
4255 ptr->ix86_target_flags_explicit = target_flags_explicit;
4257 /* The fields are char but the variables are not; make sure the
4258 values fit in the fields. */
4259 gcc_assert (ptr->arch == ix86_arch);
4260 gcc_assert (ptr->schedule == ix86_schedule);
4261 gcc_assert (ptr->tune == ix86_tune);
4262 gcc_assert (ptr->fpmath == ix86_fpmath);
4263 gcc_assert (ptr->branch_cost == ix86_branch_cost);
4266 /* Restore the current options */
4268 static void
4269 ix86_function_specific_restore (struct cl_target_option *ptr)
4271 enum processor_type old_tune = ix86_tune;
4272 enum processor_type old_arch = ix86_arch;
4273 unsigned int ix86_arch_mask, ix86_tune_mask;
4274 int i;
4276 ix86_arch = (enum processor_type) ptr->arch;
4277 ix86_schedule = (enum attr_cpu) ptr->schedule;
4278 ix86_tune = (enum processor_type) ptr->tune;
4279 ix86_fpmath = (enum fpmath_unit) ptr->fpmath;
4280 ix86_branch_cost = ptr->branch_cost;
4281 ix86_tune_defaulted = ptr->tune_defaulted;
4282 ix86_arch_specified = ptr->arch_specified;
4283 ix86_isa_flags_explicit = ptr->ix86_isa_flags_explicit;
4284 target_flags_explicit = ptr->ix86_target_flags_explicit;
4286 /* Recreate the arch feature tests if the arch changed */
4287 if (old_arch != ix86_arch)
4289 ix86_arch_mask = 1u << ix86_arch;
4290 for (i = 0; i < X86_ARCH_LAST; ++i)
4291 ix86_arch_features[i]
4292 = !!(initial_ix86_arch_features[i] & ix86_arch_mask);
4295 /* Recreate the tune optimization tests */
4296 if (old_tune != ix86_tune)
4298 ix86_tune_mask = 1u << ix86_tune;
4299 for (i = 0; i < X86_TUNE_LAST; ++i)
4300 ix86_tune_features[i]
4301 = !!(initial_ix86_tune_features[i] & ix86_tune_mask);
4305 /* Print the current options */
4307 static void
4308 ix86_function_specific_print (FILE *file, int indent,
4309 struct cl_target_option *ptr)
4311 char *target_string
4312 = ix86_target_string (ptr->x_ix86_isa_flags, ptr->x_target_flags,
4313 NULL, NULL, NULL, false);
4315 fprintf (file, "%*sarch = %d (%s)\n",
4316 indent, "",
4317 ptr->arch,
4318 ((ptr->arch < TARGET_CPU_DEFAULT_max)
4319 ? cpu_names[ptr->arch]
4320 : "<unknown>"));
4322 fprintf (file, "%*stune = %d (%s)\n",
4323 indent, "",
4324 ptr->tune,
4325 ((ptr->tune < TARGET_CPU_DEFAULT_max)
4326 ? cpu_names[ptr->tune]
4327 : "<unknown>"));
4329 fprintf (file, "%*sfpmath = %d%s%s\n", indent, "", ptr->fpmath,
4330 (ptr->fpmath & FPMATH_387) ? ", 387" : "",
4331 (ptr->fpmath & FPMATH_SSE) ? ", sse" : "");
4332 fprintf (file, "%*sbranch_cost = %d\n", indent, "", ptr->branch_cost);
4334 if (target_string)
4336 fprintf (file, "%*s%s\n", indent, "", target_string);
4337 free (target_string);
4342 /* Inner function to process the attribute((target(...))), take an argument and
4343 set the current options from the argument. If we have a list, recursively go
4344 over the list. */
4346 static bool
4347 ix86_valid_target_attribute_inner_p (tree args, char *p_strings[])
4349 char *next_optstr;
4350 bool ret = true;
4352 #define IX86_ATTR_ISA(S,O) { S, sizeof (S)-1, ix86_opt_isa, O, 0 }
4353 #define IX86_ATTR_STR(S,O) { S, sizeof (S)-1, ix86_opt_str, O, 0 }
4354 #define IX86_ATTR_YES(S,O,M) { S, sizeof (S)-1, ix86_opt_yes, O, M }
4355 #define IX86_ATTR_NO(S,O,M) { S, sizeof (S)-1, ix86_opt_no, O, M }
4357 enum ix86_opt_type
4359 ix86_opt_unknown,
4360 ix86_opt_yes,
4361 ix86_opt_no,
4362 ix86_opt_str,
4363 ix86_opt_isa
4366 static const struct
4368 const char *string;
4369 size_t len;
4370 enum ix86_opt_type type;
4371 int opt;
4372 int mask;
4373 } attrs[] = {
4374 /* isa options */
4375 IX86_ATTR_ISA ("3dnow", OPT_m3dnow),
4376 IX86_ATTR_ISA ("abm", OPT_mabm),
4377 IX86_ATTR_ISA ("bmi", OPT_mbmi),
4378 IX86_ATTR_ISA ("tbm", OPT_mtbm),
4379 IX86_ATTR_ISA ("aes", OPT_maes),
4380 IX86_ATTR_ISA ("avx", OPT_mavx),
4381 IX86_ATTR_ISA ("mmx", OPT_mmmx),
4382 IX86_ATTR_ISA ("pclmul", OPT_mpclmul),
4383 IX86_ATTR_ISA ("popcnt", OPT_mpopcnt),
4384 IX86_ATTR_ISA ("sse", OPT_msse),
4385 IX86_ATTR_ISA ("sse2", OPT_msse2),
4386 IX86_ATTR_ISA ("sse3", OPT_msse3),
4387 IX86_ATTR_ISA ("sse4", OPT_msse4),
4388 IX86_ATTR_ISA ("sse4.1", OPT_msse4_1),
4389 IX86_ATTR_ISA ("sse4.2", OPT_msse4_2),
4390 IX86_ATTR_ISA ("sse4a", OPT_msse4a),
4391 IX86_ATTR_ISA ("ssse3", OPT_mssse3),
4392 IX86_ATTR_ISA ("fma4", OPT_mfma4),
4393 IX86_ATTR_ISA ("xop", OPT_mxop),
4394 IX86_ATTR_ISA ("lwp", OPT_mlwp),
4395 IX86_ATTR_ISA ("fsgsbase", OPT_mfsgsbase),
4396 IX86_ATTR_ISA ("rdrnd", OPT_mrdrnd),
4397 IX86_ATTR_ISA ("f16c", OPT_mf16c),
4399 /* string options */
4400 IX86_ATTR_STR ("arch=", IX86_FUNCTION_SPECIFIC_ARCH),
4401 IX86_ATTR_STR ("fpmath=", IX86_FUNCTION_SPECIFIC_FPMATH),
4402 IX86_ATTR_STR ("tune=", IX86_FUNCTION_SPECIFIC_TUNE),
4404 /* flag options */
4405 IX86_ATTR_YES ("cld",
4406 OPT_mcld,
4407 MASK_CLD),
4409 IX86_ATTR_NO ("fancy-math-387",
4410 OPT_mfancy_math_387,
4411 MASK_NO_FANCY_MATH_387),
4413 IX86_ATTR_YES ("ieee-fp",
4414 OPT_mieee_fp,
4415 MASK_IEEE_FP),
4417 IX86_ATTR_YES ("inline-all-stringops",
4418 OPT_minline_all_stringops,
4419 MASK_INLINE_ALL_STRINGOPS),
4421 IX86_ATTR_YES ("inline-stringops-dynamically",
4422 OPT_minline_stringops_dynamically,
4423 MASK_INLINE_STRINGOPS_DYNAMICALLY),
4425 IX86_ATTR_NO ("align-stringops",
4426 OPT_mno_align_stringops,
4427 MASK_NO_ALIGN_STRINGOPS),
4429 IX86_ATTR_YES ("recip",
4430 OPT_mrecip,
4431 MASK_RECIP),
4435 /* If this is a list, recurse to get the options. */
4436 if (TREE_CODE (args) == TREE_LIST)
4438 bool ret = true;
4440 for (; args; args = TREE_CHAIN (args))
4441 if (TREE_VALUE (args)
4442 && !ix86_valid_target_attribute_inner_p (TREE_VALUE (args), p_strings))
4443 ret = false;
4445 return ret;
4448 else if (TREE_CODE (args) != STRING_CST)
4449 gcc_unreachable ();
4451 /* Handle multiple arguments separated by commas. */
4452 next_optstr = ASTRDUP (TREE_STRING_POINTER (args));
4454 while (next_optstr && *next_optstr != '\0')
4456 char *p = next_optstr;
4457 char *orig_p = p;
4458 char *comma = strchr (next_optstr, ',');
4459 const char *opt_string;
4460 size_t len, opt_len;
4461 int opt;
4462 bool opt_set_p;
4463 char ch;
4464 unsigned i;
4465 enum ix86_opt_type type = ix86_opt_unknown;
4466 int mask = 0;
4468 if (comma)
4470 *comma = '\0';
4471 len = comma - next_optstr;
4472 next_optstr = comma + 1;
4474 else
4476 len = strlen (p);
4477 next_optstr = NULL;
4480 /* Recognize no-xxx. */
4481 if (len > 3 && p[0] == 'n' && p[1] == 'o' && p[2] == '-')
4483 opt_set_p = false;
4484 p += 3;
4485 len -= 3;
4487 else
4488 opt_set_p = true;
4490 /* Find the option. */
4491 ch = *p;
4492 opt = N_OPTS;
4493 for (i = 0; i < ARRAY_SIZE (attrs); i++)
4495 type = attrs[i].type;
4496 opt_len = attrs[i].len;
4497 if (ch == attrs[i].string[0]
4498 && ((type != ix86_opt_str) ? len == opt_len : len > opt_len)
4499 && memcmp (p, attrs[i].string, opt_len) == 0)
4501 opt = attrs[i].opt;
4502 mask = attrs[i].mask;
4503 opt_string = attrs[i].string;
4504 break;
4508 /* Process the option. */
4509 if (opt == N_OPTS)
4511 error ("attribute(target(\"%s\")) is unknown", orig_p);
4512 ret = false;
4515 else if (type == ix86_opt_isa)
4516 ix86_handle_option (opt, p, opt_set_p);
4518 else if (type == ix86_opt_yes || type == ix86_opt_no)
4520 if (type == ix86_opt_no)
4521 opt_set_p = !opt_set_p;
4523 if (opt_set_p)
4524 target_flags |= mask;
4525 else
4526 target_flags &= ~mask;
4529 else if (type == ix86_opt_str)
4531 if (p_strings[opt])
4533 error ("option(\"%s\") was already specified", opt_string);
4534 ret = false;
4536 else
4537 p_strings[opt] = xstrdup (p + opt_len);
4540 else
4541 gcc_unreachable ();
4544 return ret;
4547 /* Return a TARGET_OPTION_NODE tree of the target options listed or NULL. */
4549 tree
4550 ix86_valid_target_attribute_tree (tree args)
4552 const char *orig_arch_string = ix86_arch_string;
4553 const char *orig_tune_string = ix86_tune_string;
4554 const char *orig_fpmath_string = ix86_fpmath_string;
4555 int orig_tune_defaulted = ix86_tune_defaulted;
4556 int orig_arch_specified = ix86_arch_specified;
4557 char *option_strings[IX86_FUNCTION_SPECIFIC_MAX] = { NULL, NULL, NULL };
4558 tree t = NULL_TREE;
4559 int i;
4560 struct cl_target_option *def
4561 = TREE_TARGET_OPTION (target_option_default_node);
4563 /* Process each of the options on the chain. */
4564 if (! ix86_valid_target_attribute_inner_p (args, option_strings))
4565 return NULL_TREE;
4567 /* If the changed options are different from the default, rerun
4568 ix86_option_override_internal, and then save the options away.
4569 The string options are are attribute options, and will be undone
4570 when we copy the save structure. */
4571 if (ix86_isa_flags != def->x_ix86_isa_flags
4572 || target_flags != def->x_target_flags
4573 || option_strings[IX86_FUNCTION_SPECIFIC_ARCH]
4574 || option_strings[IX86_FUNCTION_SPECIFIC_TUNE]
4575 || option_strings[IX86_FUNCTION_SPECIFIC_FPMATH])
4577 /* If we are using the default tune= or arch=, undo the string assigned,
4578 and use the default. */
4579 if (option_strings[IX86_FUNCTION_SPECIFIC_ARCH])
4580 ix86_arch_string = option_strings[IX86_FUNCTION_SPECIFIC_ARCH];
4581 else if (!orig_arch_specified)
4582 ix86_arch_string = NULL;
4584 if (option_strings[IX86_FUNCTION_SPECIFIC_TUNE])
4585 ix86_tune_string = option_strings[IX86_FUNCTION_SPECIFIC_TUNE];
4586 else if (orig_tune_defaulted)
4587 ix86_tune_string = NULL;
4589 /* If fpmath= is not set, and we now have sse2 on 32-bit, use it. */
4590 if (option_strings[IX86_FUNCTION_SPECIFIC_FPMATH])
4591 ix86_fpmath_string = option_strings[IX86_FUNCTION_SPECIFIC_FPMATH];
4592 else if (!TARGET_64BIT && TARGET_SSE)
4593 ix86_fpmath_string = "sse,387";
4595 /* Do any overrides, such as arch=xxx, or tune=xxx support. */
4596 ix86_option_override_internal (false);
4598 /* Add any builtin functions with the new isa if any. */
4599 ix86_add_new_builtins (ix86_isa_flags);
4601 /* Save the current options unless we are validating options for
4602 #pragma. */
4603 t = build_target_option_node ();
4605 ix86_arch_string = orig_arch_string;
4606 ix86_tune_string = orig_tune_string;
4607 ix86_fpmath_string = orig_fpmath_string;
4609 /* Free up memory allocated to hold the strings */
4610 for (i = 0; i < IX86_FUNCTION_SPECIFIC_MAX; i++)
4611 if (option_strings[i])
4612 free (option_strings[i]);
4615 return t;
4618 /* Hook to validate attribute((target("string"))). */
4620 static bool
4621 ix86_valid_target_attribute_p (tree fndecl,
4622 tree ARG_UNUSED (name),
4623 tree args,
4624 int ARG_UNUSED (flags))
4626 struct cl_target_option cur_target;
4627 bool ret = true;
4628 tree old_optimize = build_optimization_node ();
4629 tree new_target, new_optimize;
4630 tree func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl);
4632 /* If the function changed the optimization levels as well as setting target
4633 options, start with the optimizations specified. */
4634 if (func_optimize && func_optimize != old_optimize)
4635 cl_optimization_restore (&global_options,
4636 TREE_OPTIMIZATION (func_optimize));
4638 /* The target attributes may also change some optimization flags, so update
4639 the optimization options if necessary. */
4640 cl_target_option_save (&cur_target, &global_options);
4641 new_target = ix86_valid_target_attribute_tree (args);
4642 new_optimize = build_optimization_node ();
4644 if (!new_target)
4645 ret = false;
4647 else if (fndecl)
4649 DECL_FUNCTION_SPECIFIC_TARGET (fndecl) = new_target;
4651 if (old_optimize != new_optimize)
4652 DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl) = new_optimize;
4655 cl_target_option_restore (&global_options, &cur_target);
4657 if (old_optimize != new_optimize)
4658 cl_optimization_restore (&global_options,
4659 TREE_OPTIMIZATION (old_optimize));
4661 return ret;
4665 /* Hook to determine if one function can safely inline another. */
4667 static bool
4668 ix86_can_inline_p (tree caller, tree callee)
4670 bool ret = false;
4671 tree caller_tree = DECL_FUNCTION_SPECIFIC_TARGET (caller);
4672 tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (callee);
4674 /* If callee has no option attributes, then it is ok to inline. */
4675 if (!callee_tree)
4676 ret = true;
4678 /* If caller has no option attributes, but callee does then it is not ok to
4679 inline. */
4680 else if (!caller_tree)
4681 ret = false;
4683 else
4685 struct cl_target_option *caller_opts = TREE_TARGET_OPTION (caller_tree);
4686 struct cl_target_option *callee_opts = TREE_TARGET_OPTION (callee_tree);
4688 /* Callee's isa options should a subset of the caller's, i.e. a SSE4 function
4689 can inline a SSE2 function but a SSE2 function can't inline a SSE4
4690 function. */
4691 if ((caller_opts->x_ix86_isa_flags & callee_opts->x_ix86_isa_flags)
4692 != callee_opts->x_ix86_isa_flags)
4693 ret = false;
4695 /* See if we have the same non-isa options. */
4696 else if (caller_opts->x_target_flags != callee_opts->x_target_flags)
4697 ret = false;
4699 /* See if arch, tune, etc. are the same. */
4700 else if (caller_opts->arch != callee_opts->arch)
4701 ret = false;
4703 else if (caller_opts->tune != callee_opts->tune)
4704 ret = false;
4706 else if (caller_opts->fpmath != callee_opts->fpmath)
4707 ret = false;
4709 else if (caller_opts->branch_cost != callee_opts->branch_cost)
4710 ret = false;
4712 else
4713 ret = true;
4716 return ret;
4720 /* Remember the last target of ix86_set_current_function. */
4721 static GTY(()) tree ix86_previous_fndecl;
4723 /* Establish appropriate back-end context for processing the function
4724 FNDECL. The argument might be NULL to indicate processing at top
4725 level, outside of any function scope. */
4726 static void
4727 ix86_set_current_function (tree fndecl)
4729 /* Only change the context if the function changes. This hook is called
4730 several times in the course of compiling a function, and we don't want to
4731 slow things down too much or call target_reinit when it isn't safe. */
4732 if (fndecl && fndecl != ix86_previous_fndecl)
4734 tree old_tree = (ix86_previous_fndecl
4735 ? DECL_FUNCTION_SPECIFIC_TARGET (ix86_previous_fndecl)
4736 : NULL_TREE);
4738 tree new_tree = (fndecl
4739 ? DECL_FUNCTION_SPECIFIC_TARGET (fndecl)
4740 : NULL_TREE);
4742 ix86_previous_fndecl = fndecl;
4743 if (old_tree == new_tree)
4746 else if (new_tree)
4748 cl_target_option_restore (&global_options,
4749 TREE_TARGET_OPTION (new_tree));
4750 target_reinit ();
4753 else if (old_tree)
4755 struct cl_target_option *def
4756 = TREE_TARGET_OPTION (target_option_current_node);
4758 cl_target_option_restore (&global_options, def);
4759 target_reinit ();
4765 /* Return true if this goes in large data/bss. */
4767 static bool
4768 ix86_in_large_data_p (tree exp)
4770 if (ix86_cmodel != CM_MEDIUM && ix86_cmodel != CM_MEDIUM_PIC)
4771 return false;
4773 /* Functions are never large data. */
4774 if (TREE_CODE (exp) == FUNCTION_DECL)
4775 return false;
4777 if (TREE_CODE (exp) == VAR_DECL && DECL_SECTION_NAME (exp))
4779 const char *section = TREE_STRING_POINTER (DECL_SECTION_NAME (exp));
4780 if (strcmp (section, ".ldata") == 0
4781 || strcmp (section, ".lbss") == 0)
4782 return true;
4783 return false;
4785 else
4787 HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (exp));
4789 /* If this is an incomplete type with size 0, then we can't put it
4790 in data because it might be too big when completed. */
4791 if (!size || size > ix86_section_threshold)
4792 return true;
4795 return false;
4798 /* Switch to the appropriate section for output of DECL.
4799 DECL is either a `VAR_DECL' node or a constant of some sort.
4800 RELOC indicates whether forming the initial value of DECL requires
4801 link-time relocations. */
4803 static section * x86_64_elf_select_section (tree, int, unsigned HOST_WIDE_INT)
4804 ATTRIBUTE_UNUSED;
4806 static section *
4807 x86_64_elf_select_section (tree decl, int reloc,
4808 unsigned HOST_WIDE_INT align)
4810 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
4811 && ix86_in_large_data_p (decl))
4813 const char *sname = NULL;
4814 unsigned int flags = SECTION_WRITE;
4815 switch (categorize_decl_for_section (decl, reloc))
4817 case SECCAT_DATA:
4818 sname = ".ldata";
4819 break;
4820 case SECCAT_DATA_REL:
4821 sname = ".ldata.rel";
4822 break;
4823 case SECCAT_DATA_REL_LOCAL:
4824 sname = ".ldata.rel.local";
4825 break;
4826 case SECCAT_DATA_REL_RO:
4827 sname = ".ldata.rel.ro";
4828 break;
4829 case SECCAT_DATA_REL_RO_LOCAL:
4830 sname = ".ldata.rel.ro.local";
4831 break;
4832 case SECCAT_BSS:
4833 sname = ".lbss";
4834 flags |= SECTION_BSS;
4835 break;
4836 case SECCAT_RODATA:
4837 case SECCAT_RODATA_MERGE_STR:
4838 case SECCAT_RODATA_MERGE_STR_INIT:
4839 case SECCAT_RODATA_MERGE_CONST:
4840 sname = ".lrodata";
4841 flags = 0;
4842 break;
4843 case SECCAT_SRODATA:
4844 case SECCAT_SDATA:
4845 case SECCAT_SBSS:
4846 gcc_unreachable ();
4847 case SECCAT_TEXT:
4848 case SECCAT_TDATA:
4849 case SECCAT_TBSS:
4850 /* We don't split these for medium model. Place them into
4851 default sections and hope for best. */
4852 break;
4854 if (sname)
4856 /* We might get called with string constants, but get_named_section
4857 doesn't like them as they are not DECLs. Also, we need to set
4858 flags in that case. */
4859 if (!DECL_P (decl))
4860 return get_section (sname, flags, NULL);
4861 return get_named_section (decl, sname, reloc);
4864 return default_elf_select_section (decl, reloc, align);
4867 /* Build up a unique section name, expressed as a
4868 STRING_CST node, and assign it to DECL_SECTION_NAME (decl).
4869 RELOC indicates whether the initial value of EXP requires
4870 link-time relocations. */
4872 static void ATTRIBUTE_UNUSED
4873 x86_64_elf_unique_section (tree decl, int reloc)
4875 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
4876 && ix86_in_large_data_p (decl))
4878 const char *prefix = NULL;
4879 /* We only need to use .gnu.linkonce if we don't have COMDAT groups. */
4880 bool one_only = DECL_ONE_ONLY (decl) && !HAVE_COMDAT_GROUP;
4882 switch (categorize_decl_for_section (decl, reloc))
4884 case SECCAT_DATA:
4885 case SECCAT_DATA_REL:
4886 case SECCAT_DATA_REL_LOCAL:
4887 case SECCAT_DATA_REL_RO:
4888 case SECCAT_DATA_REL_RO_LOCAL:
4889 prefix = one_only ? ".ld" : ".ldata";
4890 break;
4891 case SECCAT_BSS:
4892 prefix = one_only ? ".lb" : ".lbss";
4893 break;
4894 case SECCAT_RODATA:
4895 case SECCAT_RODATA_MERGE_STR:
4896 case SECCAT_RODATA_MERGE_STR_INIT:
4897 case SECCAT_RODATA_MERGE_CONST:
4898 prefix = one_only ? ".lr" : ".lrodata";
4899 break;
4900 case SECCAT_SRODATA:
4901 case SECCAT_SDATA:
4902 case SECCAT_SBSS:
4903 gcc_unreachable ();
4904 case SECCAT_TEXT:
4905 case SECCAT_TDATA:
4906 case SECCAT_TBSS:
4907 /* We don't split these for medium model. Place them into
4908 default sections and hope for best. */
4909 break;
4911 if (prefix)
4913 const char *name, *linkonce;
4914 char *string;
4916 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
4917 name = targetm.strip_name_encoding (name);
4919 /* If we're using one_only, then there needs to be a .gnu.linkonce
4920 prefix to the section name. */
4921 linkonce = one_only ? ".gnu.linkonce" : "";
4923 string = ACONCAT ((linkonce, prefix, ".", name, NULL));
4925 DECL_SECTION_NAME (decl) = build_string (strlen (string), string);
4926 return;
4929 default_unique_section (decl, reloc);
4932 #ifdef COMMON_ASM_OP
4933 /* This says how to output assembler code to declare an
4934 uninitialized external linkage data object.
4936 For medium model x86-64 we need to use .largecomm opcode for
4937 large objects. */
4938 void
4939 x86_elf_aligned_common (FILE *file,
4940 const char *name, unsigned HOST_WIDE_INT size,
4941 int align)
4943 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
4944 && size > (unsigned int)ix86_section_threshold)
4945 fputs (".largecomm\t", file);
4946 else
4947 fputs (COMMON_ASM_OP, file);
4948 assemble_name (file, name);
4949 fprintf (file, "," HOST_WIDE_INT_PRINT_UNSIGNED ",%u\n",
4950 size, align / BITS_PER_UNIT);
4952 #endif
4954 /* Utility function for targets to use in implementing
4955 ASM_OUTPUT_ALIGNED_BSS. */
4957 void
4958 x86_output_aligned_bss (FILE *file, tree decl ATTRIBUTE_UNUSED,
4959 const char *name, unsigned HOST_WIDE_INT size,
4960 int align)
4962 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
4963 && size > (unsigned int)ix86_section_threshold)
4964 switch_to_section (get_named_section (decl, ".lbss", 0));
4965 else
4966 switch_to_section (bss_section);
4967 ASM_OUTPUT_ALIGN (file, floor_log2 (align / BITS_PER_UNIT));
4968 #ifdef ASM_DECLARE_OBJECT_NAME
4969 last_assemble_variable_decl = decl;
4970 ASM_DECLARE_OBJECT_NAME (file, name, decl);
4971 #else
4972 /* Standard thing is just output label for the object. */
4973 ASM_OUTPUT_LABEL (file, name);
4974 #endif /* ASM_DECLARE_OBJECT_NAME */
4975 ASM_OUTPUT_SKIP (file, size ? size : 1);
4978 static const struct default_options ix86_option_optimization_table[] =
4980 /* Turn off -fschedule-insns by default. It tends to make the
4981 problem with not enough registers even worse. */
4982 #ifdef INSN_SCHEDULING
4983 { OPT_LEVELS_ALL, OPT_fschedule_insns, NULL, 0 },
4984 #endif
4986 #ifdef SUBTARGET_OPTIMIZATION_OPTIONS
4987 SUBTARGET_OPTIMIZATION_OPTIONS,
4988 #endif
4989 { OPT_LEVELS_NONE, 0, NULL, 0 }
4992 /* Implement TARGET_OPTION_INIT_STRUCT. */
4994 static void
4995 ix86_option_init_struct (struct gcc_options *opts)
4997 if (TARGET_MACHO)
4998 /* The Darwin libraries never set errno, so we might as well
4999 avoid calling them when that's the only reason we would. */
5000 opts->x_flag_errno_math = 0;
5002 opts->x_flag_pcc_struct_return = 2;
5003 opts->x_flag_asynchronous_unwind_tables = 2;
5004 opts->x_flag_vect_cost_model = 1;
5007 /* Decide whether we must probe the stack before any space allocation
5008 on this target. It's essentially TARGET_STACK_PROBE except when
5009 -fstack-check causes the stack to be already probed differently. */
5011 bool
5012 ix86_target_stack_probe (void)
5014 /* Do not probe the stack twice if static stack checking is enabled. */
5015 if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
5016 return false;
5018 return TARGET_STACK_PROBE;
5021 /* Decide whether we can make a sibling call to a function. DECL is the
5022 declaration of the function being targeted by the call and EXP is the
5023 CALL_EXPR representing the call. */
5025 static bool
5026 ix86_function_ok_for_sibcall (tree decl, tree exp)
5028 tree type, decl_or_type;
5029 rtx a, b;
5031 /* If we are generating position-independent code, we cannot sibcall
5032 optimize any indirect call, or a direct call to a global function,
5033 as the PLT requires %ebx be live. (Darwin does not have a PLT.) */
5034 if (!TARGET_MACHO
5035 && !TARGET_64BIT
5036 && flag_pic
5037 && (!decl || !targetm.binds_local_p (decl)))
5038 return false;
5040 /* If we need to align the outgoing stack, then sibcalling would
5041 unalign the stack, which may break the called function. */
5042 if (ix86_minimum_incoming_stack_boundary (true)
5043 < PREFERRED_STACK_BOUNDARY)
5044 return false;
5046 if (decl)
5048 decl_or_type = decl;
5049 type = TREE_TYPE (decl);
5051 else
5053 /* We're looking at the CALL_EXPR, we need the type of the function. */
5054 type = CALL_EXPR_FN (exp); /* pointer expression */
5055 type = TREE_TYPE (type); /* pointer type */
5056 type = TREE_TYPE (type); /* function type */
5057 decl_or_type = type;
5060 /* Check that the return value locations are the same. Like
5061 if we are returning floats on the 80387 register stack, we cannot
5062 make a sibcall from a function that doesn't return a float to a
5063 function that does or, conversely, from a function that does return
5064 a float to a function that doesn't; the necessary stack adjustment
5065 would not be executed. This is also the place we notice
5066 differences in the return value ABI. Note that it is ok for one
5067 of the functions to have void return type as long as the return
5068 value of the other is passed in a register. */
5069 a = ix86_function_value (TREE_TYPE (exp), decl_or_type, false);
5070 b = ix86_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
5071 cfun->decl, false);
5072 if (STACK_REG_P (a) || STACK_REG_P (b))
5074 if (!rtx_equal_p (a, b))
5075 return false;
5077 else if (VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
5079 /* Disable sibcall if we need to generate vzeroupper after
5080 callee returns. */
5081 if (TARGET_VZEROUPPER
5082 && cfun->machine->callee_return_avx256_p
5083 && !cfun->machine->caller_return_avx256_p)
5084 return false;
5086 else if (!rtx_equal_p (a, b))
5087 return false;
5089 if (TARGET_64BIT)
5091 /* The SYSV ABI has more call-clobbered registers;
5092 disallow sibcalls from MS to SYSV. */
5093 if (cfun->machine->call_abi == MS_ABI
5094 && ix86_function_type_abi (type) == SYSV_ABI)
5095 return false;
5097 else
5099 /* If this call is indirect, we'll need to be able to use a
5100 call-clobbered register for the address of the target function.
5101 Make sure that all such registers are not used for passing
5102 parameters. Note that DLLIMPORT functions are indirect. */
5103 if (!decl
5104 || (TARGET_DLLIMPORT_DECL_ATTRIBUTES && DECL_DLLIMPORT_P (decl)))
5106 if (ix86_function_regparm (type, NULL) >= 3)
5108 /* ??? Need to count the actual number of registers to be used,
5109 not the possible number of registers. Fix later. */
5110 return false;
5115 /* Otherwise okay. That also includes certain types of indirect calls. */
5116 return true;
5119 /* Handle "cdecl", "stdcall", "fastcall", "regparm", "thiscall",
5120 and "sseregparm" calling convention attributes;
5121 arguments as in struct attribute_spec.handler. */
5123 static tree
5124 ix86_handle_cconv_attribute (tree *node, tree name,
5125 tree args,
5126 int flags ATTRIBUTE_UNUSED,
5127 bool *no_add_attrs)
5129 if (TREE_CODE (*node) != FUNCTION_TYPE
5130 && TREE_CODE (*node) != METHOD_TYPE
5131 && TREE_CODE (*node) != FIELD_DECL
5132 && TREE_CODE (*node) != TYPE_DECL)
5134 warning (OPT_Wattributes, "%qE attribute only applies to functions",
5135 name);
5136 *no_add_attrs = true;
5137 return NULL_TREE;
5140 /* Can combine regparm with all attributes but fastcall. */
5141 if (is_attribute_p ("regparm", name))
5143 tree cst;
5145 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
5147 error ("fastcall and regparm attributes are not compatible");
5150 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
5152 error ("regparam and thiscall attributes are not compatible");
5155 cst = TREE_VALUE (args);
5156 if (TREE_CODE (cst) != INTEGER_CST)
5158 warning (OPT_Wattributes,
5159 "%qE attribute requires an integer constant argument",
5160 name);
5161 *no_add_attrs = true;
5163 else if (compare_tree_int (cst, REGPARM_MAX) > 0)
5165 warning (OPT_Wattributes, "argument to %qE attribute larger than %d",
5166 name, REGPARM_MAX);
5167 *no_add_attrs = true;
5170 return NULL_TREE;
5173 if (TARGET_64BIT)
5175 /* Do not warn when emulating the MS ABI. */
5176 if ((TREE_CODE (*node) != FUNCTION_TYPE
5177 && TREE_CODE (*node) != METHOD_TYPE)
5178 || ix86_function_type_abi (*node) != MS_ABI)
5179 warning (OPT_Wattributes, "%qE attribute ignored",
5180 name);
5181 *no_add_attrs = true;
5182 return NULL_TREE;
5185 /* Can combine fastcall with stdcall (redundant) and sseregparm. */
5186 if (is_attribute_p ("fastcall", name))
5188 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
5190 error ("fastcall and cdecl attributes are not compatible");
5192 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
5194 error ("fastcall and stdcall attributes are not compatible");
5196 if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node)))
5198 error ("fastcall and regparm attributes are not compatible");
5200 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
5202 error ("fastcall and thiscall attributes are not compatible");
5206 /* Can combine stdcall with fastcall (redundant), regparm and
5207 sseregparm. */
5208 else if (is_attribute_p ("stdcall", name))
5210 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
5212 error ("stdcall and cdecl attributes are not compatible");
5214 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
5216 error ("stdcall and fastcall attributes are not compatible");
5218 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
5220 error ("stdcall and thiscall attributes are not compatible");
5224 /* Can combine cdecl with regparm and sseregparm. */
5225 else if (is_attribute_p ("cdecl", name))
5227 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
5229 error ("stdcall and cdecl attributes are not compatible");
5231 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
5233 error ("fastcall and cdecl attributes are not compatible");
5235 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
5237 error ("cdecl and thiscall attributes are not compatible");
5240 else if (is_attribute_p ("thiscall", name))
5242 if (TREE_CODE (*node) != METHOD_TYPE && pedantic)
5243 warning (OPT_Wattributes, "%qE attribute is used for none class-method",
5244 name);
5245 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
5247 error ("stdcall and thiscall attributes are not compatible");
5249 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
5251 error ("fastcall and thiscall attributes are not compatible");
5253 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
5255 error ("cdecl and thiscall attributes are not compatible");
5259 /* Can combine sseregparm with all attributes. */
5261 return NULL_TREE;
5264 /* Return 0 if the attributes for two types are incompatible, 1 if they
5265 are compatible, and 2 if they are nearly compatible (which causes a
5266 warning to be generated). */
5268 static int
5269 ix86_comp_type_attributes (const_tree type1, const_tree type2)
5271 /* Check for mismatch of non-default calling convention. */
5272 const char *const rtdstr = TARGET_RTD ? "cdecl" : "stdcall";
5274 if (TREE_CODE (type1) != FUNCTION_TYPE
5275 && TREE_CODE (type1) != METHOD_TYPE)
5276 return 1;
5278 /* Check for mismatched fastcall/regparm types. */
5279 if ((!lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type1))
5280 != !lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type2)))
5281 || (ix86_function_regparm (type1, NULL)
5282 != ix86_function_regparm (type2, NULL)))
5283 return 0;
5285 /* Check for mismatched sseregparm types. */
5286 if (!lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type1))
5287 != !lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type2)))
5288 return 0;
5290 /* Check for mismatched thiscall types. */
5291 if (!lookup_attribute ("thiscall", TYPE_ATTRIBUTES (type1))
5292 != !lookup_attribute ("thiscall", TYPE_ATTRIBUTES (type2)))
5293 return 0;
5295 /* Check for mismatched return types (cdecl vs stdcall). */
5296 if (!lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type1))
5297 != !lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type2)))
5298 return 0;
5300 return 1;
5303 /* Return the regparm value for a function with the indicated TYPE and DECL.
5304 DECL may be NULL when calling function indirectly
5305 or considering a libcall. */
5307 static int
5308 ix86_function_regparm (const_tree type, const_tree decl)
5310 tree attr;
5311 int regparm;
5313 if (TARGET_64BIT)
5314 return (ix86_function_type_abi (type) == SYSV_ABI
5315 ? X86_64_REGPARM_MAX : X86_64_MS_REGPARM_MAX);
5317 regparm = ix86_regparm;
5318 attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type));
5319 if (attr)
5321 regparm = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
5322 return regparm;
5325 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
5326 return 2;
5328 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (type)))
5329 return 1;
5331 /* Use register calling convention for local functions when possible. */
5332 if (decl
5333 && TREE_CODE (decl) == FUNCTION_DECL
5334 && optimize
5335 && !(profile_flag && !flag_fentry))
5337 /* FIXME: remove this CONST_CAST when cgraph.[ch] is constified. */
5338 struct cgraph_local_info *i = cgraph_local_info (CONST_CAST_TREE (decl));
5339 if (i && i->local)
5341 int local_regparm, globals = 0, regno;
5343 /* Make sure no regparm register is taken by a
5344 fixed register variable. */
5345 for (local_regparm = 0; local_regparm < REGPARM_MAX; local_regparm++)
5346 if (fixed_regs[local_regparm])
5347 break;
5349 /* We don't want to use regparm(3) for nested functions as
5350 these use a static chain pointer in the third argument. */
5351 if (local_regparm == 3 && DECL_STATIC_CHAIN (decl))
5352 local_regparm = 2;
5354 /* In 32-bit mode save a register for the split stack. */
5355 if (!TARGET_64BIT && local_regparm == 3 && flag_split_stack)
5356 local_regparm = 2;
5358 /* Each fixed register usage increases register pressure,
5359 so less registers should be used for argument passing.
5360 This functionality can be overriden by an explicit
5361 regparm value. */
5362 for (regno = 0; regno <= DI_REG; regno++)
5363 if (fixed_regs[regno])
5364 globals++;
5366 local_regparm
5367 = globals < local_regparm ? local_regparm - globals : 0;
5369 if (local_regparm > regparm)
5370 regparm = local_regparm;
5374 return regparm;
5377 /* Return 1 or 2, if we can pass up to SSE_REGPARM_MAX SFmode (1) and
5378 DFmode (2) arguments in SSE registers for a function with the
5379 indicated TYPE and DECL. DECL may be NULL when calling function
5380 indirectly or considering a libcall. Otherwise return 0. */
5382 static int
5383 ix86_function_sseregparm (const_tree type, const_tree decl, bool warn)
5385 gcc_assert (!TARGET_64BIT);
5387 /* Use SSE registers to pass SFmode and DFmode arguments if requested
5388 by the sseregparm attribute. */
5389 if (TARGET_SSEREGPARM
5390 || (type && lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type))))
5392 if (!TARGET_SSE)
5394 if (warn)
5396 if (decl)
5397 error ("calling %qD with attribute sseregparm without "
5398 "SSE/SSE2 enabled", decl);
5399 else
5400 error ("calling %qT with attribute sseregparm without "
5401 "SSE/SSE2 enabled", type);
5403 return 0;
5406 return 2;
5409 /* For local functions, pass up to SSE_REGPARM_MAX SFmode
5410 (and DFmode for SSE2) arguments in SSE registers. */
5411 if (decl && TARGET_SSE_MATH && optimize
5412 && !(profile_flag && !flag_fentry))
5414 /* FIXME: remove this CONST_CAST when cgraph.[ch] is constified. */
5415 struct cgraph_local_info *i = cgraph_local_info (CONST_CAST_TREE(decl));
5416 if (i && i->local)
5417 return TARGET_SSE2 ? 2 : 1;
5420 return 0;
5423 /* Return true if EAX is live at the start of the function. Used by
5424 ix86_expand_prologue to determine if we need special help before
5425 calling allocate_stack_worker. */
5427 static bool
5428 ix86_eax_live_at_start_p (void)
5430 /* Cheat. Don't bother working forward from ix86_function_regparm
5431 to the function type to whether an actual argument is located in
5432 eax. Instead just look at cfg info, which is still close enough
5433 to correct at this point. This gives false positives for broken
5434 functions that might use uninitialized data that happens to be
5435 allocated in eax, but who cares? */
5436 return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR), 0);
5439 static bool
5440 ix86_keep_aggregate_return_pointer (tree fntype)
5442 tree attr;
5444 attr = lookup_attribute ("callee_pop_aggregate_return",
5445 TYPE_ATTRIBUTES (fntype));
5446 if (attr)
5447 return (TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr))) == 0);
5449 return KEEP_AGGREGATE_RETURN_POINTER != 0;
5452 /* Value is the number of bytes of arguments automatically
5453 popped when returning from a subroutine call.
5454 FUNDECL is the declaration node of the function (as a tree),
5455 FUNTYPE is the data type of the function (as a tree),
5456 or for a library call it is an identifier node for the subroutine name.
5457 SIZE is the number of bytes of arguments passed on the stack.
5459 On the 80386, the RTD insn may be used to pop them if the number
5460 of args is fixed, but if the number is variable then the caller
5461 must pop them all. RTD can't be used for library calls now
5462 because the library is compiled with the Unix compiler.
5463 Use of RTD is a selectable option, since it is incompatible with
5464 standard Unix calling sequences. If the option is not selected,
5465 the caller must always pop the args.
5467 The attribute stdcall is equivalent to RTD on a per module basis. */
5469 static int
5470 ix86_return_pops_args (tree fundecl, tree funtype, int size)
5472 int rtd;
5474 /* None of the 64-bit ABIs pop arguments. */
5475 if (TARGET_64BIT)
5476 return 0;
5478 rtd = TARGET_RTD && (!fundecl || TREE_CODE (fundecl) != IDENTIFIER_NODE);
5480 /* Cdecl functions override -mrtd, and never pop the stack. */
5481 if (! lookup_attribute ("cdecl", TYPE_ATTRIBUTES (funtype)))
5483 /* Stdcall and fastcall functions will pop the stack if not
5484 variable args. */
5485 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (funtype))
5486 || lookup_attribute ("fastcall", TYPE_ATTRIBUTES (funtype))
5487 || lookup_attribute ("thiscall", TYPE_ATTRIBUTES (funtype)))
5488 rtd = 1;
5490 if (rtd && ! stdarg_p (funtype))
5491 return size;
5494 /* Lose any fake structure return argument if it is passed on the stack. */
5495 if (aggregate_value_p (TREE_TYPE (funtype), fundecl)
5496 && !ix86_keep_aggregate_return_pointer (funtype))
5498 int nregs = ix86_function_regparm (funtype, fundecl);
5499 if (nregs == 0)
5500 return GET_MODE_SIZE (Pmode);
5503 return 0;
5506 /* Argument support functions. */
5508 /* Return true when register may be used to pass function parameters. */
5509 bool
5510 ix86_function_arg_regno_p (int regno)
5512 int i;
5513 const int *parm_regs;
5515 if (!TARGET_64BIT)
5517 if (TARGET_MACHO)
5518 return (regno < REGPARM_MAX
5519 || (TARGET_SSE && SSE_REGNO_P (regno) && !fixed_regs[regno]));
5520 else
5521 return (regno < REGPARM_MAX
5522 || (TARGET_MMX && MMX_REGNO_P (regno)
5523 && (regno < FIRST_MMX_REG + MMX_REGPARM_MAX))
5524 || (TARGET_SSE && SSE_REGNO_P (regno)
5525 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX)));
5528 if (TARGET_MACHO)
5530 if (SSE_REGNO_P (regno) && TARGET_SSE)
5531 return true;
5533 else
5535 if (TARGET_SSE && SSE_REGNO_P (regno)
5536 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX))
5537 return true;
5540 /* TODO: The function should depend on current function ABI but
5541 builtins.c would need updating then. Therefore we use the
5542 default ABI. */
5544 /* RAX is used as hidden argument to va_arg functions. */
5545 if (ix86_abi == SYSV_ABI && regno == AX_REG)
5546 return true;
5548 if (ix86_abi == MS_ABI)
5549 parm_regs = x86_64_ms_abi_int_parameter_registers;
5550 else
5551 parm_regs = x86_64_int_parameter_registers;
5552 for (i = 0; i < (ix86_abi == MS_ABI
5553 ? X86_64_MS_REGPARM_MAX : X86_64_REGPARM_MAX); i++)
5554 if (regno == parm_regs[i])
5555 return true;
5556 return false;
5559 /* Return if we do not know how to pass TYPE solely in registers. */
5561 static bool
5562 ix86_must_pass_in_stack (enum machine_mode mode, const_tree type)
5564 if (must_pass_in_stack_var_size_or_pad (mode, type))
5565 return true;
5567 /* For 32-bit, we want TImode aggregates to go on the stack. But watch out!
5568 The layout_type routine is crafty and tries to trick us into passing
5569 currently unsupported vector types on the stack by using TImode. */
5570 return (!TARGET_64BIT && mode == TImode
5571 && type && TREE_CODE (type) != VECTOR_TYPE);
5574 /* It returns the size, in bytes, of the area reserved for arguments passed
5575 in registers for the function represented by fndecl dependent to the used
5576 abi format. */
5578 ix86_reg_parm_stack_space (const_tree fndecl)
5580 enum calling_abi call_abi = SYSV_ABI;
5581 if (fndecl != NULL_TREE && TREE_CODE (fndecl) == FUNCTION_DECL)
5582 call_abi = ix86_function_abi (fndecl);
5583 else
5584 call_abi = ix86_function_type_abi (fndecl);
5585 if (call_abi == MS_ABI)
5586 return 32;
5587 return 0;
5590 /* Returns value SYSV_ABI, MS_ABI dependent on fntype, specifying the
5591 call abi used. */
5592 enum calling_abi
5593 ix86_function_type_abi (const_tree fntype)
5595 if (TARGET_64BIT && fntype != NULL)
5597 enum calling_abi abi = ix86_abi;
5598 if (abi == SYSV_ABI)
5600 if (lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (fntype)))
5601 abi = MS_ABI;
5603 else if (lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (fntype)))
5604 abi = SYSV_ABI;
5605 return abi;
5607 return ix86_abi;
5610 static bool
5611 ix86_function_ms_hook_prologue (const_tree fn)
5613 if (fn && lookup_attribute ("ms_hook_prologue", DECL_ATTRIBUTES (fn)))
5615 if (decl_function_context (fn) != NULL_TREE)
5616 error_at (DECL_SOURCE_LOCATION (fn),
5617 "ms_hook_prologue is not compatible with nested function");
5618 else
5619 return true;
5621 return false;
5624 static enum calling_abi
5625 ix86_function_abi (const_tree fndecl)
5627 if (! fndecl)
5628 return ix86_abi;
5629 return ix86_function_type_abi (TREE_TYPE (fndecl));
5632 /* Returns value SYSV_ABI, MS_ABI dependent on cfun, specifying the
5633 call abi used. */
5634 enum calling_abi
5635 ix86_cfun_abi (void)
5637 if (! cfun || ! TARGET_64BIT)
5638 return ix86_abi;
5639 return cfun->machine->call_abi;
5642 /* Write the extra assembler code needed to declare a function properly. */
5644 void
5645 ix86_asm_output_function_label (FILE *asm_out_file, const char *fname,
5646 tree decl)
5648 bool is_ms_hook = ix86_function_ms_hook_prologue (decl);
5650 if (is_ms_hook)
5652 int i, filler_count = (TARGET_64BIT ? 32 : 16);
5653 unsigned int filler_cc = 0xcccccccc;
5655 for (i = 0; i < filler_count; i += 4)
5656 fprintf (asm_out_file, ASM_LONG " %#x\n", filler_cc);
5659 #ifdef SUBTARGET_ASM_UNWIND_INIT
5660 SUBTARGET_ASM_UNWIND_INIT (asm_out_file);
5661 #endif
5663 ASM_OUTPUT_LABEL (asm_out_file, fname);
5665 /* Output magic byte marker, if hot-patch attribute is set. */
5666 if (is_ms_hook)
5668 if (TARGET_64BIT)
5670 /* leaq [%rsp + 0], %rsp */
5671 asm_fprintf (asm_out_file, ASM_BYTE
5672 "0x48, 0x8d, 0xa4, 0x24, 0x00, 0x00, 0x00, 0x00\n");
5674 else
5676 /* movl.s %edi, %edi
5677 push %ebp
5678 movl.s %esp, %ebp */
5679 asm_fprintf (asm_out_file, ASM_BYTE
5680 "0x8b, 0xff, 0x55, 0x8b, 0xec\n");
5685 /* regclass.c */
5686 extern void init_regs (void);
5688 /* Implementation of call abi switching target hook. Specific to FNDECL
5689 the specific call register sets are set. See also
5690 ix86_conditional_register_usage for more details. */
5691 void
5692 ix86_call_abi_override (const_tree fndecl)
5694 if (fndecl == NULL_TREE)
5695 cfun->machine->call_abi = ix86_abi;
5696 else
5697 cfun->machine->call_abi = ix86_function_type_abi (TREE_TYPE (fndecl));
5700 /* MS and SYSV ABI have different set of call used registers. Avoid expensive
5701 re-initialization of init_regs each time we switch function context since
5702 this is needed only during RTL expansion. */
5703 static void
5704 ix86_maybe_switch_abi (void)
5706 if (TARGET_64BIT &&
5707 call_used_regs[SI_REG] == (cfun->machine->call_abi == MS_ABI))
5708 reinit_regs ();
5711 /* Initialize a variable CUM of type CUMULATIVE_ARGS
5712 for a call to a function whose data type is FNTYPE.
5713 For a library call, FNTYPE is 0. */
5715 void
5716 init_cumulative_args (CUMULATIVE_ARGS *cum, /* Argument info to initialize */
5717 tree fntype, /* tree ptr for function decl */
5718 rtx libname, /* SYMBOL_REF of library name or 0 */
5719 tree fndecl,
5720 int caller)
5722 struct cgraph_local_info *i;
5723 tree fnret_type;
5725 memset (cum, 0, sizeof (*cum));
5727 /* Initialize for the current callee. */
5728 if (caller)
5730 cfun->machine->callee_pass_avx256_p = false;
5731 cfun->machine->callee_return_avx256_p = false;
5734 if (fndecl)
5736 i = cgraph_local_info (fndecl);
5737 cum->call_abi = ix86_function_abi (fndecl);
5738 fnret_type = TREE_TYPE (TREE_TYPE (fndecl));
5740 else
5742 i = NULL;
5743 cum->call_abi = ix86_function_type_abi (fntype);
5744 if (fntype)
5745 fnret_type = TREE_TYPE (fntype);
5746 else
5747 fnret_type = NULL;
5750 if (TARGET_VZEROUPPER && fnret_type)
5752 rtx fnret_value = ix86_function_value (fnret_type, fntype,
5753 false);
5754 if (function_pass_avx256_p (fnret_value))
5756 /* The return value of this function uses 256bit AVX modes. */
5757 if (caller)
5758 cfun->machine->callee_return_avx256_p = true;
5759 else
5760 cfun->machine->caller_return_avx256_p = true;
5764 cum->caller = caller;
5766 /* Set up the number of registers to use for passing arguments. */
5768 if (cum->call_abi == MS_ABI && !ACCUMULATE_OUTGOING_ARGS)
5769 sorry ("ms_abi attribute requires -maccumulate-outgoing-args "
5770 "or subtarget optimization implying it");
5771 cum->nregs = ix86_regparm;
5772 if (TARGET_64BIT)
5774 cum->nregs = (cum->call_abi == SYSV_ABI
5775 ? X86_64_REGPARM_MAX
5776 : X86_64_MS_REGPARM_MAX);
5778 if (TARGET_SSE)
5780 cum->sse_nregs = SSE_REGPARM_MAX;
5781 if (TARGET_64BIT)
5783 cum->sse_nregs = (cum->call_abi == SYSV_ABI
5784 ? X86_64_SSE_REGPARM_MAX
5785 : X86_64_MS_SSE_REGPARM_MAX);
5788 if (TARGET_MMX)
5789 cum->mmx_nregs = MMX_REGPARM_MAX;
5790 cum->warn_avx = true;
5791 cum->warn_sse = true;
5792 cum->warn_mmx = true;
5794 /* Because type might mismatch in between caller and callee, we need to
5795 use actual type of function for local calls.
5796 FIXME: cgraph_analyze can be told to actually record if function uses
5797 va_start so for local functions maybe_vaarg can be made aggressive
5798 helping K&R code.
5799 FIXME: once typesytem is fixed, we won't need this code anymore. */
5800 if (i && i->local)
5801 fntype = TREE_TYPE (fndecl);
5802 cum->maybe_vaarg = (fntype
5803 ? (!prototype_p (fntype) || stdarg_p (fntype))
5804 : !libname);
5806 if (!TARGET_64BIT)
5808 /* If there are variable arguments, then we won't pass anything
5809 in registers in 32-bit mode. */
5810 if (stdarg_p (fntype))
5812 cum->nregs = 0;
5813 cum->sse_nregs = 0;
5814 cum->mmx_nregs = 0;
5815 cum->warn_avx = 0;
5816 cum->warn_sse = 0;
5817 cum->warn_mmx = 0;
5818 return;
5821 /* Use ecx and edx registers if function has fastcall attribute,
5822 else look for regparm information. */
5823 if (fntype)
5825 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (fntype)))
5827 cum->nregs = 1;
5828 cum->fastcall = 1; /* Same first register as in fastcall. */
5830 else if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)))
5832 cum->nregs = 2;
5833 cum->fastcall = 1;
5835 else
5836 cum->nregs = ix86_function_regparm (fntype, fndecl);
5839 /* Set up the number of SSE registers used for passing SFmode
5840 and DFmode arguments. Warn for mismatching ABI. */
5841 cum->float_in_sse = ix86_function_sseregparm (fntype, fndecl, true);
5845 /* Return the "natural" mode for TYPE. In most cases, this is just TYPE_MODE.
5846 But in the case of vector types, it is some vector mode.
5848 When we have only some of our vector isa extensions enabled, then there
5849 are some modes for which vector_mode_supported_p is false. For these
5850 modes, the generic vector support in gcc will choose some non-vector mode
5851 in order to implement the type. By computing the natural mode, we'll
5852 select the proper ABI location for the operand and not depend on whatever
5853 the middle-end decides to do with these vector types.
5855 The midde-end can't deal with the vector types > 16 bytes. In this
5856 case, we return the original mode and warn ABI change if CUM isn't
5857 NULL. */
5859 static enum machine_mode
5860 type_natural_mode (const_tree type, const CUMULATIVE_ARGS *cum)
5862 enum machine_mode mode = TYPE_MODE (type);
5864 if (TREE_CODE (type) == VECTOR_TYPE && !VECTOR_MODE_P (mode))
5866 HOST_WIDE_INT size = int_size_in_bytes (type);
5867 if ((size == 8 || size == 16 || size == 32)
5868 /* ??? Generic code allows us to create width 1 vectors. Ignore. */
5869 && TYPE_VECTOR_SUBPARTS (type) > 1)
5871 enum machine_mode innermode = TYPE_MODE (TREE_TYPE (type));
5873 if (TREE_CODE (TREE_TYPE (type)) == REAL_TYPE)
5874 mode = MIN_MODE_VECTOR_FLOAT;
5875 else
5876 mode = MIN_MODE_VECTOR_INT;
5878 /* Get the mode which has this inner mode and number of units. */
5879 for (; mode != VOIDmode; mode = GET_MODE_WIDER_MODE (mode))
5880 if (GET_MODE_NUNITS (mode) == TYPE_VECTOR_SUBPARTS (type)
5881 && GET_MODE_INNER (mode) == innermode)
5883 if (size == 32 && !TARGET_AVX)
5885 static bool warnedavx;
5887 if (cum
5888 && !warnedavx
5889 && cum->warn_avx)
5891 warnedavx = true;
5892 warning (0, "AVX vector argument without AVX "
5893 "enabled changes the ABI");
5895 return TYPE_MODE (type);
5897 else
5898 return mode;
5901 gcc_unreachable ();
5905 return mode;
5908 /* We want to pass a value in REGNO whose "natural" mode is MODE. However,
5909 this may not agree with the mode that the type system has chosen for the
5910 register, which is ORIG_MODE. If ORIG_MODE is not BLKmode, then we can
5911 go ahead and use it. Otherwise we have to build a PARALLEL instead. */
5913 static rtx
5914 gen_reg_or_parallel (enum machine_mode mode, enum machine_mode orig_mode,
5915 unsigned int regno)
5917 rtx tmp;
5919 if (orig_mode != BLKmode)
5920 tmp = gen_rtx_REG (orig_mode, regno);
5921 else
5923 tmp = gen_rtx_REG (mode, regno);
5924 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp, const0_rtx);
5925 tmp = gen_rtx_PARALLEL (orig_mode, gen_rtvec (1, tmp));
5928 return tmp;
5931 /* x86-64 register passing implementation. See x86-64 ABI for details. Goal
5932 of this code is to classify each 8bytes of incoming argument by the register
5933 class and assign registers accordingly. */
5935 /* Return the union class of CLASS1 and CLASS2.
5936 See the x86-64 PS ABI for details. */
5938 static enum x86_64_reg_class
5939 merge_classes (enum x86_64_reg_class class1, enum x86_64_reg_class class2)
5941 /* Rule #1: If both classes are equal, this is the resulting class. */
5942 if (class1 == class2)
5943 return class1;
5945 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
5946 the other class. */
5947 if (class1 == X86_64_NO_CLASS)
5948 return class2;
5949 if (class2 == X86_64_NO_CLASS)
5950 return class1;
5952 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
5953 if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
5954 return X86_64_MEMORY_CLASS;
5956 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
5957 if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
5958 || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
5959 return X86_64_INTEGERSI_CLASS;
5960 if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
5961 || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
5962 return X86_64_INTEGER_CLASS;
5964 /* Rule #5: If one of the classes is X87, X87UP, or COMPLEX_X87 class,
5965 MEMORY is used. */
5966 if (class1 == X86_64_X87_CLASS
5967 || class1 == X86_64_X87UP_CLASS
5968 || class1 == X86_64_COMPLEX_X87_CLASS
5969 || class2 == X86_64_X87_CLASS
5970 || class2 == X86_64_X87UP_CLASS
5971 || class2 == X86_64_COMPLEX_X87_CLASS)
5972 return X86_64_MEMORY_CLASS;
5974 /* Rule #6: Otherwise class SSE is used. */
5975 return X86_64_SSE_CLASS;
5978 /* Classify the argument of type TYPE and mode MODE.
5979 CLASSES will be filled by the register class used to pass each word
5980 of the operand. The number of words is returned. In case the parameter
5981 should be passed in memory, 0 is returned. As a special case for zero
5982 sized containers, classes[0] will be NO_CLASS and 1 is returned.
5984 BIT_OFFSET is used internally for handling records and specifies offset
5985 of the offset in bits modulo 256 to avoid overflow cases.
5987 See the x86-64 PS ABI for details.
5990 static int
5991 classify_argument (enum machine_mode mode, const_tree type,
5992 enum x86_64_reg_class classes[MAX_CLASSES], int bit_offset)
5994 HOST_WIDE_INT bytes =
5995 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
5996 int words = (bytes + (bit_offset % 64) / 8 + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
5998 /* Variable sized entities are always passed/returned in memory. */
5999 if (bytes < 0)
6000 return 0;
6002 if (mode != VOIDmode
6003 && targetm.calls.must_pass_in_stack (mode, type))
6004 return 0;
6006 /* Special case check for pointer to shared, on 64-bit target. */
6007 if (TARGET_64BIT && mode == TImode
6008 && type && TREE_CODE (type) == POINTER_TYPE
6009 && upc_shared_type_p (TREE_TYPE (type)))
6011 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
6012 return 2;
6015 if (type && AGGREGATE_TYPE_P (type))
6017 int i;
6018 tree field;
6019 enum x86_64_reg_class subclasses[MAX_CLASSES];
6021 /* On x86-64 we pass structures larger than 32 bytes on the stack. */
6022 if (bytes > 32)
6023 return 0;
6025 for (i = 0; i < words; i++)
6026 classes[i] = X86_64_NO_CLASS;
6028 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
6029 signalize memory class, so handle it as special case. */
6030 if (!words)
6032 classes[0] = X86_64_NO_CLASS;
6033 return 1;
6036 /* Classify each field of record and merge classes. */
6037 switch (TREE_CODE (type))
6039 case RECORD_TYPE:
6040 /* And now merge the fields of structure. */
6041 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
6043 if (TREE_CODE (field) == FIELD_DECL)
6045 int num;
6047 if (TREE_TYPE (field) == error_mark_node)
6048 continue;
6050 /* Bitfields are always classified as integer. Handle them
6051 early, since later code would consider them to be
6052 misaligned integers. */
6053 if (DECL_BIT_FIELD (field))
6055 for (i = (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
6056 i < ((int_bit_position (field) + (bit_offset % 64))
6057 + tree_low_cst (DECL_SIZE (field), 0)
6058 + 63) / 8 / 8; i++)
6059 classes[i] =
6060 merge_classes (X86_64_INTEGER_CLASS,
6061 classes[i]);
6063 else
6065 int pos;
6067 type = TREE_TYPE (field);
6069 /* Flexible array member is ignored. */
6070 if (TYPE_MODE (type) == BLKmode
6071 && TREE_CODE (type) == ARRAY_TYPE
6072 && TYPE_SIZE (type) == NULL_TREE
6073 && TYPE_DOMAIN (type) != NULL_TREE
6074 && (TYPE_MAX_VALUE (TYPE_DOMAIN (type))
6075 == NULL_TREE))
6077 static bool warned;
6079 if (!warned && warn_psabi)
6081 warned = true;
6082 inform (input_location,
6083 "the ABI of passing struct with"
6084 " a flexible array member has"
6085 " changed in GCC 4.4");
6087 continue;
6089 num = classify_argument (TYPE_MODE (type), type,
6090 subclasses,
6091 (int_bit_position (field)
6092 + bit_offset) % 256);
6093 if (!num)
6094 return 0;
6095 pos = (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
6096 for (i = 0; i < num && (i + pos) < words; i++)
6097 classes[i + pos] =
6098 merge_classes (subclasses[i], classes[i + pos]);
6102 break;
6104 case ARRAY_TYPE:
6105 /* Arrays are handled as small records. */
6107 int num;
6108 num = classify_argument (TYPE_MODE (TREE_TYPE (type)),
6109 TREE_TYPE (type), subclasses, bit_offset);
6110 if (!num)
6111 return 0;
6113 /* The partial classes are now full classes. */
6114 if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4)
6115 subclasses[0] = X86_64_SSE_CLASS;
6116 if (subclasses[0] == X86_64_INTEGERSI_CLASS
6117 && !((bit_offset % 64) == 0 && bytes == 4))
6118 subclasses[0] = X86_64_INTEGER_CLASS;
6120 for (i = 0; i < words; i++)
6121 classes[i] = subclasses[i % num];
6123 break;
6125 case UNION_TYPE:
6126 case QUAL_UNION_TYPE:
6127 /* Unions are similar to RECORD_TYPE but offset is always 0.
6129 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
6131 if (TREE_CODE (field) == FIELD_DECL)
6133 int num;
6135 if (TREE_TYPE (field) == error_mark_node)
6136 continue;
6138 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
6139 TREE_TYPE (field), subclasses,
6140 bit_offset);
6141 if (!num)
6142 return 0;
6143 for (i = 0; i < num; i++)
6144 classes[i] = merge_classes (subclasses[i], classes[i]);
6147 break;
6149 default:
6150 gcc_unreachable ();
6153 if (words > 2)
6155 /* When size > 16 bytes, if the first one isn't
6156 X86_64_SSE_CLASS or any other ones aren't
6157 X86_64_SSEUP_CLASS, everything should be passed in
6158 memory. */
6159 if (classes[0] != X86_64_SSE_CLASS)
6160 return 0;
6162 for (i = 1; i < words; i++)
6163 if (classes[i] != X86_64_SSEUP_CLASS)
6164 return 0;
6167 /* Final merger cleanup. */
6168 for (i = 0; i < words; i++)
6170 /* If one class is MEMORY, everything should be passed in
6171 memory. */
6172 if (classes[i] == X86_64_MEMORY_CLASS)
6173 return 0;
6175 /* The X86_64_SSEUP_CLASS should be always preceded by
6176 X86_64_SSE_CLASS or X86_64_SSEUP_CLASS. */
6177 if (classes[i] == X86_64_SSEUP_CLASS
6178 && classes[i - 1] != X86_64_SSE_CLASS
6179 && classes[i - 1] != X86_64_SSEUP_CLASS)
6181 /* The first one should never be X86_64_SSEUP_CLASS. */
6182 gcc_assert (i != 0);
6183 classes[i] = X86_64_SSE_CLASS;
6186 /* If X86_64_X87UP_CLASS isn't preceded by X86_64_X87_CLASS,
6187 everything should be passed in memory. */
6188 if (classes[i] == X86_64_X87UP_CLASS
6189 && (classes[i - 1] != X86_64_X87_CLASS))
6191 static bool warned;
6193 /* The first one should never be X86_64_X87UP_CLASS. */
6194 gcc_assert (i != 0);
6195 if (!warned && warn_psabi)
6197 warned = true;
6198 inform (input_location,
6199 "the ABI of passing union with long double"
6200 " has changed in GCC 4.4");
6202 return 0;
6205 return words;
6208 /* Compute alignment needed. We align all types to natural boundaries with
6209 exception of XFmode that is aligned to 64bits. */
6210 if (mode != VOIDmode && mode != BLKmode)
6212 int mode_alignment = GET_MODE_BITSIZE (mode);
6214 if (mode == XFmode)
6215 mode_alignment = 128;
6216 else if (mode == XCmode)
6217 mode_alignment = 256;
6218 if (COMPLEX_MODE_P (mode))
6219 mode_alignment /= 2;
6220 /* Misaligned fields are always returned in memory. */
6221 if (bit_offset % mode_alignment)
6222 return 0;
6225 /* for V1xx modes, just use the base mode */
6226 if (VECTOR_MODE_P (mode) && mode != V1DImode && mode != V1TImode
6227 && GET_MODE_SIZE (GET_MODE_INNER (mode)) == bytes)
6228 mode = GET_MODE_INNER (mode);
6230 /* Classification of atomic types. */
6231 switch (mode)
6233 case SDmode:
6234 case DDmode:
6235 classes[0] = X86_64_SSE_CLASS;
6236 return 1;
6237 case TDmode:
6238 classes[0] = X86_64_SSE_CLASS;
6239 classes[1] = X86_64_SSEUP_CLASS;
6240 return 2;
6241 case DImode:
6242 case SImode:
6243 case HImode:
6244 case QImode:
6245 case CSImode:
6246 case CHImode:
6247 case CQImode:
6249 int size = (bit_offset % 64)+ (int) GET_MODE_BITSIZE (mode);
6251 if (size <= 32)
6253 classes[0] = X86_64_INTEGERSI_CLASS;
6254 return 1;
6256 else if (size <= 64)
6258 classes[0] = X86_64_INTEGER_CLASS;
6259 return 1;
6261 else if (size <= 64+32)
6263 classes[0] = X86_64_INTEGER_CLASS;
6264 classes[1] = X86_64_INTEGERSI_CLASS;
6265 return 2;
6267 else if (size <= 64+64)
6269 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
6270 return 2;
6272 else
6273 gcc_unreachable ();
6275 case CDImode:
6276 case TImode:
6277 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
6278 return 2;
6279 case COImode:
6280 case OImode:
6281 /* OImode shouldn't be used directly. */
6282 gcc_unreachable ();
6283 case CTImode:
6284 return 0;
6285 case SFmode:
6286 if (!(bit_offset % 64))
6287 classes[0] = X86_64_SSESF_CLASS;
6288 else
6289 classes[0] = X86_64_SSE_CLASS;
6290 return 1;
6291 case DFmode:
6292 classes[0] = X86_64_SSEDF_CLASS;
6293 return 1;
6294 case XFmode:
6295 classes[0] = X86_64_X87_CLASS;
6296 classes[1] = X86_64_X87UP_CLASS;
6297 return 2;
6298 case TFmode:
6299 classes[0] = X86_64_SSE_CLASS;
6300 classes[1] = X86_64_SSEUP_CLASS;
6301 return 2;
6302 case SCmode:
6303 classes[0] = X86_64_SSE_CLASS;
6304 if (!(bit_offset % 64))
6305 return 1;
6306 else
6308 static bool warned;
6310 if (!warned && warn_psabi)
6312 warned = true;
6313 inform (input_location,
6314 "the ABI of passing structure with complex float"
6315 " member has changed in GCC 4.4");
6317 classes[1] = X86_64_SSESF_CLASS;
6318 return 2;
6320 case DCmode:
6321 classes[0] = X86_64_SSEDF_CLASS;
6322 classes[1] = X86_64_SSEDF_CLASS;
6323 return 2;
6324 case XCmode:
6325 classes[0] = X86_64_COMPLEX_X87_CLASS;
6326 return 1;
6327 case TCmode:
6328 /* This modes is larger than 16 bytes. */
6329 return 0;
6330 case V8SFmode:
6331 case V8SImode:
6332 case V32QImode:
6333 case V16HImode:
6334 case V4DFmode:
6335 case V4DImode:
6336 classes[0] = X86_64_SSE_CLASS;
6337 classes[1] = X86_64_SSEUP_CLASS;
6338 classes[2] = X86_64_SSEUP_CLASS;
6339 classes[3] = X86_64_SSEUP_CLASS;
6340 return 4;
6341 case V4SFmode:
6342 case V4SImode:
6343 case V16QImode:
6344 case V8HImode:
6345 case V2DFmode:
6346 case V2DImode:
6347 classes[0] = X86_64_SSE_CLASS;
6348 classes[1] = X86_64_SSEUP_CLASS;
6349 return 2;
6350 case V1TImode:
6351 case V1DImode:
6352 case V2SFmode:
6353 case V2SImode:
6354 case V4HImode:
6355 case V8QImode:
6356 classes[0] = X86_64_SSE_CLASS;
6357 return 1;
6358 case BLKmode:
6359 case VOIDmode:
6360 return 0;
6361 default:
6362 gcc_assert (VECTOR_MODE_P (mode));
6364 if (bytes > 16)
6365 return 0;
6367 gcc_assert (GET_MODE_CLASS (GET_MODE_INNER (mode)) == MODE_INT);
6369 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
6370 classes[0] = X86_64_INTEGERSI_CLASS;
6371 else
6372 classes[0] = X86_64_INTEGER_CLASS;
6373 classes[1] = X86_64_INTEGER_CLASS;
6374 return 1 + (bytes > 8);
6378 /* Examine the argument and return set number of register required in each
6379 class. Return 0 iff parameter should be passed in memory. */
6380 static int
6381 examine_argument (enum machine_mode mode, const_tree type, int in_return,
6382 int *int_nregs, int *sse_nregs)
6384 enum x86_64_reg_class regclass[MAX_CLASSES];
6385 int n = classify_argument (mode, type, regclass, 0);
6387 *int_nregs = 0;
6388 *sse_nregs = 0;
6389 if (!n)
6390 return 0;
6391 for (n--; n >= 0; n--)
6392 switch (regclass[n])
6394 case X86_64_INTEGER_CLASS:
6395 case X86_64_INTEGERSI_CLASS:
6396 (*int_nregs)++;
6397 break;
6398 case X86_64_SSE_CLASS:
6399 case X86_64_SSESF_CLASS:
6400 case X86_64_SSEDF_CLASS:
6401 (*sse_nregs)++;
6402 break;
6403 case X86_64_NO_CLASS:
6404 case X86_64_SSEUP_CLASS:
6405 break;
6406 case X86_64_X87_CLASS:
6407 case X86_64_X87UP_CLASS:
6408 if (!in_return)
6409 return 0;
6410 break;
6411 case X86_64_COMPLEX_X87_CLASS:
6412 return in_return ? 2 : 0;
6413 case X86_64_MEMORY_CLASS:
6414 gcc_unreachable ();
6416 return 1;
6419 /* Construct container for the argument used by GCC interface. See
6420 FUNCTION_ARG for the detailed description. */
6422 static rtx
6423 construct_container (enum machine_mode mode, enum machine_mode orig_mode,
6424 const_tree type, int in_return, int nintregs, int nsseregs,
6425 const int *intreg, int sse_regno)
6427 /* The following variables hold the static issued_error state. */
6428 static bool issued_sse_arg_error;
6429 static bool issued_sse_ret_error;
6430 static bool issued_x87_ret_error;
6432 enum machine_mode tmpmode;
6433 int bytes =
6434 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
6435 enum x86_64_reg_class regclass[MAX_CLASSES];
6436 int n;
6437 int i;
6438 int nexps = 0;
6439 int needed_sseregs, needed_intregs;
6440 rtx exp[MAX_CLASSES];
6441 rtx ret;
6443 n = classify_argument (mode, type, regclass, 0);
6444 if (!n)
6445 return NULL;
6446 if (!examine_argument (mode, type, in_return, &needed_intregs,
6447 &needed_sseregs))
6448 return NULL;
6449 if (needed_intregs > nintregs || needed_sseregs > nsseregs)
6450 return NULL;
6452 /* We allowed the user to turn off SSE for kernel mode. Don't crash if
6453 some less clueful developer tries to use floating-point anyway. */
6454 if (needed_sseregs && !TARGET_SSE)
6456 if (in_return)
6458 if (!issued_sse_ret_error)
6460 error ("SSE register return with SSE disabled");
6461 issued_sse_ret_error = true;
6464 else if (!issued_sse_arg_error)
6466 error ("SSE register argument with SSE disabled");
6467 issued_sse_arg_error = true;
6469 return NULL;
6472 /* Likewise, error if the ABI requires us to return values in the
6473 x87 registers and the user specified -mno-80387. */
6474 if (!TARGET_80387 && in_return)
6475 for (i = 0; i < n; i++)
6476 if (regclass[i] == X86_64_X87_CLASS
6477 || regclass[i] == X86_64_X87UP_CLASS
6478 || regclass[i] == X86_64_COMPLEX_X87_CLASS)
6480 if (!issued_x87_ret_error)
6482 error ("x87 register return with x87 disabled");
6483 issued_x87_ret_error = true;
6485 return NULL;
6488 /* First construct simple cases. Avoid SCmode, since we want to use
6489 single register to pass this type. */
6490 if (n == 1 && mode != SCmode)
6491 switch (regclass[0])
6493 case X86_64_INTEGER_CLASS:
6494 case X86_64_INTEGERSI_CLASS:
6495 return gen_rtx_REG (mode, intreg[0]);
6496 case X86_64_SSE_CLASS:
6497 case X86_64_SSESF_CLASS:
6498 case X86_64_SSEDF_CLASS:
6499 if (mode != BLKmode)
6500 return gen_reg_or_parallel (mode, orig_mode,
6501 SSE_REGNO (sse_regno));
6502 break;
6503 case X86_64_X87_CLASS:
6504 case X86_64_COMPLEX_X87_CLASS:
6505 return gen_rtx_REG (mode, FIRST_STACK_REG);
6506 case X86_64_NO_CLASS:
6507 /* Zero sized array, struct or class. */
6508 return NULL;
6509 default:
6510 gcc_unreachable ();
6512 if (n == 2 && regclass[0] == X86_64_SSE_CLASS
6513 && regclass[1] == X86_64_SSEUP_CLASS && mode != BLKmode)
6514 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
6515 if (n == 4
6516 && regclass[0] == X86_64_SSE_CLASS
6517 && regclass[1] == X86_64_SSEUP_CLASS
6518 && regclass[2] == X86_64_SSEUP_CLASS
6519 && regclass[3] == X86_64_SSEUP_CLASS
6520 && mode != BLKmode)
6521 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
6523 if (n == 2
6524 && regclass[0] == X86_64_X87_CLASS && regclass[1] == X86_64_X87UP_CLASS)
6525 return gen_rtx_REG (XFmode, FIRST_STACK_REG);
6526 if (n == 2 && regclass[0] == X86_64_INTEGER_CLASS
6527 && regclass[1] == X86_64_INTEGER_CLASS
6528 && (mode == CDImode || mode == TImode || mode == TFmode)
6529 && intreg[0] + 1 == intreg[1])
6530 return gen_rtx_REG (mode, intreg[0]);
6532 /* Otherwise figure out the entries of the PARALLEL. */
6533 for (i = 0; i < n; i++)
6535 int pos;
6537 switch (regclass[i])
6539 case X86_64_NO_CLASS:
6540 break;
6541 case X86_64_INTEGER_CLASS:
6542 case X86_64_INTEGERSI_CLASS:
6543 /* Merge TImodes on aligned occasions here too. */
6544 if (i * 8 + 8 > bytes)
6545 tmpmode = mode_for_size ((bytes - i * 8) * BITS_PER_UNIT, MODE_INT, 0);
6546 else if (regclass[i] == X86_64_INTEGERSI_CLASS)
6547 tmpmode = SImode;
6548 else
6549 tmpmode = DImode;
6550 /* We've requested 24 bytes we don't have mode for. Use DImode. */
6551 if (tmpmode == BLKmode)
6552 tmpmode = DImode;
6553 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
6554 gen_rtx_REG (tmpmode, *intreg),
6555 GEN_INT (i*8));
6556 intreg++;
6557 break;
6558 case X86_64_SSESF_CLASS:
6559 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
6560 gen_rtx_REG (SFmode,
6561 SSE_REGNO (sse_regno)),
6562 GEN_INT (i*8));
6563 sse_regno++;
6564 break;
6565 case X86_64_SSEDF_CLASS:
6566 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
6567 gen_rtx_REG (DFmode,
6568 SSE_REGNO (sse_regno)),
6569 GEN_INT (i*8));
6570 sse_regno++;
6571 break;
6572 case X86_64_SSE_CLASS:
6573 pos = i;
6574 switch (n)
6576 case 1:
6577 tmpmode = DImode;
6578 break;
6579 case 2:
6580 if (i == 0 && regclass[1] == X86_64_SSEUP_CLASS)
6582 tmpmode = TImode;
6583 i++;
6585 else
6586 tmpmode = DImode;
6587 break;
6588 case 4:
6589 gcc_assert (i == 0
6590 && regclass[1] == X86_64_SSEUP_CLASS
6591 && regclass[2] == X86_64_SSEUP_CLASS
6592 && regclass[3] == X86_64_SSEUP_CLASS);
6593 tmpmode = OImode;
6594 i += 3;
6595 break;
6596 default:
6597 gcc_unreachable ();
6599 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
6600 gen_rtx_REG (tmpmode,
6601 SSE_REGNO (sse_regno)),
6602 GEN_INT (pos*8));
6603 sse_regno++;
6604 break;
6605 default:
6606 gcc_unreachable ();
6610 /* Empty aligned struct, union or class. */
6611 if (nexps == 0)
6612 return NULL;
6614 ret = gen_rtx_PARALLEL (mode, rtvec_alloc (nexps));
6615 for (i = 0; i < nexps; i++)
6616 XVECEXP (ret, 0, i) = exp [i];
6617 return ret;
6620 /* Update the data in CUM to advance over an argument of mode MODE
6621 and data type TYPE. (TYPE is null for libcalls where that information
6622 may not be available.) */
6624 static void
6625 function_arg_advance_32 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
6626 const_tree type, HOST_WIDE_INT bytes,
6627 HOST_WIDE_INT words)
6629 switch (mode)
6631 default:
6632 break;
6634 case BLKmode:
6635 if (bytes < 0)
6636 break;
6637 /* FALLTHRU */
6639 case DImode:
6640 case SImode:
6641 case HImode:
6642 case QImode:
6643 cum->words += words;
6644 cum->nregs -= words;
6645 cum->regno += words;
6647 if (cum->nregs <= 0)
6649 cum->nregs = 0;
6650 cum->regno = 0;
6652 break;
6654 case OImode:
6655 /* OImode shouldn't be used directly. */
6656 gcc_unreachable ();
6658 case DFmode:
6659 if (cum->float_in_sse < 2)
6660 break;
6661 case SFmode:
6662 if (cum->float_in_sse < 1)
6663 break;
6664 /* FALLTHRU */
6666 case V8SFmode:
6667 case V8SImode:
6668 case V32QImode:
6669 case V16HImode:
6670 case V4DFmode:
6671 case V4DImode:
6672 case TImode:
6673 case V16QImode:
6674 case V8HImode:
6675 case V4SImode:
6676 case V2DImode:
6677 case V4SFmode:
6678 case V2DFmode:
6679 if (!type || !AGGREGATE_TYPE_P (type))
6681 cum->sse_words += words;
6682 cum->sse_nregs -= 1;
6683 cum->sse_regno += 1;
6684 if (cum->sse_nregs <= 0)
6686 cum->sse_nregs = 0;
6687 cum->sse_regno = 0;
6690 break;
6692 case V8QImode:
6693 case V4HImode:
6694 case V2SImode:
6695 case V2SFmode:
6696 case V1TImode:
6697 case V1DImode:
6698 if (!type || !AGGREGATE_TYPE_P (type))
6700 cum->mmx_words += words;
6701 cum->mmx_nregs -= 1;
6702 cum->mmx_regno += 1;
6703 if (cum->mmx_nregs <= 0)
6705 cum->mmx_nregs = 0;
6706 cum->mmx_regno = 0;
6709 break;
6713 static void
6714 function_arg_advance_64 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
6715 const_tree type, HOST_WIDE_INT words, bool named)
6717 int int_nregs, sse_nregs;
6719 /* Unnamed 256bit vector mode parameters are passed on stack. */
6720 if (!named && VALID_AVX256_REG_MODE (mode))
6721 return;
6723 if (examine_argument (mode, type, 0, &int_nregs, &sse_nregs)
6724 && sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs)
6726 cum->nregs -= int_nregs;
6727 cum->sse_nregs -= sse_nregs;
6728 cum->regno += int_nregs;
6729 cum->sse_regno += sse_nregs;
6731 else
6733 int align = ix86_function_arg_boundary (mode, type) / BITS_PER_WORD;
6734 cum->words = (cum->words + align - 1) & ~(align - 1);
6735 cum->words += words;
6739 static void
6740 function_arg_advance_ms_64 (CUMULATIVE_ARGS *cum, HOST_WIDE_INT bytes,
6741 HOST_WIDE_INT words)
6743 /* Otherwise, this should be passed indirect. */
6744 gcc_assert (bytes == 1 || bytes == 2 || bytes == 4 || bytes == 8);
6746 cum->words += words;
6747 if (cum->nregs > 0)
6749 cum->nregs -= 1;
6750 cum->regno += 1;
6754 /* Update the data in CUM to advance over an argument of mode MODE and
6755 data type TYPE. (TYPE is null for libcalls where that information
6756 may not be available.) */
6758 static void
6759 ix86_function_arg_advance (CUMULATIVE_ARGS *cum, enum machine_mode mode,
6760 const_tree type, bool named)
6762 HOST_WIDE_INT bytes, words;
6764 if (mode == BLKmode)
6765 bytes = int_size_in_bytes (type);
6766 else
6767 bytes = GET_MODE_SIZE (mode);
6768 words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
6770 if (type)
6771 mode = type_natural_mode (type, NULL);
6773 if (TARGET_64BIT && (cum ? cum->call_abi : ix86_abi) == MS_ABI)
6774 function_arg_advance_ms_64 (cum, bytes, words);
6775 else if (TARGET_64BIT)
6776 function_arg_advance_64 (cum, mode, type, words, named);
6777 else
6778 function_arg_advance_32 (cum, mode, type, bytes, words);
6781 /* Define where to put the arguments to a function.
6782 Value is zero to push the argument on the stack,
6783 or a hard register in which to store the argument.
6785 MODE is the argument's machine mode.
6786 TYPE is the data type of the argument (as a tree).
6787 This is null for libcalls where that information may
6788 not be available.
6789 CUM is a variable of type CUMULATIVE_ARGS which gives info about
6790 the preceding args and about the function being called.
6791 NAMED is nonzero if this argument is a named parameter
6792 (otherwise it is an extra parameter matching an ellipsis). */
6794 static rtx
6795 function_arg_32 (const CUMULATIVE_ARGS *cum, enum machine_mode mode,
6796 enum machine_mode orig_mode, const_tree type,
6797 HOST_WIDE_INT bytes, HOST_WIDE_INT words)
6799 static bool warnedsse, warnedmmx;
6801 /* Avoid the AL settings for the Unix64 ABI. */
6802 if (mode == VOIDmode)
6803 return constm1_rtx;
6805 switch (mode)
6807 default:
6808 break;
6810 case BLKmode:
6811 if (bytes < 0)
6812 break;
6813 /* FALLTHRU */
6814 case DImode:
6815 case SImode:
6816 case HImode:
6817 case QImode:
6818 if (words <= cum->nregs)
6820 int regno = cum->regno;
6822 /* Fastcall allocates the first two DWORD (SImode) or
6823 smaller arguments to ECX and EDX if it isn't an
6824 aggregate type . */
6825 if (cum->fastcall)
6827 if (mode == BLKmode
6828 || mode == DImode
6829 || (type && AGGREGATE_TYPE_P (type)))
6830 break;
6832 /* ECX not EAX is the first allocated register. */
6833 if (regno == AX_REG)
6834 regno = CX_REG;
6836 return gen_rtx_REG (mode, regno);
6838 break;
6840 case DFmode:
6841 if (cum->float_in_sse < 2)
6842 break;
6843 case SFmode:
6844 if (cum->float_in_sse < 1)
6845 break;
6846 /* FALLTHRU */
6847 case TImode:
6848 /* In 32bit, we pass TImode in xmm registers. */
6849 case V16QImode:
6850 case V8HImode:
6851 case V4SImode:
6852 case V2DImode:
6853 case V4SFmode:
6854 case V2DFmode:
6855 if (!type || !AGGREGATE_TYPE_P (type))
6857 if (!TARGET_SSE && !warnedsse && cum->warn_sse)
6859 warnedsse = true;
6860 warning (0, "SSE vector argument without SSE enabled "
6861 "changes the ABI");
6863 if (cum->sse_nregs)
6864 return gen_reg_or_parallel (mode, orig_mode,
6865 cum->sse_regno + FIRST_SSE_REG);
6867 break;
6869 case OImode:
6870 /* OImode shouldn't be used directly. */
6871 gcc_unreachable ();
6873 case V8SFmode:
6874 case V8SImode:
6875 case V32QImode:
6876 case V16HImode:
6877 case V4DFmode:
6878 case V4DImode:
6879 if (!type || !AGGREGATE_TYPE_P (type))
6881 if (cum->sse_nregs)
6882 return gen_reg_or_parallel (mode, orig_mode,
6883 cum->sse_regno + FIRST_SSE_REG);
6885 break;
6887 case V8QImode:
6888 case V4HImode:
6889 case V2SImode:
6890 case V2SFmode:
6891 case V1TImode:
6892 case V1DImode:
6893 if (!type || !AGGREGATE_TYPE_P (type))
6895 if (!TARGET_MMX && !warnedmmx && cum->warn_mmx)
6897 warnedmmx = true;
6898 warning (0, "MMX vector argument without MMX enabled "
6899 "changes the ABI");
6901 if (cum->mmx_nregs)
6902 return gen_reg_or_parallel (mode, orig_mode,
6903 cum->mmx_regno + FIRST_MMX_REG);
6905 break;
6908 return NULL_RTX;
6911 static rtx
6912 function_arg_64 (const CUMULATIVE_ARGS *cum, enum machine_mode mode,
6913 enum machine_mode orig_mode, const_tree type, bool named)
6915 /* Handle a hidden AL argument containing number of registers
6916 for varargs x86-64 functions. */
6917 if (mode == VOIDmode)
6918 return GEN_INT (cum->maybe_vaarg
6919 ? (cum->sse_nregs < 0
6920 ? X86_64_SSE_REGPARM_MAX
6921 : cum->sse_regno)
6922 : -1);
6924 switch (mode)
6926 default:
6927 break;
6929 case V8SFmode:
6930 case V8SImode:
6931 case V32QImode:
6932 case V16HImode:
6933 case V4DFmode:
6934 case V4DImode:
6935 /* Unnamed 256bit vector mode parameters are passed on stack. */
6936 if (!named)
6937 return NULL;
6938 break;
6941 return construct_container (mode, orig_mode, type, 0, cum->nregs,
6942 cum->sse_nregs,
6943 &x86_64_int_parameter_registers [cum->regno],
6944 cum->sse_regno);
6947 static rtx
6948 function_arg_ms_64 (const CUMULATIVE_ARGS *cum, enum machine_mode mode,
6949 enum machine_mode orig_mode, bool named,
6950 HOST_WIDE_INT bytes)
6952 unsigned int regno;
6954 /* We need to add clobber for MS_ABI->SYSV ABI calls in expand_call.
6955 We use value of -2 to specify that current function call is MSABI. */
6956 if (mode == VOIDmode)
6957 return GEN_INT (-2);
6959 /* If we've run out of registers, it goes on the stack. */
6960 if (cum->nregs == 0)
6961 return NULL_RTX;
6963 regno = x86_64_ms_abi_int_parameter_registers[cum->regno];
6965 /* Only floating point modes are passed in anything but integer regs. */
6966 if (TARGET_SSE && (mode == SFmode || mode == DFmode))
6968 if (named)
6969 regno = cum->regno + FIRST_SSE_REG;
6970 else
6972 rtx t1, t2;
6974 /* Unnamed floating parameters are passed in both the
6975 SSE and integer registers. */
6976 t1 = gen_rtx_REG (mode, cum->regno + FIRST_SSE_REG);
6977 t2 = gen_rtx_REG (mode, regno);
6978 t1 = gen_rtx_EXPR_LIST (VOIDmode, t1, const0_rtx);
6979 t2 = gen_rtx_EXPR_LIST (VOIDmode, t2, const0_rtx);
6980 return gen_rtx_PARALLEL (mode, gen_rtvec (2, t1, t2));
6983 /* Handle aggregated types passed in register. */
6984 if (orig_mode == BLKmode)
6986 if (bytes > 0 && bytes <= 8)
6987 mode = (bytes > 4 ? DImode : SImode);
6988 if (mode == BLKmode)
6989 mode = DImode;
6992 return gen_reg_or_parallel (mode, orig_mode, regno);
6995 /* Return where to put the arguments to a function.
6996 Return zero to push the argument on the stack, or a hard register in which to store the argument.
6998 MODE is the argument's machine mode. TYPE is the data type of the
6999 argument. It is null for libcalls where that information may not be
7000 available. CUM gives information about the preceding args and about
7001 the function being called. NAMED is nonzero if this argument is a
7002 named parameter (otherwise it is an extra parameter matching an
7003 ellipsis). */
7005 static rtx
7006 ix86_function_arg (CUMULATIVE_ARGS *cum, enum machine_mode omode,
7007 const_tree type, bool named)
7009 enum machine_mode mode = omode;
7010 HOST_WIDE_INT bytes, words;
7011 rtx arg;
7013 if (mode == BLKmode)
7014 bytes = int_size_in_bytes (type);
7015 else
7016 bytes = GET_MODE_SIZE (mode);
7017 words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
7019 /* To simplify the code below, represent vector types with a vector mode
7020 even if MMX/SSE are not active. */
7021 if (type && TREE_CODE (type) == VECTOR_TYPE)
7022 mode = type_natural_mode (type, cum);
7024 if (TARGET_64BIT && (cum ? cum->call_abi : ix86_abi) == MS_ABI)
7025 arg = function_arg_ms_64 (cum, mode, omode, named, bytes);
7026 else if (TARGET_64BIT)
7027 arg = function_arg_64 (cum, mode, omode, type, named);
7028 else
7029 arg = function_arg_32 (cum, mode, omode, type, bytes, words);
7031 if (TARGET_VZEROUPPER && function_pass_avx256_p (arg))
7033 /* This argument uses 256bit AVX modes. */
7034 if (cum->caller)
7035 cfun->machine->callee_pass_avx256_p = true;
7036 else
7037 cfun->machine->caller_pass_avx256_p = true;
7040 return arg;
7043 /* A C expression that indicates when an argument must be passed by
7044 reference. If nonzero for an argument, a copy of that argument is
7045 made in memory and a pointer to the argument is passed instead of
7046 the argument itself. The pointer is passed in whatever way is
7047 appropriate for passing a pointer to that type. */
7049 static bool
7050 ix86_pass_by_reference (CUMULATIVE_ARGS *cum ATTRIBUTE_UNUSED,
7051 enum machine_mode mode ATTRIBUTE_UNUSED,
7052 const_tree type, bool named ATTRIBUTE_UNUSED)
7054 /* See Windows x64 Software Convention. */
7055 if (TARGET_64BIT && (cum ? cum->call_abi : ix86_abi) == MS_ABI)
7057 int msize = (int) GET_MODE_SIZE (mode);
7058 if (type)
7060 /* Arrays are passed by reference. */
7061 if (TREE_CODE (type) == ARRAY_TYPE)
7062 return true;
7064 if (AGGREGATE_TYPE_P (type))
7066 /* Structs/unions of sizes other than 8, 16, 32, or 64 bits
7067 are passed by reference. */
7068 msize = int_size_in_bytes (type);
7072 /* __m128 is passed by reference. */
7073 switch (msize) {
7074 case 1: case 2: case 4: case 8:
7075 break;
7076 default:
7077 return true;
7080 else if (TARGET_64BIT && type && int_size_in_bytes (type) == -1)
7081 return 1;
7083 return 0;
7086 /* Return true when TYPE should be 128bit aligned for 32bit argument
7087 passing ABI. XXX: This function is obsolete and is only used for
7088 checking psABI compatibility with previous versions of GCC. */
7090 static bool
7091 ix86_compat_aligned_value_p (const_tree type)
7093 enum machine_mode mode = TYPE_MODE (type);
7094 if (((TARGET_SSE && SSE_REG_MODE_P (mode))
7095 || mode == TDmode
7096 || mode == TFmode
7097 || mode == TCmode)
7098 && (!TYPE_USER_ALIGN (type) || TYPE_ALIGN (type) > 128))
7099 return true;
7100 if (TYPE_ALIGN (type) < 128)
7101 return false;
7103 if (AGGREGATE_TYPE_P (type))
7105 /* Walk the aggregates recursively. */
7106 switch (TREE_CODE (type))
7108 case RECORD_TYPE:
7109 case UNION_TYPE:
7110 case QUAL_UNION_TYPE:
7112 tree field;
7114 /* Walk all the structure fields. */
7115 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
7117 if (TREE_CODE (field) == FIELD_DECL
7118 && ix86_compat_aligned_value_p (TREE_TYPE (field)))
7119 return true;
7121 break;
7124 case ARRAY_TYPE:
7125 /* Just for use if some languages passes arrays by value. */
7126 if (ix86_compat_aligned_value_p (TREE_TYPE (type)))
7127 return true;
7128 break;
7130 default:
7131 gcc_unreachable ();
7134 return false;
7137 /* Return the alignment boundary for MODE and TYPE with alignment ALIGN.
7138 XXX: This function is obsolete and is only used for checking psABI
7139 compatibility with previous versions of GCC. */
7141 static unsigned int
7142 ix86_compat_function_arg_boundary (enum machine_mode mode,
7143 const_tree type, unsigned int align)
7145 /* In 32bit, only _Decimal128 and __float128 are aligned to their
7146 natural boundaries. */
7147 if (!TARGET_64BIT && mode != TDmode && mode != TFmode)
7149 /* i386 ABI defines all arguments to be 4 byte aligned. We have to
7150 make an exception for SSE modes since these require 128bit
7151 alignment.
7153 The handling here differs from field_alignment. ICC aligns MMX
7154 arguments to 4 byte boundaries, while structure fields are aligned
7155 to 8 byte boundaries. */
7156 if (!type)
7158 if (!(TARGET_SSE && SSE_REG_MODE_P (mode)))
7159 align = PARM_BOUNDARY;
7161 else
7163 if (!ix86_compat_aligned_value_p (type))
7164 align = PARM_BOUNDARY;
7167 if (align > BIGGEST_ALIGNMENT)
7168 align = BIGGEST_ALIGNMENT;
7169 return align;
7172 /* Return true when TYPE should be 128bit aligned for 32bit argument
7173 passing ABI. */
7175 static bool
7176 ix86_contains_aligned_value_p (const_tree type)
7178 enum machine_mode mode = TYPE_MODE (type);
7180 if (mode == XFmode || mode == XCmode)
7181 return false;
7183 if (TYPE_ALIGN (type) < 128)
7184 return false;
7186 if (AGGREGATE_TYPE_P (type))
7188 /* Walk the aggregates recursively. */
7189 switch (TREE_CODE (type))
7191 case RECORD_TYPE:
7192 case UNION_TYPE:
7193 case QUAL_UNION_TYPE:
7195 tree field;
7197 /* Walk all the structure fields. */
7198 for (field = TYPE_FIELDS (type);
7199 field;
7200 field = DECL_CHAIN (field))
7202 if (TREE_CODE (field) == FIELD_DECL
7203 && ix86_contains_aligned_value_p (TREE_TYPE (field)))
7204 return true;
7206 break;
7209 case ARRAY_TYPE:
7210 /* Just for use if some languages passes arrays by value. */
7211 if (ix86_contains_aligned_value_p (TREE_TYPE (type)))
7212 return true;
7213 break;
7215 default:
7216 gcc_unreachable ();
7219 else
7220 return TYPE_ALIGN (type) >= 128;
7222 return false;
7225 /* Gives the alignment boundary, in bits, of an argument with the
7226 specified mode and type. */
7228 static unsigned int
7229 ix86_function_arg_boundary (enum machine_mode mode, const_tree type)
7231 unsigned int align;
7232 if (type)
7234 /* Since the main variant type is used for call, we convert it to
7235 the main variant type. */
7236 type = TYPE_MAIN_VARIANT (type);
7237 align = TYPE_ALIGN (type);
7239 else
7240 align = GET_MODE_ALIGNMENT (mode);
7241 if (align < PARM_BOUNDARY)
7242 align = PARM_BOUNDARY;
7243 else
7245 static bool warned;
7246 unsigned int saved_align = align;
7248 if (!TARGET_64BIT)
7250 /* i386 ABI defines XFmode arguments to be 4 byte aligned. */
7251 if (!type)
7253 if (mode == XFmode || mode == XCmode)
7254 align = PARM_BOUNDARY;
7256 else if (!ix86_contains_aligned_value_p (type))
7257 align = PARM_BOUNDARY;
7259 if (align < 128)
7260 align = PARM_BOUNDARY;
7263 if (warn_psabi
7264 && !warned
7265 && align != ix86_compat_function_arg_boundary (mode, type,
7266 saved_align))
7268 warned = true;
7269 inform (input_location,
7270 "The ABI for passing parameters with %d-byte"
7271 " alignment has changed in GCC 4.6",
7272 align / BITS_PER_UNIT);
7276 return align;
7279 /* Return true if N is a possible register number of function value. */
7281 static bool
7282 ix86_function_value_regno_p (const unsigned int regno)
7284 switch (regno)
7286 case 0:
7287 return true;
7289 case FIRST_FLOAT_REG:
7290 /* TODO: The function should depend on current function ABI but
7291 builtins.c would need updating then. Therefore we use the
7292 default ABI. */
7293 if (TARGET_64BIT && ix86_abi == MS_ABI)
7294 return false;
7295 return TARGET_FLOAT_RETURNS_IN_80387;
7297 case FIRST_SSE_REG:
7298 return TARGET_SSE;
7300 case FIRST_MMX_REG:
7301 if (TARGET_MACHO || TARGET_64BIT)
7302 return false;
7303 return TARGET_MMX;
7306 return false;
7309 /* Define how to find the value returned by a function.
7310 VALTYPE is the data type of the value (as a tree).
7311 If the precise function being called is known, FUNC is its FUNCTION_DECL;
7312 otherwise, FUNC is 0. */
7314 static rtx
7315 function_value_32 (enum machine_mode orig_mode, enum machine_mode mode,
7316 const_tree fntype, const_tree fn)
7318 unsigned int regno;
7320 /* 8-byte vector modes in %mm0. See ix86_return_in_memory for where
7321 we normally prevent this case when mmx is not available. However
7322 some ABIs may require the result to be returned like DImode. */
7323 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
7324 regno = TARGET_MMX ? FIRST_MMX_REG : 0;
7326 /* 16-byte vector modes in %xmm0. See ix86_return_in_memory for where
7327 we prevent this case when sse is not available. However some ABIs
7328 may require the result to be returned like integer TImode. */
7329 else if (mode == TImode
7330 || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
7331 regno = TARGET_SSE ? FIRST_SSE_REG : 0;
7333 /* 32-byte vector modes in %ymm0. */
7334 else if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 32)
7335 regno = TARGET_AVX ? FIRST_SSE_REG : 0;
7337 /* Floating point return values in %st(0) (unless -mno-fp-ret-in-387). */
7338 else if (X87_FLOAT_MODE_P (mode) && TARGET_FLOAT_RETURNS_IN_80387)
7339 regno = FIRST_FLOAT_REG;
7340 else
7341 /* Most things go in %eax. */
7342 regno = AX_REG;
7344 /* Override FP return register with %xmm0 for local functions when
7345 SSE math is enabled or for functions with sseregparm attribute. */
7346 if ((fn || fntype) && (mode == SFmode || mode == DFmode))
7348 int sse_level = ix86_function_sseregparm (fntype, fn, false);
7349 if ((sse_level >= 1 && mode == SFmode)
7350 || (sse_level == 2 && mode == DFmode))
7351 regno = FIRST_SSE_REG;
7354 /* OImode shouldn't be used directly. */
7355 gcc_assert (mode != OImode);
7357 return gen_rtx_REG (orig_mode, regno);
7360 static rtx
7361 function_value_64 (enum machine_mode orig_mode, enum machine_mode mode,
7362 const_tree valtype)
7364 rtx ret;
7366 /* Handle libcalls, which don't provide a type node. */
7367 if (valtype == NULL)
7369 switch (mode)
7371 case SFmode:
7372 case SCmode:
7373 case DFmode:
7374 case DCmode:
7375 case TFmode:
7376 case SDmode:
7377 case DDmode:
7378 case TDmode:
7379 return gen_rtx_REG (mode, FIRST_SSE_REG);
7380 case XFmode:
7381 case XCmode:
7382 return gen_rtx_REG (mode, FIRST_FLOAT_REG);
7383 case TCmode:
7384 return NULL;
7385 default:
7386 return gen_rtx_REG (mode, AX_REG);
7390 ret = construct_container (mode, orig_mode, valtype, 1,
7391 X86_64_REGPARM_MAX, X86_64_SSE_REGPARM_MAX,
7392 x86_64_int_return_registers, 0);
7394 /* For zero sized structures, construct_container returns NULL, but we
7395 need to keep rest of compiler happy by returning meaningful value. */
7396 if (!ret)
7397 ret = gen_rtx_REG (orig_mode, AX_REG);
7399 return ret;
7402 static rtx
7403 function_value_ms_64 (enum machine_mode orig_mode, enum machine_mode mode)
7405 unsigned int regno = AX_REG;
7407 if (TARGET_SSE)
7409 switch (GET_MODE_SIZE (mode))
7411 case 16:
7412 if((SCALAR_INT_MODE_P (mode) || VECTOR_MODE_P (mode))
7413 && !COMPLEX_MODE_P (mode))
7414 regno = FIRST_SSE_REG;
7415 break;
7416 case 8:
7417 case 4:
7418 if (mode == SFmode || mode == DFmode)
7419 regno = FIRST_SSE_REG;
7420 break;
7421 default:
7422 break;
7425 return gen_rtx_REG (orig_mode, regno);
7428 static rtx
7429 ix86_function_value_1 (const_tree valtype, const_tree fntype_or_decl,
7430 enum machine_mode orig_mode, enum machine_mode mode)
7432 const_tree fn, fntype;
7434 fn = NULL_TREE;
7435 if (fntype_or_decl && DECL_P (fntype_or_decl))
7436 fn = fntype_or_decl;
7437 fntype = fn ? TREE_TYPE (fn) : fntype_or_decl;
7439 if (TARGET_64BIT && ix86_function_type_abi (fntype) == MS_ABI)
7440 return function_value_ms_64 (orig_mode, mode);
7441 else if (TARGET_64BIT)
7442 return function_value_64 (orig_mode, mode, valtype);
7443 else
7444 return function_value_32 (orig_mode, mode, fntype, fn);
7447 static rtx
7448 ix86_function_value (const_tree valtype, const_tree fntype_or_decl,
7449 bool outgoing ATTRIBUTE_UNUSED)
7451 enum machine_mode mode, orig_mode;
7453 orig_mode = TYPE_MODE (valtype);
7454 mode = type_natural_mode (valtype, NULL);
7455 return ix86_function_value_1 (valtype, fntype_or_decl, orig_mode, mode);
7459 ix86_libcall_value (enum machine_mode mode)
7461 return ix86_function_value_1 (NULL, NULL, mode, mode);
7464 /* Return true iff type is returned in memory. */
7466 static bool ATTRIBUTE_UNUSED
7467 return_in_memory_32 (const_tree type, enum machine_mode mode)
7469 HOST_WIDE_INT size;
7471 if (mode == BLKmode)
7472 return true;
7474 size = int_size_in_bytes (type);
7476 if (MS_AGGREGATE_RETURN && AGGREGATE_TYPE_P (type) && size <= 8)
7477 return false;
7479 if (VECTOR_MODE_P (mode) || mode == TImode)
7481 /* User-created vectors small enough to fit in EAX. */
7482 if (size < 8)
7483 return false;
7485 /* MMX/3dNow values are returned in MM0,
7486 except when it doesn't exits or the ABI prescribes otherwise. */
7487 if (size == 8)
7488 return !TARGET_MMX || TARGET_VECT8_RETURNS;
7490 /* SSE values are returned in XMM0, except when it doesn't exist. */
7491 if (size == 16)
7492 return !TARGET_SSE;
7494 /* AVX values are returned in YMM0, except when it doesn't exist. */
7495 if (size == 32)
7496 return !TARGET_AVX;
7499 if (mode == XFmode)
7500 return false;
7502 if (size > 12)
7503 return true;
7505 /* OImode shouldn't be used directly. */
7506 gcc_assert (mode != OImode);
7508 return false;
7511 static bool ATTRIBUTE_UNUSED
7512 return_in_memory_64 (const_tree type, enum machine_mode mode)
7514 int needed_intregs, needed_sseregs;
7515 return !examine_argument (mode, type, 1, &needed_intregs, &needed_sseregs);
7518 static bool ATTRIBUTE_UNUSED
7519 return_in_memory_ms_64 (const_tree type, enum machine_mode mode)
7521 HOST_WIDE_INT size = int_size_in_bytes (type);
7523 /* __m128 is returned in xmm0. */
7524 if ((SCALAR_INT_MODE_P (mode) || VECTOR_MODE_P (mode))
7525 && !COMPLEX_MODE_P (mode) && (GET_MODE_SIZE (mode) == 16 || size == 16))
7526 return false;
7528 /* Otherwise, the size must be exactly in [1248]. */
7529 return size != 1 && size != 2 && size != 4 && size != 8;
7532 static bool
7533 ix86_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
7535 #ifdef SUBTARGET_RETURN_IN_MEMORY
7536 return SUBTARGET_RETURN_IN_MEMORY (type, fntype);
7537 #else
7538 const enum machine_mode mode = type_natural_mode (type, NULL);
7540 if (TARGET_64BIT)
7542 if (ix86_function_type_abi (fntype) == MS_ABI)
7543 return return_in_memory_ms_64 (type, mode);
7544 else
7545 return return_in_memory_64 (type, mode);
7547 else
7548 return return_in_memory_32 (type, mode);
7549 #endif
7552 /* When returning SSE vector types, we have a choice of either
7553 (1) being abi incompatible with a -march switch, or
7554 (2) generating an error.
7555 Given no good solution, I think the safest thing is one warning.
7556 The user won't be able to use -Werror, but....
7558 Choose the STRUCT_VALUE_RTX hook because that's (at present) only
7559 called in response to actually generating a caller or callee that
7560 uses such a type. As opposed to TARGET_RETURN_IN_MEMORY, which is called
7561 via aggregate_value_p for general type probing from tree-ssa. */
7563 static rtx
7564 ix86_struct_value_rtx (tree type, int incoming ATTRIBUTE_UNUSED)
7566 static bool warnedsse, warnedmmx;
7568 if (!TARGET_64BIT && type)
7570 /* Look at the return type of the function, not the function type. */
7571 enum machine_mode mode = TYPE_MODE (TREE_TYPE (type));
7573 if (!TARGET_SSE && !warnedsse)
7575 if (mode == TImode
7576 || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
7578 warnedsse = true;
7579 warning (0, "SSE vector return without SSE enabled "
7580 "changes the ABI");
7584 if (!TARGET_MMX && !warnedmmx)
7586 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
7588 warnedmmx = true;
7589 warning (0, "MMX vector return without MMX enabled "
7590 "changes the ABI");
7595 return NULL;
7599 /* Create the va_list data type. */
7601 /* Returns the calling convention specific va_list date type.
7602 The argument ABI can be DEFAULT_ABI, MS_ABI, or SYSV_ABI. */
7604 static tree
7605 ix86_build_builtin_va_list_abi (enum calling_abi abi)
7607 tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
7609 /* For i386 we use plain pointer to argument area. */
7610 if (!TARGET_64BIT || abi == MS_ABI)
7611 return build_pointer_type (char_type_node);
7613 record = lang_hooks.types.make_type (RECORD_TYPE);
7614 type_decl = build_decl (BUILTINS_LOCATION,
7615 TYPE_DECL, get_identifier ("__va_list_tag"), record);
7617 f_gpr = build_decl (BUILTINS_LOCATION,
7618 FIELD_DECL, get_identifier ("gp_offset"),
7619 unsigned_type_node);
7620 f_fpr = build_decl (BUILTINS_LOCATION,
7621 FIELD_DECL, get_identifier ("fp_offset"),
7622 unsigned_type_node);
7623 f_ovf = build_decl (BUILTINS_LOCATION,
7624 FIELD_DECL, get_identifier ("overflow_arg_area"),
7625 ptr_type_node);
7626 f_sav = build_decl (BUILTINS_LOCATION,
7627 FIELD_DECL, get_identifier ("reg_save_area"),
7628 ptr_type_node);
7630 va_list_gpr_counter_field = f_gpr;
7631 va_list_fpr_counter_field = f_fpr;
7633 DECL_FIELD_CONTEXT (f_gpr) = record;
7634 DECL_FIELD_CONTEXT (f_fpr) = record;
7635 DECL_FIELD_CONTEXT (f_ovf) = record;
7636 DECL_FIELD_CONTEXT (f_sav) = record;
7638 TYPE_STUB_DECL (record) = type_decl;
7639 TYPE_NAME (record) = type_decl;
7640 TYPE_FIELDS (record) = f_gpr;
7641 DECL_CHAIN (f_gpr) = f_fpr;
7642 DECL_CHAIN (f_fpr) = f_ovf;
7643 DECL_CHAIN (f_ovf) = f_sav;
7645 layout_type (record);
7647 /* The correct type is an array type of one element. */
7648 return build_array_type (record, build_index_type (size_zero_node));
7651 /* Setup the builtin va_list data type and for 64-bit the additional
7652 calling convention specific va_list data types. */
7654 static tree
7655 ix86_build_builtin_va_list (void)
7657 tree ret = ix86_build_builtin_va_list_abi (ix86_abi);
7659 /* Initialize abi specific va_list builtin types. */
7660 if (TARGET_64BIT)
7662 tree t;
7663 if (ix86_abi == MS_ABI)
7665 t = ix86_build_builtin_va_list_abi (SYSV_ABI);
7666 if (TREE_CODE (t) != RECORD_TYPE)
7667 t = build_variant_type_copy (t);
7668 sysv_va_list_type_node = t;
7670 else
7672 t = ret;
7673 if (TREE_CODE (t) != RECORD_TYPE)
7674 t = build_variant_type_copy (t);
7675 sysv_va_list_type_node = t;
7677 if (ix86_abi != MS_ABI)
7679 t = ix86_build_builtin_va_list_abi (MS_ABI);
7680 if (TREE_CODE (t) != RECORD_TYPE)
7681 t = build_variant_type_copy (t);
7682 ms_va_list_type_node = t;
7684 else
7686 t = ret;
7687 if (TREE_CODE (t) != RECORD_TYPE)
7688 t = build_variant_type_copy (t);
7689 ms_va_list_type_node = t;
7693 return ret;
7696 /* Worker function for TARGET_SETUP_INCOMING_VARARGS. */
7698 static void
7699 setup_incoming_varargs_64 (CUMULATIVE_ARGS *cum)
7701 rtx save_area, mem;
7702 alias_set_type set;
7703 int i, max;
7705 /* GPR size of varargs save area. */
7706 if (cfun->va_list_gpr_size)
7707 ix86_varargs_gpr_size = X86_64_REGPARM_MAX * UNITS_PER_WORD;
7708 else
7709 ix86_varargs_gpr_size = 0;
7711 /* FPR size of varargs save area. We don't need it if we don't pass
7712 anything in SSE registers. */
7713 if (TARGET_SSE && cfun->va_list_fpr_size)
7714 ix86_varargs_fpr_size = X86_64_SSE_REGPARM_MAX * 16;
7715 else
7716 ix86_varargs_fpr_size = 0;
7718 if (! ix86_varargs_gpr_size && ! ix86_varargs_fpr_size)
7719 return;
7721 save_area = frame_pointer_rtx;
7722 set = get_varargs_alias_set ();
7724 max = cum->regno + cfun->va_list_gpr_size / UNITS_PER_WORD;
7725 if (max > X86_64_REGPARM_MAX)
7726 max = X86_64_REGPARM_MAX;
7728 for (i = cum->regno; i < max; i++)
7730 mem = gen_rtx_MEM (Pmode,
7731 plus_constant (save_area, i * UNITS_PER_WORD));
7732 MEM_NOTRAP_P (mem) = 1;
7733 set_mem_alias_set (mem, set);
7734 emit_move_insn (mem, gen_rtx_REG (Pmode,
7735 x86_64_int_parameter_registers[i]));
7738 if (ix86_varargs_fpr_size)
7740 enum machine_mode smode;
7741 rtx label, test;
7743 /* Now emit code to save SSE registers. The AX parameter contains number
7744 of SSE parameter registers used to call this function, though all we
7745 actually check here is the zero/non-zero status. */
7747 label = gen_label_rtx ();
7748 test = gen_rtx_EQ (VOIDmode, gen_rtx_REG (QImode, AX_REG), const0_rtx);
7749 emit_jump_insn (gen_cbranchqi4 (test, XEXP (test, 0), XEXP (test, 1),
7750 label));
7752 /* ??? If !TARGET_SSE_TYPELESS_STORES, would we perform better if
7753 we used movdqa (i.e. TImode) instead? Perhaps even better would
7754 be if we could determine the real mode of the data, via a hook
7755 into pass_stdarg. Ignore all that for now. */
7756 smode = V4SFmode;
7757 if (crtl->stack_alignment_needed < GET_MODE_ALIGNMENT (smode))
7758 crtl->stack_alignment_needed = GET_MODE_ALIGNMENT (smode);
7760 max = cum->sse_regno + cfun->va_list_fpr_size / 16;
7761 if (max > X86_64_SSE_REGPARM_MAX)
7762 max = X86_64_SSE_REGPARM_MAX;
7764 for (i = cum->sse_regno; i < max; ++i)
7766 mem = plus_constant (save_area, i * 16 + ix86_varargs_gpr_size);
7767 mem = gen_rtx_MEM (smode, mem);
7768 MEM_NOTRAP_P (mem) = 1;
7769 set_mem_alias_set (mem, set);
7770 set_mem_align (mem, GET_MODE_ALIGNMENT (smode));
7772 emit_move_insn (mem, gen_rtx_REG (smode, SSE_REGNO (i)));
7775 emit_label (label);
7779 static void
7780 setup_incoming_varargs_ms_64 (CUMULATIVE_ARGS *cum)
7782 alias_set_type set = get_varargs_alias_set ();
7783 int i;
7785 for (i = cum->regno; i < X86_64_MS_REGPARM_MAX; i++)
7787 rtx reg, mem;
7789 mem = gen_rtx_MEM (Pmode,
7790 plus_constant (virtual_incoming_args_rtx,
7791 i * UNITS_PER_WORD));
7792 MEM_NOTRAP_P (mem) = 1;
7793 set_mem_alias_set (mem, set);
7795 reg = gen_rtx_REG (Pmode, x86_64_ms_abi_int_parameter_registers[i]);
7796 emit_move_insn (mem, reg);
7800 static void
7801 ix86_setup_incoming_varargs (CUMULATIVE_ARGS *cum, enum machine_mode mode,
7802 tree type, int *pretend_size ATTRIBUTE_UNUSED,
7803 int no_rtl)
7805 CUMULATIVE_ARGS next_cum;
7806 tree fntype;
7808 /* This argument doesn't appear to be used anymore. Which is good,
7809 because the old code here didn't suppress rtl generation. */
7810 gcc_assert (!no_rtl);
7812 if (!TARGET_64BIT)
7813 return;
7815 fntype = TREE_TYPE (current_function_decl);
7817 /* For varargs, we do not want to skip the dummy va_dcl argument.
7818 For stdargs, we do want to skip the last named argument. */
7819 next_cum = *cum;
7820 if (stdarg_p (fntype))
7821 ix86_function_arg_advance (&next_cum, mode, type, true);
7823 if (cum->call_abi == MS_ABI)
7824 setup_incoming_varargs_ms_64 (&next_cum);
7825 else
7826 setup_incoming_varargs_64 (&next_cum);
7829 /* Checks if TYPE is of kind va_list char *. */
7831 static bool
7832 is_va_list_char_pointer (tree type)
7834 tree canonic;
7836 /* For 32-bit it is always true. */
7837 if (!TARGET_64BIT)
7838 return true;
7839 canonic = ix86_canonical_va_list_type (type);
7840 return (canonic == ms_va_list_type_node
7841 || (ix86_abi == MS_ABI && canonic == va_list_type_node));
7844 /* Implement va_start. */
7846 static void
7847 ix86_va_start (tree valist, rtx nextarg)
7849 HOST_WIDE_INT words, n_gpr, n_fpr;
7850 tree f_gpr, f_fpr, f_ovf, f_sav;
7851 tree gpr, fpr, ovf, sav, t;
7852 tree type;
7853 rtx ovf_rtx;
7855 if (flag_split_stack
7856 && cfun->machine->split_stack_varargs_pointer == NULL_RTX)
7858 unsigned int scratch_regno;
7860 /* When we are splitting the stack, we can't refer to the stack
7861 arguments using internal_arg_pointer, because they may be on
7862 the old stack. The split stack prologue will arrange to
7863 leave a pointer to the old stack arguments in a scratch
7864 register, which we here copy to a pseudo-register. The split
7865 stack prologue can't set the pseudo-register directly because
7866 it (the prologue) runs before any registers have been saved. */
7868 scratch_regno = split_stack_prologue_scratch_regno ();
7869 if (scratch_regno != INVALID_REGNUM)
7871 rtx reg, seq;
7873 reg = gen_reg_rtx (Pmode);
7874 cfun->machine->split_stack_varargs_pointer = reg;
7876 start_sequence ();
7877 emit_move_insn (reg, gen_rtx_REG (Pmode, scratch_regno));
7878 seq = get_insns ();
7879 end_sequence ();
7881 push_topmost_sequence ();
7882 emit_insn_after (seq, entry_of_function ());
7883 pop_topmost_sequence ();
7887 /* Only 64bit target needs something special. */
7888 if (!TARGET_64BIT || is_va_list_char_pointer (TREE_TYPE (valist)))
7890 if (cfun->machine->split_stack_varargs_pointer == NULL_RTX)
7891 std_expand_builtin_va_start (valist, nextarg);
7892 else
7894 rtx va_r, next;
7896 va_r = expand_expr (valist, NULL_RTX, VOIDmode, EXPAND_WRITE);
7897 next = expand_binop (ptr_mode, add_optab,
7898 cfun->machine->split_stack_varargs_pointer,
7899 crtl->args.arg_offset_rtx,
7900 NULL_RTX, 0, OPTAB_LIB_WIDEN);
7901 convert_move (va_r, next, 0);
7903 return;
7906 f_gpr = TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node));
7907 f_fpr = DECL_CHAIN (f_gpr);
7908 f_ovf = DECL_CHAIN (f_fpr);
7909 f_sav = DECL_CHAIN (f_ovf);
7911 valist = build_simple_mem_ref (valist);
7912 TREE_TYPE (valist) = TREE_TYPE (sysv_va_list_type_node);
7913 /* The following should be folded into the MEM_REF offset. */
7914 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), unshare_expr (valist),
7915 f_gpr, NULL_TREE);
7916 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), unshare_expr (valist),
7917 f_fpr, NULL_TREE);
7918 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), unshare_expr (valist),
7919 f_ovf, NULL_TREE);
7920 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), unshare_expr (valist),
7921 f_sav, NULL_TREE);
7923 /* Count number of gp and fp argument registers used. */
7924 words = crtl->args.info.words;
7925 n_gpr = crtl->args.info.regno;
7926 n_fpr = crtl->args.info.sse_regno;
7928 if (cfun->va_list_gpr_size)
7930 type = TREE_TYPE (gpr);
7931 t = build2 (MODIFY_EXPR, type,
7932 gpr, build_int_cst (type, n_gpr * 8));
7933 TREE_SIDE_EFFECTS (t) = 1;
7934 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7937 if (TARGET_SSE && cfun->va_list_fpr_size)
7939 type = TREE_TYPE (fpr);
7940 t = build2 (MODIFY_EXPR, type, fpr,
7941 build_int_cst (type, n_fpr * 16 + 8*X86_64_REGPARM_MAX));
7942 TREE_SIDE_EFFECTS (t) = 1;
7943 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7946 /* Find the overflow area. */
7947 type = TREE_TYPE (ovf);
7948 if (cfun->machine->split_stack_varargs_pointer == NULL_RTX)
7949 ovf_rtx = crtl->args.internal_arg_pointer;
7950 else
7951 ovf_rtx = cfun->machine->split_stack_varargs_pointer;
7952 t = make_tree (type, ovf_rtx);
7953 if (words != 0)
7954 t = build2 (POINTER_PLUS_EXPR, type, t,
7955 size_int (words * UNITS_PER_WORD));
7956 t = build2 (MODIFY_EXPR, type, ovf, t);
7957 TREE_SIDE_EFFECTS (t) = 1;
7958 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7960 if (ix86_varargs_gpr_size || ix86_varargs_fpr_size)
7962 /* Find the register save area.
7963 Prologue of the function save it right above stack frame. */
7964 type = TREE_TYPE (sav);
7965 t = make_tree (type, frame_pointer_rtx);
7966 if (!ix86_varargs_gpr_size)
7967 t = build2 (POINTER_PLUS_EXPR, type, t,
7968 size_int (-8 * X86_64_REGPARM_MAX));
7969 t = build2 (MODIFY_EXPR, type, sav, t);
7970 TREE_SIDE_EFFECTS (t) = 1;
7971 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7975 /* Implement va_arg. */
7977 static tree
7978 ix86_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p,
7979 gimple_seq *post_p)
7981 static const int intreg[6] = { 0, 1, 2, 3, 4, 5 };
7982 tree f_gpr, f_fpr, f_ovf, f_sav;
7983 tree gpr, fpr, ovf, sav, t;
7984 int size, rsize;
7985 tree lab_false, lab_over = NULL_TREE;
7986 tree addr, t2;
7987 rtx container;
7988 int indirect_p = 0;
7989 tree ptrtype;
7990 enum machine_mode nat_mode;
7991 unsigned int arg_boundary;
7993 /* Only 64bit target needs something special. */
7994 if (!TARGET_64BIT || is_va_list_char_pointer (TREE_TYPE (valist)))
7995 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
7997 f_gpr = TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node));
7998 f_fpr = DECL_CHAIN (f_gpr);
7999 f_ovf = DECL_CHAIN (f_fpr);
8000 f_sav = DECL_CHAIN (f_ovf);
8002 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr),
8003 build_va_arg_indirect_ref (valist), f_gpr, NULL_TREE);
8004 valist = build_va_arg_indirect_ref (valist);
8005 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
8006 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
8007 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
8009 indirect_p = pass_by_reference (NULL, TYPE_MODE (type), type, false);
8010 if (indirect_p)
8011 type = build_pointer_type (type);
8012 size = int_size_in_bytes (type);
8013 rsize = (size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
8015 nat_mode = type_natural_mode (type, NULL);
8016 switch (nat_mode)
8018 case V8SFmode:
8019 case V8SImode:
8020 case V32QImode:
8021 case V16HImode:
8022 case V4DFmode:
8023 case V4DImode:
8024 /* Unnamed 256bit vector mode parameters are passed on stack. */
8025 if (ix86_cfun_abi () == SYSV_ABI)
8027 container = NULL;
8028 break;
8031 default:
8032 container = construct_container (nat_mode, TYPE_MODE (type),
8033 type, 0, X86_64_REGPARM_MAX,
8034 X86_64_SSE_REGPARM_MAX, intreg,
8036 break;
8039 /* Pull the value out of the saved registers. */
8041 addr = create_tmp_var (ptr_type_node, "addr");
8043 if (container)
8045 int needed_intregs, needed_sseregs;
8046 bool need_temp;
8047 tree int_addr, sse_addr;
8049 lab_false = create_artificial_label (UNKNOWN_LOCATION);
8050 lab_over = create_artificial_label (UNKNOWN_LOCATION);
8052 examine_argument (nat_mode, type, 0, &needed_intregs, &needed_sseregs);
8054 need_temp = (!REG_P (container)
8055 && ((needed_intregs && TYPE_ALIGN (type) > 64)
8056 || TYPE_ALIGN (type) > 128));
8058 /* In case we are passing structure, verify that it is consecutive block
8059 on the register save area. If not we need to do moves. */
8060 if (!need_temp && !REG_P (container))
8062 /* Verify that all registers are strictly consecutive */
8063 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0))))
8065 int i;
8067 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
8069 rtx slot = XVECEXP (container, 0, i);
8070 if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int) i
8071 || INTVAL (XEXP (slot, 1)) != i * 16)
8072 need_temp = 1;
8075 else
8077 int i;
8079 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
8081 rtx slot = XVECEXP (container, 0, i);
8082 if (REGNO (XEXP (slot, 0)) != (unsigned int) i
8083 || INTVAL (XEXP (slot, 1)) != i * 8)
8084 need_temp = 1;
8088 if (!need_temp)
8090 int_addr = addr;
8091 sse_addr = addr;
8093 else
8095 int_addr = create_tmp_var (ptr_type_node, "int_addr");
8096 sse_addr = create_tmp_var (ptr_type_node, "sse_addr");
8099 /* First ensure that we fit completely in registers. */
8100 if (needed_intregs)
8102 t = build_int_cst (TREE_TYPE (gpr),
8103 (X86_64_REGPARM_MAX - needed_intregs + 1) * 8);
8104 t = build2 (GE_EXPR, boolean_type_node, gpr, t);
8105 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
8106 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
8107 gimplify_and_add (t, pre_p);
8109 if (needed_sseregs)
8111 t = build_int_cst (TREE_TYPE (fpr),
8112 (X86_64_SSE_REGPARM_MAX - needed_sseregs + 1) * 16
8113 + X86_64_REGPARM_MAX * 8);
8114 t = build2 (GE_EXPR, boolean_type_node, fpr, t);
8115 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
8116 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
8117 gimplify_and_add (t, pre_p);
8120 /* Compute index to start of area used for integer regs. */
8121 if (needed_intregs)
8123 /* int_addr = gpr + sav; */
8124 t = fold_convert (sizetype, gpr);
8125 t = build2 (POINTER_PLUS_EXPR, ptr_type_node, sav, t);
8126 gimplify_assign (int_addr, t, pre_p);
8128 if (needed_sseregs)
8130 /* sse_addr = fpr + sav; */
8131 t = fold_convert (sizetype, fpr);
8132 t = build2 (POINTER_PLUS_EXPR, ptr_type_node, sav, t);
8133 gimplify_assign (sse_addr, t, pre_p);
8135 if (need_temp)
8137 int i, prev_size = 0;
8138 tree temp = create_tmp_var (type, "va_arg_tmp");
8140 /* addr = &temp; */
8141 t = build1 (ADDR_EXPR, build_pointer_type (type), temp);
8142 gimplify_assign (addr, t, pre_p);
8144 for (i = 0; i < XVECLEN (container, 0); i++)
8146 rtx slot = XVECEXP (container, 0, i);
8147 rtx reg = XEXP (slot, 0);
8148 enum machine_mode mode = GET_MODE (reg);
8149 tree piece_type;
8150 tree addr_type;
8151 tree daddr_type;
8152 tree src_addr, src;
8153 int src_offset;
8154 tree dest_addr, dest;
8155 int cur_size = GET_MODE_SIZE (mode);
8157 gcc_assert (prev_size <= INTVAL (XEXP (slot, 1)));
8158 prev_size = INTVAL (XEXP (slot, 1));
8159 if (prev_size + cur_size > size)
8161 cur_size = size - prev_size;
8162 mode = mode_for_size (cur_size * BITS_PER_UNIT, MODE_INT, 1);
8163 if (mode == BLKmode)
8164 mode = QImode;
8166 piece_type = lang_hooks.types.type_for_mode (mode, 1);
8167 if (mode == GET_MODE (reg))
8168 addr_type = build_pointer_type (piece_type);
8169 else
8170 addr_type = build_pointer_type_for_mode (piece_type, ptr_mode,
8171 true);
8172 daddr_type = build_pointer_type_for_mode (piece_type, ptr_mode,
8173 true);
8175 if (SSE_REGNO_P (REGNO (reg)))
8177 src_addr = sse_addr;
8178 src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16;
8180 else
8182 src_addr = int_addr;
8183 src_offset = REGNO (reg) * 8;
8185 src_addr = fold_convert (addr_type, src_addr);
8186 src_addr = fold_build2 (POINTER_PLUS_EXPR, addr_type, src_addr,
8187 size_int (src_offset));
8189 dest_addr = fold_convert (daddr_type, addr);
8190 dest_addr = fold_build2 (POINTER_PLUS_EXPR, daddr_type, dest_addr,
8191 size_int (prev_size));
8192 if (cur_size == GET_MODE_SIZE (mode))
8194 src = build_va_arg_indirect_ref (src_addr);
8195 dest = build_va_arg_indirect_ref (dest_addr);
8197 gimplify_assign (dest, src, pre_p);
8199 else
8201 tree copy
8202 = build_call_expr (implicit_built_in_decls[BUILT_IN_MEMCPY],
8203 3, dest_addr, src_addr,
8204 size_int (cur_size));
8205 gimplify_and_add (copy, pre_p);
8207 prev_size += cur_size;
8211 if (needed_intregs)
8213 t = build2 (PLUS_EXPR, TREE_TYPE (gpr), gpr,
8214 build_int_cst (TREE_TYPE (gpr), needed_intregs * 8));
8215 gimplify_assign (gpr, t, pre_p);
8218 if (needed_sseregs)
8220 t = build2 (PLUS_EXPR, TREE_TYPE (fpr), fpr,
8221 build_int_cst (TREE_TYPE (fpr), needed_sseregs * 16));
8222 gimplify_assign (fpr, t, pre_p);
8225 gimple_seq_add_stmt (pre_p, gimple_build_goto (lab_over));
8227 gimple_seq_add_stmt (pre_p, gimple_build_label (lab_false));
8230 /* ... otherwise out of the overflow area. */
8232 /* When we align parameter on stack for caller, if the parameter
8233 alignment is beyond MAX_SUPPORTED_STACK_ALIGNMENT, it will be
8234 aligned at MAX_SUPPORTED_STACK_ALIGNMENT. We will match callee
8235 here with caller. */
8236 arg_boundary = ix86_function_arg_boundary (VOIDmode, type);
8237 if ((unsigned int) arg_boundary > MAX_SUPPORTED_STACK_ALIGNMENT)
8238 arg_boundary = MAX_SUPPORTED_STACK_ALIGNMENT;
8240 /* Care for on-stack alignment if needed. */
8241 if (arg_boundary <= 64 || size == 0)
8242 t = ovf;
8243 else
8245 HOST_WIDE_INT align = arg_boundary / 8;
8246 t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (ovf), ovf,
8247 size_int (align - 1));
8248 t = fold_convert (sizetype, t);
8249 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
8250 size_int (-align));
8251 t = fold_convert (TREE_TYPE (ovf), t);
8254 gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue);
8255 gimplify_assign (addr, t, pre_p);
8257 t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (t), t,
8258 size_int (rsize * UNITS_PER_WORD));
8259 gimplify_assign (unshare_expr (ovf), t, pre_p);
8261 if (container)
8262 gimple_seq_add_stmt (pre_p, gimple_build_label (lab_over));
8264 ptrtype = build_pointer_type_for_mode (type, ptr_mode, true);
8265 addr = fold_convert (ptrtype, addr);
8267 if (indirect_p)
8268 addr = build_va_arg_indirect_ref (addr);
8269 return build_va_arg_indirect_ref (addr);
8272 /* Return true if OPNUM's MEM should be matched
8273 in movabs* patterns. */
8275 bool
8276 ix86_check_movabs (rtx insn, int opnum)
8278 rtx set, mem;
8280 set = PATTERN (insn);
8281 if (GET_CODE (set) == PARALLEL)
8282 set = XVECEXP (set, 0, 0);
8283 gcc_assert (GET_CODE (set) == SET);
8284 mem = XEXP (set, opnum);
8285 while (GET_CODE (mem) == SUBREG)
8286 mem = SUBREG_REG (mem);
8287 gcc_assert (MEM_P (mem));
8288 return volatile_ok || !MEM_VOLATILE_P (mem);
8291 /* Initialize the table of extra 80387 mathematical constants. */
8293 static void
8294 init_ext_80387_constants (void)
8296 static const char * cst[5] =
8298 "0.3010299956639811952256464283594894482", /* 0: fldlg2 */
8299 "0.6931471805599453094286904741849753009", /* 1: fldln2 */
8300 "1.4426950408889634073876517827983434472", /* 2: fldl2e */
8301 "3.3219280948873623478083405569094566090", /* 3: fldl2t */
8302 "3.1415926535897932385128089594061862044", /* 4: fldpi */
8304 int i;
8306 for (i = 0; i < 5; i++)
8308 real_from_string (&ext_80387_constants_table[i], cst[i]);
8309 /* Ensure each constant is rounded to XFmode precision. */
8310 real_convert (&ext_80387_constants_table[i],
8311 XFmode, &ext_80387_constants_table[i]);
8314 ext_80387_constants_init = 1;
8317 /* Return non-zero if the constant is something that
8318 can be loaded with a special instruction. */
8321 standard_80387_constant_p (rtx x)
8323 enum machine_mode mode = GET_MODE (x);
8325 REAL_VALUE_TYPE r;
8327 if (!(X87_FLOAT_MODE_P (mode) && (GET_CODE (x) == CONST_DOUBLE)))
8328 return -1;
8330 if (x == CONST0_RTX (mode))
8331 return 1;
8332 if (x == CONST1_RTX (mode))
8333 return 2;
8335 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
8337 /* For XFmode constants, try to find a special 80387 instruction when
8338 optimizing for size or on those CPUs that benefit from them. */
8339 if (mode == XFmode
8340 && (optimize_function_for_size_p (cfun) || TARGET_EXT_80387_CONSTANTS))
8342 int i;
8344 if (! ext_80387_constants_init)
8345 init_ext_80387_constants ();
8347 for (i = 0; i < 5; i++)
8348 if (real_identical (&r, &ext_80387_constants_table[i]))
8349 return i + 3;
8352 /* Load of the constant -0.0 or -1.0 will be split as
8353 fldz;fchs or fld1;fchs sequence. */
8354 if (real_isnegzero (&r))
8355 return 8;
8356 if (real_identical (&r, &dconstm1))
8357 return 9;
8359 return 0;
8362 /* Return the opcode of the special instruction to be used to load
8363 the constant X. */
8365 const char *
8366 standard_80387_constant_opcode (rtx x)
8368 switch (standard_80387_constant_p (x))
8370 case 1:
8371 return "fldz";
8372 case 2:
8373 return "fld1";
8374 case 3:
8375 return "fldlg2";
8376 case 4:
8377 return "fldln2";
8378 case 5:
8379 return "fldl2e";
8380 case 6:
8381 return "fldl2t";
8382 case 7:
8383 return "fldpi";
8384 case 8:
8385 case 9:
8386 return "#";
8387 default:
8388 gcc_unreachable ();
8392 /* Return the CONST_DOUBLE representing the 80387 constant that is
8393 loaded by the specified special instruction. The argument IDX
8394 matches the return value from standard_80387_constant_p. */
8397 standard_80387_constant_rtx (int idx)
8399 int i;
8401 if (! ext_80387_constants_init)
8402 init_ext_80387_constants ();
8404 switch (idx)
8406 case 3:
8407 case 4:
8408 case 5:
8409 case 6:
8410 case 7:
8411 i = idx - 3;
8412 break;
8414 default:
8415 gcc_unreachable ();
8418 return CONST_DOUBLE_FROM_REAL_VALUE (ext_80387_constants_table[i],
8419 XFmode);
8422 /* Return 1 if X is all 0s and 2 if x is all 1s
8423 in supported SSE vector mode. */
8426 standard_sse_constant_p (rtx x)
8428 enum machine_mode mode = GET_MODE (x);
8430 if (x == const0_rtx || x == CONST0_RTX (GET_MODE (x)))
8431 return 1;
8432 if (vector_all_ones_operand (x, mode))
8433 switch (mode)
8435 case V16QImode:
8436 case V8HImode:
8437 case V4SImode:
8438 case V2DImode:
8439 if (TARGET_SSE2)
8440 return 2;
8441 default:
8442 break;
8445 return 0;
8448 /* Return the opcode of the special instruction to be used to load
8449 the constant X. */
8451 const char *
8452 standard_sse_constant_opcode (rtx insn, rtx x)
8454 switch (standard_sse_constant_p (x))
8456 case 1:
8457 switch (get_attr_mode (insn))
8459 case MODE_V4SF:
8460 return TARGET_AVX ? "vxorps\t%0, %0, %0" : "xorps\t%0, %0";
8461 case MODE_V2DF:
8462 if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
8463 return TARGET_AVX ? "vxorps\t%0, %0, %0" : "xorps\t%0, %0";
8464 else
8465 return TARGET_AVX ? "vxorpd\t%0, %0, %0" : "xorpd\t%0, %0";
8466 case MODE_TI:
8467 if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
8468 return TARGET_AVX ? "vxorps\t%0, %0, %0" : "xorps\t%0, %0";
8469 else
8470 return TARGET_AVX ? "vpxor\t%0, %0, %0" : "pxor\t%0, %0";
8471 case MODE_V8SF:
8472 return "vxorps\t%x0, %x0, %x0";
8473 case MODE_V4DF:
8474 if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
8475 return "vxorps\t%x0, %x0, %x0";
8476 else
8477 return "vxorpd\t%x0, %x0, %x0";
8478 case MODE_OI:
8479 if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
8480 return "vxorps\t%x0, %x0, %x0";
8481 else
8482 return "vpxor\t%x0, %x0, %x0";
8483 default:
8484 break;
8486 case 2:
8487 return TARGET_AVX ? "vpcmpeqd\t%0, %0, %0" : "pcmpeqd\t%0, %0";
8488 default:
8489 break;
8491 gcc_unreachable ();
8494 /* Returns true if OP contains a symbol reference */
8496 bool
8497 symbolic_reference_mentioned_p (rtx op)
8499 const char *fmt;
8500 int i;
8502 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
8503 return true;
8505 fmt = GET_RTX_FORMAT (GET_CODE (op));
8506 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
8508 if (fmt[i] == 'E')
8510 int j;
8512 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
8513 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
8514 return true;
8517 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
8518 return true;
8521 return false;
8524 /* Return true if it is appropriate to emit `ret' instructions in the
8525 body of a function. Do this only if the epilogue is simple, needing a
8526 couple of insns. Prior to reloading, we can't tell how many registers
8527 must be saved, so return false then. Return false if there is no frame
8528 marker to de-allocate. */
8530 bool
8531 ix86_can_use_return_insn_p (void)
8533 struct ix86_frame frame;
8535 if (! reload_completed || frame_pointer_needed)
8536 return 0;
8538 /* Don't allow more than 32k pop, since that's all we can do
8539 with one instruction. */
8540 if (crtl->args.pops_args && crtl->args.size >= 32768)
8541 return 0;
8543 ix86_compute_frame_layout (&frame);
8544 return (frame.stack_pointer_offset == UNITS_PER_WORD
8545 && (frame.nregs + frame.nsseregs) == 0);
8548 /* Value should be nonzero if functions must have frame pointers.
8549 Zero means the frame pointer need not be set up (and parms may
8550 be accessed via the stack pointer) in functions that seem suitable. */
8552 static bool
8553 ix86_frame_pointer_required (void)
8555 /* If we accessed previous frames, then the generated code expects
8556 to be able to access the saved ebp value in our frame. */
8557 if (cfun->machine->accesses_prev_frame)
8558 return true;
8560 /* Several x86 os'es need a frame pointer for other reasons,
8561 usually pertaining to setjmp. */
8562 if (SUBTARGET_FRAME_POINTER_REQUIRED)
8563 return true;
8565 /* In ix86_option_override_internal, TARGET_OMIT_LEAF_FRAME_POINTER
8566 turns off the frame pointer by default. Turn it back on now if
8567 we've not got a leaf function. */
8568 if (TARGET_OMIT_LEAF_FRAME_POINTER
8569 && (!current_function_is_leaf
8570 || ix86_current_function_calls_tls_descriptor))
8571 return true;
8573 if (crtl->profile && !flag_fentry)
8574 return true;
8576 return false;
8579 /* Record that the current function accesses previous call frames. */
8581 void
8582 ix86_setup_frame_addresses (void)
8584 cfun->machine->accesses_prev_frame = 1;
8587 #ifndef USE_HIDDEN_LINKONCE
8588 # if (defined(HAVE_GAS_HIDDEN) && (SUPPORTS_ONE_ONLY - 0)) || TARGET_MACHO
8589 # define USE_HIDDEN_LINKONCE 1
8590 # else
8591 # define USE_HIDDEN_LINKONCE 0
8592 # endif
8593 #endif
8595 static int pic_labels_used;
8597 /* Fills in the label name that should be used for a pc thunk for
8598 the given register. */
8600 static void
8601 get_pc_thunk_name (char name[32], unsigned int regno)
8603 gcc_assert (!TARGET_64BIT);
8605 if (USE_HIDDEN_LINKONCE)
8606 sprintf (name, "__i686.get_pc_thunk.%s", reg_names[regno]);
8607 else
8608 ASM_GENERATE_INTERNAL_LABEL (name, "LPR", regno);
8612 /* This function generates code for -fpic that loads %ebx with
8613 the return address of the caller and then returns. */
8615 static void
8616 ix86_code_end (void)
8618 rtx xops[2];
8619 int regno;
8621 for (regno = AX_REG; regno <= SP_REG; regno++)
8623 char name[32];
8624 tree decl;
8626 if (!(pic_labels_used & (1 << regno)))
8627 continue;
8629 get_pc_thunk_name (name, regno);
8631 decl = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
8632 get_identifier (name),
8633 build_function_type (void_type_node, void_list_node));
8634 DECL_RESULT (decl) = build_decl (BUILTINS_LOCATION, RESULT_DECL,
8635 NULL_TREE, void_type_node);
8636 TREE_PUBLIC (decl) = 1;
8637 TREE_STATIC (decl) = 1;
8639 #if TARGET_MACHO
8640 if (TARGET_MACHO)
8642 switch_to_section (darwin_sections[text_coal_section]);
8643 fputs ("\t.weak_definition\t", asm_out_file);
8644 assemble_name (asm_out_file, name);
8645 fputs ("\n\t.private_extern\t", asm_out_file);
8646 assemble_name (asm_out_file, name);
8647 putc ('\n', asm_out_file);
8648 ASM_OUTPUT_LABEL (asm_out_file, name);
8649 DECL_WEAK (decl) = 1;
8651 else
8652 #endif
8653 if (USE_HIDDEN_LINKONCE)
8655 DECL_COMDAT_GROUP (decl) = DECL_ASSEMBLER_NAME (decl);
8657 targetm.asm_out.unique_section (decl, 0);
8658 switch_to_section (get_named_section (decl, NULL, 0));
8660 targetm.asm_out.globalize_label (asm_out_file, name);
8661 fputs ("\t.hidden\t", asm_out_file);
8662 assemble_name (asm_out_file, name);
8663 putc ('\n', asm_out_file);
8664 ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
8666 else
8668 switch_to_section (text_section);
8669 ASM_OUTPUT_LABEL (asm_out_file, name);
8672 DECL_INITIAL (decl) = make_node (BLOCK);
8673 current_function_decl = decl;
8674 init_function_start (decl);
8675 first_function_block_is_cold = false;
8676 /* Make sure unwind info is emitted for the thunk if needed. */
8677 final_start_function (emit_barrier (), asm_out_file, 1);
8679 /* Pad stack IP move with 4 instructions (two NOPs count
8680 as one instruction). */
8681 if (TARGET_PAD_SHORT_FUNCTION)
8683 int i = 8;
8685 while (i--)
8686 fputs ("\tnop\n", asm_out_file);
8689 xops[0] = gen_rtx_REG (Pmode, regno);
8690 xops[1] = gen_rtx_MEM (Pmode, stack_pointer_rtx);
8691 output_asm_insn ("mov%z0\t{%1, %0|%0, %1}", xops);
8692 fputs ("\tret\n", asm_out_file);
8693 final_end_function ();
8694 init_insn_lengths ();
8695 free_after_compilation (cfun);
8696 set_cfun (NULL);
8697 current_function_decl = NULL;
8700 if (flag_split_stack)
8701 file_end_indicate_split_stack ();
8704 /* Emit code for the SET_GOT patterns. */
8706 const char *
8707 output_set_got (rtx dest, rtx label ATTRIBUTE_UNUSED)
8709 rtx xops[3];
8711 xops[0] = dest;
8713 if (TARGET_VXWORKS_RTP && flag_pic)
8715 /* Load (*VXWORKS_GOTT_BASE) into the PIC register. */
8716 xops[2] = gen_rtx_MEM (Pmode,
8717 gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE));
8718 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
8720 /* Load (*VXWORKS_GOTT_BASE)[VXWORKS_GOTT_INDEX] into the PIC register.
8721 Use %P and a local symbol in order to print VXWORKS_GOTT_INDEX as
8722 an unadorned address. */
8723 xops[2] = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX);
8724 SYMBOL_REF_FLAGS (xops[2]) |= SYMBOL_FLAG_LOCAL;
8725 output_asm_insn ("mov{l}\t{%P2(%0), %0|%0, DWORD PTR %P2[%0]}", xops);
8726 return "";
8729 xops[1] = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
8731 if (! TARGET_DEEP_BRANCH_PREDICTION || !flag_pic)
8733 xops[2] = gen_rtx_LABEL_REF (Pmode, label ? label : gen_label_rtx ());
8735 if (!flag_pic)
8736 output_asm_insn ("mov%z0\t{%2, %0|%0, %2}", xops);
8737 else
8739 output_asm_insn ("call\t%a2", xops);
8740 #ifdef DWARF2_UNWIND_INFO
8741 /* The call to next label acts as a push. */
8742 if (dwarf2out_do_frame ())
8744 rtx insn;
8745 start_sequence ();
8746 insn = emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
8747 gen_rtx_PLUS (Pmode,
8748 stack_pointer_rtx,
8749 GEN_INT (-4))));
8750 RTX_FRAME_RELATED_P (insn) = 1;
8751 dwarf2out_frame_debug (insn, true);
8752 end_sequence ();
8754 #endif
8757 #if TARGET_MACHO
8758 /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This
8759 is what will be referenced by the Mach-O PIC subsystem. */
8760 if (!label)
8761 ASM_OUTPUT_LABEL (asm_out_file, MACHOPIC_FUNCTION_BASE_NAME);
8762 #endif
8764 targetm.asm_out.internal_label (asm_out_file, "L",
8765 CODE_LABEL_NUMBER (XEXP (xops[2], 0)));
8767 if (flag_pic)
8769 output_asm_insn ("pop%z0\t%0", xops);
8770 #ifdef DWARF2_UNWIND_INFO
8771 /* The pop is a pop and clobbers dest, but doesn't restore it
8772 for unwind info purposes. */
8773 if (dwarf2out_do_frame ())
8775 rtx insn;
8776 start_sequence ();
8777 insn = emit_insn (gen_rtx_SET (VOIDmode, dest, const0_rtx));
8778 dwarf2out_frame_debug (insn, true);
8779 insn = emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
8780 gen_rtx_PLUS (Pmode,
8781 stack_pointer_rtx,
8782 GEN_INT (4))));
8783 RTX_FRAME_RELATED_P (insn) = 1;
8784 dwarf2out_frame_debug (insn, true);
8785 end_sequence ();
8787 #endif
8790 else
8792 char name[32];
8793 get_pc_thunk_name (name, REGNO (dest));
8794 pic_labels_used |= 1 << REGNO (dest);
8796 #ifdef DWARF2_UNWIND_INFO
8797 /* Ensure all queued register saves are flushed before the
8798 call. */
8799 if (dwarf2out_do_frame ())
8800 dwarf2out_flush_queued_reg_saves ();
8801 #endif
8802 xops[2] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
8803 xops[2] = gen_rtx_MEM (QImode, xops[2]);
8804 output_asm_insn ("call\t%X2", xops);
8805 /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This
8806 is what will be referenced by the Mach-O PIC subsystem. */
8807 #if TARGET_MACHO
8808 if (!label)
8809 ASM_OUTPUT_LABEL (asm_out_file, MACHOPIC_FUNCTION_BASE_NAME);
8810 else
8811 targetm.asm_out.internal_label (asm_out_file, "L",
8812 CODE_LABEL_NUMBER (label));
8813 #endif
8816 if (TARGET_MACHO)
8817 return "";
8819 if (!flag_pic || TARGET_DEEP_BRANCH_PREDICTION)
8820 output_asm_insn ("add%z0\t{%1, %0|%0, %1}", xops);
8821 else
8822 output_asm_insn ("add%z0\t{%1+[.-%a2], %0|%0, %1+(.-%a2)}", xops);
8824 return "";
8827 /* Generate an "push" pattern for input ARG. */
8829 static rtx
8830 gen_push (rtx arg)
8832 struct machine_function *m = cfun->machine;
8834 if (m->fs.cfa_reg == stack_pointer_rtx)
8835 m->fs.cfa_offset += UNITS_PER_WORD;
8836 m->fs.sp_offset += UNITS_PER_WORD;
8838 return gen_rtx_SET (VOIDmode,
8839 gen_rtx_MEM (Pmode,
8840 gen_rtx_PRE_DEC (Pmode,
8841 stack_pointer_rtx)),
8842 arg);
8845 /* Generate an "pop" pattern for input ARG. */
8847 static rtx
8848 gen_pop (rtx arg)
8850 return gen_rtx_SET (VOIDmode,
8851 arg,
8852 gen_rtx_MEM (Pmode,
8853 gen_rtx_POST_INC (Pmode,
8854 stack_pointer_rtx)));
8857 /* Return >= 0 if there is an unused call-clobbered register available
8858 for the entire function. */
8860 static unsigned int
8861 ix86_select_alt_pic_regnum (void)
8863 if (current_function_is_leaf
8864 && !crtl->profile
8865 && !ix86_current_function_calls_tls_descriptor)
8867 int i, drap;
8868 /* Can't use the same register for both PIC and DRAP. */
8869 if (crtl->drap_reg)
8870 drap = REGNO (crtl->drap_reg);
8871 else
8872 drap = -1;
8873 for (i = 2; i >= 0; --i)
8874 if (i != drap && !df_regs_ever_live_p (i))
8875 return i;
8878 return INVALID_REGNUM;
8881 /* Return 1 if we need to save REGNO. */
8882 static int
8883 ix86_save_reg (unsigned int regno, int maybe_eh_return)
8885 if (pic_offset_table_rtx
8886 && regno == REAL_PIC_OFFSET_TABLE_REGNUM
8887 && (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM)
8888 || crtl->profile
8889 || crtl->calls_eh_return
8890 || crtl->uses_const_pool))
8892 if (ix86_select_alt_pic_regnum () != INVALID_REGNUM)
8893 return 0;
8894 return 1;
8897 if (crtl->calls_eh_return && maybe_eh_return)
8899 unsigned i;
8900 for (i = 0; ; i++)
8902 unsigned test = EH_RETURN_DATA_REGNO (i);
8903 if (test == INVALID_REGNUM)
8904 break;
8905 if (test == regno)
8906 return 1;
8910 if (crtl->drap_reg && regno == REGNO (crtl->drap_reg))
8911 return 1;
8913 return (df_regs_ever_live_p (regno)
8914 && !call_used_regs[regno]
8915 && !fixed_regs[regno]
8916 && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed));
8919 /* Return number of saved general prupose registers. */
8921 static int
8922 ix86_nsaved_regs (void)
8924 int nregs = 0;
8925 int regno;
8927 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
8928 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
8929 nregs ++;
8930 return nregs;
8933 /* Return number of saved SSE registrers. */
8935 static int
8936 ix86_nsaved_sseregs (void)
8938 int nregs = 0;
8939 int regno;
8941 if (ix86_cfun_abi () != MS_ABI)
8942 return 0;
8943 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
8944 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
8945 nregs ++;
8946 return nregs;
8949 /* Given FROM and TO register numbers, say whether this elimination is
8950 allowed. If stack alignment is needed, we can only replace argument
8951 pointer with hard frame pointer, or replace frame pointer with stack
8952 pointer. Otherwise, frame pointer elimination is automatically
8953 handled and all other eliminations are valid. */
8955 static bool
8956 ix86_can_eliminate (const int from, const int to)
8958 if (stack_realign_fp)
8959 return ((from == ARG_POINTER_REGNUM
8960 && to == HARD_FRAME_POINTER_REGNUM)
8961 || (from == FRAME_POINTER_REGNUM
8962 && to == STACK_POINTER_REGNUM));
8963 else
8964 return to == STACK_POINTER_REGNUM ? !frame_pointer_needed : true;
8967 /* Return the offset between two registers, one to be eliminated, and the other
8968 its replacement, at the start of a routine. */
8970 HOST_WIDE_INT
8971 ix86_initial_elimination_offset (int from, int to)
8973 struct ix86_frame frame;
8974 ix86_compute_frame_layout (&frame);
8976 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
8977 return frame.hard_frame_pointer_offset;
8978 else if (from == FRAME_POINTER_REGNUM
8979 && to == HARD_FRAME_POINTER_REGNUM)
8980 return frame.hard_frame_pointer_offset - frame.frame_pointer_offset;
8981 else
8983 gcc_assert (to == STACK_POINTER_REGNUM);
8985 if (from == ARG_POINTER_REGNUM)
8986 return frame.stack_pointer_offset;
8988 gcc_assert (from == FRAME_POINTER_REGNUM);
8989 return frame.stack_pointer_offset - frame.frame_pointer_offset;
8993 /* In a dynamically-aligned function, we can't know the offset from
8994 stack pointer to frame pointer, so we must ensure that setjmp
8995 eliminates fp against the hard fp (%ebp) rather than trying to
8996 index from %esp up to the top of the frame across a gap that is
8997 of unknown (at compile-time) size. */
8998 static rtx
8999 ix86_builtin_setjmp_frame_value (void)
9001 return stack_realign_fp ? hard_frame_pointer_rtx : virtual_stack_vars_rtx;
9004 /* On the x86 -fsplit-stack and -fstack-protector both use the same
9005 field in the TCB, so they can not be used together. */
9007 static bool
9008 ix86_supports_split_stack (bool report ATTRIBUTE_UNUSED,
9009 struct gcc_options *opts ATTRIBUTE_UNUSED)
9011 bool ret = true;
9013 #ifndef TARGET_THREAD_SPLIT_STACK_OFFSET
9014 if (report)
9015 error ("%<-fsplit-stack%> currently only supported on GNU/Linux");
9016 ret = false;
9017 #else
9018 if (!HAVE_GAS_CFI_PERSONALITY_DIRECTIVE)
9020 if (report)
9021 error ("%<-fsplit-stack%> requires "
9022 "assembler support for CFI directives");
9023 ret = false;
9025 #endif
9027 return ret;
9030 /* When using -fsplit-stack, the allocation routines set a field in
9031 the TCB to the bottom of the stack plus this much space, measured
9032 in bytes. */
9034 #define SPLIT_STACK_AVAILABLE 256
9036 /* Fill structure ix86_frame about frame of currently computed function. */
9038 static void
9039 ix86_compute_frame_layout (struct ix86_frame *frame)
9041 unsigned int stack_alignment_needed;
9042 HOST_WIDE_INT offset;
9043 unsigned int preferred_alignment;
9044 HOST_WIDE_INT size = get_frame_size ();
9045 HOST_WIDE_INT to_allocate;
9047 frame->nregs = ix86_nsaved_regs ();
9048 frame->nsseregs = ix86_nsaved_sseregs ();
9050 stack_alignment_needed = crtl->stack_alignment_needed / BITS_PER_UNIT;
9051 preferred_alignment = crtl->preferred_stack_boundary / BITS_PER_UNIT;
9053 /* MS ABI seem to require stack alignment to be always 16 except for function
9054 prologues and leaf. */
9055 if ((ix86_cfun_abi () == MS_ABI && preferred_alignment < 16)
9056 && (!current_function_is_leaf || cfun->calls_alloca != 0
9057 || ix86_current_function_calls_tls_descriptor))
9059 preferred_alignment = 16;
9060 stack_alignment_needed = 16;
9061 crtl->preferred_stack_boundary = 128;
9062 crtl->stack_alignment_needed = 128;
9065 gcc_assert (!size || stack_alignment_needed);
9066 gcc_assert (preferred_alignment >= STACK_BOUNDARY / BITS_PER_UNIT);
9067 gcc_assert (preferred_alignment <= stack_alignment_needed);
9069 /* For SEH we have to limit the amount of code movement into the prologue.
9070 At present we do this via a BLOCKAGE, at which point there's very little
9071 scheduling that can be done, which means that there's very little point
9072 in doing anything except PUSHs. */
9073 if (TARGET_SEH)
9074 cfun->machine->use_fast_prologue_epilogue = false;
9076 /* During reload iteration the amount of registers saved can change.
9077 Recompute the value as needed. Do not recompute when amount of registers
9078 didn't change as reload does multiple calls to the function and does not
9079 expect the decision to change within single iteration. */
9080 else if (!optimize_function_for_size_p (cfun)
9081 && cfun->machine->use_fast_prologue_epilogue_nregs != frame->nregs)
9083 int count = frame->nregs;
9084 struct cgraph_node *node = cgraph_node (current_function_decl);
9086 cfun->machine->use_fast_prologue_epilogue_nregs = count;
9088 /* The fast prologue uses move instead of push to save registers. This
9089 is significantly longer, but also executes faster as modern hardware
9090 can execute the moves in parallel, but can't do that for push/pop.
9092 Be careful about choosing what prologue to emit: When function takes
9093 many instructions to execute we may use slow version as well as in
9094 case function is known to be outside hot spot (this is known with
9095 feedback only). Weight the size of function by number of registers
9096 to save as it is cheap to use one or two push instructions but very
9097 slow to use many of them. */
9098 if (count)
9099 count = (count - 1) * FAST_PROLOGUE_INSN_COUNT;
9100 if (node->frequency < NODE_FREQUENCY_NORMAL
9101 || (flag_branch_probabilities
9102 && node->frequency < NODE_FREQUENCY_HOT))
9103 cfun->machine->use_fast_prologue_epilogue = false;
9104 else
9105 cfun->machine->use_fast_prologue_epilogue
9106 = !expensive_function_p (count);
9108 if (TARGET_PROLOGUE_USING_MOVE
9109 && cfun->machine->use_fast_prologue_epilogue)
9110 frame->save_regs_using_mov = true;
9111 else
9112 frame->save_regs_using_mov = false;
9114 /* If static stack checking is enabled and done with probes, the registers
9115 need to be saved before allocating the frame. */
9116 if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
9117 frame->save_regs_using_mov = false;
9119 /* Skip return address. */
9120 offset = UNITS_PER_WORD;
9122 /* Skip pushed static chain. */
9123 if (ix86_static_chain_on_stack)
9124 offset += UNITS_PER_WORD;
9126 /* Skip saved base pointer. */
9127 if (frame_pointer_needed)
9128 offset += UNITS_PER_WORD;
9129 frame->hfp_save_offset = offset;
9131 /* The traditional frame pointer location is at the top of the frame. */
9132 frame->hard_frame_pointer_offset = offset;
9134 /* Register save area */
9135 offset += frame->nregs * UNITS_PER_WORD;
9136 frame->reg_save_offset = offset;
9138 /* Align and set SSE register save area. */
9139 if (frame->nsseregs)
9141 /* The only ABI that has saved SSE registers (Win64) also has a
9142 16-byte aligned default stack, and thus we don't need to be
9143 within the re-aligned local stack frame to save them. */
9144 gcc_assert (INCOMING_STACK_BOUNDARY >= 128);
9145 offset = (offset + 16 - 1) & -16;
9146 offset += frame->nsseregs * 16;
9148 frame->sse_reg_save_offset = offset;
9150 /* The re-aligned stack starts here. Values before this point are not
9151 directly comparable with values below this point. In order to make
9152 sure that no value happens to be the same before and after, force
9153 the alignment computation below to add a non-zero value. */
9154 if (stack_realign_fp)
9155 offset = (offset + stack_alignment_needed) & -stack_alignment_needed;
9157 /* Va-arg area */
9158 frame->va_arg_size = ix86_varargs_gpr_size + ix86_varargs_fpr_size;
9159 offset += frame->va_arg_size;
9161 /* Align start of frame for local function. */
9162 offset = (offset + stack_alignment_needed - 1) & -stack_alignment_needed;
9164 /* Frame pointer points here. */
9165 frame->frame_pointer_offset = offset;
9167 offset += size;
9169 /* Add outgoing arguments area. Can be skipped if we eliminated
9170 all the function calls as dead code.
9171 Skipping is however impossible when function calls alloca. Alloca
9172 expander assumes that last crtl->outgoing_args_size
9173 of stack frame are unused. */
9174 if (ACCUMULATE_OUTGOING_ARGS
9175 && (!current_function_is_leaf || cfun->calls_alloca
9176 || ix86_current_function_calls_tls_descriptor))
9178 offset += crtl->outgoing_args_size;
9179 frame->outgoing_arguments_size = crtl->outgoing_args_size;
9181 else
9182 frame->outgoing_arguments_size = 0;
9184 /* Align stack boundary. Only needed if we're calling another function
9185 or using alloca. */
9186 if (!current_function_is_leaf || cfun->calls_alloca
9187 || ix86_current_function_calls_tls_descriptor)
9188 offset = (offset + preferred_alignment - 1) & -preferred_alignment;
9190 /* We've reached end of stack frame. */
9191 frame->stack_pointer_offset = offset;
9193 /* Size prologue needs to allocate. */
9194 to_allocate = offset - frame->sse_reg_save_offset;
9196 if ((!to_allocate && frame->nregs <= 1)
9197 || (TARGET_64BIT && to_allocate >= (HOST_WIDE_INT) 0x80000000))
9198 frame->save_regs_using_mov = false;
9200 if (ix86_using_red_zone ()
9201 && current_function_sp_is_unchanging
9202 && current_function_is_leaf
9203 && !ix86_current_function_calls_tls_descriptor)
9205 frame->red_zone_size = to_allocate;
9206 if (frame->save_regs_using_mov)
9207 frame->red_zone_size += frame->nregs * UNITS_PER_WORD;
9208 if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE)
9209 frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE;
9211 else
9212 frame->red_zone_size = 0;
9213 frame->stack_pointer_offset -= frame->red_zone_size;
9215 /* The SEH frame pointer location is near the bottom of the frame.
9216 This is enforced by the fact that the difference between the
9217 stack pointer and the frame pointer is limited to 240 bytes in
9218 the unwind data structure. */
9219 if (TARGET_SEH)
9221 HOST_WIDE_INT diff;
9223 /* If we can leave the frame pointer where it is, do so. */
9224 diff = frame->stack_pointer_offset - frame->hard_frame_pointer_offset;
9225 if (diff > 240 || (diff & 15) != 0)
9227 /* Ideally we'd determine what portion of the local stack frame
9228 (within the constraint of the lowest 240) is most heavily used.
9229 But without that complication, simply bias the frame pointer
9230 by 128 bytes so as to maximize the amount of the local stack
9231 frame that is addressable with 8-bit offsets. */
9232 frame->hard_frame_pointer_offset = frame->stack_pointer_offset - 128;
9237 /* This is semi-inlined memory_address_length, but simplified
9238 since we know that we're always dealing with reg+offset, and
9239 to avoid having to create and discard all that rtl. */
9241 static inline int
9242 choose_baseaddr_len (unsigned int regno, HOST_WIDE_INT offset)
9244 int len = 4;
9246 if (offset == 0)
9248 /* EBP and R13 cannot be encoded without an offset. */
9249 len = (regno == BP_REG || regno == R13_REG);
9251 else if (IN_RANGE (offset, -128, 127))
9252 len = 1;
9254 /* ESP and R12 must be encoded with a SIB byte. */
9255 if (regno == SP_REG || regno == R12_REG)
9256 len++;
9258 return len;
9261 /* Return an RTX that points to CFA_OFFSET within the stack frame.
9262 The valid base registers are taken from CFUN->MACHINE->FS. */
9264 static rtx
9265 choose_baseaddr (HOST_WIDE_INT cfa_offset)
9267 const struct machine_function *m = cfun->machine;
9268 rtx base_reg = NULL;
9269 HOST_WIDE_INT base_offset = 0;
9271 if (m->use_fast_prologue_epilogue)
9273 /* Choose the base register most likely to allow the most scheduling
9274 opportunities. Generally FP is valid througout the function,
9275 while DRAP must be reloaded within the epilogue. But choose either
9276 over the SP due to increased encoding size. */
9278 if (m->fs.fp_valid)
9280 base_reg = hard_frame_pointer_rtx;
9281 base_offset = m->fs.fp_offset - cfa_offset;
9283 else if (m->fs.drap_valid)
9285 base_reg = crtl->drap_reg;
9286 base_offset = 0 - cfa_offset;
9288 else if (m->fs.sp_valid)
9290 base_reg = stack_pointer_rtx;
9291 base_offset = m->fs.sp_offset - cfa_offset;
9294 else
9296 HOST_WIDE_INT toffset;
9297 int len = 16, tlen;
9299 /* Choose the base register with the smallest address encoding.
9300 With a tie, choose FP > DRAP > SP. */
9301 if (m->fs.sp_valid)
9303 base_reg = stack_pointer_rtx;
9304 base_offset = m->fs.sp_offset - cfa_offset;
9305 len = choose_baseaddr_len (STACK_POINTER_REGNUM, base_offset);
9307 if (m->fs.drap_valid)
9309 toffset = 0 - cfa_offset;
9310 tlen = choose_baseaddr_len (REGNO (crtl->drap_reg), toffset);
9311 if (tlen <= len)
9313 base_reg = crtl->drap_reg;
9314 base_offset = toffset;
9315 len = tlen;
9318 if (m->fs.fp_valid)
9320 toffset = m->fs.fp_offset - cfa_offset;
9321 tlen = choose_baseaddr_len (HARD_FRAME_POINTER_REGNUM, toffset);
9322 if (tlen <= len)
9324 base_reg = hard_frame_pointer_rtx;
9325 base_offset = toffset;
9326 len = tlen;
9330 gcc_assert (base_reg != NULL);
9332 return plus_constant (base_reg, base_offset);
9335 /* Emit code to save registers in the prologue. */
9337 static void
9338 ix86_emit_save_regs (void)
9340 unsigned int regno;
9341 rtx insn;
9343 for (regno = FIRST_PSEUDO_REGISTER - 1; regno-- > 0; )
9344 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
9346 insn = emit_insn (gen_push (gen_rtx_REG (Pmode, regno)));
9347 RTX_FRAME_RELATED_P (insn) = 1;
9351 /* Emit a single register save at CFA - CFA_OFFSET. */
9353 static void
9354 ix86_emit_save_reg_using_mov (enum machine_mode mode, unsigned int regno,
9355 HOST_WIDE_INT cfa_offset)
9357 struct machine_function *m = cfun->machine;
9358 rtx reg = gen_rtx_REG (mode, regno);
9359 rtx mem, addr, base, insn;
9361 addr = choose_baseaddr (cfa_offset);
9362 mem = gen_frame_mem (mode, addr);
9364 /* For SSE saves, we need to indicate the 128-bit alignment. */
9365 set_mem_align (mem, GET_MODE_ALIGNMENT (mode));
9367 insn = emit_move_insn (mem, reg);
9368 RTX_FRAME_RELATED_P (insn) = 1;
9370 base = addr;
9371 if (GET_CODE (base) == PLUS)
9372 base = XEXP (base, 0);
9373 gcc_checking_assert (REG_P (base));
9375 /* When saving registers into a re-aligned local stack frame, avoid
9376 any tricky guessing by dwarf2out. */
9377 if (m->fs.realigned)
9379 gcc_checking_assert (stack_realign_drap);
9381 if (regno == REGNO (crtl->drap_reg))
9383 /* A bit of a hack. We force the DRAP register to be saved in
9384 the re-aligned stack frame, which provides us with a copy
9385 of the CFA that will last past the prologue. Install it. */
9386 gcc_checking_assert (cfun->machine->fs.fp_valid);
9387 addr = plus_constant (hard_frame_pointer_rtx,
9388 cfun->machine->fs.fp_offset - cfa_offset);
9389 mem = gen_rtx_MEM (mode, addr);
9390 add_reg_note (insn, REG_CFA_DEF_CFA, mem);
9392 else
9394 /* The frame pointer is a stable reference within the
9395 aligned frame. Use it. */
9396 gcc_checking_assert (cfun->machine->fs.fp_valid);
9397 addr = plus_constant (hard_frame_pointer_rtx,
9398 cfun->machine->fs.fp_offset - cfa_offset);
9399 mem = gen_rtx_MEM (mode, addr);
9400 add_reg_note (insn, REG_CFA_EXPRESSION,
9401 gen_rtx_SET (VOIDmode, mem, reg));
9405 /* The memory may not be relative to the current CFA register,
9406 which means that we may need to generate a new pattern for
9407 use by the unwind info. */
9408 else if (base != m->fs.cfa_reg)
9410 addr = plus_constant (m->fs.cfa_reg, m->fs.cfa_offset - cfa_offset);
9411 mem = gen_rtx_MEM (mode, addr);
9412 add_reg_note (insn, REG_CFA_OFFSET, gen_rtx_SET (VOIDmode, mem, reg));
9416 /* Emit code to save registers using MOV insns.
9417 First register is stored at CFA - CFA_OFFSET. */
9418 static void
9419 ix86_emit_save_regs_using_mov (HOST_WIDE_INT cfa_offset)
9421 unsigned int regno;
9423 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
9424 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
9426 ix86_emit_save_reg_using_mov (Pmode, regno, cfa_offset);
9427 cfa_offset -= UNITS_PER_WORD;
9431 /* Emit code to save SSE registers using MOV insns.
9432 First register is stored at CFA - CFA_OFFSET. */
9433 static void
9434 ix86_emit_save_sse_regs_using_mov (HOST_WIDE_INT cfa_offset)
9436 unsigned int regno;
9438 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
9439 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
9441 ix86_emit_save_reg_using_mov (V4SFmode, regno, cfa_offset);
9442 cfa_offset -= 16;
9446 static GTY(()) rtx queued_cfa_restores;
9448 /* Add a REG_CFA_RESTORE REG note to INSN or queue them until next stack
9449 manipulation insn. The value is on the stack at CFA - CFA_OFFSET.
9450 Don't add the note if the previously saved value will be left untouched
9451 within stack red-zone till return, as unwinders can find the same value
9452 in the register and on the stack. */
9454 static void
9455 ix86_add_cfa_restore_note (rtx insn, rtx reg, HOST_WIDE_INT cfa_offset)
9457 if (cfa_offset <= cfun->machine->fs.red_zone_offset)
9458 return;
9460 if (insn)
9462 add_reg_note (insn, REG_CFA_RESTORE, reg);
9463 RTX_FRAME_RELATED_P (insn) = 1;
9465 else
9466 queued_cfa_restores
9467 = alloc_reg_note (REG_CFA_RESTORE, reg, queued_cfa_restores);
9470 /* Add queued REG_CFA_RESTORE notes if any to INSN. */
9472 static void
9473 ix86_add_queued_cfa_restore_notes (rtx insn)
9475 rtx last;
9476 if (!queued_cfa_restores)
9477 return;
9478 for (last = queued_cfa_restores; XEXP (last, 1); last = XEXP (last, 1))
9480 XEXP (last, 1) = REG_NOTES (insn);
9481 REG_NOTES (insn) = queued_cfa_restores;
9482 queued_cfa_restores = NULL_RTX;
9483 RTX_FRAME_RELATED_P (insn) = 1;
9486 /* Expand prologue or epilogue stack adjustment.
9487 The pattern exist to put a dependency on all ebp-based memory accesses.
9488 STYLE should be negative if instructions should be marked as frame related,
9489 zero if %r11 register is live and cannot be freely used and positive
9490 otherwise. */
9492 static void
9493 pro_epilogue_adjust_stack (rtx dest, rtx src, rtx offset,
9494 int style, bool set_cfa)
9496 struct machine_function *m = cfun->machine;
9497 rtx insn;
9498 bool add_frame_related_expr = false;
9500 if (! TARGET_64BIT)
9501 insn = gen_pro_epilogue_adjust_stack_si_add (dest, src, offset);
9502 else if (x86_64_immediate_operand (offset, DImode))
9503 insn = gen_pro_epilogue_adjust_stack_di_add (dest, src, offset);
9504 else
9506 rtx tmp;
9507 /* r11 is used by indirect sibcall return as well, set before the
9508 epilogue and used after the epilogue. */
9509 if (style)
9510 tmp = gen_rtx_REG (DImode, R11_REG);
9511 else
9513 gcc_assert (src != hard_frame_pointer_rtx
9514 && dest != hard_frame_pointer_rtx);
9515 tmp = hard_frame_pointer_rtx;
9517 insn = emit_insn (gen_rtx_SET (DImode, tmp, offset));
9518 if (style < 0)
9519 add_frame_related_expr = true;
9521 insn = gen_pro_epilogue_adjust_stack_di_add (dest, src, tmp);
9524 insn = emit_insn (insn);
9525 if (style >= 0)
9526 ix86_add_queued_cfa_restore_notes (insn);
9528 if (set_cfa)
9530 rtx r;
9532 gcc_assert (m->fs.cfa_reg == src);
9533 m->fs.cfa_offset += INTVAL (offset);
9534 m->fs.cfa_reg = dest;
9536 r = gen_rtx_PLUS (Pmode, src, offset);
9537 r = gen_rtx_SET (VOIDmode, dest, r);
9538 add_reg_note (insn, REG_CFA_ADJUST_CFA, r);
9539 RTX_FRAME_RELATED_P (insn) = 1;
9541 else if (style < 0)
9543 RTX_FRAME_RELATED_P (insn) = 1;
9544 if (add_frame_related_expr)
9546 rtx r = gen_rtx_PLUS (Pmode, src, offset);
9547 r = gen_rtx_SET (VOIDmode, dest, r);
9548 add_reg_note (insn, REG_FRAME_RELATED_EXPR, r);
9552 if (dest == stack_pointer_rtx)
9554 HOST_WIDE_INT ooffset = m->fs.sp_offset;
9555 bool valid = m->fs.sp_valid;
9557 if (src == hard_frame_pointer_rtx)
9559 valid = m->fs.fp_valid;
9560 ooffset = m->fs.fp_offset;
9562 else if (src == crtl->drap_reg)
9564 valid = m->fs.drap_valid;
9565 ooffset = 0;
9567 else
9569 /* Else there are two possibilities: SP itself, which we set
9570 up as the default above. Or EH_RETURN_STACKADJ_RTX, which is
9571 taken care of this by hand along the eh_return path. */
9572 gcc_checking_assert (src == stack_pointer_rtx
9573 || offset == const0_rtx);
9576 m->fs.sp_offset = ooffset - INTVAL (offset);
9577 m->fs.sp_valid = valid;
9581 /* Find an available register to be used as dynamic realign argument
9582 pointer regsiter. Such a register will be written in prologue and
9583 used in begin of body, so it must not be
9584 1. parameter passing register.
9585 2. GOT pointer.
9586 We reuse static-chain register if it is available. Otherwise, we
9587 use DI for i386 and R13 for x86-64. We chose R13 since it has
9588 shorter encoding.
9590 Return: the regno of chosen register. */
9592 static unsigned int
9593 find_drap_reg (void)
9595 tree decl = cfun->decl;
9597 if (TARGET_64BIT)
9599 /* Use R13 for nested function or function need static chain.
9600 Since function with tail call may use any caller-saved
9601 registers in epilogue, DRAP must not use caller-saved
9602 register in such case. */
9603 if (DECL_STATIC_CHAIN (decl) || crtl->tail_call_emit)
9604 return R13_REG;
9606 return R10_REG;
9608 else
9610 /* Use DI for nested function or function need static chain.
9611 Since function with tail call may use any caller-saved
9612 registers in epilogue, DRAP must not use caller-saved
9613 register in such case. */
9614 if (DECL_STATIC_CHAIN (decl) || crtl->tail_call_emit)
9615 return DI_REG;
9617 /* Reuse static chain register if it isn't used for parameter
9618 passing. */
9619 if (ix86_function_regparm (TREE_TYPE (decl), decl) <= 2
9620 && !lookup_attribute ("fastcall",
9621 TYPE_ATTRIBUTES (TREE_TYPE (decl)))
9622 && !lookup_attribute ("thiscall",
9623 TYPE_ATTRIBUTES (TREE_TYPE (decl))))
9624 return CX_REG;
9625 else
9626 return DI_REG;
9630 /* Return minimum incoming stack alignment. */
9632 static unsigned int
9633 ix86_minimum_incoming_stack_boundary (bool sibcall)
9635 unsigned int incoming_stack_boundary;
9637 /* Prefer the one specified at command line. */
9638 if (ix86_user_incoming_stack_boundary)
9639 incoming_stack_boundary = ix86_user_incoming_stack_boundary;
9640 /* In 32bit, use MIN_STACK_BOUNDARY for incoming stack boundary
9641 if -mstackrealign is used, it isn't used for sibcall check and
9642 estimated stack alignment is 128bit. */
9643 else if (!sibcall
9644 && !TARGET_64BIT
9645 && ix86_force_align_arg_pointer
9646 && crtl->stack_alignment_estimated == 128)
9647 incoming_stack_boundary = MIN_STACK_BOUNDARY;
9648 else
9649 incoming_stack_boundary = ix86_default_incoming_stack_boundary;
9651 /* Incoming stack alignment can be changed on individual functions
9652 via force_align_arg_pointer attribute. We use the smallest
9653 incoming stack boundary. */
9654 if (incoming_stack_boundary > MIN_STACK_BOUNDARY
9655 && lookup_attribute (ix86_force_align_arg_pointer_string,
9656 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))))
9657 incoming_stack_boundary = MIN_STACK_BOUNDARY;
9659 /* The incoming stack frame has to be aligned at least at
9660 parm_stack_boundary. */
9661 if (incoming_stack_boundary < crtl->parm_stack_boundary)
9662 incoming_stack_boundary = crtl->parm_stack_boundary;
9664 /* Stack at entrance of main is aligned by runtime. We use the
9665 smallest incoming stack boundary. */
9666 if (incoming_stack_boundary > MAIN_STACK_BOUNDARY
9667 && DECL_NAME (current_function_decl)
9668 && MAIN_NAME_P (DECL_NAME (current_function_decl))
9669 && DECL_FILE_SCOPE_P (current_function_decl))
9670 incoming_stack_boundary = MAIN_STACK_BOUNDARY;
9672 return incoming_stack_boundary;
9675 /* Update incoming stack boundary and estimated stack alignment. */
9677 static void
9678 ix86_update_stack_boundary (void)
9680 ix86_incoming_stack_boundary
9681 = ix86_minimum_incoming_stack_boundary (false);
9683 /* x86_64 vararg needs 16byte stack alignment for register save
9684 area. */
9685 if (TARGET_64BIT
9686 && cfun->stdarg
9687 && crtl->stack_alignment_estimated < 128)
9688 crtl->stack_alignment_estimated = 128;
9691 /* Handle the TARGET_GET_DRAP_RTX hook. Return NULL if no DRAP is
9692 needed or an rtx for DRAP otherwise. */
9694 static rtx
9695 ix86_get_drap_rtx (void)
9697 if (ix86_force_drap || !ACCUMULATE_OUTGOING_ARGS)
9698 crtl->need_drap = true;
9700 if (stack_realign_drap)
9702 /* Assign DRAP to vDRAP and returns vDRAP */
9703 unsigned int regno = find_drap_reg ();
9704 rtx drap_vreg;
9705 rtx arg_ptr;
9706 rtx seq, insn;
9708 arg_ptr = gen_rtx_REG (Pmode, regno);
9709 crtl->drap_reg = arg_ptr;
9711 start_sequence ();
9712 drap_vreg = copy_to_reg (arg_ptr);
9713 seq = get_insns ();
9714 end_sequence ();
9716 insn = emit_insn_before (seq, NEXT_INSN (entry_of_function ()));
9717 if (!optimize)
9719 add_reg_note (insn, REG_CFA_SET_VDRAP, drap_vreg);
9720 RTX_FRAME_RELATED_P (insn) = 1;
9722 return drap_vreg;
9724 else
9725 return NULL;
9728 /* Handle the TARGET_INTERNAL_ARG_POINTER hook. */
9730 static rtx
9731 ix86_internal_arg_pointer (void)
9733 return virtual_incoming_args_rtx;
9736 struct scratch_reg {
9737 rtx reg;
9738 bool saved;
9741 /* Return a short-lived scratch register for use on function entry.
9742 In 32-bit mode, it is valid only after the registers are saved
9743 in the prologue. This register must be released by means of
9744 release_scratch_register_on_entry once it is dead. */
9746 static void
9747 get_scratch_register_on_entry (struct scratch_reg *sr)
9749 int regno;
9751 sr->saved = false;
9753 if (TARGET_64BIT)
9755 /* We always use R11 in 64-bit mode. */
9756 regno = R11_REG;
9758 else
9760 tree decl = current_function_decl, fntype = TREE_TYPE (decl);
9761 bool fastcall_p
9762 = lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)) != NULL_TREE;
9763 bool static_chain_p = DECL_STATIC_CHAIN (decl);
9764 int regparm = ix86_function_regparm (fntype, decl);
9765 int drap_regno
9766 = crtl->drap_reg ? REGNO (crtl->drap_reg) : INVALID_REGNUM;
9768 /* 'fastcall' sets regparm to 2, uses ecx/edx for arguments and eax
9769 for the static chain register. */
9770 if ((regparm < 1 || (fastcall_p && !static_chain_p))
9771 && drap_regno != AX_REG)
9772 regno = AX_REG;
9773 else if (regparm < 2 && drap_regno != DX_REG)
9774 regno = DX_REG;
9775 /* ecx is the static chain register. */
9776 else if (regparm < 3 && !fastcall_p && !static_chain_p
9777 && drap_regno != CX_REG)
9778 regno = CX_REG;
9779 else if (ix86_save_reg (BX_REG, true))
9780 regno = BX_REG;
9781 /* esi is the static chain register. */
9782 else if (!(regparm == 3 && static_chain_p)
9783 && ix86_save_reg (SI_REG, true))
9784 regno = SI_REG;
9785 else if (ix86_save_reg (DI_REG, true))
9786 regno = DI_REG;
9787 else
9789 regno = (drap_regno == AX_REG ? DX_REG : AX_REG);
9790 sr->saved = true;
9794 sr->reg = gen_rtx_REG (Pmode, regno);
9795 if (sr->saved)
9797 rtx insn = emit_insn (gen_push (sr->reg));
9798 RTX_FRAME_RELATED_P (insn) = 1;
9802 /* Release a scratch register obtained from the preceding function. */
9804 static void
9805 release_scratch_register_on_entry (struct scratch_reg *sr)
9807 if (sr->saved)
9809 rtx x, insn = emit_insn (gen_pop (sr->reg));
9811 /* The RTX_FRAME_RELATED_P mechanism doesn't know about pop. */
9812 RTX_FRAME_RELATED_P (insn) = 1;
9813 x = gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (UNITS_PER_WORD));
9814 x = gen_rtx_SET (VOIDmode, stack_pointer_rtx, x);
9815 add_reg_note (insn, REG_FRAME_RELATED_EXPR, x);
9819 #define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP)
9821 /* Emit code to adjust the stack pointer by SIZE bytes while probing it. */
9823 static void
9824 ix86_adjust_stack_and_probe (const HOST_WIDE_INT size)
9826 /* We skip the probe for the first interval + a small dope of 4 words and
9827 probe that many bytes past the specified size to maintain a protection
9828 area at the botton of the stack. */
9829 const int dope = 4 * UNITS_PER_WORD;
9830 rtx size_rtx = GEN_INT (size);
9832 /* See if we have a constant small number of probes to generate. If so,
9833 that's the easy case. The run-time loop is made up of 11 insns in the
9834 generic case while the compile-time loop is made up of 3+2*(n-1) insns
9835 for n # of intervals. */
9836 if (size <= 5 * PROBE_INTERVAL)
9838 HOST_WIDE_INT i, adjust;
9839 bool first_probe = true;
9841 /* Adjust SP and probe at PROBE_INTERVAL + N * PROBE_INTERVAL for
9842 values of N from 1 until it exceeds SIZE. If only one probe is
9843 needed, this will not generate any code. Then adjust and probe
9844 to PROBE_INTERVAL + SIZE. */
9845 for (i = PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
9847 if (first_probe)
9849 adjust = 2 * PROBE_INTERVAL + dope;
9850 first_probe = false;
9852 else
9853 adjust = PROBE_INTERVAL;
9855 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
9856 plus_constant (stack_pointer_rtx, -adjust)));
9857 emit_stack_probe (stack_pointer_rtx);
9860 if (first_probe)
9861 adjust = size + PROBE_INTERVAL + dope;
9862 else
9863 adjust = size + PROBE_INTERVAL - i;
9865 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
9866 plus_constant (stack_pointer_rtx, -adjust)));
9867 emit_stack_probe (stack_pointer_rtx);
9869 /* Adjust back to account for the additional first interval. */
9870 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
9871 plus_constant (stack_pointer_rtx,
9872 PROBE_INTERVAL + dope)));
9875 /* Otherwise, do the same as above, but in a loop. Note that we must be
9876 extra careful with variables wrapping around because we might be at
9877 the very top (or the very bottom) of the address space and we have
9878 to be able to handle this case properly; in particular, we use an
9879 equality test for the loop condition. */
9880 else
9882 HOST_WIDE_INT rounded_size;
9883 struct scratch_reg sr;
9885 get_scratch_register_on_entry (&sr);
9888 /* Step 1: round SIZE to the previous multiple of the interval. */
9890 rounded_size = size & -PROBE_INTERVAL;
9893 /* Step 2: compute initial and final value of the loop counter. */
9895 /* SP = SP_0 + PROBE_INTERVAL. */
9896 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
9897 plus_constant (stack_pointer_rtx,
9898 - (PROBE_INTERVAL + dope))));
9900 /* LAST_ADDR = SP_0 + PROBE_INTERVAL + ROUNDED_SIZE. */
9901 emit_move_insn (sr.reg, GEN_INT (-rounded_size));
9902 emit_insn (gen_rtx_SET (VOIDmode, sr.reg,
9903 gen_rtx_PLUS (Pmode, sr.reg,
9904 stack_pointer_rtx)));
9907 /* Step 3: the loop
9909 while (SP != LAST_ADDR)
9911 SP = SP + PROBE_INTERVAL
9912 probe at SP
9915 adjusts SP and probes to PROBE_INTERVAL + N * PROBE_INTERVAL for
9916 values of N from 1 until it is equal to ROUNDED_SIZE. */
9918 emit_insn (ix86_gen_adjust_stack_and_probe (sr.reg, sr.reg, size_rtx));
9921 /* Step 4: adjust SP and probe at PROBE_INTERVAL + SIZE if we cannot
9922 assert at compile-time that SIZE is equal to ROUNDED_SIZE. */
9924 if (size != rounded_size)
9926 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
9927 plus_constant (stack_pointer_rtx,
9928 rounded_size - size)));
9929 emit_stack_probe (stack_pointer_rtx);
9932 /* Adjust back to account for the additional first interval. */
9933 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
9934 plus_constant (stack_pointer_rtx,
9935 PROBE_INTERVAL + dope)));
9937 release_scratch_register_on_entry (&sr);
9940 gcc_assert (cfun->machine->fs.cfa_reg != stack_pointer_rtx);
9941 cfun->machine->fs.sp_offset += size;
9943 /* Make sure nothing is scheduled before we are done. */
9944 emit_insn (gen_blockage ());
9947 /* Adjust the stack pointer up to REG while probing it. */
9949 const char *
9950 output_adjust_stack_and_probe (rtx reg)
9952 static int labelno = 0;
9953 char loop_lab[32], end_lab[32];
9954 rtx xops[2];
9956 ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno);
9957 ASM_GENERATE_INTERNAL_LABEL (end_lab, "LPSRE", labelno++);
9959 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
9961 /* Jump to END_LAB if SP == LAST_ADDR. */
9962 xops[0] = stack_pointer_rtx;
9963 xops[1] = reg;
9964 output_asm_insn ("cmp%z0\t{%1, %0|%0, %1}", xops);
9965 fputs ("\tje\t", asm_out_file);
9966 assemble_name_raw (asm_out_file, end_lab);
9967 fputc ('\n', asm_out_file);
9969 /* SP = SP + PROBE_INTERVAL. */
9970 xops[1] = GEN_INT (PROBE_INTERVAL);
9971 output_asm_insn ("sub%z0\t{%1, %0|%0, %1}", xops);
9973 /* Probe at SP. */
9974 xops[1] = const0_rtx;
9975 output_asm_insn ("or%z0\t{%1, (%0)|DWORD PTR [%0], %1}", xops);
9977 fprintf (asm_out_file, "\tjmp\t");
9978 assemble_name_raw (asm_out_file, loop_lab);
9979 fputc ('\n', asm_out_file);
9981 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, end_lab);
9983 return "";
9986 /* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE,
9987 inclusive. These are offsets from the current stack pointer. */
9989 static void
9990 ix86_emit_probe_stack_range (HOST_WIDE_INT first, HOST_WIDE_INT size)
9992 /* See if we have a constant small number of probes to generate. If so,
9993 that's the easy case. The run-time loop is made up of 7 insns in the
9994 generic case while the compile-time loop is made up of n insns for n #
9995 of intervals. */
9996 if (size <= 7 * PROBE_INTERVAL)
9998 HOST_WIDE_INT i;
10000 /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 1 until
10001 it exceeds SIZE. If only one probe is needed, this will not
10002 generate any code. Then probe at FIRST + SIZE. */
10003 for (i = PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
10004 emit_stack_probe (plus_constant (stack_pointer_rtx, -(first + i)));
10006 emit_stack_probe (plus_constant (stack_pointer_rtx, -(first + size)));
10009 /* Otherwise, do the same as above, but in a loop. Note that we must be
10010 extra careful with variables wrapping around because we might be at
10011 the very top (or the very bottom) of the address space and we have
10012 to be able to handle this case properly; in particular, we use an
10013 equality test for the loop condition. */
10014 else
10016 HOST_WIDE_INT rounded_size, last;
10017 struct scratch_reg sr;
10019 get_scratch_register_on_entry (&sr);
10022 /* Step 1: round SIZE to the previous multiple of the interval. */
10024 rounded_size = size & -PROBE_INTERVAL;
10027 /* Step 2: compute initial and final value of the loop counter. */
10029 /* TEST_OFFSET = FIRST. */
10030 emit_move_insn (sr.reg, GEN_INT (-first));
10032 /* LAST_OFFSET = FIRST + ROUNDED_SIZE. */
10033 last = first + rounded_size;
10036 /* Step 3: the loop
10038 while (TEST_ADDR != LAST_ADDR)
10040 TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
10041 probe at TEST_ADDR
10044 probes at FIRST + N * PROBE_INTERVAL for values of N from 1
10045 until it is equal to ROUNDED_SIZE. */
10047 emit_insn (ix86_gen_probe_stack_range (sr.reg, sr.reg, GEN_INT (-last)));
10050 /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
10051 that SIZE is equal to ROUNDED_SIZE. */
10053 if (size != rounded_size)
10054 emit_stack_probe (plus_constant (gen_rtx_PLUS (Pmode,
10055 stack_pointer_rtx,
10056 sr.reg),
10057 rounded_size - size));
10059 release_scratch_register_on_entry (&sr);
10062 /* Make sure nothing is scheduled before we are done. */
10063 emit_insn (gen_blockage ());
10066 /* Probe a range of stack addresses from REG to END, inclusive. These are
10067 offsets from the current stack pointer. */
10069 const char *
10070 output_probe_stack_range (rtx reg, rtx end)
10072 static int labelno = 0;
10073 char loop_lab[32], end_lab[32];
10074 rtx xops[3];
10076 ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno);
10077 ASM_GENERATE_INTERNAL_LABEL (end_lab, "LPSRE", labelno++);
10079 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
10081 /* Jump to END_LAB if TEST_ADDR == LAST_ADDR. */
10082 xops[0] = reg;
10083 xops[1] = end;
10084 output_asm_insn ("cmp%z0\t{%1, %0|%0, %1}", xops);
10085 fputs ("\tje\t", asm_out_file);
10086 assemble_name_raw (asm_out_file, end_lab);
10087 fputc ('\n', asm_out_file);
10089 /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL. */
10090 xops[1] = GEN_INT (PROBE_INTERVAL);
10091 output_asm_insn ("sub%z0\t{%1, %0|%0, %1}", xops);
10093 /* Probe at TEST_ADDR. */
10094 xops[0] = stack_pointer_rtx;
10095 xops[1] = reg;
10096 xops[2] = const0_rtx;
10097 output_asm_insn ("or%z0\t{%2, (%0,%1)|DWORD PTR [%0+%1], %2}", xops);
10099 fprintf (asm_out_file, "\tjmp\t");
10100 assemble_name_raw (asm_out_file, loop_lab);
10101 fputc ('\n', asm_out_file);
10103 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, end_lab);
10105 return "";
10108 /* Finalize stack_realign_needed flag, which will guide prologue/epilogue
10109 to be generated in correct form. */
10110 static void
10111 ix86_finalize_stack_realign_flags (void)
10113 /* Check if stack realign is really needed after reload, and
10114 stores result in cfun */
10115 unsigned int incoming_stack_boundary
10116 = (crtl->parm_stack_boundary > ix86_incoming_stack_boundary
10117 ? crtl->parm_stack_boundary : ix86_incoming_stack_boundary);
10118 unsigned int stack_realign = (incoming_stack_boundary
10119 < (current_function_is_leaf
10120 ? crtl->max_used_stack_slot_alignment
10121 : crtl->stack_alignment_needed));
10123 if (crtl->stack_realign_finalized)
10125 /* After stack_realign_needed is finalized, we can't no longer
10126 change it. */
10127 gcc_assert (crtl->stack_realign_needed == stack_realign);
10129 else
10131 crtl->stack_realign_needed = stack_realign;
10132 crtl->stack_realign_finalized = true;
10136 /* Expand the prologue into a bunch of separate insns. */
10138 void
10139 ix86_expand_prologue (void)
10141 struct machine_function *m = cfun->machine;
10142 rtx insn, t;
10143 bool pic_reg_used;
10144 struct ix86_frame frame;
10145 HOST_WIDE_INT allocate;
10146 bool int_registers_saved;
10148 ix86_finalize_stack_realign_flags ();
10150 /* DRAP should not coexist with stack_realign_fp */
10151 gcc_assert (!(crtl->drap_reg && stack_realign_fp));
10153 memset (&m->fs, 0, sizeof (m->fs));
10155 /* Initialize CFA state for before the prologue. */
10156 m->fs.cfa_reg = stack_pointer_rtx;
10157 m->fs.cfa_offset = INCOMING_FRAME_SP_OFFSET;
10159 /* Track SP offset to the CFA. We continue tracking this after we've
10160 swapped the CFA register away from SP. In the case of re-alignment
10161 this is fudged; we're interested to offsets within the local frame. */
10162 m->fs.sp_offset = INCOMING_FRAME_SP_OFFSET;
10163 m->fs.sp_valid = true;
10165 ix86_compute_frame_layout (&frame);
10167 if (!TARGET_64BIT && ix86_function_ms_hook_prologue (current_function_decl))
10169 /* We should have already generated an error for any use of
10170 ms_hook on a nested function. */
10171 gcc_checking_assert (!ix86_static_chain_on_stack);
10173 /* Check if profiling is active and we shall use profiling before
10174 prologue variant. If so sorry. */
10175 if (crtl->profile && flag_fentry != 0)
10176 sorry ("ms_hook_prologue attribute isn%'t compatible "
10177 "with -mfentry for 32-bit");
10179 /* In ix86_asm_output_function_label we emitted:
10180 8b ff movl.s %edi,%edi
10181 55 push %ebp
10182 8b ec movl.s %esp,%ebp
10184 This matches the hookable function prologue in Win32 API
10185 functions in Microsoft Windows XP Service Pack 2 and newer.
10186 Wine uses this to enable Windows apps to hook the Win32 API
10187 functions provided by Wine.
10189 What that means is that we've already set up the frame pointer. */
10191 if (frame_pointer_needed
10192 && !(crtl->drap_reg && crtl->stack_realign_needed))
10194 rtx push, mov;
10196 /* We've decided to use the frame pointer already set up.
10197 Describe this to the unwinder by pretending that both
10198 push and mov insns happen right here.
10200 Putting the unwind info here at the end of the ms_hook
10201 is done so that we can make absolutely certain we get
10202 the required byte sequence at the start of the function,
10203 rather than relying on an assembler that can produce
10204 the exact encoding required.
10206 However it does mean (in the unpatched case) that we have
10207 a 1 insn window where the asynchronous unwind info is
10208 incorrect. However, if we placed the unwind info at
10209 its correct location we would have incorrect unwind info
10210 in the patched case. Which is probably all moot since
10211 I don't expect Wine generates dwarf2 unwind info for the
10212 system libraries that use this feature. */
10214 insn = emit_insn (gen_blockage ());
10216 push = gen_push (hard_frame_pointer_rtx);
10217 mov = gen_rtx_SET (VOIDmode, hard_frame_pointer_rtx,
10218 stack_pointer_rtx);
10219 RTX_FRAME_RELATED_P (push) = 1;
10220 RTX_FRAME_RELATED_P (mov) = 1;
10222 RTX_FRAME_RELATED_P (insn) = 1;
10223 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
10224 gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, push, mov)));
10226 /* Note that gen_push incremented m->fs.cfa_offset, even
10227 though we didn't emit the push insn here. */
10228 m->fs.cfa_reg = hard_frame_pointer_rtx;
10229 m->fs.fp_offset = m->fs.cfa_offset;
10230 m->fs.fp_valid = true;
10232 else
10234 /* The frame pointer is not needed so pop %ebp again.
10235 This leaves us with a pristine state. */
10236 emit_insn (gen_pop (hard_frame_pointer_rtx));
10240 /* The first insn of a function that accepts its static chain on the
10241 stack is to push the register that would be filled in by a direct
10242 call. This insn will be skipped by the trampoline. */
10243 else if (ix86_static_chain_on_stack)
10245 insn = emit_insn (gen_push (ix86_static_chain (cfun->decl, false)));
10246 emit_insn (gen_blockage ());
10248 /* We don't want to interpret this push insn as a register save,
10249 only as a stack adjustment. The real copy of the register as
10250 a save will be done later, if needed. */
10251 t = plus_constant (stack_pointer_rtx, -UNITS_PER_WORD);
10252 t = gen_rtx_SET (VOIDmode, stack_pointer_rtx, t);
10253 add_reg_note (insn, REG_CFA_ADJUST_CFA, t);
10254 RTX_FRAME_RELATED_P (insn) = 1;
10257 /* Emit prologue code to adjust stack alignment and setup DRAP, in case
10258 of DRAP is needed and stack realignment is really needed after reload */
10259 if (stack_realign_drap)
10261 int align_bytes = crtl->stack_alignment_needed / BITS_PER_UNIT;
10263 /* Only need to push parameter pointer reg if it is caller saved. */
10264 if (!call_used_regs[REGNO (crtl->drap_reg)])
10266 /* Push arg pointer reg */
10267 insn = emit_insn (gen_push (crtl->drap_reg));
10268 RTX_FRAME_RELATED_P (insn) = 1;
10271 /* Grab the argument pointer. */
10272 t = plus_constant (stack_pointer_rtx, m->fs.sp_offset);
10273 insn = emit_insn (gen_rtx_SET (VOIDmode, crtl->drap_reg, t));
10274 RTX_FRAME_RELATED_P (insn) = 1;
10275 m->fs.cfa_reg = crtl->drap_reg;
10276 m->fs.cfa_offset = 0;
10278 /* Align the stack. */
10279 insn = emit_insn (ix86_gen_andsp (stack_pointer_rtx,
10280 stack_pointer_rtx,
10281 GEN_INT (-align_bytes)));
10282 RTX_FRAME_RELATED_P (insn) = 1;
10284 /* Replicate the return address on the stack so that return
10285 address can be reached via (argp - 1) slot. This is needed
10286 to implement macro RETURN_ADDR_RTX and intrinsic function
10287 expand_builtin_return_addr etc. */
10288 t = plus_constant (crtl->drap_reg, -UNITS_PER_WORD);
10289 t = gen_frame_mem (Pmode, t);
10290 insn = emit_insn (gen_push (t));
10291 RTX_FRAME_RELATED_P (insn) = 1;
10293 /* For the purposes of frame and register save area addressing,
10294 we've started over with a new frame. */
10295 m->fs.sp_offset = INCOMING_FRAME_SP_OFFSET;
10296 m->fs.realigned = true;
10299 if (frame_pointer_needed && !m->fs.fp_valid)
10301 /* Note: AT&T enter does NOT have reversed args. Enter is probably
10302 slower on all targets. Also sdb doesn't like it. */
10303 insn = emit_insn (gen_push (hard_frame_pointer_rtx));
10304 RTX_FRAME_RELATED_P (insn) = 1;
10306 if (m->fs.sp_offset == frame.hard_frame_pointer_offset)
10308 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
10309 RTX_FRAME_RELATED_P (insn) = 1;
10311 if (m->fs.cfa_reg == stack_pointer_rtx)
10312 m->fs.cfa_reg = hard_frame_pointer_rtx;
10313 m->fs.fp_offset = m->fs.sp_offset;
10314 m->fs.fp_valid = true;
10318 int_registers_saved = (frame.nregs == 0);
10320 if (!int_registers_saved)
10322 /* If saving registers via PUSH, do so now. */
10323 if (!frame.save_regs_using_mov)
10325 ix86_emit_save_regs ();
10326 int_registers_saved = true;
10327 gcc_assert (m->fs.sp_offset == frame.reg_save_offset);
10330 /* When using red zone we may start register saving before allocating
10331 the stack frame saving one cycle of the prologue. However, avoid
10332 doing this if we have to probe the stack; at least on x86_64 the
10333 stack probe can turn into a call that clobbers a red zone location. */
10334 else if (ix86_using_red_zone ()
10335 && (! TARGET_STACK_PROBE
10336 || frame.stack_pointer_offset < CHECK_STACK_LIMIT))
10338 ix86_emit_save_regs_using_mov (frame.reg_save_offset);
10339 int_registers_saved = true;
10343 if (stack_realign_fp)
10345 int align_bytes = crtl->stack_alignment_needed / BITS_PER_UNIT;
10346 gcc_assert (align_bytes > MIN_STACK_BOUNDARY / BITS_PER_UNIT);
10348 /* The computation of the size of the re-aligned stack frame means
10349 that we must allocate the size of the register save area before
10350 performing the actual alignment. Otherwise we cannot guarantee
10351 that there's enough storage above the realignment point. */
10352 if (m->fs.sp_offset != frame.sse_reg_save_offset)
10353 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
10354 GEN_INT (m->fs.sp_offset
10355 - frame.sse_reg_save_offset),
10356 -1, false);
10358 /* Align the stack. */
10359 insn = emit_insn (ix86_gen_andsp (stack_pointer_rtx,
10360 stack_pointer_rtx,
10361 GEN_INT (-align_bytes)));
10363 /* For the purposes of register save area addressing, the stack
10364 pointer is no longer valid. As for the value of sp_offset,
10365 see ix86_compute_frame_layout, which we need to match in order
10366 to pass verification of stack_pointer_offset at the end. */
10367 m->fs.sp_offset = (m->fs.sp_offset + align_bytes) & -align_bytes;
10368 m->fs.sp_valid = false;
10371 allocate = frame.stack_pointer_offset - m->fs.sp_offset;
10373 if (flag_stack_usage)
10375 /* We start to count from ARG_POINTER. */
10376 HOST_WIDE_INT stack_size = frame.stack_pointer_offset;
10378 /* If it was realigned, take into account the fake frame. */
10379 if (stack_realign_drap)
10381 if (ix86_static_chain_on_stack)
10382 stack_size += UNITS_PER_WORD;
10384 if (!call_used_regs[REGNO (crtl->drap_reg)])
10385 stack_size += UNITS_PER_WORD;
10387 /* This over-estimates by 1 minimal-stack-alignment-unit but
10388 mitigates that by counting in the new return address slot. */
10389 current_function_dynamic_stack_size
10390 += crtl->stack_alignment_needed / BITS_PER_UNIT;
10393 current_function_static_stack_size = stack_size;
10396 /* The stack has already been decremented by the instruction calling us
10397 so we need to probe unconditionally to preserve the protection area. */
10398 if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
10400 /* We expect the registers to be saved when probes are used. */
10401 gcc_assert (int_registers_saved);
10403 if (STACK_CHECK_MOVING_SP)
10405 ix86_adjust_stack_and_probe (allocate);
10406 allocate = 0;
10408 else
10410 HOST_WIDE_INT size = allocate;
10412 if (TARGET_64BIT && size >= (HOST_WIDE_INT) 0x80000000)
10413 size = 0x80000000 - STACK_CHECK_PROTECT - 1;
10415 if (TARGET_STACK_PROBE)
10416 ix86_emit_probe_stack_range (0, size + STACK_CHECK_PROTECT);
10417 else
10418 ix86_emit_probe_stack_range (STACK_CHECK_PROTECT, size);
10422 if (allocate == 0)
10424 else if (!ix86_target_stack_probe ()
10425 || frame.stack_pointer_offset < CHECK_STACK_LIMIT)
10427 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
10428 GEN_INT (-allocate), -1,
10429 m->fs.cfa_reg == stack_pointer_rtx);
10431 else
10433 rtx eax = gen_rtx_REG (Pmode, AX_REG);
10434 rtx r10 = NULL;
10435 rtx (*adjust_stack_insn)(rtx, rtx, rtx);
10437 bool eax_live = false;
10438 bool r10_live = false;
10440 if (TARGET_64BIT)
10441 r10_live = (DECL_STATIC_CHAIN (current_function_decl) != 0);
10442 if (!TARGET_64BIT_MS_ABI)
10443 eax_live = ix86_eax_live_at_start_p ();
10445 if (eax_live)
10447 emit_insn (gen_push (eax));
10448 allocate -= UNITS_PER_WORD;
10450 if (r10_live)
10452 r10 = gen_rtx_REG (Pmode, R10_REG);
10453 emit_insn (gen_push (r10));
10454 allocate -= UNITS_PER_WORD;
10457 emit_move_insn (eax, GEN_INT (allocate));
10458 emit_insn (ix86_gen_allocate_stack_worker (eax, eax));
10460 /* Use the fact that AX still contains ALLOCATE. */
10461 adjust_stack_insn = (TARGET_64BIT
10462 ? gen_pro_epilogue_adjust_stack_di_sub
10463 : gen_pro_epilogue_adjust_stack_si_sub);
10465 insn = emit_insn (adjust_stack_insn (stack_pointer_rtx,
10466 stack_pointer_rtx, eax));
10468 /* Note that SEH directives need to continue tracking the stack
10469 pointer even after the frame pointer has been set up. */
10470 if (m->fs.cfa_reg == stack_pointer_rtx || TARGET_SEH)
10472 if (m->fs.cfa_reg == stack_pointer_rtx)
10473 m->fs.cfa_offset += allocate;
10475 RTX_FRAME_RELATED_P (insn) = 1;
10476 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
10477 gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10478 plus_constant (stack_pointer_rtx,
10479 -allocate)));
10481 m->fs.sp_offset += allocate;
10483 if (r10_live && eax_live)
10485 t = choose_baseaddr (m->fs.sp_offset - allocate);
10486 emit_move_insn (r10, gen_frame_mem (Pmode, t));
10487 t = choose_baseaddr (m->fs.sp_offset - allocate - UNITS_PER_WORD);
10488 emit_move_insn (eax, gen_frame_mem (Pmode, t));
10490 else if (eax_live || r10_live)
10492 t = choose_baseaddr (m->fs.sp_offset - allocate);
10493 emit_move_insn ((eax_live ? eax : r10), gen_frame_mem (Pmode, t));
10496 gcc_assert (m->fs.sp_offset == frame.stack_pointer_offset);
10498 /* If we havn't already set up the frame pointer, do so now. */
10499 if (frame_pointer_needed && !m->fs.fp_valid)
10501 insn = ix86_gen_add3 (hard_frame_pointer_rtx, stack_pointer_rtx,
10502 GEN_INT (frame.stack_pointer_offset
10503 - frame.hard_frame_pointer_offset));
10504 insn = emit_insn (insn);
10505 RTX_FRAME_RELATED_P (insn) = 1;
10506 add_reg_note (insn, REG_CFA_ADJUST_CFA, NULL);
10508 if (m->fs.cfa_reg == stack_pointer_rtx)
10509 m->fs.cfa_reg = hard_frame_pointer_rtx;
10510 m->fs.fp_offset = frame.hard_frame_pointer_offset;
10511 m->fs.fp_valid = true;
10514 if (!int_registers_saved)
10515 ix86_emit_save_regs_using_mov (frame.reg_save_offset);
10516 if (frame.nsseregs)
10517 ix86_emit_save_sse_regs_using_mov (frame.sse_reg_save_offset);
10519 pic_reg_used = false;
10520 if (pic_offset_table_rtx
10521 && (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM)
10522 || crtl->profile))
10524 unsigned int alt_pic_reg_used = ix86_select_alt_pic_regnum ();
10526 if (alt_pic_reg_used != INVALID_REGNUM)
10527 SET_REGNO (pic_offset_table_rtx, alt_pic_reg_used);
10529 pic_reg_used = true;
10532 if (pic_reg_used)
10534 if (TARGET_64BIT)
10536 if (ix86_cmodel == CM_LARGE_PIC)
10538 rtx tmp_reg = gen_rtx_REG (DImode, R11_REG);
10539 rtx label = gen_label_rtx ();
10540 emit_label (label);
10541 LABEL_PRESERVE_P (label) = 1;
10542 gcc_assert (REGNO (pic_offset_table_rtx) != REGNO (tmp_reg));
10543 insn = emit_insn (gen_set_rip_rex64 (pic_offset_table_rtx, label));
10544 insn = emit_insn (gen_set_got_offset_rex64 (tmp_reg, label));
10545 insn = emit_insn (gen_adddi3 (pic_offset_table_rtx,
10546 pic_offset_table_rtx, tmp_reg));
10548 else
10549 insn = emit_insn (gen_set_got_rex64 (pic_offset_table_rtx));
10551 else
10552 insn = emit_insn (gen_set_got (pic_offset_table_rtx));
10555 /* In the pic_reg_used case, make sure that the got load isn't deleted
10556 when mcount needs it. Blockage to avoid call movement across mcount
10557 call is emitted in generic code after the NOTE_INSN_PROLOGUE_END
10558 note. */
10559 if (crtl->profile && !flag_fentry && pic_reg_used)
10560 emit_insn (gen_prologue_use (pic_offset_table_rtx));
10562 if (crtl->drap_reg && !crtl->stack_realign_needed)
10564 /* vDRAP is setup but after reload it turns out stack realign
10565 isn't necessary, here we will emit prologue to setup DRAP
10566 without stack realign adjustment */
10567 t = choose_baseaddr (0);
10568 emit_insn (gen_rtx_SET (VOIDmode, crtl->drap_reg, t));
10571 /* Prevent instructions from being scheduled into register save push
10572 sequence when access to the redzone area is done through frame pointer.
10573 The offset between the frame pointer and the stack pointer is calculated
10574 relative to the value of the stack pointer at the end of the function
10575 prologue, and moving instructions that access redzone area via frame
10576 pointer inside push sequence violates this assumption. */
10577 if (frame_pointer_needed && frame.red_zone_size)
10578 emit_insn (gen_memory_blockage ());
10580 /* Emit cld instruction if stringops are used in the function. */
10581 if (TARGET_CLD && ix86_current_function_needs_cld)
10582 emit_insn (gen_cld ());
10584 /* SEH requires that the prologue end within 256 bytes of the start of
10585 the function. Prevent instruction schedules that would extend that. */
10586 if (TARGET_SEH)
10587 emit_insn (gen_blockage ());
10590 /* Emit code to restore REG using a POP insn. */
10592 static void
10593 ix86_emit_restore_reg_using_pop (rtx reg)
10595 struct machine_function *m = cfun->machine;
10596 rtx insn = emit_insn (gen_pop (reg));
10598 ix86_add_cfa_restore_note (insn, reg, m->fs.sp_offset);
10599 m->fs.sp_offset -= UNITS_PER_WORD;
10601 if (m->fs.cfa_reg == crtl->drap_reg
10602 && REGNO (reg) == REGNO (crtl->drap_reg))
10604 /* Previously we'd represented the CFA as an expression
10605 like *(%ebp - 8). We've just popped that value from
10606 the stack, which means we need to reset the CFA to
10607 the drap register. This will remain until we restore
10608 the stack pointer. */
10609 add_reg_note (insn, REG_CFA_DEF_CFA, reg);
10610 RTX_FRAME_RELATED_P (insn) = 1;
10612 /* This means that the DRAP register is valid for addressing too. */
10613 m->fs.drap_valid = true;
10614 return;
10617 if (m->fs.cfa_reg == stack_pointer_rtx)
10619 rtx x = plus_constant (stack_pointer_rtx, UNITS_PER_WORD);
10620 x = gen_rtx_SET (VOIDmode, stack_pointer_rtx, x);
10621 add_reg_note (insn, REG_CFA_ADJUST_CFA, x);
10622 RTX_FRAME_RELATED_P (insn) = 1;
10624 m->fs.cfa_offset -= UNITS_PER_WORD;
10627 /* When the frame pointer is the CFA, and we pop it, we are
10628 swapping back to the stack pointer as the CFA. This happens
10629 for stack frames that don't allocate other data, so we assume
10630 the stack pointer is now pointing at the return address, i.e.
10631 the function entry state, which makes the offset be 1 word. */
10632 if (reg == hard_frame_pointer_rtx)
10634 m->fs.fp_valid = false;
10635 if (m->fs.cfa_reg == hard_frame_pointer_rtx)
10637 m->fs.cfa_reg = stack_pointer_rtx;
10638 m->fs.cfa_offset -= UNITS_PER_WORD;
10640 add_reg_note (insn, REG_CFA_DEF_CFA,
10641 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
10642 GEN_INT (m->fs.cfa_offset)));
10643 RTX_FRAME_RELATED_P (insn) = 1;
10648 /* Emit code to restore saved registers using POP insns. */
10650 static void
10651 ix86_emit_restore_regs_using_pop (void)
10653 unsigned int regno;
10655 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
10656 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, false))
10657 ix86_emit_restore_reg_using_pop (gen_rtx_REG (Pmode, regno));
10660 /* Emit code and notes for the LEAVE instruction. */
10662 static void
10663 ix86_emit_leave (void)
10665 struct machine_function *m = cfun->machine;
10666 rtx insn = emit_insn (ix86_gen_leave ());
10668 ix86_add_queued_cfa_restore_notes (insn);
10670 gcc_assert (m->fs.fp_valid);
10671 m->fs.sp_valid = true;
10672 m->fs.sp_offset = m->fs.fp_offset - UNITS_PER_WORD;
10673 m->fs.fp_valid = false;
10675 if (m->fs.cfa_reg == hard_frame_pointer_rtx)
10677 m->fs.cfa_reg = stack_pointer_rtx;
10678 m->fs.cfa_offset = m->fs.sp_offset;
10680 add_reg_note (insn, REG_CFA_DEF_CFA,
10681 plus_constant (stack_pointer_rtx, m->fs.sp_offset));
10682 RTX_FRAME_RELATED_P (insn) = 1;
10683 ix86_add_cfa_restore_note (insn, hard_frame_pointer_rtx,
10684 m->fs.fp_offset);
10688 /* Emit code to restore saved registers using MOV insns.
10689 First register is restored from CFA - CFA_OFFSET. */
10690 static void
10691 ix86_emit_restore_regs_using_mov (HOST_WIDE_INT cfa_offset,
10692 int maybe_eh_return)
10694 struct machine_function *m = cfun->machine;
10695 unsigned int regno;
10697 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
10698 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return))
10700 rtx reg = gen_rtx_REG (Pmode, regno);
10701 rtx insn, mem;
10703 mem = choose_baseaddr (cfa_offset);
10704 mem = gen_frame_mem (Pmode, mem);
10705 insn = emit_move_insn (reg, mem);
10707 if (m->fs.cfa_reg == crtl->drap_reg && regno == REGNO (crtl->drap_reg))
10709 /* Previously we'd represented the CFA as an expression
10710 like *(%ebp - 8). We've just popped that value from
10711 the stack, which means we need to reset the CFA to
10712 the drap register. This will remain until we restore
10713 the stack pointer. */
10714 add_reg_note (insn, REG_CFA_DEF_CFA, reg);
10715 RTX_FRAME_RELATED_P (insn) = 1;
10717 /* This means that the DRAP register is valid for addressing. */
10718 m->fs.drap_valid = true;
10720 else
10721 ix86_add_cfa_restore_note (NULL_RTX, reg, cfa_offset);
10723 cfa_offset -= UNITS_PER_WORD;
10727 /* Emit code to restore saved registers using MOV insns.
10728 First register is restored from CFA - CFA_OFFSET. */
10729 static void
10730 ix86_emit_restore_sse_regs_using_mov (HOST_WIDE_INT cfa_offset,
10731 int maybe_eh_return)
10733 unsigned int regno;
10735 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
10736 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return))
10738 rtx reg = gen_rtx_REG (V4SFmode, regno);
10739 rtx mem;
10741 mem = choose_baseaddr (cfa_offset);
10742 mem = gen_rtx_MEM (V4SFmode, mem);
10743 set_mem_align (mem, 128);
10744 emit_move_insn (reg, mem);
10746 ix86_add_cfa_restore_note (NULL_RTX, reg, cfa_offset);
10748 cfa_offset -= 16;
10752 /* Restore function stack, frame, and registers. */
10754 void
10755 ix86_expand_epilogue (int style)
10757 struct machine_function *m = cfun->machine;
10758 struct machine_frame_state frame_state_save = m->fs;
10759 struct ix86_frame frame;
10760 bool restore_regs_via_mov;
10761 bool using_drap;
10763 ix86_finalize_stack_realign_flags ();
10764 ix86_compute_frame_layout (&frame);
10766 m->fs.sp_valid = (!frame_pointer_needed
10767 || (current_function_sp_is_unchanging
10768 && !stack_realign_fp));
10769 gcc_assert (!m->fs.sp_valid
10770 || m->fs.sp_offset == frame.stack_pointer_offset);
10772 /* The FP must be valid if the frame pointer is present. */
10773 gcc_assert (frame_pointer_needed == m->fs.fp_valid);
10774 gcc_assert (!m->fs.fp_valid
10775 || m->fs.fp_offset == frame.hard_frame_pointer_offset);
10777 /* We must have *some* valid pointer to the stack frame. */
10778 gcc_assert (m->fs.sp_valid || m->fs.fp_valid);
10780 /* The DRAP is never valid at this point. */
10781 gcc_assert (!m->fs.drap_valid);
10783 /* See the comment about red zone and frame
10784 pointer usage in ix86_expand_prologue. */
10785 if (frame_pointer_needed && frame.red_zone_size)
10786 emit_insn (gen_memory_blockage ());
10788 using_drap = crtl->drap_reg && crtl->stack_realign_needed;
10789 gcc_assert (!using_drap || m->fs.cfa_reg == crtl->drap_reg);
10791 /* Determine the CFA offset of the end of the red-zone. */
10792 m->fs.red_zone_offset = 0;
10793 if (ix86_using_red_zone () && crtl->args.pops_args < 65536)
10795 /* The red-zone begins below the return address. */
10796 m->fs.red_zone_offset = RED_ZONE_SIZE + UNITS_PER_WORD;
10798 /* When the register save area is in the aligned portion of
10799 the stack, determine the maximum runtime displacement that
10800 matches up with the aligned frame. */
10801 if (stack_realign_drap)
10802 m->fs.red_zone_offset -= (crtl->stack_alignment_needed / BITS_PER_UNIT
10803 + UNITS_PER_WORD);
10806 /* Special care must be taken for the normal return case of a function
10807 using eh_return: the eax and edx registers are marked as saved, but
10808 not restored along this path. Adjust the save location to match. */
10809 if (crtl->calls_eh_return && style != 2)
10810 frame.reg_save_offset -= 2 * UNITS_PER_WORD;
10812 /* EH_RETURN requires the use of moves to function properly. */
10813 if (crtl->calls_eh_return)
10814 restore_regs_via_mov = true;
10815 /* SEH requires the use of pops to identify the epilogue. */
10816 else if (TARGET_SEH)
10817 restore_regs_via_mov = false;
10818 /* If we're only restoring one register and sp is not valid then
10819 using a move instruction to restore the register since it's
10820 less work than reloading sp and popping the register. */
10821 else if (!m->fs.sp_valid && frame.nregs <= 1)
10822 restore_regs_via_mov = true;
10823 else if (TARGET_EPILOGUE_USING_MOVE
10824 && cfun->machine->use_fast_prologue_epilogue
10825 && (frame.nregs > 1
10826 || m->fs.sp_offset != frame.reg_save_offset))
10827 restore_regs_via_mov = true;
10828 else if (frame_pointer_needed
10829 && !frame.nregs
10830 && m->fs.sp_offset != frame.reg_save_offset)
10831 restore_regs_via_mov = true;
10832 else if (frame_pointer_needed
10833 && TARGET_USE_LEAVE
10834 && cfun->machine->use_fast_prologue_epilogue
10835 && frame.nregs == 1)
10836 restore_regs_via_mov = true;
10837 else
10838 restore_regs_via_mov = false;
10840 if (restore_regs_via_mov || frame.nsseregs)
10842 /* Ensure that the entire register save area is addressable via
10843 the stack pointer, if we will restore via sp. */
10844 if (TARGET_64BIT
10845 && m->fs.sp_offset > 0x7fffffff
10846 && !(m->fs.fp_valid || m->fs.drap_valid)
10847 && (frame.nsseregs + frame.nregs) != 0)
10849 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
10850 GEN_INT (m->fs.sp_offset
10851 - frame.sse_reg_save_offset),
10852 style,
10853 m->fs.cfa_reg == stack_pointer_rtx);
10857 /* If there are any SSE registers to restore, then we have to do it
10858 via moves, since there's obviously no pop for SSE regs. */
10859 if (frame.nsseregs)
10860 ix86_emit_restore_sse_regs_using_mov (frame.sse_reg_save_offset,
10861 style == 2);
10863 if (restore_regs_via_mov)
10865 rtx t;
10867 if (frame.nregs)
10868 ix86_emit_restore_regs_using_mov (frame.reg_save_offset, style == 2);
10870 /* eh_return epilogues need %ecx added to the stack pointer. */
10871 if (style == 2)
10873 rtx insn, sa = EH_RETURN_STACKADJ_RTX;
10875 /* Stack align doesn't work with eh_return. */
10876 gcc_assert (!stack_realign_drap);
10877 /* Neither does regparm nested functions. */
10878 gcc_assert (!ix86_static_chain_on_stack);
10880 if (frame_pointer_needed)
10882 t = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa);
10883 t = plus_constant (t, m->fs.fp_offset - UNITS_PER_WORD);
10884 emit_insn (gen_rtx_SET (VOIDmode, sa, t));
10886 t = gen_frame_mem (Pmode, hard_frame_pointer_rtx);
10887 insn = emit_move_insn (hard_frame_pointer_rtx, t);
10889 /* Note that we use SA as a temporary CFA, as the return
10890 address is at the proper place relative to it. We
10891 pretend this happens at the FP restore insn because
10892 prior to this insn the FP would be stored at the wrong
10893 offset relative to SA, and after this insn we have no
10894 other reasonable register to use for the CFA. We don't
10895 bother resetting the CFA to the SP for the duration of
10896 the return insn. */
10897 add_reg_note (insn, REG_CFA_DEF_CFA,
10898 plus_constant (sa, UNITS_PER_WORD));
10899 ix86_add_queued_cfa_restore_notes (insn);
10900 add_reg_note (insn, REG_CFA_RESTORE, hard_frame_pointer_rtx);
10901 RTX_FRAME_RELATED_P (insn) = 1;
10903 m->fs.cfa_reg = sa;
10904 m->fs.cfa_offset = UNITS_PER_WORD;
10905 m->fs.fp_valid = false;
10907 pro_epilogue_adjust_stack (stack_pointer_rtx, sa,
10908 const0_rtx, style, false);
10910 else
10912 t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa);
10913 t = plus_constant (t, m->fs.sp_offset - UNITS_PER_WORD);
10914 insn = emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, t));
10915 ix86_add_queued_cfa_restore_notes (insn);
10917 gcc_assert (m->fs.cfa_reg == stack_pointer_rtx);
10918 if (m->fs.cfa_offset != UNITS_PER_WORD)
10920 m->fs.cfa_offset = UNITS_PER_WORD;
10921 add_reg_note (insn, REG_CFA_DEF_CFA,
10922 plus_constant (stack_pointer_rtx,
10923 UNITS_PER_WORD));
10924 RTX_FRAME_RELATED_P (insn) = 1;
10927 m->fs.sp_offset = UNITS_PER_WORD;
10928 m->fs.sp_valid = true;
10931 else
10933 /* SEH requires that the function end with (1) a stack adjustment
10934 if necessary, (2) a sequence of pops, and (3) a return or
10935 jump instruction. Prevent insns from the function body from
10936 being scheduled into this sequence. */
10937 if (TARGET_SEH)
10939 /* Prevent a catch region from being adjacent to the standard
10940 epilogue sequence. Unfortuantely crtl->uses_eh_lsda nor
10941 several other flags that would be interesting to test are
10942 not yet set up. */
10943 if (flag_non_call_exceptions)
10944 emit_insn (gen_nops (const1_rtx));
10945 else
10946 emit_insn (gen_blockage ());
10949 /* First step is to deallocate the stack frame so that we can
10950 pop the registers. */
10951 if (!m->fs.sp_valid)
10953 pro_epilogue_adjust_stack (stack_pointer_rtx, hard_frame_pointer_rtx,
10954 GEN_INT (m->fs.fp_offset
10955 - frame.reg_save_offset),
10956 style, false);
10958 else if (m->fs.sp_offset != frame.reg_save_offset)
10960 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
10961 GEN_INT (m->fs.sp_offset
10962 - frame.reg_save_offset),
10963 style,
10964 m->fs.cfa_reg == stack_pointer_rtx);
10967 ix86_emit_restore_regs_using_pop ();
10970 /* If we used a stack pointer and haven't already got rid of it,
10971 then do so now. */
10972 if (m->fs.fp_valid)
10974 /* If the stack pointer is valid and pointing at the frame
10975 pointer store address, then we only need a pop. */
10976 if (m->fs.sp_valid && m->fs.sp_offset == frame.hfp_save_offset)
10977 ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx);
10978 /* Leave results in shorter dependency chains on CPUs that are
10979 able to grok it fast. */
10980 else if (TARGET_USE_LEAVE
10981 || optimize_function_for_size_p (cfun)
10982 || !cfun->machine->use_fast_prologue_epilogue)
10983 ix86_emit_leave ();
10984 else
10986 pro_epilogue_adjust_stack (stack_pointer_rtx,
10987 hard_frame_pointer_rtx,
10988 const0_rtx, style, !using_drap);
10989 ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx);
10993 if (using_drap)
10995 int param_ptr_offset = UNITS_PER_WORD;
10996 rtx insn;
10998 gcc_assert (stack_realign_drap);
11000 if (ix86_static_chain_on_stack)
11001 param_ptr_offset += UNITS_PER_WORD;
11002 if (!call_used_regs[REGNO (crtl->drap_reg)])
11003 param_ptr_offset += UNITS_PER_WORD;
11005 insn = emit_insn (gen_rtx_SET
11006 (VOIDmode, stack_pointer_rtx,
11007 gen_rtx_PLUS (Pmode,
11008 crtl->drap_reg,
11009 GEN_INT (-param_ptr_offset))));
11010 m->fs.cfa_reg = stack_pointer_rtx;
11011 m->fs.cfa_offset = param_ptr_offset;
11012 m->fs.sp_offset = param_ptr_offset;
11013 m->fs.realigned = false;
11015 add_reg_note (insn, REG_CFA_DEF_CFA,
11016 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
11017 GEN_INT (param_ptr_offset)));
11018 RTX_FRAME_RELATED_P (insn) = 1;
11020 if (!call_used_regs[REGNO (crtl->drap_reg)])
11021 ix86_emit_restore_reg_using_pop (crtl->drap_reg);
11024 /* At this point the stack pointer must be valid, and we must have
11025 restored all of the registers. We may not have deallocated the
11026 entire stack frame. We've delayed this until now because it may
11027 be possible to merge the local stack deallocation with the
11028 deallocation forced by ix86_static_chain_on_stack. */
11029 gcc_assert (m->fs.sp_valid);
11030 gcc_assert (!m->fs.fp_valid);
11031 gcc_assert (!m->fs.realigned);
11032 if (m->fs.sp_offset != UNITS_PER_WORD)
11034 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
11035 GEN_INT (m->fs.sp_offset - UNITS_PER_WORD),
11036 style, true);
11039 /* Sibcall epilogues don't want a return instruction. */
11040 if (style == 0)
11042 m->fs = frame_state_save;
11043 return;
11046 /* Emit vzeroupper if needed. */
11047 if (TARGET_VZEROUPPER
11048 && !TREE_THIS_VOLATILE (cfun->decl)
11049 && !cfun->machine->caller_return_avx256_p)
11050 emit_insn (gen_avx_vzeroupper (GEN_INT (call_no_avx256)));
11052 if (crtl->args.pops_args && crtl->args.size)
11054 rtx popc = GEN_INT (crtl->args.pops_args);
11056 /* i386 can only pop 64K bytes. If asked to pop more, pop return
11057 address, do explicit add, and jump indirectly to the caller. */
11059 if (crtl->args.pops_args >= 65536)
11061 rtx ecx = gen_rtx_REG (SImode, CX_REG);
11062 rtx insn;
11064 /* There is no "pascal" calling convention in any 64bit ABI. */
11065 gcc_assert (!TARGET_64BIT);
11067 insn = emit_insn (gen_pop (ecx));
11068 m->fs.cfa_offset -= UNITS_PER_WORD;
11069 m->fs.sp_offset -= UNITS_PER_WORD;
11071 add_reg_note (insn, REG_CFA_ADJUST_CFA,
11072 copy_rtx (XVECEXP (PATTERN (insn), 0, 1)));
11073 add_reg_note (insn, REG_CFA_REGISTER,
11074 gen_rtx_SET (VOIDmode, ecx, pc_rtx));
11075 RTX_FRAME_RELATED_P (insn) = 1;
11077 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
11078 popc, -1, true);
11079 emit_jump_insn (gen_return_indirect_internal (ecx));
11081 else
11082 emit_jump_insn (gen_return_pop_internal (popc));
11084 else
11085 emit_jump_insn (gen_return_internal ());
11087 /* Restore the state back to the state from the prologue,
11088 so that it's correct for the next epilogue. */
11089 m->fs = frame_state_save;
11092 /* Reset from the function's potential modifications. */
11094 static void
11095 ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
11096 HOST_WIDE_INT size ATTRIBUTE_UNUSED)
11098 if (pic_offset_table_rtx)
11099 SET_REGNO (pic_offset_table_rtx, REAL_PIC_OFFSET_TABLE_REGNUM);
11100 #if TARGET_MACHO
11101 /* Mach-O doesn't support labels at the end of objects, so if
11102 it looks like we might want one, insert a NOP. */
11104 rtx insn = get_last_insn ();
11105 while (insn
11106 && NOTE_P (insn)
11107 && NOTE_KIND (insn) != NOTE_INSN_DELETED_LABEL)
11108 insn = PREV_INSN (insn);
11109 if (insn
11110 && (LABEL_P (insn)
11111 || (NOTE_P (insn)
11112 && NOTE_KIND (insn) == NOTE_INSN_DELETED_LABEL)))
11113 fputs ("\tnop\n", file);
11115 #endif
11119 /* Return a scratch register to use in the split stack prologue. The
11120 split stack prologue is used for -fsplit-stack. It is the first
11121 instructions in the function, even before the regular prologue.
11122 The scratch register can be any caller-saved register which is not
11123 used for parameters or for the static chain. */
11125 static unsigned int
11126 split_stack_prologue_scratch_regno (void)
11128 if (TARGET_64BIT)
11129 return R11_REG;
11130 else
11132 bool is_fastcall;
11133 int regparm;
11135 is_fastcall = (lookup_attribute ("fastcall",
11136 TYPE_ATTRIBUTES (TREE_TYPE (cfun->decl)))
11137 != NULL);
11138 regparm = ix86_function_regparm (TREE_TYPE (cfun->decl), cfun->decl);
11140 if (is_fastcall)
11142 if (DECL_STATIC_CHAIN (cfun->decl))
11144 sorry ("-fsplit-stack does not support fastcall with "
11145 "nested function");
11146 return INVALID_REGNUM;
11148 return AX_REG;
11150 else if (regparm < 3)
11152 if (!DECL_STATIC_CHAIN (cfun->decl))
11153 return CX_REG;
11154 else
11156 if (regparm >= 2)
11158 sorry ("-fsplit-stack does not support 2 register "
11159 " parameters for a nested function");
11160 return INVALID_REGNUM;
11162 return DX_REG;
11165 else
11167 /* FIXME: We could make this work by pushing a register
11168 around the addition and comparison. */
11169 sorry ("-fsplit-stack does not support 3 register parameters");
11170 return INVALID_REGNUM;
11175 /* A SYMBOL_REF for the function which allocates new stackspace for
11176 -fsplit-stack. */
11178 static GTY(()) rtx split_stack_fn;
11180 /* A SYMBOL_REF for the more stack function when using the large
11181 model. */
11183 static GTY(()) rtx split_stack_fn_large;
11185 /* Handle -fsplit-stack. These are the first instructions in the
11186 function, even before the regular prologue. */
11188 void
11189 ix86_expand_split_stack_prologue (void)
11191 struct ix86_frame frame;
11192 HOST_WIDE_INT allocate;
11193 unsigned HOST_WIDE_INT args_size;
11194 rtx label, limit, current, jump_insn, allocate_rtx, call_insn, call_fusage;
11195 rtx scratch_reg = NULL_RTX;
11196 rtx varargs_label = NULL_RTX;
11197 rtx fn;
11199 gcc_assert (flag_split_stack && reload_completed);
11201 ix86_finalize_stack_realign_flags ();
11202 ix86_compute_frame_layout (&frame);
11203 allocate = frame.stack_pointer_offset - INCOMING_FRAME_SP_OFFSET;
11205 /* This is the label we will branch to if we have enough stack
11206 space. We expect the basic block reordering pass to reverse this
11207 branch if optimizing, so that we branch in the unlikely case. */
11208 label = gen_label_rtx ();
11210 /* We need to compare the stack pointer minus the frame size with
11211 the stack boundary in the TCB. The stack boundary always gives
11212 us SPLIT_STACK_AVAILABLE bytes, so if we need less than that we
11213 can compare directly. Otherwise we need to do an addition. */
11215 limit = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
11216 UNSPEC_STACK_CHECK);
11217 limit = gen_rtx_CONST (Pmode, limit);
11218 limit = gen_rtx_MEM (Pmode, limit);
11219 if (allocate < SPLIT_STACK_AVAILABLE)
11220 current = stack_pointer_rtx;
11221 else
11223 unsigned int scratch_regno;
11224 rtx offset;
11226 /* We need a scratch register to hold the stack pointer minus
11227 the required frame size. Since this is the very start of the
11228 function, the scratch register can be any caller-saved
11229 register which is not used for parameters. */
11230 offset = GEN_INT (- allocate);
11231 scratch_regno = split_stack_prologue_scratch_regno ();
11232 if (scratch_regno == INVALID_REGNUM)
11233 return;
11234 scratch_reg = gen_rtx_REG (Pmode, scratch_regno);
11235 if (!TARGET_64BIT || x86_64_immediate_operand (offset, Pmode))
11237 /* We don't use ix86_gen_add3 in this case because it will
11238 want to split to lea, but when not optimizing the insn
11239 will not be split after this point. */
11240 emit_insn (gen_rtx_SET (VOIDmode, scratch_reg,
11241 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
11242 offset)));
11244 else
11246 emit_move_insn (scratch_reg, offset);
11247 emit_insn (gen_adddi3 (scratch_reg, scratch_reg,
11248 stack_pointer_rtx));
11250 current = scratch_reg;
11253 ix86_expand_branch (GEU, current, limit, label);
11254 jump_insn = get_last_insn ();
11255 JUMP_LABEL (jump_insn) = label;
11257 /* Mark the jump as very likely to be taken. */
11258 add_reg_note (jump_insn, REG_BR_PROB,
11259 GEN_INT (REG_BR_PROB_BASE - REG_BR_PROB_BASE / 100));
11261 if (split_stack_fn == NULL_RTX)
11262 split_stack_fn = gen_rtx_SYMBOL_REF (Pmode, "__morestack");
11263 fn = split_stack_fn;
11265 /* Get more stack space. We pass in the desired stack space and the
11266 size of the arguments to copy to the new stack. In 32-bit mode
11267 we push the parameters; __morestack will return on a new stack
11268 anyhow. In 64-bit mode we pass the parameters in r10 and
11269 r11. */
11270 allocate_rtx = GEN_INT (allocate);
11271 args_size = crtl->args.size >= 0 ? crtl->args.size : 0;
11272 call_fusage = NULL_RTX;
11273 if (TARGET_64BIT)
11275 rtx reg10, reg11;
11277 reg10 = gen_rtx_REG (Pmode, R10_REG);
11278 reg11 = gen_rtx_REG (Pmode, R11_REG);
11280 /* If this function uses a static chain, it will be in %r10.
11281 Preserve it across the call to __morestack. */
11282 if (DECL_STATIC_CHAIN (cfun->decl))
11284 rtx rax;
11286 rax = gen_rtx_REG (Pmode, AX_REG);
11287 emit_move_insn (rax, reg10);
11288 use_reg (&call_fusage, rax);
11291 if (ix86_cmodel == CM_LARGE || ix86_cmodel == CM_LARGE_PIC)
11293 HOST_WIDE_INT argval;
11295 /* When using the large model we need to load the address
11296 into a register, and we've run out of registers. So we
11297 switch to a different calling convention, and we call a
11298 different function: __morestack_large. We pass the
11299 argument size in the upper 32 bits of r10 and pass the
11300 frame size in the lower 32 bits. */
11301 gcc_assert ((allocate & (HOST_WIDE_INT) 0xffffffff) == allocate);
11302 gcc_assert ((args_size & 0xffffffff) == args_size);
11304 if (split_stack_fn_large == NULL_RTX)
11305 split_stack_fn_large =
11306 gen_rtx_SYMBOL_REF (Pmode, "__morestack_large_model");
11308 if (ix86_cmodel == CM_LARGE_PIC)
11310 rtx label, x;
11312 label = gen_label_rtx ();
11313 emit_label (label);
11314 LABEL_PRESERVE_P (label) = 1;
11315 emit_insn (gen_set_rip_rex64 (reg10, label));
11316 emit_insn (gen_set_got_offset_rex64 (reg11, label));
11317 emit_insn (gen_adddi3 (reg10, reg10, reg11));
11318 x = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, split_stack_fn_large),
11319 UNSPEC_GOT);
11320 x = gen_rtx_CONST (Pmode, x);
11321 emit_move_insn (reg11, x);
11322 x = gen_rtx_PLUS (Pmode, reg10, reg11);
11323 x = gen_const_mem (Pmode, x);
11324 emit_move_insn (reg11, x);
11326 else
11327 emit_move_insn (reg11, split_stack_fn_large);
11329 fn = reg11;
11331 argval = ((args_size << 16) << 16) + allocate;
11332 emit_move_insn (reg10, GEN_INT (argval));
11334 else
11336 emit_move_insn (reg10, allocate_rtx);
11337 emit_move_insn (reg11, GEN_INT (args_size));
11338 use_reg (&call_fusage, reg11);
11341 use_reg (&call_fusage, reg10);
11343 else
11345 emit_insn (gen_push (GEN_INT (args_size)));
11346 emit_insn (gen_push (allocate_rtx));
11348 call_insn = ix86_expand_call (NULL_RTX, gen_rtx_MEM (QImode, fn),
11349 GEN_INT (UNITS_PER_WORD), constm1_rtx,
11350 NULL_RTX, 0);
11351 add_function_usage_to (call_insn, call_fusage);
11353 /* In order to make call/return prediction work right, we now need
11354 to execute a return instruction. See
11355 libgcc/config/i386/morestack.S for the details on how this works.
11357 For flow purposes gcc must not see this as a return
11358 instruction--we need control flow to continue at the subsequent
11359 label. Therefore, we use an unspec. */
11360 gcc_assert (crtl->args.pops_args < 65536);
11361 emit_insn (gen_split_stack_return (GEN_INT (crtl->args.pops_args)));
11363 /* If we are in 64-bit mode and this function uses a static chain,
11364 we saved %r10 in %rax before calling _morestack. */
11365 if (TARGET_64BIT && DECL_STATIC_CHAIN (cfun->decl))
11366 emit_move_insn (gen_rtx_REG (Pmode, R10_REG),
11367 gen_rtx_REG (Pmode, AX_REG));
11369 /* If this function calls va_start, we need to store a pointer to
11370 the arguments on the old stack, because they may not have been
11371 all copied to the new stack. At this point the old stack can be
11372 found at the frame pointer value used by __morestack, because
11373 __morestack has set that up before calling back to us. Here we
11374 store that pointer in a scratch register, and in
11375 ix86_expand_prologue we store the scratch register in a stack
11376 slot. */
11377 if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
11379 unsigned int scratch_regno;
11380 rtx frame_reg;
11381 int words;
11383 scratch_regno = split_stack_prologue_scratch_regno ();
11384 scratch_reg = gen_rtx_REG (Pmode, scratch_regno);
11385 frame_reg = gen_rtx_REG (Pmode, BP_REG);
11387 /* 64-bit:
11388 fp -> old fp value
11389 return address within this function
11390 return address of caller of this function
11391 stack arguments
11392 So we add three words to get to the stack arguments.
11394 32-bit:
11395 fp -> old fp value
11396 return address within this function
11397 first argument to __morestack
11398 second argument to __morestack
11399 return address of caller of this function
11400 stack arguments
11401 So we add five words to get to the stack arguments.
11403 words = TARGET_64BIT ? 3 : 5;
11404 emit_insn (gen_rtx_SET (VOIDmode, scratch_reg,
11405 gen_rtx_PLUS (Pmode, frame_reg,
11406 GEN_INT (words * UNITS_PER_WORD))));
11408 varargs_label = gen_label_rtx ();
11409 emit_jump_insn (gen_jump (varargs_label));
11410 JUMP_LABEL (get_last_insn ()) = varargs_label;
11412 emit_barrier ();
11415 emit_label (label);
11416 LABEL_NUSES (label) = 1;
11418 /* If this function calls va_start, we now have to set the scratch
11419 register for the case where we do not call __morestack. In this
11420 case we need to set it based on the stack pointer. */
11421 if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
11423 emit_insn (gen_rtx_SET (VOIDmode, scratch_reg,
11424 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
11425 GEN_INT (UNITS_PER_WORD))));
11427 emit_label (varargs_label);
11428 LABEL_NUSES (varargs_label) = 1;
11432 /* We may have to tell the dataflow pass that the split stack prologue
11433 is initializing a scratch register. */
11435 static void
11436 ix86_live_on_entry (bitmap regs)
11438 if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
11440 gcc_assert (flag_split_stack);
11441 bitmap_set_bit (regs, split_stack_prologue_scratch_regno ());
11445 /* Extract the parts of an RTL expression that is a valid memory address
11446 for an instruction. Return 0 if the structure of the address is
11447 grossly off. Return -1 if the address contains ASHIFT, so it is not
11448 strictly valid, but still used for computing length of lea instruction. */
11451 ix86_decompose_address (rtx addr, struct ix86_address *out)
11453 rtx base = NULL_RTX, index = NULL_RTX, disp = NULL_RTX;
11454 rtx base_reg, index_reg;
11455 HOST_WIDE_INT scale = 1;
11456 rtx scale_rtx = NULL_RTX;
11457 rtx tmp;
11458 int retval = 1;
11459 enum ix86_address_seg seg = SEG_DEFAULT;
11461 if (REG_P (addr) || GET_CODE (addr) == SUBREG)
11462 base = addr;
11463 else if (GET_CODE (addr) == PLUS)
11465 rtx addends[4], op;
11466 int n = 0, i;
11468 op = addr;
11471 if (n >= 4)
11472 return 0;
11473 addends[n++] = XEXP (op, 1);
11474 op = XEXP (op, 0);
11476 while (GET_CODE (op) == PLUS);
11477 if (n >= 4)
11478 return 0;
11479 addends[n] = op;
11481 for (i = n; i >= 0; --i)
11483 op = addends[i];
11484 switch (GET_CODE (op))
11486 case MULT:
11487 if (index)
11488 return 0;
11489 index = XEXP (op, 0);
11490 scale_rtx = XEXP (op, 1);
11491 break;
11493 case ASHIFT:
11494 if (index)
11495 return 0;
11496 index = XEXP (op, 0);
11497 tmp = XEXP (op, 1);
11498 if (!CONST_INT_P (tmp))
11499 return 0;
11500 scale = INTVAL (tmp);
11501 if ((unsigned HOST_WIDE_INT) scale > 3)
11502 return 0;
11503 scale = 1 << scale;
11504 break;
11506 case UNSPEC:
11507 if (XINT (op, 1) == UNSPEC_TP
11508 && TARGET_TLS_DIRECT_SEG_REFS
11509 && seg == SEG_DEFAULT)
11510 seg = TARGET_64BIT ? SEG_FS : SEG_GS;
11511 else
11512 return 0;
11513 break;
11515 case REG:
11516 case SUBREG:
11517 if (!base)
11518 base = op;
11519 else if (!index)
11520 index = op;
11521 else
11522 return 0;
11523 break;
11525 case CONST:
11526 case CONST_INT:
11527 case SYMBOL_REF:
11528 case LABEL_REF:
11529 if (disp)
11530 return 0;
11531 disp = op;
11532 break;
11534 default:
11535 return 0;
11539 else if (GET_CODE (addr) == MULT)
11541 index = XEXP (addr, 0); /* index*scale */
11542 scale_rtx = XEXP (addr, 1);
11544 else if (GET_CODE (addr) == ASHIFT)
11546 /* We're called for lea too, which implements ashift on occasion. */
11547 index = XEXP (addr, 0);
11548 tmp = XEXP (addr, 1);
11549 if (!CONST_INT_P (tmp))
11550 return 0;
11551 scale = INTVAL (tmp);
11552 if ((unsigned HOST_WIDE_INT) scale > 3)
11553 return 0;
11554 scale = 1 << scale;
11555 retval = -1;
11557 else
11558 disp = addr; /* displacement */
11560 /* Extract the integral value of scale. */
11561 if (scale_rtx)
11563 if (!CONST_INT_P (scale_rtx))
11564 return 0;
11565 scale = INTVAL (scale_rtx);
11568 base_reg = base && GET_CODE (base) == SUBREG ? SUBREG_REG (base) : base;
11569 index_reg = index && GET_CODE (index) == SUBREG ? SUBREG_REG (index) : index;
11571 /* Avoid useless 0 displacement. */
11572 if (disp == const0_rtx && (base || index))
11573 disp = NULL_RTX;
11575 /* Allow arg pointer and stack pointer as index if there is not scaling. */
11576 if (base_reg && index_reg && scale == 1
11577 && (index_reg == arg_pointer_rtx
11578 || index_reg == frame_pointer_rtx
11579 || (REG_P (index_reg) && REGNO (index_reg) == STACK_POINTER_REGNUM)))
11581 rtx tmp;
11582 tmp = base, base = index, index = tmp;
11583 tmp = base_reg, base_reg = index_reg, index_reg = tmp;
11586 /* Special case: %ebp cannot be encoded as a base without a displacement.
11587 Similarly %r13. */
11588 if (!disp
11589 && base_reg
11590 && (base_reg == hard_frame_pointer_rtx
11591 || base_reg == frame_pointer_rtx
11592 || base_reg == arg_pointer_rtx
11593 || (REG_P (base_reg)
11594 && (REGNO (base_reg) == HARD_FRAME_POINTER_REGNUM
11595 || REGNO (base_reg) == R13_REG))))
11596 disp = const0_rtx;
11598 /* Special case: on K6, [%esi] makes the instruction vector decoded.
11599 Avoid this by transforming to [%esi+0].
11600 Reload calls address legitimization without cfun defined, so we need
11601 to test cfun for being non-NULL. */
11602 if (TARGET_K6 && cfun && optimize_function_for_speed_p (cfun)
11603 && base_reg && !index_reg && !disp
11604 && REG_P (base_reg) && REGNO (base_reg) == SI_REG)
11605 disp = const0_rtx;
11607 /* Special case: encode reg+reg instead of reg*2. */
11608 if (!base && index && scale == 2)
11609 base = index, base_reg = index_reg, scale = 1;
11611 /* Special case: scaling cannot be encoded without base or displacement. */
11612 if (!base && !disp && index && scale != 1)
11613 disp = const0_rtx;
11615 out->base = base;
11616 out->index = index;
11617 out->disp = disp;
11618 out->scale = scale;
11619 out->seg = seg;
11621 return retval;
11624 /* Return cost of the memory address x.
11625 For i386, it is better to use a complex address than let gcc copy
11626 the address into a reg and make a new pseudo. But not if the address
11627 requires to two regs - that would mean more pseudos with longer
11628 lifetimes. */
11629 static int
11630 ix86_address_cost (rtx x, bool speed ATTRIBUTE_UNUSED)
11632 struct ix86_address parts;
11633 int cost = 1;
11634 int ok = ix86_decompose_address (x, &parts);
11636 gcc_assert (ok);
11638 if (parts.base && GET_CODE (parts.base) == SUBREG)
11639 parts.base = SUBREG_REG (parts.base);
11640 if (parts.index && GET_CODE (parts.index) == SUBREG)
11641 parts.index = SUBREG_REG (parts.index);
11643 /* Attempt to minimize number of registers in the address. */
11644 if ((parts.base
11645 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER))
11646 || (parts.index
11647 && (!REG_P (parts.index)
11648 || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)))
11649 cost++;
11651 if (parts.base
11652 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
11653 && parts.index
11654 && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
11655 && parts.base != parts.index)
11656 cost++;
11658 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
11659 since it's predecode logic can't detect the length of instructions
11660 and it degenerates to vector decoded. Increase cost of such
11661 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
11662 to split such addresses or even refuse such addresses at all.
11664 Following addressing modes are affected:
11665 [base+scale*index]
11666 [scale*index+disp]
11667 [base+index]
11669 The first and last case may be avoidable by explicitly coding the zero in
11670 memory address, but I don't have AMD-K6 machine handy to check this
11671 theory. */
11673 if (TARGET_K6
11674 && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
11675 || (parts.disp && !parts.base && parts.index && parts.scale != 1)
11676 || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
11677 cost += 10;
11679 return cost;
11682 /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O as
11683 this is used for to form addresses to local data when -fPIC is in
11684 use. */
11686 static bool
11687 darwin_local_data_pic (rtx disp)
11689 return (GET_CODE (disp) == UNSPEC
11690 && XINT (disp, 1) == UNSPEC_MACHOPIC_OFFSET);
11693 /* Determine if a given RTX is a valid constant. We already know this
11694 satisfies CONSTANT_P. */
11696 bool
11697 legitimate_constant_p (rtx x)
11699 switch (GET_CODE (x))
11701 case CONST:
11702 x = XEXP (x, 0);
11704 if (GET_CODE (x) == PLUS)
11706 if (!CONST_INT_P (XEXP (x, 1)))
11707 return false;
11708 x = XEXP (x, 0);
11711 if (TARGET_MACHO && darwin_local_data_pic (x))
11712 return true;
11714 /* Only some unspecs are valid as "constants". */
11715 if (GET_CODE (x) == UNSPEC)
11716 switch (XINT (x, 1))
11718 case UNSPEC_GOT:
11719 case UNSPEC_GOTOFF:
11720 case UNSPEC_PLTOFF:
11721 return TARGET_64BIT;
11722 case UNSPEC_TPOFF:
11723 case UNSPEC_NTPOFF:
11724 x = XVECEXP (x, 0, 0);
11725 return (GET_CODE (x) == SYMBOL_REF
11726 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
11727 case UNSPEC_DTPOFF:
11728 x = XVECEXP (x, 0, 0);
11729 return (GET_CODE (x) == SYMBOL_REF
11730 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC);
11731 default:
11732 return false;
11735 /* We must have drilled down to a symbol. */
11736 if (GET_CODE (x) == LABEL_REF)
11737 return true;
11738 if (GET_CODE (x) != SYMBOL_REF)
11739 return false;
11740 /* FALLTHRU */
11742 case SYMBOL_REF:
11743 /* TLS symbols are never valid. */
11744 if (SYMBOL_REF_TLS_MODEL (x))
11745 return false;
11747 /* DLLIMPORT symbols are never valid. */
11748 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
11749 && SYMBOL_REF_DLLIMPORT_P (x))
11750 return false;
11752 #if TARGET_MACHO
11753 /* mdynamic-no-pic */
11754 if (MACHO_DYNAMIC_NO_PIC_P)
11755 return machopic_symbol_defined_p (x);
11756 #endif
11757 break;
11759 case CONST_DOUBLE:
11760 if (GET_MODE (x) == TImode
11761 && x != CONST0_RTX (TImode)
11762 && !TARGET_64BIT)
11763 return false;
11764 break;
11766 case CONST_VECTOR:
11767 if (!standard_sse_constant_p (x))
11768 return false;
11770 default:
11771 break;
11774 /* Otherwise we handle everything else in the move patterns. */
11775 return true;
11778 /* Determine if it's legal to put X into the constant pool. This
11779 is not possible for the address of thread-local symbols, which
11780 is checked above. */
11782 static bool
11783 ix86_cannot_force_const_mem (rtx x)
11785 /* We can always put integral constants and vectors in memory. */
11786 switch (GET_CODE (x))
11788 case CONST_INT:
11789 case CONST_DOUBLE:
11790 case CONST_VECTOR:
11791 return false;
11793 default:
11794 break;
11796 return !legitimate_constant_p (x);
11800 /* Nonzero if the constant value X is a legitimate general operand
11801 when generating PIC code. It is given that flag_pic is on and
11802 that X satisfies CONSTANT_P or is a CONST_DOUBLE. */
11804 bool
11805 legitimate_pic_operand_p (rtx x)
11807 rtx inner;
11809 switch (GET_CODE (x))
11811 case CONST:
11812 inner = XEXP (x, 0);
11813 if (GET_CODE (inner) == PLUS
11814 && CONST_INT_P (XEXP (inner, 1)))
11815 inner = XEXP (inner, 0);
11817 /* Only some unspecs are valid as "constants". */
11818 if (GET_CODE (inner) == UNSPEC)
11819 switch (XINT (inner, 1))
11821 case UNSPEC_GOT:
11822 case UNSPEC_GOTOFF:
11823 case UNSPEC_PLTOFF:
11824 return TARGET_64BIT;
11825 case UNSPEC_TPOFF:
11826 x = XVECEXP (inner, 0, 0);
11827 return (GET_CODE (x) == SYMBOL_REF
11828 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
11829 case UNSPEC_MACHOPIC_OFFSET:
11830 return legitimate_pic_address_disp_p (x);
11831 default:
11832 return false;
11834 /* FALLTHRU */
11836 case SYMBOL_REF:
11837 case LABEL_REF:
11838 return legitimate_pic_address_disp_p (x);
11840 default:
11841 return true;
11845 /* Determine if a given CONST RTX is a valid memory displacement
11846 in PIC mode. */
11848 bool
11849 legitimate_pic_address_disp_p (rtx disp)
11851 bool saw_plus;
11853 /* In 64bit mode we can allow direct addresses of symbols and labels
11854 when they are not dynamic symbols. */
11855 if (TARGET_64BIT)
11857 rtx op0 = disp, op1;
11859 switch (GET_CODE (disp))
11861 case LABEL_REF:
11862 return true;
11864 case CONST:
11865 if (GET_CODE (XEXP (disp, 0)) != PLUS)
11866 break;
11867 op0 = XEXP (XEXP (disp, 0), 0);
11868 op1 = XEXP (XEXP (disp, 0), 1);
11869 if (!CONST_INT_P (op1)
11870 || INTVAL (op1) >= 16*1024*1024
11871 || INTVAL (op1) < -16*1024*1024)
11872 break;
11873 if (GET_CODE (op0) == LABEL_REF)
11874 return true;
11875 if (GET_CODE (op0) != SYMBOL_REF)
11876 break;
11877 /* FALLTHRU */
11879 case SYMBOL_REF:
11880 /* TLS references should always be enclosed in UNSPEC. */
11881 if (SYMBOL_REF_TLS_MODEL (op0))
11882 return false;
11883 if (!SYMBOL_REF_FAR_ADDR_P (op0) && SYMBOL_REF_LOCAL_P (op0)
11884 && ix86_cmodel != CM_LARGE_PIC)
11885 return true;
11886 break;
11888 default:
11889 break;
11892 if (GET_CODE (disp) != CONST)
11893 return false;
11894 disp = XEXP (disp, 0);
11896 if (TARGET_64BIT)
11898 /* We are unsafe to allow PLUS expressions. This limit allowed distance
11899 of GOT tables. We should not need these anyway. */
11900 if (GET_CODE (disp) != UNSPEC
11901 || (XINT (disp, 1) != UNSPEC_GOTPCREL
11902 && XINT (disp, 1) != UNSPEC_GOTOFF
11903 && XINT (disp, 1) != UNSPEC_PCREL
11904 && XINT (disp, 1) != UNSPEC_PLTOFF))
11905 return false;
11907 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
11908 && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
11909 return false;
11910 return true;
11913 saw_plus = false;
11914 if (GET_CODE (disp) == PLUS)
11916 if (!CONST_INT_P (XEXP (disp, 1)))
11917 return false;
11918 disp = XEXP (disp, 0);
11919 saw_plus = true;
11922 if (TARGET_MACHO && darwin_local_data_pic (disp))
11923 return true;
11925 if (GET_CODE (disp) != UNSPEC)
11926 return false;
11928 switch (XINT (disp, 1))
11930 case UNSPEC_GOT:
11931 if (saw_plus)
11932 return false;
11933 /* We need to check for both symbols and labels because VxWorks loads
11934 text labels with @GOT rather than @GOTOFF. See gotoff_operand for
11935 details. */
11936 return (GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
11937 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF);
11938 case UNSPEC_GOTOFF:
11939 /* Refuse GOTOFF in 64bit mode since it is always 64bit when used.
11940 While ABI specify also 32bit relocation but we don't produce it in
11941 small PIC model at all. */
11942 if ((GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
11943 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF)
11944 && !TARGET_64BIT)
11945 return gotoff_operand (XVECEXP (disp, 0, 0), Pmode);
11946 return false;
11947 case UNSPEC_GOTTPOFF:
11948 case UNSPEC_GOTNTPOFF:
11949 case UNSPEC_INDNTPOFF:
11950 if (saw_plus)
11951 return false;
11952 disp = XVECEXP (disp, 0, 0);
11953 return (GET_CODE (disp) == SYMBOL_REF
11954 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_INITIAL_EXEC);
11955 case UNSPEC_NTPOFF:
11956 disp = XVECEXP (disp, 0, 0);
11957 return (GET_CODE (disp) == SYMBOL_REF
11958 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_EXEC);
11959 case UNSPEC_DTPOFF:
11960 disp = XVECEXP (disp, 0, 0);
11961 return (GET_CODE (disp) == SYMBOL_REF
11962 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_DYNAMIC);
11965 return false;
11968 /* Recognizes RTL expressions that are valid memory addresses for an
11969 instruction. The MODE argument is the machine mode for the MEM
11970 expression that wants to use this address.
11972 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
11973 convert common non-canonical forms to canonical form so that they will
11974 be recognized. */
11976 static bool
11977 ix86_legitimate_address_p (enum machine_mode mode ATTRIBUTE_UNUSED,
11978 rtx addr, bool strict)
11980 struct ix86_address parts;
11981 rtx base, index, disp;
11982 HOST_WIDE_INT scale;
11984 if (ix86_decompose_address (addr, &parts) <= 0)
11985 /* Decomposition failed. */
11986 return false;
11988 base = parts.base;
11989 index = parts.index;
11990 disp = parts.disp;
11991 scale = parts.scale;
11993 /* Validate base register.
11995 Don't allow SUBREG's that span more than a word here. It can lead to spill
11996 failures when the base is one word out of a two word structure, which is
11997 represented internally as a DImode int. */
11999 if (base)
12001 rtx reg;
12003 if (REG_P (base))
12004 reg = base;
12005 else if (GET_CODE (base) == SUBREG
12006 && REG_P (SUBREG_REG (base))
12007 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (base)))
12008 <= UNITS_PER_WORD)
12009 reg = SUBREG_REG (base);
12010 else
12011 /* Base is not a register. */
12012 return false;
12014 if (GET_MODE (base) != Pmode)
12015 /* Base is not in Pmode. */
12016 return false;
12018 if ((strict && ! REG_OK_FOR_BASE_STRICT_P (reg))
12019 || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (reg)))
12020 /* Base is not valid. */
12021 return false;
12024 /* Validate index register.
12026 Don't allow SUBREG's that span more than a word here -- same as above. */
12028 if (index)
12030 rtx reg;
12032 if (REG_P (index))
12033 reg = index;
12034 else if (GET_CODE (index) == SUBREG
12035 && REG_P (SUBREG_REG (index))
12036 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (index)))
12037 <= UNITS_PER_WORD)
12038 reg = SUBREG_REG (index);
12039 else
12040 /* Index is not a register. */
12041 return false;
12043 if (GET_MODE (index) != Pmode)
12044 /* Index is not in Pmode. */
12045 return false;
12047 if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (reg))
12048 || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (reg)))
12049 /* Index is not valid. */
12050 return false;
12053 /* Validate scale factor. */
12054 if (scale != 1)
12056 if (!index)
12057 /* Scale without index. */
12058 return false;
12060 if (scale != 2 && scale != 4 && scale != 8)
12061 /* Scale is not a valid multiplier. */
12062 return false;
12065 /* Validate displacement. */
12066 if (disp)
12068 if (GET_CODE (disp) == CONST
12069 && GET_CODE (XEXP (disp, 0)) == UNSPEC
12070 && XINT (XEXP (disp, 0), 1) != UNSPEC_MACHOPIC_OFFSET)
12071 switch (XINT (XEXP (disp, 0), 1))
12073 /* Refuse GOTOFF and GOT in 64bit mode since it is always 64bit when
12074 used. While ABI specify also 32bit relocations, we don't produce
12075 them at all and use IP relative instead. */
12076 case UNSPEC_GOT:
12077 case UNSPEC_GOTOFF:
12078 gcc_assert (flag_pic);
12079 if (!TARGET_64BIT)
12080 goto is_legitimate_pic;
12082 /* 64bit address unspec. */
12083 return false;
12085 case UNSPEC_GOTPCREL:
12086 case UNSPEC_PCREL:
12087 gcc_assert (flag_pic);
12088 goto is_legitimate_pic;
12090 case UNSPEC_GOTTPOFF:
12091 case UNSPEC_GOTNTPOFF:
12092 case UNSPEC_INDNTPOFF:
12093 case UNSPEC_NTPOFF:
12094 case UNSPEC_DTPOFF:
12095 break;
12097 case UNSPEC_STACK_CHECK:
12098 gcc_assert (flag_split_stack);
12099 break;
12101 default:
12102 /* Invalid address unspec. */
12103 return false;
12106 else if (SYMBOLIC_CONST (disp)
12107 && (flag_pic
12108 || (TARGET_MACHO
12109 #if TARGET_MACHO
12110 && MACHOPIC_INDIRECT
12111 && !machopic_operand_p (disp)
12112 #endif
12116 is_legitimate_pic:
12117 if (TARGET_64BIT && (index || base))
12119 /* foo@dtpoff(%rX) is ok. */
12120 if (GET_CODE (disp) != CONST
12121 || GET_CODE (XEXP (disp, 0)) != PLUS
12122 || GET_CODE (XEXP (XEXP (disp, 0), 0)) != UNSPEC
12123 || !CONST_INT_P (XEXP (XEXP (disp, 0), 1))
12124 || (XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_DTPOFF
12125 && XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_NTPOFF))
12126 /* Non-constant pic memory reference. */
12127 return false;
12129 else if ((!TARGET_MACHO || flag_pic)
12130 && ! legitimate_pic_address_disp_p (disp))
12131 /* Displacement is an invalid pic construct. */
12132 return false;
12133 #if TARGET_MACHO
12134 else if (MACHO_DYNAMIC_NO_PIC_P && !legitimate_constant_p (disp))
12135 /* displacment must be referenced via non_lazy_pointer */
12136 return false;
12137 #endif
12139 /* This code used to verify that a symbolic pic displacement
12140 includes the pic_offset_table_rtx register.
12142 While this is good idea, unfortunately these constructs may
12143 be created by "adds using lea" optimization for incorrect
12144 code like:
12146 int a;
12147 int foo(int i)
12149 return *(&a+i);
12152 This code is nonsensical, but results in addressing
12153 GOT table with pic_offset_table_rtx base. We can't
12154 just refuse it easily, since it gets matched by
12155 "addsi3" pattern, that later gets split to lea in the
12156 case output register differs from input. While this
12157 can be handled by separate addsi pattern for this case
12158 that never results in lea, this seems to be easier and
12159 correct fix for crash to disable this test. */
12161 else if (GET_CODE (disp) != LABEL_REF
12162 && !CONST_INT_P (disp)
12163 && (GET_CODE (disp) != CONST
12164 || !legitimate_constant_p (disp))
12165 && (GET_CODE (disp) != SYMBOL_REF
12166 || !legitimate_constant_p (disp)))
12167 /* Displacement is not constant. */
12168 return false;
12169 else if (TARGET_64BIT
12170 && !x86_64_immediate_operand (disp, VOIDmode))
12171 /* Displacement is out of range. */
12172 return false;
12175 /* Everything looks valid. */
12176 return true;
12179 /* Determine if a given RTX is a valid constant address. */
12181 bool
12182 constant_address_p (rtx x)
12184 return CONSTANT_P (x) && ix86_legitimate_address_p (Pmode, x, 1);
12187 /* Return a unique alias set for the GOT. */
12189 static alias_set_type
12190 ix86_GOT_alias_set (void)
12192 static alias_set_type set = -1;
12193 if (set == -1)
12194 set = new_alias_set ();
12195 return set;
12198 /* Return a legitimate reference for ORIG (an address) using the
12199 register REG. If REG is 0, a new pseudo is generated.
12201 There are two types of references that must be handled:
12203 1. Global data references must load the address from the GOT, via
12204 the PIC reg. An insn is emitted to do this load, and the reg is
12205 returned.
12207 2. Static data references, constant pool addresses, and code labels
12208 compute the address as an offset from the GOT, whose base is in
12209 the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to
12210 differentiate them from global data objects. The returned
12211 address is the PIC reg + an unspec constant.
12213 TARGET_LEGITIMATE_ADDRESS_P rejects symbolic references unless the PIC
12214 reg also appears in the address. */
12216 static rtx
12217 legitimize_pic_address (rtx orig, rtx reg)
12219 rtx addr = orig;
12220 rtx new_rtx = orig;
12221 rtx base;
12223 #if TARGET_MACHO
12224 if (TARGET_MACHO && !TARGET_64BIT)
12226 if (reg == 0)
12227 reg = gen_reg_rtx (Pmode);
12228 /* Use the generic Mach-O PIC machinery. */
12229 return machopic_legitimize_pic_address (orig, GET_MODE (orig), reg);
12231 #endif
12233 if (TARGET_64BIT && legitimate_pic_address_disp_p (addr))
12234 new_rtx = addr;
12235 else if (TARGET_64BIT
12236 && ix86_cmodel != CM_SMALL_PIC
12237 && gotoff_operand (addr, Pmode))
12239 rtx tmpreg;
12240 /* This symbol may be referenced via a displacement from the PIC
12241 base address (@GOTOFF). */
12243 if (reload_in_progress)
12244 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
12245 if (GET_CODE (addr) == CONST)
12246 addr = XEXP (addr, 0);
12247 if (GET_CODE (addr) == PLUS)
12249 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)),
12250 UNSPEC_GOTOFF);
12251 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, XEXP (addr, 1));
12253 else
12254 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
12255 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
12256 if (!reg)
12257 tmpreg = gen_reg_rtx (Pmode);
12258 else
12259 tmpreg = reg;
12260 emit_move_insn (tmpreg, new_rtx);
12262 if (reg != 0)
12264 new_rtx = expand_simple_binop (Pmode, PLUS, reg, pic_offset_table_rtx,
12265 tmpreg, 1, OPTAB_DIRECT);
12266 new_rtx = reg;
12268 else new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, tmpreg);
12270 else if (!TARGET_64BIT && gotoff_operand (addr, Pmode))
12272 /* This symbol may be referenced via a displacement from the PIC
12273 base address (@GOTOFF). */
12275 if (reload_in_progress)
12276 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
12277 if (GET_CODE (addr) == CONST)
12278 addr = XEXP (addr, 0);
12279 if (GET_CODE (addr) == PLUS)
12281 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)),
12282 UNSPEC_GOTOFF);
12283 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, XEXP (addr, 1));
12285 else
12286 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
12287 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
12288 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
12290 if (reg != 0)
12292 emit_move_insn (reg, new_rtx);
12293 new_rtx = reg;
12296 else if ((GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (addr) == 0)
12297 /* We can't use @GOTOFF for text labels on VxWorks;
12298 see gotoff_operand. */
12299 || (TARGET_VXWORKS_RTP && GET_CODE (addr) == LABEL_REF))
12301 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES)
12303 if (GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_DLLIMPORT_P (addr))
12304 return legitimize_dllimport_symbol (addr, true);
12305 if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS
12306 && GET_CODE (XEXP (XEXP (addr, 0), 0)) == SYMBOL_REF
12307 && SYMBOL_REF_DLLIMPORT_P (XEXP (XEXP (addr, 0), 0)))
12309 rtx t = legitimize_dllimport_symbol (XEXP (XEXP (addr, 0), 0), true);
12310 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (addr, 0), 1));
12314 /* For x64 PE-COFF there is no GOT table. So we use address
12315 directly. */
12316 if (TARGET_64BIT && DEFAULT_ABI == MS_ABI)
12318 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_PCREL);
12319 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
12321 if (reg == 0)
12322 reg = gen_reg_rtx (Pmode);
12323 emit_move_insn (reg, new_rtx);
12324 new_rtx = reg;
12326 else if (TARGET_64BIT && ix86_cmodel != CM_LARGE_PIC)
12328 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTPCREL);
12329 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
12330 new_rtx = gen_const_mem (Pmode, new_rtx);
12331 set_mem_alias_set (new_rtx, ix86_GOT_alias_set ());
12333 if (reg == 0)
12334 reg = gen_reg_rtx (Pmode);
12335 /* Use directly gen_movsi, otherwise the address is loaded
12336 into register for CSE. We don't want to CSE this addresses,
12337 instead we CSE addresses from the GOT table, so skip this. */
12338 emit_insn (gen_movsi (reg, new_rtx));
12339 new_rtx = reg;
12341 else
12343 /* This symbol must be referenced via a load from the
12344 Global Offset Table (@GOT). */
12346 if (reload_in_progress)
12347 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
12348 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
12349 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
12350 if (TARGET_64BIT)
12351 new_rtx = force_reg (Pmode, new_rtx);
12352 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
12353 new_rtx = gen_const_mem (Pmode, new_rtx);
12354 set_mem_alias_set (new_rtx, ix86_GOT_alias_set ());
12356 if (reg == 0)
12357 reg = gen_reg_rtx (Pmode);
12358 emit_move_insn (reg, new_rtx);
12359 new_rtx = reg;
12362 else
12364 if (CONST_INT_P (addr)
12365 && !x86_64_immediate_operand (addr, VOIDmode))
12367 if (reg)
12369 emit_move_insn (reg, addr);
12370 new_rtx = reg;
12372 else
12373 new_rtx = force_reg (Pmode, addr);
12375 else if (GET_CODE (addr) == CONST)
12377 addr = XEXP (addr, 0);
12379 /* We must match stuff we generate before. Assume the only
12380 unspecs that can get here are ours. Not that we could do
12381 anything with them anyway.... */
12382 if (GET_CODE (addr) == UNSPEC
12383 || (GET_CODE (addr) == PLUS
12384 && GET_CODE (XEXP (addr, 0)) == UNSPEC))
12385 return orig;
12386 gcc_assert (GET_CODE (addr) == PLUS);
12388 if (GET_CODE (addr) == PLUS)
12390 rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
12392 /* Check first to see if this is a constant offset from a @GOTOFF
12393 symbol reference. */
12394 if (gotoff_operand (op0, Pmode)
12395 && CONST_INT_P (op1))
12397 if (!TARGET_64BIT)
12399 if (reload_in_progress)
12400 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
12401 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0),
12402 UNSPEC_GOTOFF);
12403 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, op1);
12404 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
12405 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
12407 if (reg != 0)
12409 emit_move_insn (reg, new_rtx);
12410 new_rtx = reg;
12413 else
12415 if (INTVAL (op1) < -16*1024*1024
12416 || INTVAL (op1) >= 16*1024*1024)
12418 if (!x86_64_immediate_operand (op1, Pmode))
12419 op1 = force_reg (Pmode, op1);
12420 new_rtx = gen_rtx_PLUS (Pmode, force_reg (Pmode, op0), op1);
12424 else
12426 base = legitimize_pic_address (XEXP (addr, 0), reg);
12427 new_rtx = legitimize_pic_address (XEXP (addr, 1),
12428 base == reg ? NULL_RTX : reg);
12430 if (CONST_INT_P (new_rtx))
12431 new_rtx = plus_constant (base, INTVAL (new_rtx));
12432 else
12434 if (GET_CODE (new_rtx) == PLUS && CONSTANT_P (XEXP (new_rtx, 1)))
12436 base = gen_rtx_PLUS (Pmode, base, XEXP (new_rtx, 0));
12437 new_rtx = XEXP (new_rtx, 1);
12439 new_rtx = gen_rtx_PLUS (Pmode, base, new_rtx);
12444 return new_rtx;
12447 /* Load the thread pointer. If TO_REG is true, force it into a register. */
12449 static rtx
12450 get_thread_pointer (int to_reg)
12452 rtx tp, reg, insn;
12454 tp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_TP);
12455 if (!to_reg)
12456 return tp;
12458 reg = gen_reg_rtx (Pmode);
12459 insn = gen_rtx_SET (VOIDmode, reg, tp);
12460 insn = emit_insn (insn);
12462 return reg;
12465 /* A subroutine of ix86_legitimize_address and ix86_expand_move. FOR_MOV is
12466 false if we expect this to be used for a memory address and true if
12467 we expect to load the address into a register. */
12469 static rtx
12470 legitimize_tls_address (rtx x, enum tls_model model, int for_mov)
12472 rtx dest, base, off, pic, tp;
12473 int type;
12475 switch (model)
12477 case TLS_MODEL_GLOBAL_DYNAMIC:
12478 dest = gen_reg_rtx (Pmode);
12479 tp = TARGET_GNU2_TLS ? get_thread_pointer (1) : 0;
12481 if (TARGET_64BIT && ! TARGET_GNU2_TLS)
12483 rtx rax = gen_rtx_REG (Pmode, AX_REG), insns;
12485 start_sequence ();
12486 emit_call_insn (gen_tls_global_dynamic_64 (rax, x));
12487 insns = get_insns ();
12488 end_sequence ();
12490 RTL_CONST_CALL_P (insns) = 1;
12491 emit_libcall_block (insns, dest, rax, x);
12493 else if (TARGET_64BIT && TARGET_GNU2_TLS)
12494 emit_insn (gen_tls_global_dynamic_64 (dest, x));
12495 else
12496 emit_insn (gen_tls_global_dynamic_32 (dest, x));
12498 if (TARGET_GNU2_TLS)
12500 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, tp, dest));
12502 set_unique_reg_note (get_last_insn (), REG_EQUIV, x);
12504 break;
12506 case TLS_MODEL_LOCAL_DYNAMIC:
12507 base = gen_reg_rtx (Pmode);
12508 tp = TARGET_GNU2_TLS ? get_thread_pointer (1) : 0;
12510 if (TARGET_64BIT && ! TARGET_GNU2_TLS)
12512 rtx rax = gen_rtx_REG (Pmode, AX_REG), insns, note;
12514 start_sequence ();
12515 emit_call_insn (gen_tls_local_dynamic_base_64 (rax));
12516 insns = get_insns ();
12517 end_sequence ();
12519 note = gen_rtx_EXPR_LIST (VOIDmode, const0_rtx, NULL);
12520 note = gen_rtx_EXPR_LIST (VOIDmode, ix86_tls_get_addr (), note);
12521 RTL_CONST_CALL_P (insns) = 1;
12522 emit_libcall_block (insns, base, rax, note);
12524 else if (TARGET_64BIT && TARGET_GNU2_TLS)
12525 emit_insn (gen_tls_local_dynamic_base_64 (base));
12526 else
12527 emit_insn (gen_tls_local_dynamic_base_32 (base));
12529 if (TARGET_GNU2_TLS)
12531 rtx x = ix86_tls_module_base ();
12533 set_unique_reg_note (get_last_insn (), REG_EQUIV,
12534 gen_rtx_MINUS (Pmode, x, tp));
12537 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPOFF);
12538 off = gen_rtx_CONST (Pmode, off);
12540 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, base, off));
12542 if (TARGET_GNU2_TLS)
12544 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, dest, tp));
12546 set_unique_reg_note (get_last_insn (), REG_EQUIV, x);
12549 break;
12551 case TLS_MODEL_INITIAL_EXEC:
12552 if (TARGET_64BIT)
12554 pic = NULL;
12555 type = UNSPEC_GOTNTPOFF;
12557 else if (flag_pic)
12559 if (reload_in_progress)
12560 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
12561 pic = pic_offset_table_rtx;
12562 type = TARGET_ANY_GNU_TLS ? UNSPEC_GOTNTPOFF : UNSPEC_GOTTPOFF;
12564 else if (!TARGET_ANY_GNU_TLS)
12566 pic = gen_reg_rtx (Pmode);
12567 emit_insn (gen_set_got (pic));
12568 type = UNSPEC_GOTTPOFF;
12570 else
12572 pic = NULL;
12573 type = UNSPEC_INDNTPOFF;
12576 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), type);
12577 off = gen_rtx_CONST (Pmode, off);
12578 if (pic)
12579 off = gen_rtx_PLUS (Pmode, pic, off);
12580 off = gen_const_mem (Pmode, off);
12581 set_mem_alias_set (off, ix86_GOT_alias_set ());
12583 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
12585 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
12586 off = force_reg (Pmode, off);
12587 return gen_rtx_PLUS (Pmode, base, off);
12589 else
12591 base = get_thread_pointer (true);
12592 dest = gen_reg_rtx (Pmode);
12593 emit_insn (gen_subsi3 (dest, base, off));
12595 break;
12597 case TLS_MODEL_LOCAL_EXEC:
12598 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x),
12599 (TARGET_64BIT || TARGET_ANY_GNU_TLS)
12600 ? UNSPEC_NTPOFF : UNSPEC_TPOFF);
12601 off = gen_rtx_CONST (Pmode, off);
12603 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
12605 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
12606 return gen_rtx_PLUS (Pmode, base, off);
12608 else
12610 base = get_thread_pointer (true);
12611 dest = gen_reg_rtx (Pmode);
12612 emit_insn (gen_subsi3 (dest, base, off));
12614 break;
12616 default:
12617 gcc_unreachable ();
12620 return dest;
12623 /* Create or return the unique __imp_DECL dllimport symbol corresponding
12624 to symbol DECL. */
12626 static GTY((if_marked ("tree_map_marked_p"), param_is (struct tree_map)))
12627 htab_t dllimport_map;
12629 static tree
12630 get_dllimport_decl (tree decl)
12632 struct tree_map *h, in;
12633 void **loc;
12634 const char *name;
12635 const char *prefix;
12636 size_t namelen, prefixlen;
12637 char *imp_name;
12638 tree to;
12639 rtx rtl;
12641 if (!dllimport_map)
12642 dllimport_map = htab_create_ggc (512, tree_map_hash, tree_map_eq, 0);
12644 in.hash = htab_hash_pointer (decl);
12645 in.base.from = decl;
12646 loc = htab_find_slot_with_hash (dllimport_map, &in, in.hash, INSERT);
12647 h = (struct tree_map *) *loc;
12648 if (h)
12649 return h->to;
12651 *loc = h = ggc_alloc_tree_map ();
12652 h->hash = in.hash;
12653 h->base.from = decl;
12654 h->to = to = build_decl (DECL_SOURCE_LOCATION (decl),
12655 VAR_DECL, NULL, ptr_type_node);
12656 DECL_ARTIFICIAL (to) = 1;
12657 DECL_IGNORED_P (to) = 1;
12658 DECL_EXTERNAL (to) = 1;
12659 TREE_READONLY (to) = 1;
12661 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
12662 name = targetm.strip_name_encoding (name);
12663 prefix = name[0] == FASTCALL_PREFIX || user_label_prefix[0] == 0
12664 ? "*__imp_" : "*__imp__";
12665 namelen = strlen (name);
12666 prefixlen = strlen (prefix);
12667 imp_name = (char *) alloca (namelen + prefixlen + 1);
12668 memcpy (imp_name, prefix, prefixlen);
12669 memcpy (imp_name + prefixlen, name, namelen + 1);
12671 name = ggc_alloc_string (imp_name, namelen + prefixlen);
12672 rtl = gen_rtx_SYMBOL_REF (Pmode, name);
12673 SET_SYMBOL_REF_DECL (rtl, to);
12674 SYMBOL_REF_FLAGS (rtl) = SYMBOL_FLAG_LOCAL;
12676 rtl = gen_const_mem (Pmode, rtl);
12677 set_mem_alias_set (rtl, ix86_GOT_alias_set ());
12679 SET_DECL_RTL (to, rtl);
12680 SET_DECL_ASSEMBLER_NAME (to, get_identifier (name));
12682 return to;
12685 /* Expand SYMBOL into its corresponding dllimport symbol. WANT_REG is
12686 true if we require the result be a register. */
12688 static rtx
12689 legitimize_dllimport_symbol (rtx symbol, bool want_reg)
12691 tree imp_decl;
12692 rtx x;
12694 gcc_assert (SYMBOL_REF_DECL (symbol));
12695 imp_decl = get_dllimport_decl (SYMBOL_REF_DECL (symbol));
12697 x = DECL_RTL (imp_decl);
12698 if (want_reg)
12699 x = force_reg (Pmode, x);
12700 return x;
12703 /* Try machine-dependent ways of modifying an illegitimate address
12704 to be legitimate. If we find one, return the new, valid address.
12705 This macro is used in only one place: `memory_address' in explow.c.
12707 OLDX is the address as it was before break_out_memory_refs was called.
12708 In some cases it is useful to look at this to decide what needs to be done.
12710 It is always safe for this macro to do nothing. It exists to recognize
12711 opportunities to optimize the output.
12713 For the 80386, we handle X+REG by loading X into a register R and
12714 using R+REG. R will go in a general reg and indexing will be used.
12715 However, if REG is a broken-out memory address or multiplication,
12716 nothing needs to be done because REG can certainly go in a general reg.
12718 When -fpic is used, special handling is needed for symbolic references.
12719 See comments by legitimize_pic_address in i386.c for details. */
12721 static rtx
12722 ix86_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
12723 enum machine_mode mode)
12725 int changed = 0;
12726 unsigned log;
12728 log = GET_CODE (x) == SYMBOL_REF ? SYMBOL_REF_TLS_MODEL (x) : 0;
12729 if (log)
12730 return legitimize_tls_address (x, (enum tls_model) log, false);
12731 if (GET_CODE (x) == CONST
12732 && GET_CODE (XEXP (x, 0)) == PLUS
12733 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
12734 && (log = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x, 0), 0))))
12736 rtx t = legitimize_tls_address (XEXP (XEXP (x, 0), 0),
12737 (enum tls_model) log, false);
12738 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1));
12741 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES)
12743 if (GET_CODE (x) == SYMBOL_REF && SYMBOL_REF_DLLIMPORT_P (x))
12744 return legitimize_dllimport_symbol (x, true);
12745 if (GET_CODE (x) == CONST
12746 && GET_CODE (XEXP (x, 0)) == PLUS
12747 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
12748 && SYMBOL_REF_DLLIMPORT_P (XEXP (XEXP (x, 0), 0)))
12750 rtx t = legitimize_dllimport_symbol (XEXP (XEXP (x, 0), 0), true);
12751 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1));
12755 if (flag_pic && SYMBOLIC_CONST (x))
12756 return legitimize_pic_address (x, 0);
12758 #if TARGET_MACHO
12759 if (MACHO_DYNAMIC_NO_PIC_P && SYMBOLIC_CONST (x))
12760 return machopic_indirect_data_reference (x, 0);
12761 #endif
12763 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
12764 if (GET_CODE (x) == ASHIFT
12765 && CONST_INT_P (XEXP (x, 1))
12766 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (x, 1)) < 4)
12768 changed = 1;
12769 log = INTVAL (XEXP (x, 1));
12770 x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
12771 GEN_INT (1 << log));
12774 if (GET_CODE (x) == PLUS)
12776 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
12778 if (GET_CODE (XEXP (x, 0)) == ASHIFT
12779 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
12780 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 0), 1)) < 4)
12782 changed = 1;
12783 log = INTVAL (XEXP (XEXP (x, 0), 1));
12784 XEXP (x, 0) = gen_rtx_MULT (Pmode,
12785 force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
12786 GEN_INT (1 << log));
12789 if (GET_CODE (XEXP (x, 1)) == ASHIFT
12790 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
12791 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 1), 1)) < 4)
12793 changed = 1;
12794 log = INTVAL (XEXP (XEXP (x, 1), 1));
12795 XEXP (x, 1) = gen_rtx_MULT (Pmode,
12796 force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
12797 GEN_INT (1 << log));
12800 /* Put multiply first if it isn't already. */
12801 if (GET_CODE (XEXP (x, 1)) == MULT)
12803 rtx tmp = XEXP (x, 0);
12804 XEXP (x, 0) = XEXP (x, 1);
12805 XEXP (x, 1) = tmp;
12806 changed = 1;
12809 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
12810 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
12811 created by virtual register instantiation, register elimination, and
12812 similar optimizations. */
12813 if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
12815 changed = 1;
12816 x = gen_rtx_PLUS (Pmode,
12817 gen_rtx_PLUS (Pmode, XEXP (x, 0),
12818 XEXP (XEXP (x, 1), 0)),
12819 XEXP (XEXP (x, 1), 1));
12822 /* Canonicalize
12823 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
12824 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
12825 else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
12826 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
12827 && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
12828 && CONSTANT_P (XEXP (x, 1)))
12830 rtx constant;
12831 rtx other = NULL_RTX;
12833 if (CONST_INT_P (XEXP (x, 1)))
12835 constant = XEXP (x, 1);
12836 other = XEXP (XEXP (XEXP (x, 0), 1), 1);
12838 else if (CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 1), 1)))
12840 constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
12841 other = XEXP (x, 1);
12843 else
12844 constant = 0;
12846 if (constant)
12848 changed = 1;
12849 x = gen_rtx_PLUS (Pmode,
12850 gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
12851 XEXP (XEXP (XEXP (x, 0), 1), 0)),
12852 plus_constant (other, INTVAL (constant)));
12856 if (changed && ix86_legitimate_address_p (mode, x, false))
12857 return x;
12859 if (GET_CODE (XEXP (x, 0)) == MULT)
12861 changed = 1;
12862 XEXP (x, 0) = force_operand (XEXP (x, 0), 0);
12865 if (GET_CODE (XEXP (x, 1)) == MULT)
12867 changed = 1;
12868 XEXP (x, 1) = force_operand (XEXP (x, 1), 0);
12871 if (changed
12872 && REG_P (XEXP (x, 1))
12873 && REG_P (XEXP (x, 0)))
12874 return x;
12876 if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
12878 changed = 1;
12879 x = legitimize_pic_address (x, 0);
12882 if (changed && ix86_legitimate_address_p (mode, x, false))
12883 return x;
12885 if (REG_P (XEXP (x, 0)))
12887 rtx temp = gen_reg_rtx (Pmode);
12888 rtx val = force_operand (XEXP (x, 1), temp);
12889 if (val != temp)
12890 emit_move_insn (temp, val);
12892 XEXP (x, 1) = temp;
12893 return x;
12896 else if (REG_P (XEXP (x, 1)))
12898 rtx temp = gen_reg_rtx (Pmode);
12899 rtx val = force_operand (XEXP (x, 0), temp);
12900 if (val != temp)
12901 emit_move_insn (temp, val);
12903 XEXP (x, 0) = temp;
12904 return x;
12908 return x;
12911 /* Print an integer constant expression in assembler syntax. Addition
12912 and subtraction are the only arithmetic that may appear in these
12913 expressions. FILE is the stdio stream to write to, X is the rtx, and
12914 CODE is the operand print code from the output string. */
12916 static void
12917 output_pic_addr_const (FILE *file, rtx x, int code)
12919 char buf[256];
12921 switch (GET_CODE (x))
12923 case PC:
12924 gcc_assert (flag_pic);
12925 putc ('.', file);
12926 break;
12928 case SYMBOL_REF:
12929 if (TARGET_64BIT || ! TARGET_MACHO_BRANCH_ISLANDS)
12930 output_addr_const (file, x);
12931 else
12933 const char *name = XSTR (x, 0);
12935 /* Mark the decl as referenced so that cgraph will
12936 output the function. */
12937 if (SYMBOL_REF_DECL (x))
12938 mark_decl_referenced (SYMBOL_REF_DECL (x));
12940 #if TARGET_MACHO
12941 if (MACHOPIC_INDIRECT
12942 && machopic_classify_symbol (x) == MACHOPIC_UNDEFINED_FUNCTION)
12943 name = machopic_indirection_name (x, /*stub_p=*/true);
12944 #endif
12945 assemble_name (file, name);
12947 if (!TARGET_MACHO && !(TARGET_64BIT && DEFAULT_ABI == MS_ABI)
12948 && code == 'P' && ! SYMBOL_REF_LOCAL_P (x))
12949 fputs ("@PLT", file);
12950 break;
12952 case LABEL_REF:
12953 x = XEXP (x, 0);
12954 /* FALLTHRU */
12955 case CODE_LABEL:
12956 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
12957 assemble_name (asm_out_file, buf);
12958 break;
12960 case CONST_INT:
12961 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
12962 break;
12964 case CONST:
12965 /* This used to output parentheses around the expression,
12966 but that does not work on the 386 (either ATT or BSD assembler). */
12967 output_pic_addr_const (file, XEXP (x, 0), code);
12968 break;
12970 case CONST_DOUBLE:
12971 if (GET_MODE (x) == VOIDmode)
12973 /* We can use %d if the number is <32 bits and positive. */
12974 if (CONST_DOUBLE_HIGH (x) || CONST_DOUBLE_LOW (x) < 0)
12975 fprintf (file, "0x%lx%08lx",
12976 (unsigned long) CONST_DOUBLE_HIGH (x),
12977 (unsigned long) CONST_DOUBLE_LOW (x));
12978 else
12979 fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x));
12981 else
12982 /* We can't handle floating point constants;
12983 TARGET_PRINT_OPERAND must handle them. */
12984 output_operand_lossage ("floating constant misused");
12985 break;
12987 case PLUS:
12988 /* Some assemblers need integer constants to appear first. */
12989 if (CONST_INT_P (XEXP (x, 0)))
12991 output_pic_addr_const (file, XEXP (x, 0), code);
12992 putc ('+', file);
12993 output_pic_addr_const (file, XEXP (x, 1), code);
12995 else
12997 gcc_assert (CONST_INT_P (XEXP (x, 1)));
12998 output_pic_addr_const (file, XEXP (x, 1), code);
12999 putc ('+', file);
13000 output_pic_addr_const (file, XEXP (x, 0), code);
13002 break;
13004 case MINUS:
13005 if (!TARGET_MACHO)
13006 putc (ASSEMBLER_DIALECT == ASM_INTEL ? '(' : '[', file);
13007 output_pic_addr_const (file, XEXP (x, 0), code);
13008 putc ('-', file);
13009 output_pic_addr_const (file, XEXP (x, 1), code);
13010 if (!TARGET_MACHO)
13011 putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', file);
13012 break;
13014 case UNSPEC:
13015 if (XINT (x, 1) == UNSPEC_STACK_CHECK)
13017 bool f = i386_asm_output_addr_const_extra (file, x);
13018 gcc_assert (f);
13019 break;
13022 gcc_assert (XVECLEN (x, 0) == 1);
13023 output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
13024 switch (XINT (x, 1))
13026 case UNSPEC_GOT:
13027 fputs ("@GOT", file);
13028 break;
13029 case UNSPEC_GOTOFF:
13030 fputs ("@GOTOFF", file);
13031 break;
13032 case UNSPEC_PLTOFF:
13033 fputs ("@PLTOFF", file);
13034 break;
13035 case UNSPEC_PCREL:
13036 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
13037 "(%rip)" : "[rip]", file);
13038 break;
13039 case UNSPEC_GOTPCREL:
13040 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
13041 "@GOTPCREL(%rip)" : "@GOTPCREL[rip]", file);
13042 break;
13043 case UNSPEC_GOTTPOFF:
13044 /* FIXME: This might be @TPOFF in Sun ld too. */
13045 fputs ("@gottpoff", file);
13046 break;
13047 case UNSPEC_TPOFF:
13048 fputs ("@tpoff", file);
13049 break;
13050 case UNSPEC_NTPOFF:
13051 if (TARGET_64BIT)
13052 fputs ("@tpoff", file);
13053 else
13054 fputs ("@ntpoff", file);
13055 break;
13056 case UNSPEC_DTPOFF:
13057 fputs ("@dtpoff", file);
13058 break;
13059 case UNSPEC_GOTNTPOFF:
13060 if (TARGET_64BIT)
13061 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
13062 "@gottpoff(%rip)": "@gottpoff[rip]", file);
13063 else
13064 fputs ("@gotntpoff", file);
13065 break;
13066 case UNSPEC_INDNTPOFF:
13067 fputs ("@indntpoff", file);
13068 break;
13069 #if TARGET_MACHO
13070 case UNSPEC_MACHOPIC_OFFSET:
13071 putc ('-', file);
13072 machopic_output_function_base_name (file);
13073 break;
13074 #endif
13075 default:
13076 output_operand_lossage ("invalid UNSPEC as operand");
13077 break;
13079 break;
13081 default:
13082 output_operand_lossage ("invalid expression as operand");
13086 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
13087 We need to emit DTP-relative relocations. */
13089 static void ATTRIBUTE_UNUSED
13090 i386_output_dwarf_dtprel (FILE *file, int size, rtx x)
13092 fputs (ASM_LONG, file);
13093 output_addr_const (file, x);
13094 fputs ("@dtpoff", file);
13095 switch (size)
13097 case 4:
13098 break;
13099 case 8:
13100 fputs (", 0", file);
13101 break;
13102 default:
13103 gcc_unreachable ();
13107 /* Return true if X is a representation of the PIC register. This copes
13108 with calls from ix86_find_base_term, where the register might have
13109 been replaced by a cselib value. */
13111 static bool
13112 ix86_pic_register_p (rtx x)
13114 if (GET_CODE (x) == VALUE && CSELIB_VAL_PTR (x))
13115 return (pic_offset_table_rtx
13116 && rtx_equal_for_cselib_p (x, pic_offset_table_rtx));
13117 else
13118 return REG_P (x) && REGNO (x) == PIC_OFFSET_TABLE_REGNUM;
13121 /* Helper function for ix86_delegitimize_address.
13122 Attempt to delegitimize TLS local-exec accesses. */
13124 static rtx
13125 ix86_delegitimize_tls_address (rtx orig_x)
13127 rtx x = orig_x, unspec;
13128 struct ix86_address addr;
13130 if (!TARGET_TLS_DIRECT_SEG_REFS)
13131 return orig_x;
13132 if (MEM_P (x))
13133 x = XEXP (x, 0);
13134 if (GET_CODE (x) != PLUS || GET_MODE (x) != Pmode)
13135 return orig_x;
13136 if (ix86_decompose_address (x, &addr) == 0
13137 || addr.seg != (TARGET_64BIT ? SEG_FS : SEG_GS)
13138 || addr.disp == NULL_RTX
13139 || GET_CODE (addr.disp) != CONST)
13140 return orig_x;
13141 unspec = XEXP (addr.disp, 0);
13142 if (GET_CODE (unspec) == PLUS && CONST_INT_P (XEXP (unspec, 1)))
13143 unspec = XEXP (unspec, 0);
13144 if (GET_CODE (unspec) != UNSPEC || XINT (unspec, 1) != UNSPEC_NTPOFF)
13145 return orig_x;
13146 x = XVECEXP (unspec, 0, 0);
13147 gcc_assert (GET_CODE (x) == SYMBOL_REF);
13148 if (unspec != XEXP (addr.disp, 0))
13149 x = gen_rtx_PLUS (Pmode, x, XEXP (XEXP (addr.disp, 0), 1));
13150 if (addr.index)
13152 rtx idx = addr.index;
13153 if (addr.scale != 1)
13154 idx = gen_rtx_MULT (Pmode, idx, GEN_INT (addr.scale));
13155 x = gen_rtx_PLUS (Pmode, idx, x);
13157 if (addr.base)
13158 x = gen_rtx_PLUS (Pmode, addr.base, x);
13159 if (MEM_P (orig_x))
13160 x = replace_equiv_address_nv (orig_x, x);
13161 return x;
13164 /* In the name of slightly smaller debug output, and to cater to
13165 general assembler lossage, recognize PIC+GOTOFF and turn it back
13166 into a direct symbol reference.
13168 On Darwin, this is necessary to avoid a crash, because Darwin
13169 has a different PIC label for each routine but the DWARF debugging
13170 information is not associated with any particular routine, so it's
13171 necessary to remove references to the PIC label from RTL stored by
13172 the DWARF output code. */
13174 static rtx
13175 ix86_delegitimize_address (rtx x)
13177 rtx orig_x = delegitimize_mem_from_attrs (x);
13178 /* addend is NULL or some rtx if x is something+GOTOFF where
13179 something doesn't include the PIC register. */
13180 rtx addend = NULL_RTX;
13181 /* reg_addend is NULL or a multiple of some register. */
13182 rtx reg_addend = NULL_RTX;
13183 /* const_addend is NULL or a const_int. */
13184 rtx const_addend = NULL_RTX;
13185 /* This is the result, or NULL. */
13186 rtx result = NULL_RTX;
13188 x = orig_x;
13190 if (MEM_P (x))
13191 x = XEXP (x, 0);
13193 if (TARGET_64BIT)
13195 if (GET_CODE (x) != CONST
13196 || GET_CODE (XEXP (x, 0)) != UNSPEC
13197 || (XINT (XEXP (x, 0), 1) != UNSPEC_GOTPCREL
13198 && XINT (XEXP (x, 0), 1) != UNSPEC_PCREL)
13199 || !MEM_P (orig_x))
13200 return ix86_delegitimize_tls_address (orig_x);
13201 x = XVECEXP (XEXP (x, 0), 0, 0);
13202 if (GET_MODE (orig_x) != Pmode)
13203 return simplify_gen_subreg (GET_MODE (orig_x), x, Pmode, 0);
13204 return x;
13207 if (GET_CODE (x) != PLUS
13208 || GET_CODE (XEXP (x, 1)) != CONST)
13209 return ix86_delegitimize_tls_address (orig_x);
13211 if (ix86_pic_register_p (XEXP (x, 0)))
13212 /* %ebx + GOT/GOTOFF */
13214 else if (GET_CODE (XEXP (x, 0)) == PLUS)
13216 /* %ebx + %reg * scale + GOT/GOTOFF */
13217 reg_addend = XEXP (x, 0);
13218 if (ix86_pic_register_p (XEXP (reg_addend, 0)))
13219 reg_addend = XEXP (reg_addend, 1);
13220 else if (ix86_pic_register_p (XEXP (reg_addend, 1)))
13221 reg_addend = XEXP (reg_addend, 0);
13222 else
13224 reg_addend = NULL_RTX;
13225 addend = XEXP (x, 0);
13228 else
13229 addend = XEXP (x, 0);
13231 x = XEXP (XEXP (x, 1), 0);
13232 if (GET_CODE (x) == PLUS
13233 && CONST_INT_P (XEXP (x, 1)))
13235 const_addend = XEXP (x, 1);
13236 x = XEXP (x, 0);
13239 if (GET_CODE (x) == UNSPEC
13240 && ((XINT (x, 1) == UNSPEC_GOT && MEM_P (orig_x) && !addend)
13241 || (XINT (x, 1) == UNSPEC_GOTOFF && !MEM_P (orig_x))))
13242 result = XVECEXP (x, 0, 0);
13244 if (TARGET_MACHO && darwin_local_data_pic (x)
13245 && !MEM_P (orig_x))
13246 result = XVECEXP (x, 0, 0);
13248 if (! result)
13249 return ix86_delegitimize_tls_address (orig_x);
13251 if (const_addend)
13252 result = gen_rtx_CONST (Pmode, gen_rtx_PLUS (Pmode, result, const_addend));
13253 if (reg_addend)
13254 result = gen_rtx_PLUS (Pmode, reg_addend, result);
13255 if (addend)
13257 /* If the rest of original X doesn't involve the PIC register, add
13258 addend and subtract pic_offset_table_rtx. This can happen e.g.
13259 for code like:
13260 leal (%ebx, %ecx, 4), %ecx
13262 movl foo@GOTOFF(%ecx), %edx
13263 in which case we return (%ecx - %ebx) + foo. */
13264 if (pic_offset_table_rtx)
13265 result = gen_rtx_PLUS (Pmode, gen_rtx_MINUS (Pmode, copy_rtx (addend),
13266 pic_offset_table_rtx),
13267 result);
13268 else
13269 return orig_x;
13271 if (GET_MODE (orig_x) != Pmode && MEM_P (orig_x))
13272 return simplify_gen_subreg (GET_MODE (orig_x), result, Pmode, 0);
13273 return result;
13276 /* If X is a machine specific address (i.e. a symbol or label being
13277 referenced as a displacement from the GOT implemented using an
13278 UNSPEC), then return the base term. Otherwise return X. */
13281 ix86_find_base_term (rtx x)
13283 rtx term;
13285 if (TARGET_64BIT)
13287 if (GET_CODE (x) != CONST)
13288 return x;
13289 term = XEXP (x, 0);
13290 if (GET_CODE (term) == PLUS
13291 && (CONST_INT_P (XEXP (term, 1))
13292 || GET_CODE (XEXP (term, 1)) == CONST_DOUBLE))
13293 term = XEXP (term, 0);
13294 if (GET_CODE (term) != UNSPEC
13295 || (XINT (term, 1) != UNSPEC_GOTPCREL
13296 && XINT (term, 1) != UNSPEC_PCREL))
13297 return x;
13299 return XVECEXP (term, 0, 0);
13302 return ix86_delegitimize_address (x);
13305 static void
13306 put_condition_code (enum rtx_code code, enum machine_mode mode, int reverse,
13307 int fp, FILE *file)
13309 const char *suffix;
13311 if (mode == CCFPmode || mode == CCFPUmode)
13313 code = ix86_fp_compare_code_to_integer (code);
13314 mode = CCmode;
13316 if (reverse)
13317 code = reverse_condition (code);
13319 switch (code)
13321 case EQ:
13322 switch (mode)
13324 case CCAmode:
13325 suffix = "a";
13326 break;
13328 case CCCmode:
13329 suffix = "c";
13330 break;
13332 case CCOmode:
13333 suffix = "o";
13334 break;
13336 case CCSmode:
13337 suffix = "s";
13338 break;
13340 default:
13341 suffix = "e";
13343 break;
13344 case NE:
13345 switch (mode)
13347 case CCAmode:
13348 suffix = "na";
13349 break;
13351 case CCCmode:
13352 suffix = "nc";
13353 break;
13355 case CCOmode:
13356 suffix = "no";
13357 break;
13359 case CCSmode:
13360 suffix = "ns";
13361 break;
13363 default:
13364 suffix = "ne";
13366 break;
13367 case GT:
13368 gcc_assert (mode == CCmode || mode == CCNOmode || mode == CCGCmode);
13369 suffix = "g";
13370 break;
13371 case GTU:
13372 /* ??? Use "nbe" instead of "a" for fcmov lossage on some assemblers.
13373 Those same assemblers have the same but opposite lossage on cmov. */
13374 if (mode == CCmode)
13375 suffix = fp ? "nbe" : "a";
13376 else if (mode == CCCmode)
13377 suffix = "b";
13378 else
13379 gcc_unreachable ();
13380 break;
13381 case LT:
13382 switch (mode)
13384 case CCNOmode:
13385 case CCGOCmode:
13386 suffix = "s";
13387 break;
13389 case CCmode:
13390 case CCGCmode:
13391 suffix = "l";
13392 break;
13394 default:
13395 gcc_unreachable ();
13397 break;
13398 case LTU:
13399 gcc_assert (mode == CCmode || mode == CCCmode);
13400 suffix = "b";
13401 break;
13402 case GE:
13403 switch (mode)
13405 case CCNOmode:
13406 case CCGOCmode:
13407 suffix = "ns";
13408 break;
13410 case CCmode:
13411 case CCGCmode:
13412 suffix = "ge";
13413 break;
13415 default:
13416 gcc_unreachable ();
13418 break;
13419 case GEU:
13420 /* ??? As above. */
13421 gcc_assert (mode == CCmode || mode == CCCmode);
13422 suffix = fp ? "nb" : "ae";
13423 break;
13424 case LE:
13425 gcc_assert (mode == CCmode || mode == CCGCmode || mode == CCNOmode);
13426 suffix = "le";
13427 break;
13428 case LEU:
13429 /* ??? As above. */
13430 if (mode == CCmode)
13431 suffix = "be";
13432 else if (mode == CCCmode)
13433 suffix = fp ? "nb" : "ae";
13434 else
13435 gcc_unreachable ();
13436 break;
13437 case UNORDERED:
13438 suffix = fp ? "u" : "p";
13439 break;
13440 case ORDERED:
13441 suffix = fp ? "nu" : "np";
13442 break;
13443 default:
13444 gcc_unreachable ();
13446 fputs (suffix, file);
13449 /* Print the name of register X to FILE based on its machine mode and number.
13450 If CODE is 'w', pretend the mode is HImode.
13451 If CODE is 'b', pretend the mode is QImode.
13452 If CODE is 'k', pretend the mode is SImode.
13453 If CODE is 'q', pretend the mode is DImode.
13454 If CODE is 'x', pretend the mode is V4SFmode.
13455 If CODE is 't', pretend the mode is V8SFmode.
13456 If CODE is 'h', pretend the reg is the 'high' byte register.
13457 If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op.
13458 If CODE is 'd', duplicate the operand for AVX instruction.
13461 void
13462 print_reg (rtx x, int code, FILE *file)
13464 const char *reg;
13465 bool duplicated = code == 'd' && TARGET_AVX;
13467 gcc_assert (x == pc_rtx
13468 || (REGNO (x) != ARG_POINTER_REGNUM
13469 && REGNO (x) != FRAME_POINTER_REGNUM
13470 && REGNO (x) != FLAGS_REG
13471 && REGNO (x) != FPSR_REG
13472 && REGNO (x) != FPCR_REG));
13474 if (ASSEMBLER_DIALECT == ASM_ATT)
13475 putc ('%', file);
13477 if (x == pc_rtx)
13479 gcc_assert (TARGET_64BIT);
13480 fputs ("rip", file);
13481 return;
13484 if (code == 'w' || MMX_REG_P (x))
13485 code = 2;
13486 else if (code == 'b')
13487 code = 1;
13488 else if (code == 'k')
13489 code = 4;
13490 else if (code == 'q')
13491 code = 8;
13492 else if (code == 'y')
13493 code = 3;
13494 else if (code == 'h')
13495 code = 0;
13496 else if (code == 'x')
13497 code = 16;
13498 else if (code == 't')
13499 code = 32;
13500 else
13501 code = GET_MODE_SIZE (GET_MODE (x));
13503 /* Irritatingly, AMD extended registers use different naming convention
13504 from the normal registers. */
13505 if (REX_INT_REG_P (x))
13507 gcc_assert (TARGET_64BIT);
13508 switch (code)
13510 case 0:
13511 error ("extended registers have no high halves");
13512 break;
13513 case 1:
13514 fprintf (file, "r%ib", REGNO (x) - FIRST_REX_INT_REG + 8);
13515 break;
13516 case 2:
13517 fprintf (file, "r%iw", REGNO (x) - FIRST_REX_INT_REG + 8);
13518 break;
13519 case 4:
13520 fprintf (file, "r%id", REGNO (x) - FIRST_REX_INT_REG + 8);
13521 break;
13522 case 8:
13523 fprintf (file, "r%i", REGNO (x) - FIRST_REX_INT_REG + 8);
13524 break;
13525 default:
13526 error ("unsupported operand size for extended register");
13527 break;
13529 return;
13532 reg = NULL;
13533 switch (code)
13535 case 3:
13536 if (STACK_TOP_P (x))
13538 reg = "st(0)";
13539 break;
13541 /* FALLTHRU */
13542 case 8:
13543 case 4:
13544 case 12:
13545 if (! ANY_FP_REG_P (x))
13546 putc (code == 8 && TARGET_64BIT ? 'r' : 'e', file);
13547 /* FALLTHRU */
13548 case 16:
13549 case 2:
13550 normal:
13551 reg = hi_reg_name[REGNO (x)];
13552 break;
13553 case 1:
13554 if (REGNO (x) >= ARRAY_SIZE (qi_reg_name))
13555 goto normal;
13556 reg = qi_reg_name[REGNO (x)];
13557 break;
13558 case 0:
13559 if (REGNO (x) >= ARRAY_SIZE (qi_high_reg_name))
13560 goto normal;
13561 reg = qi_high_reg_name[REGNO (x)];
13562 break;
13563 case 32:
13564 if (SSE_REG_P (x))
13566 gcc_assert (!duplicated);
13567 putc ('y', file);
13568 fputs (hi_reg_name[REGNO (x)] + 1, file);
13569 return;
13571 break;
13572 default:
13573 gcc_unreachable ();
13576 fputs (reg, file);
13577 if (duplicated)
13579 if (ASSEMBLER_DIALECT == ASM_ATT)
13580 fprintf (file, ", %%%s", reg);
13581 else
13582 fprintf (file, ", %s", reg);
13586 /* Locate some local-dynamic symbol still in use by this function
13587 so that we can print its name in some tls_local_dynamic_base
13588 pattern. */
13590 static int
13591 get_some_local_dynamic_name_1 (rtx *px, void *data ATTRIBUTE_UNUSED)
13593 rtx x = *px;
13595 if (GET_CODE (x) == SYMBOL_REF
13596 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC)
13598 cfun->machine->some_ld_name = XSTR (x, 0);
13599 return 1;
13602 return 0;
13605 static const char *
13606 get_some_local_dynamic_name (void)
13608 rtx insn;
13610 if (cfun->machine->some_ld_name)
13611 return cfun->machine->some_ld_name;
13613 for (insn = get_insns (); insn ; insn = NEXT_INSN (insn))
13614 if (NONDEBUG_INSN_P (insn)
13615 && for_each_rtx (&PATTERN (insn), get_some_local_dynamic_name_1, 0))
13616 return cfun->machine->some_ld_name;
13618 return NULL;
13621 /* Meaning of CODE:
13622 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
13623 C -- print opcode suffix for set/cmov insn.
13624 c -- like C, but print reversed condition
13625 F,f -- likewise, but for floating-point.
13626 O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.",
13627 otherwise nothing
13628 R -- print the prefix for register names.
13629 z -- print the opcode suffix for the size of the current operand.
13630 Z -- likewise, with special suffixes for x87 instructions.
13631 * -- print a star (in certain assembler syntax)
13632 A -- print an absolute memory reference.
13633 w -- print the operand as if it's a "word" (HImode) even if it isn't.
13634 s -- print a shift double count, followed by the assemblers argument
13635 delimiter.
13636 b -- print the QImode name of the register for the indicated operand.
13637 %b0 would print %al if operands[0] is reg 0.
13638 w -- likewise, print the HImode name of the register.
13639 k -- likewise, print the SImode name of the register.
13640 q -- likewise, print the DImode name of the register.
13641 x -- likewise, print the V4SFmode name of the register.
13642 t -- likewise, print the V8SFmode name of the register.
13643 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
13644 y -- print "st(0)" instead of "st" as a register.
13645 d -- print duplicated register operand for AVX instruction.
13646 D -- print condition for SSE cmp instruction.
13647 P -- if PIC, print an @PLT suffix.
13648 X -- don't print any sort of PIC '@' suffix for a symbol.
13649 & -- print some in-use local-dynamic symbol name.
13650 H -- print a memory address offset by 8; used for sse high-parts
13651 Y -- print condition for XOP pcom* instruction.
13652 + -- print a branch hint as 'cs' or 'ds' prefix
13653 ; -- print a semicolon (after prefixes due to bug in older gas).
13654 @ -- print a segment register of thread base pointer load
13657 void
13658 ix86_print_operand (FILE *file, rtx x, int code)
13660 if (code)
13662 switch (code)
13664 case '*':
13665 if (ASSEMBLER_DIALECT == ASM_ATT)
13666 putc ('*', file);
13667 return;
13669 case '&':
13671 const char *name = get_some_local_dynamic_name ();
13672 if (name == NULL)
13673 output_operand_lossage ("'%%&' used without any "
13674 "local dynamic TLS references");
13675 else
13676 assemble_name (file, name);
13677 return;
13680 case 'A':
13681 switch (ASSEMBLER_DIALECT)
13683 case ASM_ATT:
13684 putc ('*', file);
13685 break;
13687 case ASM_INTEL:
13688 /* Intel syntax. For absolute addresses, registers should not
13689 be surrounded by braces. */
13690 if (!REG_P (x))
13692 putc ('[', file);
13693 ix86_print_operand (file, x, 0);
13694 putc (']', file);
13695 return;
13697 break;
13699 default:
13700 gcc_unreachable ();
13703 ix86_print_operand (file, x, 0);
13704 return;
13707 case 'L':
13708 if (ASSEMBLER_DIALECT == ASM_ATT)
13709 putc ('l', file);
13710 return;
13712 case 'W':
13713 if (ASSEMBLER_DIALECT == ASM_ATT)
13714 putc ('w', file);
13715 return;
13717 case 'B':
13718 if (ASSEMBLER_DIALECT == ASM_ATT)
13719 putc ('b', file);
13720 return;
13722 case 'Q':
13723 if (ASSEMBLER_DIALECT == ASM_ATT)
13724 putc ('l', file);
13725 return;
13727 case 'S':
13728 if (ASSEMBLER_DIALECT == ASM_ATT)
13729 putc ('s', file);
13730 return;
13732 case 'T':
13733 if (ASSEMBLER_DIALECT == ASM_ATT)
13734 putc ('t', file);
13735 return;
13737 case 'z':
13738 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
13740 /* Opcodes don't get size suffixes if using Intel opcodes. */
13741 if (ASSEMBLER_DIALECT == ASM_INTEL)
13742 return;
13744 switch (GET_MODE_SIZE (GET_MODE (x)))
13746 case 1:
13747 putc ('b', file);
13748 return;
13750 case 2:
13751 putc ('w', file);
13752 return;
13754 case 4:
13755 putc ('l', file);
13756 return;
13758 case 8:
13759 putc ('q', file);
13760 return;
13762 default:
13763 output_operand_lossage
13764 ("invalid operand size for operand code '%c'", code);
13765 return;
13769 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
13770 warning
13771 (0, "non-integer operand used with operand code '%c'", code);
13772 /* FALLTHRU */
13774 case 'Z':
13775 /* 387 opcodes don't get size suffixes if using Intel opcodes. */
13776 if (ASSEMBLER_DIALECT == ASM_INTEL)
13777 return;
13779 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
13781 switch (GET_MODE_SIZE (GET_MODE (x)))
13783 case 2:
13784 #ifdef HAVE_AS_IX86_FILDS
13785 putc ('s', file);
13786 #endif
13787 return;
13789 case 4:
13790 putc ('l', file);
13791 return;
13793 case 8:
13794 #ifdef HAVE_AS_IX86_FILDQ
13795 putc ('q', file);
13796 #else
13797 fputs ("ll", file);
13798 #endif
13799 return;
13801 default:
13802 break;
13805 else if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
13807 /* 387 opcodes don't get size suffixes
13808 if the operands are registers. */
13809 if (STACK_REG_P (x))
13810 return;
13812 switch (GET_MODE_SIZE (GET_MODE (x)))
13814 case 4:
13815 putc ('s', file);
13816 return;
13818 case 8:
13819 putc ('l', file);
13820 return;
13822 case 12:
13823 case 16:
13824 putc ('t', file);
13825 return;
13827 default:
13828 break;
13831 else
13833 output_operand_lossage
13834 ("invalid operand type used with operand code '%c'", code);
13835 return;
13838 output_operand_lossage
13839 ("invalid operand size for operand code '%c'", code);
13840 return;
13842 case 'd':
13843 case 'b':
13844 case 'w':
13845 case 'k':
13846 case 'q':
13847 case 'h':
13848 case 't':
13849 case 'y':
13850 case 'x':
13851 case 'X':
13852 case 'P':
13853 break;
13855 case 's':
13856 if (CONST_INT_P (x) || ! SHIFT_DOUBLE_OMITS_COUNT)
13858 ix86_print_operand (file, x, 0);
13859 fputs (", ", file);
13861 return;
13863 case 'D':
13864 /* Little bit of braindamage here. The SSE compare instructions
13865 does use completely different names for the comparisons that the
13866 fp conditional moves. */
13867 if (TARGET_AVX)
13869 switch (GET_CODE (x))
13871 case EQ:
13872 fputs ("eq", file);
13873 break;
13874 case UNEQ:
13875 fputs ("eq_us", file);
13876 break;
13877 case LT:
13878 fputs ("lt", file);
13879 break;
13880 case UNLT:
13881 fputs ("nge", file);
13882 break;
13883 case LE:
13884 fputs ("le", file);
13885 break;
13886 case UNLE:
13887 fputs ("ngt", file);
13888 break;
13889 case UNORDERED:
13890 fputs ("unord", file);
13891 break;
13892 case NE:
13893 fputs ("neq", file);
13894 break;
13895 case LTGT:
13896 fputs ("neq_oq", file);
13897 break;
13898 case GE:
13899 fputs ("ge", file);
13900 break;
13901 case UNGE:
13902 fputs ("nlt", file);
13903 break;
13904 case GT:
13905 fputs ("gt", file);
13906 break;
13907 case UNGT:
13908 fputs ("nle", file);
13909 break;
13910 case ORDERED:
13911 fputs ("ord", file);
13912 break;
13913 default:
13914 output_operand_lossage ("operand is not a condition code, "
13915 "invalid operand code 'D'");
13916 return;
13919 else
13921 switch (GET_CODE (x))
13923 case EQ:
13924 case UNEQ:
13925 fputs ("eq", file);
13926 break;
13927 case LT:
13928 case UNLT:
13929 fputs ("lt", file);
13930 break;
13931 case LE:
13932 case UNLE:
13933 fputs ("le", file);
13934 break;
13935 case UNORDERED:
13936 fputs ("unord", file);
13937 break;
13938 case NE:
13939 case LTGT:
13940 fputs ("neq", file);
13941 break;
13942 case UNGE:
13943 case GE:
13944 fputs ("nlt", file);
13945 break;
13946 case UNGT:
13947 case GT:
13948 fputs ("nle", file);
13949 break;
13950 case ORDERED:
13951 fputs ("ord", file);
13952 break;
13953 default:
13954 output_operand_lossage ("operand is not a condition code, "
13955 "invalid operand code 'D'");
13956 return;
13959 return;
13960 case 'O':
13961 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
13962 if (ASSEMBLER_DIALECT == ASM_ATT)
13964 switch (GET_MODE (x))
13966 case HImode: putc ('w', file); break;
13967 case SImode:
13968 case SFmode: putc ('l', file); break;
13969 case DImode:
13970 case DFmode: putc ('q', file); break;
13971 default: gcc_unreachable ();
13973 putc ('.', file);
13975 #endif
13976 return;
13977 case 'C':
13978 if (!COMPARISON_P (x))
13980 output_operand_lossage ("operand is neither a constant nor a "
13981 "condition code, invalid operand code "
13982 "'C'");
13983 return;
13985 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 0, file);
13986 return;
13987 case 'F':
13988 if (!COMPARISON_P (x))
13990 output_operand_lossage ("operand is neither a constant nor a "
13991 "condition code, invalid operand code "
13992 "'F'");
13993 return;
13995 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
13996 if (ASSEMBLER_DIALECT == ASM_ATT)
13997 putc ('.', file);
13998 #endif
13999 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 1, file);
14000 return;
14002 /* Like above, but reverse condition */
14003 case 'c':
14004 /* Check to see if argument to %c is really a constant
14005 and not a condition code which needs to be reversed. */
14006 if (!COMPARISON_P (x))
14008 output_operand_lossage ("operand is neither a constant nor a "
14009 "condition code, invalid operand "
14010 "code 'c'");
14011 return;
14013 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 0, file);
14014 return;
14015 case 'f':
14016 if (!COMPARISON_P (x))
14018 output_operand_lossage ("operand is neither a constant nor a "
14019 "condition code, invalid operand "
14020 "code 'f'");
14021 return;
14023 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
14024 if (ASSEMBLER_DIALECT == ASM_ATT)
14025 putc ('.', file);
14026 #endif
14027 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 1, file);
14028 return;
14030 case 'H':
14031 /* It doesn't actually matter what mode we use here, as we're
14032 only going to use this for printing. */
14033 x = adjust_address_nv (x, DImode, 8);
14034 break;
14036 case '+':
14038 rtx x;
14040 if (!optimize
14041 || optimize_function_for_size_p (cfun) || !TARGET_BRANCH_PREDICTION_HINTS)
14042 return;
14044 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
14045 if (x)
14047 int pred_val = INTVAL (XEXP (x, 0));
14049 if (pred_val < REG_BR_PROB_BASE * 45 / 100
14050 || pred_val > REG_BR_PROB_BASE * 55 / 100)
14052 int taken = pred_val > REG_BR_PROB_BASE / 2;
14053 int cputaken = final_forward_branch_p (current_output_insn) == 0;
14055 /* Emit hints only in the case default branch prediction
14056 heuristics would fail. */
14057 if (taken != cputaken)
14059 /* We use 3e (DS) prefix for taken branches and
14060 2e (CS) prefix for not taken branches. */
14061 if (taken)
14062 fputs ("ds ; ", file);
14063 else
14064 fputs ("cs ; ", file);
14068 return;
14071 case 'Y':
14072 switch (GET_CODE (x))
14074 case NE:
14075 fputs ("neq", file);
14076 break;
14077 case EQ:
14078 fputs ("eq", file);
14079 break;
14080 case GE:
14081 case GEU:
14082 fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "ge" : "unlt", file);
14083 break;
14084 case GT:
14085 case GTU:
14086 fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "gt" : "unle", file);
14087 break;
14088 case LE:
14089 case LEU:
14090 fputs ("le", file);
14091 break;
14092 case LT:
14093 case LTU:
14094 fputs ("lt", file);
14095 break;
14096 case UNORDERED:
14097 fputs ("unord", file);
14098 break;
14099 case ORDERED:
14100 fputs ("ord", file);
14101 break;
14102 case UNEQ:
14103 fputs ("ueq", file);
14104 break;
14105 case UNGE:
14106 fputs ("nlt", file);
14107 break;
14108 case UNGT:
14109 fputs ("nle", file);
14110 break;
14111 case UNLE:
14112 fputs ("ule", file);
14113 break;
14114 case UNLT:
14115 fputs ("ult", file);
14116 break;
14117 case LTGT:
14118 fputs ("une", file);
14119 break;
14120 default:
14121 output_operand_lossage ("operand is not a condition code, "
14122 "invalid operand code 'Y'");
14123 return;
14125 return;
14127 case ';':
14128 #ifndef HAVE_AS_IX86_REP_LOCK_PREFIX
14129 putc (';', file);
14130 #endif
14131 return;
14133 case '@':
14134 if (ASSEMBLER_DIALECT == ASM_ATT)
14135 putc ('%', file);
14137 /* The kernel uses a different segment register for performance
14138 reasons; a system call would not have to trash the userspace
14139 segment register, which would be expensive. */
14140 if (TARGET_64BIT && ix86_cmodel != CM_KERNEL)
14141 fputs ("fs", file);
14142 else
14143 fputs ("gs", file);
14144 return;
14146 default:
14147 output_operand_lossage ("invalid operand code '%c'", code);
14151 if (REG_P (x))
14152 print_reg (x, code, file);
14154 else if (MEM_P (x))
14156 /* No `byte ptr' prefix for call instructions or BLKmode operands. */
14157 if (ASSEMBLER_DIALECT == ASM_INTEL && code != 'X' && code != 'P'
14158 && GET_MODE (x) != BLKmode)
14160 const char * size;
14161 switch (GET_MODE_SIZE (GET_MODE (x)))
14163 case 1: size = "BYTE"; break;
14164 case 2: size = "WORD"; break;
14165 case 4: size = "DWORD"; break;
14166 case 8: size = "QWORD"; break;
14167 case 12: size = "TBYTE"; break;
14168 case 16:
14169 if (GET_MODE (x) == XFmode)
14170 size = "TBYTE";
14171 else
14172 size = "XMMWORD";
14173 break;
14174 case 32: size = "YMMWORD"; break;
14175 default:
14176 gcc_unreachable ();
14179 /* Check for explicit size override (codes 'b', 'w' and 'k') */
14180 if (code == 'b')
14181 size = "BYTE";
14182 else if (code == 'w')
14183 size = "WORD";
14184 else if (code == 'k')
14185 size = "DWORD";
14187 fputs (size, file);
14188 fputs (" PTR ", file);
14191 x = XEXP (x, 0);
14192 /* Avoid (%rip) for call operands. */
14193 if (CONSTANT_ADDRESS_P (x) && code == 'P'
14194 && !CONST_INT_P (x))
14195 output_addr_const (file, x);
14196 else if (this_is_asm_operands && ! address_operand (x, VOIDmode))
14197 output_operand_lossage ("invalid constraints for operand");
14198 else
14199 output_address (x);
14202 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == SFmode)
14204 REAL_VALUE_TYPE r;
14205 long l;
14207 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
14208 REAL_VALUE_TO_TARGET_SINGLE (r, l);
14210 if (ASSEMBLER_DIALECT == ASM_ATT)
14211 putc ('$', file);
14212 /* Sign extend 32bit SFmode immediate to 8 bytes. */
14213 if (code == 'q')
14214 fprintf (file, "0x%08llx", (unsigned long long) (int) l);
14215 else
14216 fprintf (file, "0x%08x", (unsigned int) l);
14219 /* These float cases don't actually occur as immediate operands. */
14220 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == DFmode)
14222 char dstr[30];
14224 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
14225 fputs (dstr, file);
14228 else if (GET_CODE (x) == CONST_DOUBLE
14229 && GET_MODE (x) == XFmode)
14231 char dstr[30];
14233 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
14234 fputs (dstr, file);
14237 else
14239 /* We have patterns that allow zero sets of memory, for instance.
14240 In 64-bit mode, we should probably support all 8-byte vectors,
14241 since we can in fact encode that into an immediate. */
14242 if (GET_CODE (x) == CONST_VECTOR)
14244 gcc_assert (x == CONST0_RTX (GET_MODE (x)));
14245 x = const0_rtx;
14248 if (code != 'P')
14250 if (CONST_INT_P (x) || GET_CODE (x) == CONST_DOUBLE)
14252 if (ASSEMBLER_DIALECT == ASM_ATT)
14253 putc ('$', file);
14255 else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
14256 || GET_CODE (x) == LABEL_REF)
14258 if (ASSEMBLER_DIALECT == ASM_ATT)
14259 putc ('$', file);
14260 else
14261 fputs ("OFFSET FLAT:", file);
14264 if (CONST_INT_P (x))
14265 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
14266 else if (flag_pic || MACHOPIC_INDIRECT)
14267 output_pic_addr_const (file, x, code);
14268 else
14269 output_addr_const (file, x);
14273 static bool
14274 ix86_print_operand_punct_valid_p (unsigned char code)
14276 return (code == '@' || code == '*' || code == '+'
14277 || code == '&' || code == ';');
14280 /* Print a memory operand whose address is ADDR. */
14282 static void
14283 ix86_print_operand_address (FILE *file, rtx addr)
14285 struct ix86_address parts;
14286 rtx base, index, disp;
14287 int scale;
14288 int ok = ix86_decompose_address (addr, &parts);
14290 gcc_assert (ok);
14292 base = parts.base;
14293 index = parts.index;
14294 disp = parts.disp;
14295 scale = parts.scale;
14297 switch (parts.seg)
14299 case SEG_DEFAULT:
14300 break;
14301 case SEG_FS:
14302 case SEG_GS:
14303 if (ASSEMBLER_DIALECT == ASM_ATT)
14304 putc ('%', file);
14305 fputs ((parts.seg == SEG_FS ? "fs:" : "gs:"), file);
14306 break;
14307 default:
14308 gcc_unreachable ();
14311 /* Use one byte shorter RIP relative addressing for 64bit mode. */
14312 if (TARGET_64BIT && !base && !index)
14314 rtx symbol = disp;
14316 if (GET_CODE (disp) == CONST
14317 && GET_CODE (XEXP (disp, 0)) == PLUS
14318 && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
14319 symbol = XEXP (XEXP (disp, 0), 0);
14321 if (GET_CODE (symbol) == LABEL_REF
14322 || (GET_CODE (symbol) == SYMBOL_REF
14323 && SYMBOL_REF_TLS_MODEL (symbol) == 0))
14324 base = pc_rtx;
14326 if (!base && !index)
14328 /* Displacement only requires special attention. */
14330 if (CONST_INT_P (disp))
14332 if (ASSEMBLER_DIALECT == ASM_INTEL && parts.seg == SEG_DEFAULT)
14333 fputs ("ds:", file);
14334 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (disp));
14336 else if (flag_pic)
14337 output_pic_addr_const (file, disp, 0);
14338 else
14339 output_addr_const (file, disp);
14341 else
14343 if (ASSEMBLER_DIALECT == ASM_ATT)
14345 if (disp)
14347 if (flag_pic)
14348 output_pic_addr_const (file, disp, 0);
14349 else if (GET_CODE (disp) == LABEL_REF)
14350 output_asm_label (disp);
14351 else
14352 output_addr_const (file, disp);
14355 putc ('(', file);
14356 if (base)
14357 print_reg (base, 0, file);
14358 if (index)
14360 putc (',', file);
14361 print_reg (index, 0, file);
14362 if (scale != 1)
14363 fprintf (file, ",%d", scale);
14365 putc (')', file);
14367 else
14369 rtx offset = NULL_RTX;
14371 if (disp)
14373 /* Pull out the offset of a symbol; print any symbol itself. */
14374 if (GET_CODE (disp) == CONST
14375 && GET_CODE (XEXP (disp, 0)) == PLUS
14376 && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
14378 offset = XEXP (XEXP (disp, 0), 1);
14379 disp = gen_rtx_CONST (VOIDmode,
14380 XEXP (XEXP (disp, 0), 0));
14383 if (flag_pic)
14384 output_pic_addr_const (file, disp, 0);
14385 else if (GET_CODE (disp) == LABEL_REF)
14386 output_asm_label (disp);
14387 else if (CONST_INT_P (disp))
14388 offset = disp;
14389 else
14390 output_addr_const (file, disp);
14393 putc ('[', file);
14394 if (base)
14396 print_reg (base, 0, file);
14397 if (offset)
14399 if (INTVAL (offset) >= 0)
14400 putc ('+', file);
14401 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
14404 else if (offset)
14405 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
14406 else
14407 putc ('0', file);
14409 if (index)
14411 putc ('+', file);
14412 print_reg (index, 0, file);
14413 if (scale != 1)
14414 fprintf (file, "*%d", scale);
14416 putc (']', file);
14421 /* Implementation of TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */
14423 static bool
14424 i386_asm_output_addr_const_extra (FILE *file, rtx x)
14426 rtx op;
14428 if (GET_CODE (x) != UNSPEC)
14429 return false;
14431 op = XVECEXP (x, 0, 0);
14432 switch (XINT (x, 1))
14434 case UNSPEC_GOTTPOFF:
14435 output_addr_const (file, op);
14436 /* FIXME: This might be @TPOFF in Sun ld. */
14437 fputs ("@gottpoff", file);
14438 break;
14439 case UNSPEC_TPOFF:
14440 output_addr_const (file, op);
14441 fputs ("@tpoff", file);
14442 break;
14443 case UNSPEC_NTPOFF:
14444 output_addr_const (file, op);
14445 if (TARGET_64BIT)
14446 fputs ("@tpoff", file);
14447 else
14448 fputs ("@ntpoff", file);
14449 break;
14450 case UNSPEC_DTPOFF:
14451 output_addr_const (file, op);
14452 fputs ("@dtpoff", file);
14453 break;
14454 case UNSPEC_GOTNTPOFF:
14455 output_addr_const (file, op);
14456 if (TARGET_64BIT)
14457 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
14458 "@gottpoff(%rip)" : "@gottpoff[rip]", file);
14459 else
14460 fputs ("@gotntpoff", file);
14461 break;
14462 case UNSPEC_INDNTPOFF:
14463 output_addr_const (file, op);
14464 fputs ("@indntpoff", file);
14465 break;
14466 #if TARGET_MACHO
14467 case UNSPEC_MACHOPIC_OFFSET:
14468 output_addr_const (file, op);
14469 putc ('-', file);
14470 machopic_output_function_base_name (file);
14471 break;
14472 #endif
14474 case UNSPEC_STACK_CHECK:
14476 int offset;
14478 gcc_assert (flag_split_stack);
14480 #ifdef TARGET_THREAD_SPLIT_STACK_OFFSET
14481 offset = TARGET_THREAD_SPLIT_STACK_OFFSET;
14482 #else
14483 gcc_unreachable ();
14484 #endif
14486 fprintf (file, "%s:%d", TARGET_64BIT ? "%fs" : "%gs", offset);
14488 break;
14490 default:
14491 return false;
14494 return true;
14497 /* Split one or more double-mode RTL references into pairs of half-mode
14498 references. The RTL can be REG, offsettable MEM, integer constant, or
14499 CONST_DOUBLE. "operands" is a pointer to an array of double-mode RTLs to
14500 split and "num" is its length. lo_half and hi_half are output arrays
14501 that parallel "operands". */
14503 void
14504 split_double_mode (enum machine_mode mode, rtx operands[],
14505 int num, rtx lo_half[], rtx hi_half[])
14507 enum machine_mode half_mode;
14508 unsigned int byte;
14510 switch (mode)
14512 case TImode:
14513 half_mode = DImode;
14514 break;
14515 case DImode:
14516 half_mode = SImode;
14517 break;
14518 default:
14519 gcc_unreachable ();
14522 byte = GET_MODE_SIZE (half_mode);
14524 while (num--)
14526 rtx op = operands[num];
14528 /* simplify_subreg refuse to split volatile memory addresses,
14529 but we still have to handle it. */
14530 if (MEM_P (op))
14532 lo_half[num] = adjust_address (op, half_mode, 0);
14533 hi_half[num] = adjust_address (op, half_mode, byte);
14535 else
14537 lo_half[num] = simplify_gen_subreg (half_mode, op,
14538 GET_MODE (op) == VOIDmode
14539 ? mode : GET_MODE (op), 0);
14540 hi_half[num] = simplify_gen_subreg (half_mode, op,
14541 GET_MODE (op) == VOIDmode
14542 ? mode : GET_MODE (op), byte);
14547 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
14548 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
14549 is the expression of the binary operation. The output may either be
14550 emitted here, or returned to the caller, like all output_* functions.
14552 There is no guarantee that the operands are the same mode, as they
14553 might be within FLOAT or FLOAT_EXTEND expressions. */
14555 #ifndef SYSV386_COMPAT
14556 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
14557 wants to fix the assemblers because that causes incompatibility
14558 with gcc. No-one wants to fix gcc because that causes
14559 incompatibility with assemblers... You can use the option of
14560 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
14561 #define SYSV386_COMPAT 1
14562 #endif
14564 const char *
14565 output_387_binary_op (rtx insn, rtx *operands)
14567 static char buf[40];
14568 const char *p;
14569 const char *ssep;
14570 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]) || SSE_REG_P (operands[2]);
14572 #ifdef ENABLE_CHECKING
14573 /* Even if we do not want to check the inputs, this documents input
14574 constraints. Which helps in understanding the following code. */
14575 if (STACK_REG_P (operands[0])
14576 && ((REG_P (operands[1])
14577 && REGNO (operands[0]) == REGNO (operands[1])
14578 && (STACK_REG_P (operands[2]) || MEM_P (operands[2])))
14579 || (REG_P (operands[2])
14580 && REGNO (operands[0]) == REGNO (operands[2])
14581 && (STACK_REG_P (operands[1]) || MEM_P (operands[1]))))
14582 && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
14583 ; /* ok */
14584 else
14585 gcc_assert (is_sse);
14586 #endif
14588 switch (GET_CODE (operands[3]))
14590 case PLUS:
14591 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
14592 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
14593 p = "fiadd";
14594 else
14595 p = "fadd";
14596 ssep = "vadd";
14597 break;
14599 case MINUS:
14600 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
14601 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
14602 p = "fisub";
14603 else
14604 p = "fsub";
14605 ssep = "vsub";
14606 break;
14608 case MULT:
14609 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
14610 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
14611 p = "fimul";
14612 else
14613 p = "fmul";
14614 ssep = "vmul";
14615 break;
14617 case DIV:
14618 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
14619 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
14620 p = "fidiv";
14621 else
14622 p = "fdiv";
14623 ssep = "vdiv";
14624 break;
14626 default:
14627 gcc_unreachable ();
14630 if (is_sse)
14632 if (TARGET_AVX)
14634 strcpy (buf, ssep);
14635 if (GET_MODE (operands[0]) == SFmode)
14636 strcat (buf, "ss\t{%2, %1, %0|%0, %1, %2}");
14637 else
14638 strcat (buf, "sd\t{%2, %1, %0|%0, %1, %2}");
14640 else
14642 strcpy (buf, ssep + 1);
14643 if (GET_MODE (operands[0]) == SFmode)
14644 strcat (buf, "ss\t{%2, %0|%0, %2}");
14645 else
14646 strcat (buf, "sd\t{%2, %0|%0, %2}");
14648 return buf;
14650 strcpy (buf, p);
14652 switch (GET_CODE (operands[3]))
14654 case MULT:
14655 case PLUS:
14656 if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
14658 rtx temp = operands[2];
14659 operands[2] = operands[1];
14660 operands[1] = temp;
14663 /* know operands[0] == operands[1]. */
14665 if (MEM_P (operands[2]))
14667 p = "%Z2\t%2";
14668 break;
14671 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
14673 if (STACK_TOP_P (operands[0]))
14674 /* How is it that we are storing to a dead operand[2]?
14675 Well, presumably operands[1] is dead too. We can't
14676 store the result to st(0) as st(0) gets popped on this
14677 instruction. Instead store to operands[2] (which I
14678 think has to be st(1)). st(1) will be popped later.
14679 gcc <= 2.8.1 didn't have this check and generated
14680 assembly code that the Unixware assembler rejected. */
14681 p = "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
14682 else
14683 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
14684 break;
14687 if (STACK_TOP_P (operands[0]))
14688 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
14689 else
14690 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
14691 break;
14693 case MINUS:
14694 case DIV:
14695 if (MEM_P (operands[1]))
14697 p = "r%Z1\t%1";
14698 break;
14701 if (MEM_P (operands[2]))
14703 p = "%Z2\t%2";
14704 break;
14707 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
14709 #if SYSV386_COMPAT
14710 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
14711 derived assemblers, confusingly reverse the direction of
14712 the operation for fsub{r} and fdiv{r} when the
14713 destination register is not st(0). The Intel assembler
14714 doesn't have this brain damage. Read !SYSV386_COMPAT to
14715 figure out what the hardware really does. */
14716 if (STACK_TOP_P (operands[0]))
14717 p = "{p\t%0, %2|rp\t%2, %0}";
14718 else
14719 p = "{rp\t%2, %0|p\t%0, %2}";
14720 #else
14721 if (STACK_TOP_P (operands[0]))
14722 /* As above for fmul/fadd, we can't store to st(0). */
14723 p = "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
14724 else
14725 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
14726 #endif
14727 break;
14730 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
14732 #if SYSV386_COMPAT
14733 if (STACK_TOP_P (operands[0]))
14734 p = "{rp\t%0, %1|p\t%1, %0}";
14735 else
14736 p = "{p\t%1, %0|rp\t%0, %1}";
14737 #else
14738 if (STACK_TOP_P (operands[0]))
14739 p = "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
14740 else
14741 p = "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
14742 #endif
14743 break;
14746 if (STACK_TOP_P (operands[0]))
14748 if (STACK_TOP_P (operands[1]))
14749 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
14750 else
14751 p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
14752 break;
14754 else if (STACK_TOP_P (operands[1]))
14756 #if SYSV386_COMPAT
14757 p = "{\t%1, %0|r\t%0, %1}";
14758 #else
14759 p = "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
14760 #endif
14762 else
14764 #if SYSV386_COMPAT
14765 p = "{r\t%2, %0|\t%0, %2}";
14766 #else
14767 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
14768 #endif
14770 break;
14772 default:
14773 gcc_unreachable ();
14776 strcat (buf, p);
14777 return buf;
14780 /* Return needed mode for entity in optimize_mode_switching pass. */
14783 ix86_mode_needed (int entity, rtx insn)
14785 enum attr_i387_cw mode;
14787 /* The mode UNINITIALIZED is used to store control word after a
14788 function call or ASM pattern. The mode ANY specify that function
14789 has no requirements on the control word and make no changes in the
14790 bits we are interested in. */
14792 if (CALL_P (insn)
14793 || (NONJUMP_INSN_P (insn)
14794 && (asm_noperands (PATTERN (insn)) >= 0
14795 || GET_CODE (PATTERN (insn)) == ASM_INPUT)))
14796 return I387_CW_UNINITIALIZED;
14798 if (recog_memoized (insn) < 0)
14799 return I387_CW_ANY;
14801 mode = get_attr_i387_cw (insn);
14803 switch (entity)
14805 case I387_TRUNC:
14806 if (mode == I387_CW_TRUNC)
14807 return mode;
14808 break;
14810 case I387_FLOOR:
14811 if (mode == I387_CW_FLOOR)
14812 return mode;
14813 break;
14815 case I387_CEIL:
14816 if (mode == I387_CW_CEIL)
14817 return mode;
14818 break;
14820 case I387_MASK_PM:
14821 if (mode == I387_CW_MASK_PM)
14822 return mode;
14823 break;
14825 default:
14826 gcc_unreachable ();
14829 return I387_CW_ANY;
14832 /* Output code to initialize control word copies used by trunc?f?i and
14833 rounding patterns. CURRENT_MODE is set to current control word,
14834 while NEW_MODE is set to new control word. */
14836 void
14837 emit_i387_cw_initialization (int mode)
14839 rtx stored_mode = assign_386_stack_local (HImode, SLOT_CW_STORED);
14840 rtx new_mode;
14842 enum ix86_stack_slot slot;
14844 rtx reg = gen_reg_rtx (HImode);
14846 emit_insn (gen_x86_fnstcw_1 (stored_mode));
14847 emit_move_insn (reg, copy_rtx (stored_mode));
14849 if (TARGET_64BIT || TARGET_PARTIAL_REG_STALL
14850 || optimize_function_for_size_p (cfun))
14852 switch (mode)
14854 case I387_CW_TRUNC:
14855 /* round toward zero (truncate) */
14856 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0c00)));
14857 slot = SLOT_CW_TRUNC;
14858 break;
14860 case I387_CW_FLOOR:
14861 /* round down toward -oo */
14862 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
14863 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0400)));
14864 slot = SLOT_CW_FLOOR;
14865 break;
14867 case I387_CW_CEIL:
14868 /* round up toward +oo */
14869 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
14870 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0800)));
14871 slot = SLOT_CW_CEIL;
14872 break;
14874 case I387_CW_MASK_PM:
14875 /* mask precision exception for nearbyint() */
14876 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
14877 slot = SLOT_CW_MASK_PM;
14878 break;
14880 default:
14881 gcc_unreachable ();
14884 else
14886 switch (mode)
14888 case I387_CW_TRUNC:
14889 /* round toward zero (truncate) */
14890 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0xc)));
14891 slot = SLOT_CW_TRUNC;
14892 break;
14894 case I387_CW_FLOOR:
14895 /* round down toward -oo */
14896 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x4)));
14897 slot = SLOT_CW_FLOOR;
14898 break;
14900 case I387_CW_CEIL:
14901 /* round up toward +oo */
14902 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x8)));
14903 slot = SLOT_CW_CEIL;
14904 break;
14906 case I387_CW_MASK_PM:
14907 /* mask precision exception for nearbyint() */
14908 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
14909 slot = SLOT_CW_MASK_PM;
14910 break;
14912 default:
14913 gcc_unreachable ();
14917 gcc_assert (slot < MAX_386_STACK_LOCALS);
14919 new_mode = assign_386_stack_local (HImode, slot);
14920 emit_move_insn (new_mode, reg);
14923 /* Output code for INSN to convert a float to a signed int. OPERANDS
14924 are the insn operands. The output may be [HSD]Imode and the input
14925 operand may be [SDX]Fmode. */
14927 const char *
14928 output_fix_trunc (rtx insn, rtx *operands, int fisttp)
14930 int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
14931 int dimode_p = GET_MODE (operands[0]) == DImode;
14932 int round_mode = get_attr_i387_cw (insn);
14934 /* Jump through a hoop or two for DImode, since the hardware has no
14935 non-popping instruction. We used to do this a different way, but
14936 that was somewhat fragile and broke with post-reload splitters. */
14937 if ((dimode_p || fisttp) && !stack_top_dies)
14938 output_asm_insn ("fld\t%y1", operands);
14940 gcc_assert (STACK_TOP_P (operands[1]));
14941 gcc_assert (MEM_P (operands[0]));
14942 gcc_assert (GET_MODE (operands[1]) != TFmode);
14944 if (fisttp)
14945 output_asm_insn ("fisttp%Z0\t%0", operands);
14946 else
14948 if (round_mode != I387_CW_ANY)
14949 output_asm_insn ("fldcw\t%3", operands);
14950 if (stack_top_dies || dimode_p)
14951 output_asm_insn ("fistp%Z0\t%0", operands);
14952 else
14953 output_asm_insn ("fist%Z0\t%0", operands);
14954 if (round_mode != I387_CW_ANY)
14955 output_asm_insn ("fldcw\t%2", operands);
14958 return "";
14961 /* Output code for x87 ffreep insn. The OPNO argument, which may only
14962 have the values zero or one, indicates the ffreep insn's operand
14963 from the OPERANDS array. */
14965 static const char *
14966 output_387_ffreep (rtx *operands ATTRIBUTE_UNUSED, int opno)
14968 if (TARGET_USE_FFREEP)
14969 #ifdef HAVE_AS_IX86_FFREEP
14970 return opno ? "ffreep\t%y1" : "ffreep\t%y0";
14971 #else
14973 static char retval[32];
14974 int regno = REGNO (operands[opno]);
14976 gcc_assert (FP_REGNO_P (regno));
14978 regno -= FIRST_STACK_REG;
14980 snprintf (retval, sizeof (retval), ASM_SHORT "0xc%ddf", regno);
14981 return retval;
14983 #endif
14985 return opno ? "fstp\t%y1" : "fstp\t%y0";
14989 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
14990 should be used. UNORDERED_P is true when fucom should be used. */
14992 const char *
14993 output_fp_compare (rtx insn, rtx *operands, int eflags_p, int unordered_p)
14995 int stack_top_dies;
14996 rtx cmp_op0, cmp_op1;
14997 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]);
14999 if (eflags_p)
15001 cmp_op0 = operands[0];
15002 cmp_op1 = operands[1];
15004 else
15006 cmp_op0 = operands[1];
15007 cmp_op1 = operands[2];
15010 if (is_sse)
15012 static const char ucomiss[] = "vucomiss\t{%1, %0|%0, %1}";
15013 static const char ucomisd[] = "vucomisd\t{%1, %0|%0, %1}";
15014 static const char comiss[] = "vcomiss\t{%1, %0|%0, %1}";
15015 static const char comisd[] = "vcomisd\t{%1, %0|%0, %1}";
15017 if (GET_MODE (operands[0]) == SFmode)
15018 if (unordered_p)
15019 return &ucomiss[TARGET_AVX ? 0 : 1];
15020 else
15021 return &comiss[TARGET_AVX ? 0 : 1];
15022 else
15023 if (unordered_p)
15024 return &ucomisd[TARGET_AVX ? 0 : 1];
15025 else
15026 return &comisd[TARGET_AVX ? 0 : 1];
15029 gcc_assert (STACK_TOP_P (cmp_op0));
15031 stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
15033 if (cmp_op1 == CONST0_RTX (GET_MODE (cmp_op1)))
15035 if (stack_top_dies)
15037 output_asm_insn ("ftst\n\tfnstsw\t%0", operands);
15038 return output_387_ffreep (operands, 1);
15040 else
15041 return "ftst\n\tfnstsw\t%0";
15044 if (STACK_REG_P (cmp_op1)
15045 && stack_top_dies
15046 && find_regno_note (insn, REG_DEAD, REGNO (cmp_op1))
15047 && REGNO (cmp_op1) != FIRST_STACK_REG)
15049 /* If both the top of the 387 stack dies, and the other operand
15050 is also a stack register that dies, then this must be a
15051 `fcompp' float compare */
15053 if (eflags_p)
15055 /* There is no double popping fcomi variant. Fortunately,
15056 eflags is immune from the fstp's cc clobbering. */
15057 if (unordered_p)
15058 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands);
15059 else
15060 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands);
15061 return output_387_ffreep (operands, 0);
15063 else
15065 if (unordered_p)
15066 return "fucompp\n\tfnstsw\t%0";
15067 else
15068 return "fcompp\n\tfnstsw\t%0";
15071 else
15073 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
15075 static const char * const alt[16] =
15077 "fcom%Z2\t%y2\n\tfnstsw\t%0",
15078 "fcomp%Z2\t%y2\n\tfnstsw\t%0",
15079 "fucom%Z2\t%y2\n\tfnstsw\t%0",
15080 "fucomp%Z2\t%y2\n\tfnstsw\t%0",
15082 "ficom%Z2\t%y2\n\tfnstsw\t%0",
15083 "ficomp%Z2\t%y2\n\tfnstsw\t%0",
15084 NULL,
15085 NULL,
15087 "fcomi\t{%y1, %0|%0, %y1}",
15088 "fcomip\t{%y1, %0|%0, %y1}",
15089 "fucomi\t{%y1, %0|%0, %y1}",
15090 "fucomip\t{%y1, %0|%0, %y1}",
15092 NULL,
15093 NULL,
15094 NULL,
15095 NULL
15098 int mask;
15099 const char *ret;
15101 mask = eflags_p << 3;
15102 mask |= (GET_MODE_CLASS (GET_MODE (cmp_op1)) == MODE_INT) << 2;
15103 mask |= unordered_p << 1;
15104 mask |= stack_top_dies;
15106 gcc_assert (mask < 16);
15107 ret = alt[mask];
15108 gcc_assert (ret);
15110 return ret;
15114 void
15115 ix86_output_addr_vec_elt (FILE *file, int value)
15117 const char *directive = ASM_LONG;
15119 #ifdef ASM_QUAD
15120 if (TARGET_64BIT)
15121 directive = ASM_QUAD;
15122 #else
15123 gcc_assert (!TARGET_64BIT);
15124 #endif
15126 fprintf (file, "%s%s%d\n", directive, LPREFIX, value);
15129 void
15130 ix86_output_addr_diff_elt (FILE *file, int value, int rel)
15132 const char *directive = ASM_LONG;
15134 #ifdef ASM_QUAD
15135 if (TARGET_64BIT && CASE_VECTOR_MODE == DImode)
15136 directive = ASM_QUAD;
15137 #else
15138 gcc_assert (!TARGET_64BIT);
15139 #endif
15140 /* We can't use @GOTOFF for text labels on VxWorks; see gotoff_operand. */
15141 if (TARGET_64BIT || TARGET_VXWORKS_RTP)
15142 fprintf (file, "%s%s%d-%s%d\n",
15143 directive, LPREFIX, value, LPREFIX, rel);
15144 else if (HAVE_AS_GOTOFF_IN_DATA)
15145 fprintf (file, ASM_LONG "%s%d@GOTOFF\n", LPREFIX, value);
15146 #if TARGET_MACHO
15147 else if (TARGET_MACHO)
15149 fprintf (file, ASM_LONG "%s%d-", LPREFIX, value);
15150 machopic_output_function_base_name (file);
15151 putc ('\n', file);
15153 #endif
15154 else
15155 asm_fprintf (file, ASM_LONG "%U%s+[.-%s%d]\n",
15156 GOT_SYMBOL_NAME, LPREFIX, value);
15159 /* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
15160 for the target. */
15162 void
15163 ix86_expand_clear (rtx dest)
15165 rtx tmp;
15167 /* We play register width games, which are only valid after reload. */
15168 gcc_assert (reload_completed);
15170 /* Avoid HImode and its attendant prefix byte. */
15171 if (GET_MODE_SIZE (GET_MODE (dest)) < 4)
15172 dest = gen_rtx_REG (SImode, REGNO (dest));
15173 tmp = gen_rtx_SET (VOIDmode, dest, const0_rtx);
15175 /* This predicate should match that for movsi_xor and movdi_xor_rex64. */
15176 if (!TARGET_USE_MOV0 || optimize_insn_for_speed_p ())
15178 rtx clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
15179 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
15182 emit_insn (tmp);
15185 /* X is an unchanging MEM. If it is a constant pool reference, return
15186 the constant pool rtx, else NULL. */
15189 maybe_get_pool_constant (rtx x)
15191 x = ix86_delegitimize_address (XEXP (x, 0));
15193 if (GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
15194 return get_pool_constant (x);
15196 return NULL_RTX;
15199 void
15200 ix86_expand_move (enum machine_mode mode, rtx operands[])
15202 rtx op0, op1;
15203 enum tls_model model;
15205 op0 = operands[0];
15206 op1 = operands[1];
15208 if (GET_CODE (op1) == SYMBOL_REF)
15210 model = SYMBOL_REF_TLS_MODEL (op1);
15211 if (model)
15213 op1 = legitimize_tls_address (op1, model, true);
15214 op1 = force_operand (op1, op0);
15215 if (op1 == op0)
15216 return;
15218 else if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
15219 && SYMBOL_REF_DLLIMPORT_P (op1))
15220 op1 = legitimize_dllimport_symbol (op1, false);
15222 else if (GET_CODE (op1) == CONST
15223 && GET_CODE (XEXP (op1, 0)) == PLUS
15224 && GET_CODE (XEXP (XEXP (op1, 0), 0)) == SYMBOL_REF)
15226 rtx addend = XEXP (XEXP (op1, 0), 1);
15227 rtx symbol = XEXP (XEXP (op1, 0), 0);
15228 rtx tmp = NULL;
15230 model = SYMBOL_REF_TLS_MODEL (symbol);
15231 if (model)
15232 tmp = legitimize_tls_address (symbol, model, true);
15233 else if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
15234 && SYMBOL_REF_DLLIMPORT_P (symbol))
15235 tmp = legitimize_dllimport_symbol (symbol, true);
15237 if (tmp)
15239 tmp = force_operand (tmp, NULL);
15240 tmp = expand_simple_binop (Pmode, PLUS, tmp, addend,
15241 op0, 1, OPTAB_DIRECT);
15242 if (tmp == op0)
15243 return;
15247 if ((flag_pic || MACHOPIC_INDIRECT)
15248 && mode == Pmode && symbolic_operand (op1, Pmode))
15250 if (TARGET_MACHO && !TARGET_64BIT)
15252 #if TARGET_MACHO
15253 /* dynamic-no-pic */
15254 if (MACHOPIC_INDIRECT)
15256 rtx temp = ((reload_in_progress
15257 || ((op0 && REG_P (op0))
15258 && mode == Pmode))
15259 ? op0 : gen_reg_rtx (Pmode));
15260 op1 = machopic_indirect_data_reference (op1, temp);
15261 if (MACHOPIC_PURE)
15262 op1 = machopic_legitimize_pic_address (op1, mode,
15263 temp == op1 ? 0 : temp);
15265 if (op0 != op1 && GET_CODE (op0) != MEM)
15267 rtx insn = gen_rtx_SET (VOIDmode, op0, op1);
15268 emit_insn (insn);
15269 return;
15271 if (GET_CODE (op0) == MEM)
15272 op1 = force_reg (Pmode, op1);
15273 else
15275 rtx temp = op0;
15276 if (GET_CODE (temp) != REG)
15277 temp = gen_reg_rtx (Pmode);
15278 temp = legitimize_pic_address (op1, temp);
15279 if (temp == op0)
15280 return;
15281 op1 = temp;
15283 /* dynamic-no-pic */
15284 #endif
15286 else
15288 if (MEM_P (op0))
15289 op1 = force_reg (Pmode, op1);
15290 else if (!TARGET_64BIT || !x86_64_movabs_operand (op1, Pmode))
15292 rtx reg = can_create_pseudo_p () ? NULL_RTX : op0;
15293 op1 = legitimize_pic_address (op1, reg);
15294 if (op0 == op1)
15295 return;
15299 else
15301 if (MEM_P (op0)
15302 && (PUSH_ROUNDING (GET_MODE_SIZE (mode)) != GET_MODE_SIZE (mode)
15303 || !push_operand (op0, mode))
15304 && MEM_P (op1))
15305 op1 = force_reg (mode, op1);
15307 if (push_operand (op0, mode)
15308 && ! general_no_elim_operand (op1, mode))
15309 op1 = copy_to_mode_reg (mode, op1);
15311 /* Force large constants in 64bit compilation into register
15312 to get them CSEed. */
15313 if (can_create_pseudo_p ()
15314 && (mode == DImode) && TARGET_64BIT
15315 && immediate_operand (op1, mode)
15316 && !x86_64_zext_immediate_operand (op1, VOIDmode)
15317 && !register_operand (op0, mode)
15318 && optimize)
15319 op1 = copy_to_mode_reg (mode, op1);
15321 if (can_create_pseudo_p ()
15322 && FLOAT_MODE_P (mode)
15323 && GET_CODE (op1) == CONST_DOUBLE)
15325 /* If we are loading a floating point constant to a register,
15326 force the value to memory now, since we'll get better code
15327 out the back end. */
15329 op1 = validize_mem (force_const_mem (mode, op1));
15330 if (!register_operand (op0, mode))
15332 rtx temp = gen_reg_rtx (mode);
15333 emit_insn (gen_rtx_SET (VOIDmode, temp, op1));
15334 emit_move_insn (op0, temp);
15335 return;
15340 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
15343 void
15344 ix86_expand_vector_move (enum machine_mode mode, rtx operands[])
15346 rtx op0 = operands[0], op1 = operands[1];
15347 unsigned int align = GET_MODE_ALIGNMENT (mode);
15349 /* Force constants other than zero into memory. We do not know how
15350 the instructions used to build constants modify the upper 64 bits
15351 of the register, once we have that information we may be able
15352 to handle some of them more efficiently. */
15353 if (can_create_pseudo_p ()
15354 && register_operand (op0, mode)
15355 && (CONSTANT_P (op1)
15356 || (GET_CODE (op1) == SUBREG
15357 && CONSTANT_P (SUBREG_REG (op1))))
15358 && !standard_sse_constant_p (op1))
15359 op1 = validize_mem (force_const_mem (mode, op1));
15361 /* We need to check memory alignment for SSE mode since attribute
15362 can make operands unaligned. */
15363 if (can_create_pseudo_p ()
15364 && SSE_REG_MODE_P (mode)
15365 && ((MEM_P (op0) && (MEM_ALIGN (op0) < align))
15366 || (MEM_P (op1) && (MEM_ALIGN (op1) < align))))
15368 rtx tmp[2];
15370 /* ix86_expand_vector_move_misalign() does not like constants ... */
15371 if (CONSTANT_P (op1)
15372 || (GET_CODE (op1) == SUBREG
15373 && CONSTANT_P (SUBREG_REG (op1))))
15374 op1 = validize_mem (force_const_mem (mode, op1));
15376 /* ... nor both arguments in memory. */
15377 if (!register_operand (op0, mode)
15378 && !register_operand (op1, mode))
15379 op1 = force_reg (mode, op1);
15381 tmp[0] = op0; tmp[1] = op1;
15382 ix86_expand_vector_move_misalign (mode, tmp);
15383 return;
15386 /* Make operand1 a register if it isn't already. */
15387 if (can_create_pseudo_p ()
15388 && !register_operand (op0, mode)
15389 && !register_operand (op1, mode))
15391 emit_move_insn (op0, force_reg (GET_MODE (op0), op1));
15392 return;
15395 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
15398 /* Implement the movmisalign patterns for SSE. Non-SSE modes go
15399 straight to ix86_expand_vector_move. */
15400 /* Code generation for scalar reg-reg moves of single and double precision data:
15401 if (x86_sse_partial_reg_dependency == true | x86_sse_split_regs == true)
15402 movaps reg, reg
15403 else
15404 movss reg, reg
15405 if (x86_sse_partial_reg_dependency == true)
15406 movapd reg, reg
15407 else
15408 movsd reg, reg
15410 Code generation for scalar loads of double precision data:
15411 if (x86_sse_split_regs == true)
15412 movlpd mem, reg (gas syntax)
15413 else
15414 movsd mem, reg
15416 Code generation for unaligned packed loads of single precision data
15417 (x86_sse_unaligned_move_optimal overrides x86_sse_partial_reg_dependency):
15418 if (x86_sse_unaligned_move_optimal)
15419 movups mem, reg
15421 if (x86_sse_partial_reg_dependency == true)
15423 xorps reg, reg
15424 movlps mem, reg
15425 movhps mem+8, reg
15427 else
15429 movlps mem, reg
15430 movhps mem+8, reg
15433 Code generation for unaligned packed loads of double precision data
15434 (x86_sse_unaligned_move_optimal overrides x86_sse_split_regs):
15435 if (x86_sse_unaligned_move_optimal)
15436 movupd mem, reg
15438 if (x86_sse_split_regs == true)
15440 movlpd mem, reg
15441 movhpd mem+8, reg
15443 else
15445 movsd mem, reg
15446 movhpd mem+8, reg
15450 void
15451 ix86_expand_vector_move_misalign (enum machine_mode mode, rtx operands[])
15453 rtx op0, op1, m;
15455 op0 = operands[0];
15456 op1 = operands[1];
15458 if (TARGET_AVX)
15460 switch (GET_MODE_CLASS (mode))
15462 case MODE_VECTOR_INT:
15463 case MODE_INT:
15464 switch (GET_MODE_SIZE (mode))
15466 case 16:
15467 /* If we're optimizing for size, movups is the smallest. */
15468 if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
15470 op0 = gen_lowpart (V4SFmode, op0);
15471 op1 = gen_lowpart (V4SFmode, op1);
15472 emit_insn (gen_avx_movups (op0, op1));
15473 return;
15475 op0 = gen_lowpart (V16QImode, op0);
15476 op1 = gen_lowpart (V16QImode, op1);
15477 emit_insn (gen_avx_movdqu (op0, op1));
15478 break;
15479 case 32:
15480 op0 = gen_lowpart (V32QImode, op0);
15481 op1 = gen_lowpart (V32QImode, op1);
15482 emit_insn (gen_avx_movdqu256 (op0, op1));
15483 break;
15484 default:
15485 gcc_unreachable ();
15487 break;
15488 case MODE_VECTOR_FLOAT:
15489 op0 = gen_lowpart (mode, op0);
15490 op1 = gen_lowpart (mode, op1);
15492 switch (mode)
15494 case V4SFmode:
15495 emit_insn (gen_avx_movups (op0, op1));
15496 break;
15497 case V8SFmode:
15498 emit_insn (gen_avx_movups256 (op0, op1));
15499 break;
15500 case V2DFmode:
15501 if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
15503 op0 = gen_lowpart (V4SFmode, op0);
15504 op1 = gen_lowpart (V4SFmode, op1);
15505 emit_insn (gen_avx_movups (op0, op1));
15506 return;
15508 emit_insn (gen_avx_movupd (op0, op1));
15509 break;
15510 case V4DFmode:
15511 emit_insn (gen_avx_movupd256 (op0, op1));
15512 break;
15513 default:
15514 gcc_unreachable ();
15516 break;
15518 default:
15519 gcc_unreachable ();
15522 return;
15525 if (MEM_P (op1))
15527 /* If we're optimizing for size, movups is the smallest. */
15528 if (optimize_insn_for_size_p ()
15529 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
15531 op0 = gen_lowpart (V4SFmode, op0);
15532 op1 = gen_lowpart (V4SFmode, op1);
15533 emit_insn (gen_sse_movups (op0, op1));
15534 return;
15537 /* ??? If we have typed data, then it would appear that using
15538 movdqu is the only way to get unaligned data loaded with
15539 integer type. */
15540 if (TARGET_SSE2 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
15542 op0 = gen_lowpart (V16QImode, op0);
15543 op1 = gen_lowpart (V16QImode, op1);
15544 emit_insn (gen_sse2_movdqu (op0, op1));
15545 return;
15548 if (TARGET_SSE2 && mode == V2DFmode)
15550 rtx zero;
15552 if (TARGET_SSE_UNALIGNED_LOAD_OPTIMAL)
15554 op0 = gen_lowpart (V2DFmode, op0);
15555 op1 = gen_lowpart (V2DFmode, op1);
15556 emit_insn (gen_sse2_movupd (op0, op1));
15557 return;
15560 /* When SSE registers are split into halves, we can avoid
15561 writing to the top half twice. */
15562 if (TARGET_SSE_SPLIT_REGS)
15564 emit_clobber (op0);
15565 zero = op0;
15567 else
15569 /* ??? Not sure about the best option for the Intel chips.
15570 The following would seem to satisfy; the register is
15571 entirely cleared, breaking the dependency chain. We
15572 then store to the upper half, with a dependency depth
15573 of one. A rumor has it that Intel recommends two movsd
15574 followed by an unpacklpd, but this is unconfirmed. And
15575 given that the dependency depth of the unpacklpd would
15576 still be one, I'm not sure why this would be better. */
15577 zero = CONST0_RTX (V2DFmode);
15580 m = adjust_address (op1, DFmode, 0);
15581 emit_insn (gen_sse2_loadlpd (op0, zero, m));
15582 m = adjust_address (op1, DFmode, 8);
15583 emit_insn (gen_sse2_loadhpd (op0, op0, m));
15585 else
15587 if (TARGET_SSE_UNALIGNED_LOAD_OPTIMAL)
15589 op0 = gen_lowpart (V4SFmode, op0);
15590 op1 = gen_lowpart (V4SFmode, op1);
15591 emit_insn (gen_sse_movups (op0, op1));
15592 return;
15595 if (TARGET_SSE_PARTIAL_REG_DEPENDENCY)
15596 emit_move_insn (op0, CONST0_RTX (mode));
15597 else
15598 emit_clobber (op0);
15600 if (mode != V4SFmode)
15601 op0 = gen_lowpart (V4SFmode, op0);
15602 m = adjust_address (op1, V2SFmode, 0);
15603 emit_insn (gen_sse_loadlps (op0, op0, m));
15604 m = adjust_address (op1, V2SFmode, 8);
15605 emit_insn (gen_sse_loadhps (op0, op0, m));
15608 else if (MEM_P (op0))
15610 /* If we're optimizing for size, movups is the smallest. */
15611 if (optimize_insn_for_size_p ()
15612 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
15614 op0 = gen_lowpart (V4SFmode, op0);
15615 op1 = gen_lowpart (V4SFmode, op1);
15616 emit_insn (gen_sse_movups (op0, op1));
15617 return;
15620 /* ??? Similar to above, only less clear because of quote
15621 typeless stores unquote. */
15622 if (TARGET_SSE2 && !TARGET_SSE_TYPELESS_STORES
15623 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
15625 op0 = gen_lowpart (V16QImode, op0);
15626 op1 = gen_lowpart (V16QImode, op1);
15627 emit_insn (gen_sse2_movdqu (op0, op1));
15628 return;
15631 if (TARGET_SSE2 && mode == V2DFmode)
15633 if (TARGET_SSE_UNALIGNED_STORE_OPTIMAL)
15635 op0 = gen_lowpart (V2DFmode, op0);
15636 op1 = gen_lowpart (V2DFmode, op1);
15637 emit_insn (gen_sse2_movupd (op0, op1));
15639 else
15641 m = adjust_address (op0, DFmode, 0);
15642 emit_insn (gen_sse2_storelpd (m, op1));
15643 m = adjust_address (op0, DFmode, 8);
15644 emit_insn (gen_sse2_storehpd (m, op1));
15647 else
15649 if (mode != V4SFmode)
15650 op1 = gen_lowpart (V4SFmode, op1);
15652 if (TARGET_SSE_UNALIGNED_STORE_OPTIMAL)
15654 op0 = gen_lowpart (V4SFmode, op0);
15655 emit_insn (gen_sse_movups (op0, op1));
15657 else
15659 m = adjust_address (op0, V2SFmode, 0);
15660 emit_insn (gen_sse_storelps (m, op1));
15661 m = adjust_address (op0, V2SFmode, 8);
15662 emit_insn (gen_sse_storehps (m, op1));
15666 else
15667 gcc_unreachable ();
15670 /* Expand a push in MODE. This is some mode for which we do not support
15671 proper push instructions, at least from the registers that we expect
15672 the value to live in. */
15674 void
15675 ix86_expand_push (enum machine_mode mode, rtx x)
15677 rtx tmp;
15679 tmp = expand_simple_binop (Pmode, PLUS, stack_pointer_rtx,
15680 GEN_INT (-GET_MODE_SIZE (mode)),
15681 stack_pointer_rtx, 1, OPTAB_DIRECT);
15682 if (tmp != stack_pointer_rtx)
15683 emit_move_insn (stack_pointer_rtx, tmp);
15685 tmp = gen_rtx_MEM (mode, stack_pointer_rtx);
15687 /* When we push an operand onto stack, it has to be aligned at least
15688 at the function argument boundary. However since we don't have
15689 the argument type, we can't determine the actual argument
15690 boundary. */
15691 emit_move_insn (tmp, x);
15694 /* Helper function of ix86_fixup_binary_operands to canonicalize
15695 operand order. Returns true if the operands should be swapped. */
15697 static bool
15698 ix86_swap_binary_operands_p (enum rtx_code code, enum machine_mode mode,
15699 rtx operands[])
15701 rtx dst = operands[0];
15702 rtx src1 = operands[1];
15703 rtx src2 = operands[2];
15705 /* If the operation is not commutative, we can't do anything. */
15706 if (GET_RTX_CLASS (code) != RTX_COMM_ARITH)
15707 return false;
15709 /* Highest priority is that src1 should match dst. */
15710 if (rtx_equal_p (dst, src1))
15711 return false;
15712 if (rtx_equal_p (dst, src2))
15713 return true;
15715 /* Next highest priority is that immediate constants come second. */
15716 if (immediate_operand (src2, mode))
15717 return false;
15718 if (immediate_operand (src1, mode))
15719 return true;
15721 /* Lowest priority is that memory references should come second. */
15722 if (MEM_P (src2))
15723 return false;
15724 if (MEM_P (src1))
15725 return true;
15727 return false;
15731 /* Fix up OPERANDS to satisfy ix86_binary_operator_ok. Return the
15732 destination to use for the operation. If different from the true
15733 destination in operands[0], a copy operation will be required. */
15736 ix86_fixup_binary_operands (enum rtx_code code, enum machine_mode mode,
15737 rtx operands[])
15739 rtx dst = operands[0];
15740 rtx src1 = operands[1];
15741 rtx src2 = operands[2];
15743 /* Canonicalize operand order. */
15744 if (ix86_swap_binary_operands_p (code, mode, operands))
15746 rtx temp;
15748 /* It is invalid to swap operands of different modes. */
15749 gcc_assert (GET_MODE (src1) == GET_MODE (src2));
15751 temp = src1;
15752 src1 = src2;
15753 src2 = temp;
15756 /* Both source operands cannot be in memory. */
15757 if (MEM_P (src1) && MEM_P (src2))
15759 /* Optimization: Only read from memory once. */
15760 if (rtx_equal_p (src1, src2))
15762 src2 = force_reg (mode, src2);
15763 src1 = src2;
15765 else
15766 src2 = force_reg (mode, src2);
15769 /* If the destination is memory, and we do not have matching source
15770 operands, do things in registers. */
15771 if (MEM_P (dst) && !rtx_equal_p (dst, src1))
15772 dst = gen_reg_rtx (mode);
15774 /* Source 1 cannot be a constant. */
15775 if (CONSTANT_P (src1))
15776 src1 = force_reg (mode, src1);
15778 /* Source 1 cannot be a non-matching memory. */
15779 if (MEM_P (src1) && !rtx_equal_p (dst, src1))
15780 src1 = force_reg (mode, src1);
15782 operands[1] = src1;
15783 operands[2] = src2;
15784 return dst;
15787 /* Similarly, but assume that the destination has already been
15788 set up properly. */
15790 void
15791 ix86_fixup_binary_operands_no_copy (enum rtx_code code,
15792 enum machine_mode mode, rtx operands[])
15794 rtx dst = ix86_fixup_binary_operands (code, mode, operands);
15795 gcc_assert (dst == operands[0]);
15798 /* Attempt to expand a binary operator. Make the expansion closer to the
15799 actual machine, then just general_operand, which will allow 3 separate
15800 memory references (one output, two input) in a single insn. */
15802 void
15803 ix86_expand_binary_operator (enum rtx_code code, enum machine_mode mode,
15804 rtx operands[])
15806 rtx src1, src2, dst, op, clob;
15808 dst = ix86_fixup_binary_operands (code, mode, operands);
15809 src1 = operands[1];
15810 src2 = operands[2];
15812 /* Emit the instruction. */
15814 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, src1, src2));
15815 if (reload_in_progress)
15817 /* Reload doesn't know about the flags register, and doesn't know that
15818 it doesn't want to clobber it. We can only do this with PLUS. */
15819 gcc_assert (code == PLUS);
15820 emit_insn (op);
15822 else if (reload_completed
15823 && code == PLUS
15824 && !rtx_equal_p (dst, src1))
15826 /* This is going to be an LEA; avoid splitting it later. */
15827 emit_insn (op);
15829 else
15831 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
15832 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
15835 /* Fix up the destination if needed. */
15836 if (dst != operands[0])
15837 emit_move_insn (operands[0], dst);
15840 /* Return TRUE or FALSE depending on whether the binary operator meets the
15841 appropriate constraints. */
15843 bool
15844 ix86_binary_operator_ok (enum rtx_code code, enum machine_mode mode,
15845 rtx operands[3])
15847 rtx dst = operands[0];
15848 rtx src1 = operands[1];
15849 rtx src2 = operands[2];
15851 /* Both source operands cannot be in memory. */
15852 if (MEM_P (src1) && MEM_P (src2))
15853 return false;
15855 /* Canonicalize operand order for commutative operators. */
15856 if (ix86_swap_binary_operands_p (code, mode, operands))
15858 rtx temp = src1;
15859 src1 = src2;
15860 src2 = temp;
15863 /* If the destination is memory, we must have a matching source operand. */
15864 if (MEM_P (dst) && !rtx_equal_p (dst, src1))
15865 return false;
15867 /* Source 1 cannot be a constant. */
15868 if (CONSTANT_P (src1))
15869 return false;
15871 /* Source 1 cannot be a non-matching memory. */
15872 if (MEM_P (src1) && !rtx_equal_p (dst, src1))
15874 /* Support "andhi/andsi/anddi" as a zero-extending move. */
15875 return (code == AND
15876 && (mode == HImode
15877 || mode == SImode
15878 || (TARGET_64BIT && mode == DImode))
15879 && CONST_INT_P (src2)
15880 && (INTVAL (src2) == 0xff
15881 || INTVAL (src2) == 0xffff));
15884 return true;
15887 /* Attempt to expand a unary operator. Make the expansion closer to the
15888 actual machine, then just general_operand, which will allow 2 separate
15889 memory references (one output, one input) in a single insn. */
15891 void
15892 ix86_expand_unary_operator (enum rtx_code code, enum machine_mode mode,
15893 rtx operands[])
15895 int matching_memory;
15896 rtx src, dst, op, clob;
15898 dst = operands[0];
15899 src = operands[1];
15901 /* If the destination is memory, and we do not have matching source
15902 operands, do things in registers. */
15903 matching_memory = 0;
15904 if (MEM_P (dst))
15906 if (rtx_equal_p (dst, src))
15907 matching_memory = 1;
15908 else
15909 dst = gen_reg_rtx (mode);
15912 /* When source operand is memory, destination must match. */
15913 if (MEM_P (src) && !matching_memory)
15914 src = force_reg (mode, src);
15916 /* Emit the instruction. */
15918 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_e (code, mode, src));
15919 if (reload_in_progress || code == NOT)
15921 /* Reload doesn't know about the flags register, and doesn't know that
15922 it doesn't want to clobber it. */
15923 gcc_assert (code == NOT);
15924 emit_insn (op);
15926 else
15928 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
15929 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
15932 /* Fix up the destination if needed. */
15933 if (dst != operands[0])
15934 emit_move_insn (operands[0], dst);
15937 /* Split 32bit/64bit divmod with 8bit unsigned divmod if dividend and
15938 divisor are within the the range [0-255]. */
15940 void
15941 ix86_split_idivmod (enum machine_mode mode, rtx operands[],
15942 bool signed_p)
15944 rtx end_label, qimode_label;
15945 rtx insn, div, mod;
15946 rtx scratch, tmp0, tmp1, tmp2;
15947 rtx (*gen_divmod4_1) (rtx, rtx, rtx, rtx);
15948 rtx (*gen_zero_extend) (rtx, rtx);
15949 rtx (*gen_test_ccno_1) (rtx, rtx);
15951 switch (mode)
15953 case SImode:
15954 gen_divmod4_1 = signed_p ? gen_divmodsi4_1 : gen_udivmodsi4_1;
15955 gen_test_ccno_1 = gen_testsi_ccno_1;
15956 gen_zero_extend = gen_zero_extendqisi2;
15957 break;
15958 case DImode:
15959 gen_divmod4_1 = signed_p ? gen_divmoddi4_1 : gen_udivmoddi4_1;
15960 gen_test_ccno_1 = gen_testdi_ccno_1;
15961 gen_zero_extend = gen_zero_extendqidi2;
15962 break;
15963 default:
15964 gcc_unreachable ();
15967 end_label = gen_label_rtx ();
15968 qimode_label = gen_label_rtx ();
15970 scratch = gen_reg_rtx (mode);
15972 /* Use 8bit unsigned divimod if dividend and divisor are within the
15973 the range [0-255]. */
15974 emit_move_insn (scratch, operands[2]);
15975 scratch = expand_simple_binop (mode, IOR, scratch, operands[3],
15976 scratch, 1, OPTAB_DIRECT);
15977 emit_insn (gen_test_ccno_1 (scratch, GEN_INT (-0x100)));
15978 tmp0 = gen_rtx_REG (CCNOmode, FLAGS_REG);
15979 tmp0 = gen_rtx_EQ (VOIDmode, tmp0, const0_rtx);
15980 tmp0 = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp0,
15981 gen_rtx_LABEL_REF (VOIDmode, qimode_label),
15982 pc_rtx);
15983 insn = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp0));
15984 predict_jump (REG_BR_PROB_BASE * 50 / 100);
15985 JUMP_LABEL (insn) = qimode_label;
15987 /* Generate original signed/unsigned divimod. */
15988 div = gen_divmod4_1 (operands[0], operands[1],
15989 operands[2], operands[3]);
15990 emit_insn (div);
15992 /* Branch to the end. */
15993 emit_jump_insn (gen_jump (end_label));
15994 emit_barrier ();
15996 /* Generate 8bit unsigned divide. */
15997 emit_label (qimode_label);
15998 /* Don't use operands[0] for result of 8bit divide since not all
15999 registers support QImode ZERO_EXTRACT. */
16000 tmp0 = simplify_gen_subreg (HImode, scratch, mode, 0);
16001 tmp1 = simplify_gen_subreg (HImode, operands[2], mode, 0);
16002 tmp2 = simplify_gen_subreg (QImode, operands[3], mode, 0);
16003 emit_insn (gen_udivmodhiqi3 (tmp0, tmp1, tmp2));
16005 if (signed_p)
16007 div = gen_rtx_DIV (SImode, operands[2], operands[3]);
16008 mod = gen_rtx_MOD (SImode, operands[2], operands[3]);
16010 else
16012 div = gen_rtx_UDIV (SImode, operands[2], operands[3]);
16013 mod = gen_rtx_UMOD (SImode, operands[2], operands[3]);
16016 /* Extract remainder from AH. */
16017 tmp1 = gen_rtx_ZERO_EXTRACT (mode, tmp0, GEN_INT (8), GEN_INT (8));
16018 if (REG_P (operands[1]))
16019 insn = emit_move_insn (operands[1], tmp1);
16020 else
16022 /* Need a new scratch register since the old one has result
16023 of 8bit divide. */
16024 scratch = gen_reg_rtx (mode);
16025 emit_move_insn (scratch, tmp1);
16026 insn = emit_move_insn (operands[1], scratch);
16028 set_unique_reg_note (insn, REG_EQUAL, mod);
16030 /* Zero extend quotient from AL. */
16031 tmp1 = gen_lowpart (QImode, tmp0);
16032 insn = emit_insn (gen_zero_extend (operands[0], tmp1));
16033 set_unique_reg_note (insn, REG_EQUAL, div);
16035 emit_label (end_label);
16038 #define LEA_SEARCH_THRESHOLD 12
16040 /* Search backward for non-agu definition of register number REGNO1
16041 or register number REGNO2 in INSN's basic block until
16042 1. Pass LEA_SEARCH_THRESHOLD instructions, or
16043 2. Reach BB boundary, or
16044 3. Reach agu definition.
16045 Returns the distance between the non-agu definition point and INSN.
16046 If no definition point, returns -1. */
16048 static int
16049 distance_non_agu_define (unsigned int regno1, unsigned int regno2,
16050 rtx insn)
16052 basic_block bb = BLOCK_FOR_INSN (insn);
16053 int distance = 0;
16054 df_ref *def_rec;
16055 enum attr_type insn_type;
16057 if (insn != BB_HEAD (bb))
16059 rtx prev = PREV_INSN (insn);
16060 while (prev && distance < LEA_SEARCH_THRESHOLD)
16062 if (NONDEBUG_INSN_P (prev))
16064 distance++;
16065 for (def_rec = DF_INSN_DEFS (prev); *def_rec; def_rec++)
16066 if (DF_REF_TYPE (*def_rec) == DF_REF_REG_DEF
16067 && !DF_REF_IS_ARTIFICIAL (*def_rec)
16068 && (regno1 == DF_REF_REGNO (*def_rec)
16069 || regno2 == DF_REF_REGNO (*def_rec)))
16071 insn_type = get_attr_type (prev);
16072 if (insn_type != TYPE_LEA)
16073 goto done;
16076 if (prev == BB_HEAD (bb))
16077 break;
16078 prev = PREV_INSN (prev);
16082 if (distance < LEA_SEARCH_THRESHOLD)
16084 edge e;
16085 edge_iterator ei;
16086 bool simple_loop = false;
16088 FOR_EACH_EDGE (e, ei, bb->preds)
16089 if (e->src == bb)
16091 simple_loop = true;
16092 break;
16095 if (simple_loop)
16097 rtx prev = BB_END (bb);
16098 while (prev
16099 && prev != insn
16100 && distance < LEA_SEARCH_THRESHOLD)
16102 if (NONDEBUG_INSN_P (prev))
16104 distance++;
16105 for (def_rec = DF_INSN_DEFS (prev); *def_rec; def_rec++)
16106 if (DF_REF_TYPE (*def_rec) == DF_REF_REG_DEF
16107 && !DF_REF_IS_ARTIFICIAL (*def_rec)
16108 && (regno1 == DF_REF_REGNO (*def_rec)
16109 || regno2 == DF_REF_REGNO (*def_rec)))
16111 insn_type = get_attr_type (prev);
16112 if (insn_type != TYPE_LEA)
16113 goto done;
16116 prev = PREV_INSN (prev);
16121 distance = -1;
16123 done:
16124 /* get_attr_type may modify recog data. We want to make sure
16125 that recog data is valid for instruction INSN, on which
16126 distance_non_agu_define is called. INSN is unchanged here. */
16127 extract_insn_cached (insn);
16128 return distance;
16131 /* Return the distance between INSN and the next insn that uses
16132 register number REGNO0 in memory address. Return -1 if no such
16133 a use is found within LEA_SEARCH_THRESHOLD or REGNO0 is set. */
16135 static int
16136 distance_agu_use (unsigned int regno0, rtx insn)
16138 basic_block bb = BLOCK_FOR_INSN (insn);
16139 int distance = 0;
16140 df_ref *def_rec;
16141 df_ref *use_rec;
16143 if (insn != BB_END (bb))
16145 rtx next = NEXT_INSN (insn);
16146 while (next && distance < LEA_SEARCH_THRESHOLD)
16148 if (NONDEBUG_INSN_P (next))
16150 distance++;
16152 for (use_rec = DF_INSN_USES (next); *use_rec; use_rec++)
16153 if ((DF_REF_TYPE (*use_rec) == DF_REF_REG_MEM_LOAD
16154 || DF_REF_TYPE (*use_rec) == DF_REF_REG_MEM_STORE)
16155 && regno0 == DF_REF_REGNO (*use_rec))
16157 /* Return DISTANCE if OP0 is used in memory
16158 address in NEXT. */
16159 return distance;
16162 for (def_rec = DF_INSN_DEFS (next); *def_rec; def_rec++)
16163 if (DF_REF_TYPE (*def_rec) == DF_REF_REG_DEF
16164 && !DF_REF_IS_ARTIFICIAL (*def_rec)
16165 && regno0 == DF_REF_REGNO (*def_rec))
16167 /* Return -1 if OP0 is set in NEXT. */
16168 return -1;
16171 if (next == BB_END (bb))
16172 break;
16173 next = NEXT_INSN (next);
16177 if (distance < LEA_SEARCH_THRESHOLD)
16179 edge e;
16180 edge_iterator ei;
16181 bool simple_loop = false;
16183 FOR_EACH_EDGE (e, ei, bb->succs)
16184 if (e->dest == bb)
16186 simple_loop = true;
16187 break;
16190 if (simple_loop)
16192 rtx next = BB_HEAD (bb);
16193 while (next
16194 && next != insn
16195 && distance < LEA_SEARCH_THRESHOLD)
16197 if (NONDEBUG_INSN_P (next))
16199 distance++;
16201 for (use_rec = DF_INSN_USES (next); *use_rec; use_rec++)
16202 if ((DF_REF_TYPE (*use_rec) == DF_REF_REG_MEM_LOAD
16203 || DF_REF_TYPE (*use_rec) == DF_REF_REG_MEM_STORE)
16204 && regno0 == DF_REF_REGNO (*use_rec))
16206 /* Return DISTANCE if OP0 is used in memory
16207 address in NEXT. */
16208 return distance;
16211 for (def_rec = DF_INSN_DEFS (next); *def_rec; def_rec++)
16212 if (DF_REF_TYPE (*def_rec) == DF_REF_REG_DEF
16213 && !DF_REF_IS_ARTIFICIAL (*def_rec)
16214 && regno0 == DF_REF_REGNO (*def_rec))
16216 /* Return -1 if OP0 is set in NEXT. */
16217 return -1;
16221 next = NEXT_INSN (next);
16226 return -1;
16229 /* Define this macro to tune LEA priority vs ADD, it take effect when
16230 there is a dilemma of choicing LEA or ADD
16231 Negative value: ADD is more preferred than LEA
16232 Zero: Netrual
16233 Positive value: LEA is more preferred than ADD*/
16234 #define IX86_LEA_PRIORITY 2
16236 /* Return true if it is ok to optimize an ADD operation to LEA
16237 operation to avoid flag register consumation. For most processors,
16238 ADD is faster than LEA. For the processors like ATOM, if the
16239 destination register of LEA holds an actual address which will be
16240 used soon, LEA is better and otherwise ADD is better. */
16242 bool
16243 ix86_lea_for_add_ok (rtx insn, rtx operands[])
16245 unsigned int regno0 = true_regnum (operands[0]);
16246 unsigned int regno1 = true_regnum (operands[1]);
16247 unsigned int regno2 = true_regnum (operands[2]);
16249 /* If a = b + c, (a!=b && a!=c), must use lea form. */
16250 if (regno0 != regno1 && regno0 != regno2)
16251 return true;
16253 if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun))
16254 return false;
16255 else
16257 int dist_define, dist_use;
16259 /* Return false if REGNO0 isn't used in memory address. */
16260 dist_use = distance_agu_use (regno0, insn);
16261 if (dist_use <= 0)
16262 return false;
16264 dist_define = distance_non_agu_define (regno1, regno2, insn);
16265 if (dist_define <= 0)
16266 return true;
16268 /* If this insn has both backward non-agu dependence and forward
16269 agu dependence, the one with short distance take effect. */
16270 if ((dist_define + IX86_LEA_PRIORITY) < dist_use)
16271 return false;
16273 return true;
16277 /* Return true if destination reg of SET_BODY is shift count of
16278 USE_BODY. */
16280 static bool
16281 ix86_dep_by_shift_count_body (const_rtx set_body, const_rtx use_body)
16283 rtx set_dest;
16284 rtx shift_rtx;
16285 int i;
16287 /* Retrieve destination of SET_BODY. */
16288 switch (GET_CODE (set_body))
16290 case SET:
16291 set_dest = SET_DEST (set_body);
16292 if (!set_dest || !REG_P (set_dest))
16293 return false;
16294 break;
16295 case PARALLEL:
16296 for (i = XVECLEN (set_body, 0) - 1; i >= 0; i--)
16297 if (ix86_dep_by_shift_count_body (XVECEXP (set_body, 0, i),
16298 use_body))
16299 return true;
16300 default:
16301 return false;
16302 break;
16305 /* Retrieve shift count of USE_BODY. */
16306 switch (GET_CODE (use_body))
16308 case SET:
16309 shift_rtx = XEXP (use_body, 1);
16310 break;
16311 case PARALLEL:
16312 for (i = XVECLEN (use_body, 0) - 1; i >= 0; i--)
16313 if (ix86_dep_by_shift_count_body (set_body,
16314 XVECEXP (use_body, 0, i)))
16315 return true;
16316 default:
16317 return false;
16318 break;
16321 if (shift_rtx
16322 && (GET_CODE (shift_rtx) == ASHIFT
16323 || GET_CODE (shift_rtx) == LSHIFTRT
16324 || GET_CODE (shift_rtx) == ASHIFTRT
16325 || GET_CODE (shift_rtx) == ROTATE
16326 || GET_CODE (shift_rtx) == ROTATERT))
16328 rtx shift_count = XEXP (shift_rtx, 1);
16330 /* Return true if shift count is dest of SET_BODY. */
16331 if (REG_P (shift_count)
16332 && true_regnum (set_dest) == true_regnum (shift_count))
16333 return true;
16336 return false;
16339 /* Return true if destination reg of SET_INSN is shift count of
16340 USE_INSN. */
16342 bool
16343 ix86_dep_by_shift_count (const_rtx set_insn, const_rtx use_insn)
16345 return ix86_dep_by_shift_count_body (PATTERN (set_insn),
16346 PATTERN (use_insn));
16349 /* Return TRUE or FALSE depending on whether the unary operator meets the
16350 appropriate constraints. */
16352 bool
16353 ix86_unary_operator_ok (enum rtx_code code ATTRIBUTE_UNUSED,
16354 enum machine_mode mode ATTRIBUTE_UNUSED,
16355 rtx operands[2] ATTRIBUTE_UNUSED)
16357 /* If one of operands is memory, source and destination must match. */
16358 if ((MEM_P (operands[0])
16359 || MEM_P (operands[1]))
16360 && ! rtx_equal_p (operands[0], operands[1]))
16361 return false;
16362 return true;
16365 /* Return TRUE if the operands to a vec_interleave_{high,low}v2df
16366 are ok, keeping in mind the possible movddup alternative. */
16368 bool
16369 ix86_vec_interleave_v2df_operator_ok (rtx operands[3], bool high)
16371 if (MEM_P (operands[0]))
16372 return rtx_equal_p (operands[0], operands[1 + high]);
16373 if (MEM_P (operands[1]) && MEM_P (operands[2]))
16374 return TARGET_SSE3 && rtx_equal_p (operands[1], operands[2]);
16375 return true;
16378 /* Post-reload splitter for converting an SF or DFmode value in an
16379 SSE register into an unsigned SImode. */
16381 void
16382 ix86_split_convert_uns_si_sse (rtx operands[])
16384 enum machine_mode vecmode;
16385 rtx value, large, zero_or_two31, input, two31, x;
16387 large = operands[1];
16388 zero_or_two31 = operands[2];
16389 input = operands[3];
16390 two31 = operands[4];
16391 vecmode = GET_MODE (large);
16392 value = gen_rtx_REG (vecmode, REGNO (operands[0]));
16394 /* Load up the value into the low element. We must ensure that the other
16395 elements are valid floats -- zero is the easiest such value. */
16396 if (MEM_P (input))
16398 if (vecmode == V4SFmode)
16399 emit_insn (gen_vec_setv4sf_0 (value, CONST0_RTX (V4SFmode), input));
16400 else
16401 emit_insn (gen_sse2_loadlpd (value, CONST0_RTX (V2DFmode), input));
16403 else
16405 input = gen_rtx_REG (vecmode, REGNO (input));
16406 emit_move_insn (value, CONST0_RTX (vecmode));
16407 if (vecmode == V4SFmode)
16408 emit_insn (gen_sse_movss (value, value, input));
16409 else
16410 emit_insn (gen_sse2_movsd (value, value, input));
16413 emit_move_insn (large, two31);
16414 emit_move_insn (zero_or_two31, MEM_P (two31) ? large : two31);
16416 x = gen_rtx_fmt_ee (LE, vecmode, large, value);
16417 emit_insn (gen_rtx_SET (VOIDmode, large, x));
16419 x = gen_rtx_AND (vecmode, zero_or_two31, large);
16420 emit_insn (gen_rtx_SET (VOIDmode, zero_or_two31, x));
16422 x = gen_rtx_MINUS (vecmode, value, zero_or_two31);
16423 emit_insn (gen_rtx_SET (VOIDmode, value, x));
16425 large = gen_rtx_REG (V4SImode, REGNO (large));
16426 emit_insn (gen_ashlv4si3 (large, large, GEN_INT (31)));
16428 x = gen_rtx_REG (V4SImode, REGNO (value));
16429 if (vecmode == V4SFmode)
16430 emit_insn (gen_sse2_cvttps2dq (x, value));
16431 else
16432 emit_insn (gen_sse2_cvttpd2dq (x, value));
16433 value = x;
16435 emit_insn (gen_xorv4si3 (value, value, large));
16438 /* Convert an unsigned DImode value into a DFmode, using only SSE.
16439 Expects the 64-bit DImode to be supplied in a pair of integral
16440 registers. Requires SSE2; will use SSE3 if available. For x86_32,
16441 -mfpmath=sse, !optimize_size only. */
16443 void
16444 ix86_expand_convert_uns_didf_sse (rtx target, rtx input)
16446 REAL_VALUE_TYPE bias_lo_rvt, bias_hi_rvt;
16447 rtx int_xmm, fp_xmm;
16448 rtx biases, exponents;
16449 rtx x;
16451 int_xmm = gen_reg_rtx (V4SImode);
16452 if (TARGET_INTER_UNIT_MOVES)
16453 emit_insn (gen_movdi_to_sse (int_xmm, input));
16454 else if (TARGET_SSE_SPLIT_REGS)
16456 emit_clobber (int_xmm);
16457 emit_move_insn (gen_lowpart (DImode, int_xmm), input);
16459 else
16461 x = gen_reg_rtx (V2DImode);
16462 ix86_expand_vector_init_one_nonzero (false, V2DImode, x, input, 0);
16463 emit_move_insn (int_xmm, gen_lowpart (V4SImode, x));
16466 x = gen_rtx_CONST_VECTOR (V4SImode,
16467 gen_rtvec (4, GEN_INT (0x43300000UL),
16468 GEN_INT (0x45300000UL),
16469 const0_rtx, const0_rtx));
16470 exponents = validize_mem (force_const_mem (V4SImode, x));
16472 /* int_xmm = {0x45300000UL, fp_xmm/hi, 0x43300000, fp_xmm/lo } */
16473 emit_insn (gen_vec_interleave_lowv4si (int_xmm, int_xmm, exponents));
16475 /* Concatenating (juxtaposing) (0x43300000UL ## fp_value_low_xmm)
16476 yields a valid DF value equal to (0x1.0p52 + double(fp_value_lo_xmm)).
16477 Similarly (0x45300000UL ## fp_value_hi_xmm) yields
16478 (0x1.0p84 + double(fp_value_hi_xmm)).
16479 Note these exponents differ by 32. */
16481 fp_xmm = copy_to_mode_reg (V2DFmode, gen_lowpart (V2DFmode, int_xmm));
16483 /* Subtract off those 0x1.0p52 and 0x1.0p84 biases, to produce values
16484 in [0,2**32-1] and [0]+[2**32,2**64-1] respectively. */
16485 real_ldexp (&bias_lo_rvt, &dconst1, 52);
16486 real_ldexp (&bias_hi_rvt, &dconst1, 84);
16487 biases = const_double_from_real_value (bias_lo_rvt, DFmode);
16488 x = const_double_from_real_value (bias_hi_rvt, DFmode);
16489 biases = gen_rtx_CONST_VECTOR (V2DFmode, gen_rtvec (2, biases, x));
16490 biases = validize_mem (force_const_mem (V2DFmode, biases));
16491 emit_insn (gen_subv2df3 (fp_xmm, fp_xmm, biases));
16493 /* Add the upper and lower DFmode values together. */
16494 if (TARGET_SSE3)
16495 emit_insn (gen_sse3_haddv2df3 (fp_xmm, fp_xmm, fp_xmm));
16496 else
16498 x = copy_to_mode_reg (V2DFmode, fp_xmm);
16499 emit_insn (gen_vec_interleave_highv2df (fp_xmm, fp_xmm, fp_xmm));
16500 emit_insn (gen_addv2df3 (fp_xmm, fp_xmm, x));
16503 ix86_expand_vector_extract (false, target, fp_xmm, 0);
16506 /* Not used, but eases macroization of patterns. */
16507 void
16508 ix86_expand_convert_uns_sixf_sse (rtx target ATTRIBUTE_UNUSED,
16509 rtx input ATTRIBUTE_UNUSED)
16511 gcc_unreachable ();
16514 /* Convert an unsigned SImode value into a DFmode. Only currently used
16515 for SSE, but applicable anywhere. */
16517 void
16518 ix86_expand_convert_uns_sidf_sse (rtx target, rtx input)
16520 REAL_VALUE_TYPE TWO31r;
16521 rtx x, fp;
16523 x = expand_simple_binop (SImode, PLUS, input, GEN_INT (-2147483647 - 1),
16524 NULL, 1, OPTAB_DIRECT);
16526 fp = gen_reg_rtx (DFmode);
16527 emit_insn (gen_floatsidf2 (fp, x));
16529 real_ldexp (&TWO31r, &dconst1, 31);
16530 x = const_double_from_real_value (TWO31r, DFmode);
16532 x = expand_simple_binop (DFmode, PLUS, fp, x, target, 0, OPTAB_DIRECT);
16533 if (x != target)
16534 emit_move_insn (target, x);
16537 /* Convert a signed DImode value into a DFmode. Only used for SSE in
16538 32-bit mode; otherwise we have a direct convert instruction. */
16540 void
16541 ix86_expand_convert_sign_didf_sse (rtx target, rtx input)
16543 REAL_VALUE_TYPE TWO32r;
16544 rtx fp_lo, fp_hi, x;
16546 fp_lo = gen_reg_rtx (DFmode);
16547 fp_hi = gen_reg_rtx (DFmode);
16549 emit_insn (gen_floatsidf2 (fp_hi, gen_highpart (SImode, input)));
16551 real_ldexp (&TWO32r, &dconst1, 32);
16552 x = const_double_from_real_value (TWO32r, DFmode);
16553 fp_hi = expand_simple_binop (DFmode, MULT, fp_hi, x, fp_hi, 0, OPTAB_DIRECT);
16555 ix86_expand_convert_uns_sidf_sse (fp_lo, gen_lowpart (SImode, input));
16557 x = expand_simple_binop (DFmode, PLUS, fp_hi, fp_lo, target,
16558 0, OPTAB_DIRECT);
16559 if (x != target)
16560 emit_move_insn (target, x);
16563 /* Convert an unsigned SImode value into a SFmode, using only SSE.
16564 For x86_32, -mfpmath=sse, !optimize_size only. */
16565 void
16566 ix86_expand_convert_uns_sisf_sse (rtx target, rtx input)
16568 REAL_VALUE_TYPE ONE16r;
16569 rtx fp_hi, fp_lo, int_hi, int_lo, x;
16571 real_ldexp (&ONE16r, &dconst1, 16);
16572 x = const_double_from_real_value (ONE16r, SFmode);
16573 int_lo = expand_simple_binop (SImode, AND, input, GEN_INT(0xffff),
16574 NULL, 0, OPTAB_DIRECT);
16575 int_hi = expand_simple_binop (SImode, LSHIFTRT, input, GEN_INT(16),
16576 NULL, 0, OPTAB_DIRECT);
16577 fp_hi = gen_reg_rtx (SFmode);
16578 fp_lo = gen_reg_rtx (SFmode);
16579 emit_insn (gen_floatsisf2 (fp_hi, int_hi));
16580 emit_insn (gen_floatsisf2 (fp_lo, int_lo));
16581 fp_hi = expand_simple_binop (SFmode, MULT, fp_hi, x, fp_hi,
16582 0, OPTAB_DIRECT);
16583 fp_hi = expand_simple_binop (SFmode, PLUS, fp_hi, fp_lo, target,
16584 0, OPTAB_DIRECT);
16585 if (!rtx_equal_p (target, fp_hi))
16586 emit_move_insn (target, fp_hi);
16589 /* A subroutine of ix86_build_signbit_mask. If VECT is true,
16590 then replicate the value for all elements of the vector
16591 register. */
16594 ix86_build_const_vector (enum machine_mode mode, bool vect, rtx value)
16596 rtvec v;
16597 switch (mode)
16599 case V4SImode:
16600 gcc_assert (vect);
16601 v = gen_rtvec (4, value, value, value, value);
16602 return gen_rtx_CONST_VECTOR (V4SImode, v);
16604 case V2DImode:
16605 gcc_assert (vect);
16606 v = gen_rtvec (2, value, value);
16607 return gen_rtx_CONST_VECTOR (V2DImode, v);
16609 case V8SFmode:
16610 if (vect)
16611 v = gen_rtvec (8, value, value, value, value,
16612 value, value, value, value);
16613 else
16614 v = gen_rtvec (8, value, CONST0_RTX (SFmode),
16615 CONST0_RTX (SFmode), CONST0_RTX (SFmode),
16616 CONST0_RTX (SFmode), CONST0_RTX (SFmode),
16617 CONST0_RTX (SFmode), CONST0_RTX (SFmode));
16618 return gen_rtx_CONST_VECTOR (V8SFmode, v);
16620 case V4SFmode:
16621 if (vect)
16622 v = gen_rtvec (4, value, value, value, value);
16623 else
16624 v = gen_rtvec (4, value, CONST0_RTX (SFmode),
16625 CONST0_RTX (SFmode), CONST0_RTX (SFmode));
16626 return gen_rtx_CONST_VECTOR (V4SFmode, v);
16628 case V4DFmode:
16629 if (vect)
16630 v = gen_rtvec (4, value, value, value, value);
16631 else
16632 v = gen_rtvec (4, value, CONST0_RTX (DFmode),
16633 CONST0_RTX (DFmode), CONST0_RTX (DFmode));
16634 return gen_rtx_CONST_VECTOR (V4DFmode, v);
16636 case V2DFmode:
16637 if (vect)
16638 v = gen_rtvec (2, value, value);
16639 else
16640 v = gen_rtvec (2, value, CONST0_RTX (DFmode));
16641 return gen_rtx_CONST_VECTOR (V2DFmode, v);
16643 default:
16644 gcc_unreachable ();
16648 /* A subroutine of ix86_expand_fp_absneg_operator, copysign expanders
16649 and ix86_expand_int_vcond. Create a mask for the sign bit in MODE
16650 for an SSE register. If VECT is true, then replicate the mask for
16651 all elements of the vector register. If INVERT is true, then create
16652 a mask excluding the sign bit. */
16655 ix86_build_signbit_mask (enum machine_mode mode, bool vect, bool invert)
16657 enum machine_mode vec_mode, imode;
16658 HOST_WIDE_INT hi, lo;
16659 int shift = 63;
16660 rtx v;
16661 rtx mask;
16663 /* Find the sign bit, sign extended to 2*HWI. */
16664 switch (mode)
16666 case V4SImode:
16667 case V8SFmode:
16668 case V4SFmode:
16669 vec_mode = mode;
16670 mode = GET_MODE_INNER (mode);
16671 imode = SImode;
16672 lo = 0x80000000, hi = lo < 0;
16673 break;
16675 case V2DImode:
16676 case V4DFmode:
16677 case V2DFmode:
16678 vec_mode = mode;
16679 mode = GET_MODE_INNER (mode);
16680 imode = DImode;
16681 if (HOST_BITS_PER_WIDE_INT >= 64)
16682 lo = (HOST_WIDE_INT)1 << shift, hi = -1;
16683 else
16684 lo = 0, hi = (HOST_WIDE_INT)1 << (shift - HOST_BITS_PER_WIDE_INT);
16685 break;
16687 case TImode:
16688 case TFmode:
16689 vec_mode = VOIDmode;
16690 if (HOST_BITS_PER_WIDE_INT >= 64)
16692 imode = TImode;
16693 lo = 0, hi = (HOST_WIDE_INT)1 << shift;
16695 else
16697 rtvec vec;
16699 imode = DImode;
16700 lo = 0, hi = (HOST_WIDE_INT)1 << (shift - HOST_BITS_PER_WIDE_INT);
16702 if (invert)
16704 lo = ~lo, hi = ~hi;
16705 v = constm1_rtx;
16707 else
16708 v = const0_rtx;
16710 mask = immed_double_const (lo, hi, imode);
16712 vec = gen_rtvec (2, v, mask);
16713 v = gen_rtx_CONST_VECTOR (V2DImode, vec);
16714 v = copy_to_mode_reg (mode, gen_lowpart (mode, v));
16716 return v;
16718 break;
16720 default:
16721 gcc_unreachable ();
16724 if (invert)
16725 lo = ~lo, hi = ~hi;
16727 /* Force this value into the low part of a fp vector constant. */
16728 mask = immed_double_const (lo, hi, imode);
16729 mask = gen_lowpart (mode, mask);
16731 if (vec_mode == VOIDmode)
16732 return force_reg (mode, mask);
16734 v = ix86_build_const_vector (vec_mode, vect, mask);
16735 return force_reg (vec_mode, v);
16738 /* Generate code for floating point ABS or NEG. */
16740 void
16741 ix86_expand_fp_absneg_operator (enum rtx_code code, enum machine_mode mode,
16742 rtx operands[])
16744 rtx mask, set, dst, src;
16745 bool use_sse = false;
16746 bool vector_mode = VECTOR_MODE_P (mode);
16747 enum machine_mode vmode = mode;
16749 if (vector_mode)
16750 use_sse = true;
16751 else if (mode == TFmode)
16752 use_sse = true;
16753 else if (TARGET_SSE_MATH)
16755 use_sse = SSE_FLOAT_MODE_P (mode);
16756 if (mode == SFmode)
16757 vmode = V4SFmode;
16758 else if (mode == DFmode)
16759 vmode = V2DFmode;
16762 /* NEG and ABS performed with SSE use bitwise mask operations.
16763 Create the appropriate mask now. */
16764 if (use_sse)
16765 mask = ix86_build_signbit_mask (vmode, vector_mode, code == ABS);
16766 else
16767 mask = NULL_RTX;
16769 dst = operands[0];
16770 src = operands[1];
16772 set = gen_rtx_fmt_e (code, mode, src);
16773 set = gen_rtx_SET (VOIDmode, dst, set);
16775 if (mask)
16777 rtx use, clob;
16778 rtvec par;
16780 use = gen_rtx_USE (VOIDmode, mask);
16781 if (vector_mode)
16782 par = gen_rtvec (2, set, use);
16783 else
16785 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
16786 par = gen_rtvec (3, set, use, clob);
16788 emit_insn (gen_rtx_PARALLEL (VOIDmode, par));
16790 else
16791 emit_insn (set);
16794 /* Expand a copysign operation. Special case operand 0 being a constant. */
16796 void
16797 ix86_expand_copysign (rtx operands[])
16799 enum machine_mode mode, vmode;
16800 rtx dest, op0, op1, mask, nmask;
16802 dest = operands[0];
16803 op0 = operands[1];
16804 op1 = operands[2];
16806 mode = GET_MODE (dest);
16808 if (mode == SFmode)
16809 vmode = V4SFmode;
16810 else if (mode == DFmode)
16811 vmode = V2DFmode;
16812 else
16813 vmode = mode;
16815 if (GET_CODE (op0) == CONST_DOUBLE)
16817 rtx (*copysign_insn)(rtx, rtx, rtx, rtx);
16819 if (real_isneg (CONST_DOUBLE_REAL_VALUE (op0)))
16820 op0 = simplify_unary_operation (ABS, mode, op0, mode);
16822 if (mode == SFmode || mode == DFmode)
16824 if (op0 == CONST0_RTX (mode))
16825 op0 = CONST0_RTX (vmode);
16826 else
16828 rtx v = ix86_build_const_vector (vmode, false, op0);
16830 op0 = force_reg (vmode, v);
16833 else if (op0 != CONST0_RTX (mode))
16834 op0 = force_reg (mode, op0);
16836 mask = ix86_build_signbit_mask (vmode, 0, 0);
16838 if (mode == SFmode)
16839 copysign_insn = gen_copysignsf3_const;
16840 else if (mode == DFmode)
16841 copysign_insn = gen_copysigndf3_const;
16842 else
16843 copysign_insn = gen_copysigntf3_const;
16845 emit_insn (copysign_insn (dest, op0, op1, mask));
16847 else
16849 rtx (*copysign_insn)(rtx, rtx, rtx, rtx, rtx, rtx);
16851 nmask = ix86_build_signbit_mask (vmode, 0, 1);
16852 mask = ix86_build_signbit_mask (vmode, 0, 0);
16854 if (mode == SFmode)
16855 copysign_insn = gen_copysignsf3_var;
16856 else if (mode == DFmode)
16857 copysign_insn = gen_copysigndf3_var;
16858 else
16859 copysign_insn = gen_copysigntf3_var;
16861 emit_insn (copysign_insn (dest, NULL_RTX, op0, op1, nmask, mask));
16865 /* Deconstruct a copysign operation into bit masks. Operand 0 is known to
16866 be a constant, and so has already been expanded into a vector constant. */
16868 void
16869 ix86_split_copysign_const (rtx operands[])
16871 enum machine_mode mode, vmode;
16872 rtx dest, op0, mask, x;
16874 dest = operands[0];
16875 op0 = operands[1];
16876 mask = operands[3];
16878 mode = GET_MODE (dest);
16879 vmode = GET_MODE (mask);
16881 dest = simplify_gen_subreg (vmode, dest, mode, 0);
16882 x = gen_rtx_AND (vmode, dest, mask);
16883 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
16885 if (op0 != CONST0_RTX (vmode))
16887 x = gen_rtx_IOR (vmode, dest, op0);
16888 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
16892 /* Deconstruct a copysign operation into bit masks. Operand 0 is variable,
16893 so we have to do two masks. */
16895 void
16896 ix86_split_copysign_var (rtx operands[])
16898 enum machine_mode mode, vmode;
16899 rtx dest, scratch, op0, op1, mask, nmask, x;
16901 dest = operands[0];
16902 scratch = operands[1];
16903 op0 = operands[2];
16904 op1 = operands[3];
16905 nmask = operands[4];
16906 mask = operands[5];
16908 mode = GET_MODE (dest);
16909 vmode = GET_MODE (mask);
16911 if (rtx_equal_p (op0, op1))
16913 /* Shouldn't happen often (it's useless, obviously), but when it does
16914 we'd generate incorrect code if we continue below. */
16915 emit_move_insn (dest, op0);
16916 return;
16919 if (REG_P (mask) && REGNO (dest) == REGNO (mask)) /* alternative 0 */
16921 gcc_assert (REGNO (op1) == REGNO (scratch));
16923 x = gen_rtx_AND (vmode, scratch, mask);
16924 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
16926 dest = mask;
16927 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
16928 x = gen_rtx_NOT (vmode, dest);
16929 x = gen_rtx_AND (vmode, x, op0);
16930 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
16932 else
16934 if (REGNO (op1) == REGNO (scratch)) /* alternative 1,3 */
16936 x = gen_rtx_AND (vmode, scratch, mask);
16938 else /* alternative 2,4 */
16940 gcc_assert (REGNO (mask) == REGNO (scratch));
16941 op1 = simplify_gen_subreg (vmode, op1, mode, 0);
16942 x = gen_rtx_AND (vmode, scratch, op1);
16944 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
16946 if (REGNO (op0) == REGNO (dest)) /* alternative 1,2 */
16948 dest = simplify_gen_subreg (vmode, op0, mode, 0);
16949 x = gen_rtx_AND (vmode, dest, nmask);
16951 else /* alternative 3,4 */
16953 gcc_assert (REGNO (nmask) == REGNO (dest));
16954 dest = nmask;
16955 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
16956 x = gen_rtx_AND (vmode, dest, op0);
16958 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
16961 x = gen_rtx_IOR (vmode, dest, scratch);
16962 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
16965 /* Return TRUE or FALSE depending on whether the first SET in INSN
16966 has source and destination with matching CC modes, and that the
16967 CC mode is at least as constrained as REQ_MODE. */
16969 bool
16970 ix86_match_ccmode (rtx insn, enum machine_mode req_mode)
16972 rtx set;
16973 enum machine_mode set_mode;
16975 set = PATTERN (insn);
16976 if (GET_CODE (set) == PARALLEL)
16977 set = XVECEXP (set, 0, 0);
16978 gcc_assert (GET_CODE (set) == SET);
16979 gcc_assert (GET_CODE (SET_SRC (set)) == COMPARE);
16981 set_mode = GET_MODE (SET_DEST (set));
16982 switch (set_mode)
16984 case CCNOmode:
16985 if (req_mode != CCNOmode
16986 && (req_mode != CCmode
16987 || XEXP (SET_SRC (set), 1) != const0_rtx))
16988 return false;
16989 break;
16990 case CCmode:
16991 if (req_mode == CCGCmode)
16992 return false;
16993 /* FALLTHRU */
16994 case CCGCmode:
16995 if (req_mode == CCGOCmode || req_mode == CCNOmode)
16996 return false;
16997 /* FALLTHRU */
16998 case CCGOCmode:
16999 if (req_mode == CCZmode)
17000 return false;
17001 /* FALLTHRU */
17002 case CCAmode:
17003 case CCCmode:
17004 case CCOmode:
17005 case CCSmode:
17006 case CCZmode:
17007 break;
17009 default:
17010 gcc_unreachable ();
17013 return GET_MODE (SET_SRC (set)) == set_mode;
17016 /* Generate insn patterns to do an integer compare of OPERANDS. */
17018 static rtx
17019 ix86_expand_int_compare (enum rtx_code code, rtx op0, rtx op1)
17021 enum machine_mode cmpmode;
17022 rtx tmp, flags;
17024 cmpmode = SELECT_CC_MODE (code, op0, op1);
17025 flags = gen_rtx_REG (cmpmode, FLAGS_REG);
17027 /* This is very simple, but making the interface the same as in the
17028 FP case makes the rest of the code easier. */
17029 tmp = gen_rtx_COMPARE (cmpmode, op0, op1);
17030 emit_insn (gen_rtx_SET (VOIDmode, flags, tmp));
17032 /* Return the test that should be put into the flags user, i.e.
17033 the bcc, scc, or cmov instruction. */
17034 return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
17037 /* Figure out whether to use ordered or unordered fp comparisons.
17038 Return the appropriate mode to use. */
17040 enum machine_mode
17041 ix86_fp_compare_mode (enum rtx_code code ATTRIBUTE_UNUSED)
17043 /* ??? In order to make all comparisons reversible, we do all comparisons
17044 non-trapping when compiling for IEEE. Once gcc is able to distinguish
17045 all forms trapping and nontrapping comparisons, we can make inequality
17046 comparisons trapping again, since it results in better code when using
17047 FCOM based compares. */
17048 return TARGET_IEEE_FP ? CCFPUmode : CCFPmode;
17051 enum machine_mode
17052 ix86_cc_mode (enum rtx_code code, rtx op0, rtx op1)
17054 enum machine_mode mode = GET_MODE (op0);
17056 if (SCALAR_FLOAT_MODE_P (mode))
17058 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode));
17059 return ix86_fp_compare_mode (code);
17062 switch (code)
17064 /* Only zero flag is needed. */
17065 case EQ: /* ZF=0 */
17066 case NE: /* ZF!=0 */
17067 return CCZmode;
17068 /* Codes needing carry flag. */
17069 case GEU: /* CF=0 */
17070 case LTU: /* CF=1 */
17071 /* Detect overflow checks. They need just the carry flag. */
17072 if (GET_CODE (op0) == PLUS
17073 && rtx_equal_p (op1, XEXP (op0, 0)))
17074 return CCCmode;
17075 else
17076 return CCmode;
17077 case GTU: /* CF=0 & ZF=0 */
17078 case LEU: /* CF=1 | ZF=1 */
17079 /* Detect overflow checks. They need just the carry flag. */
17080 if (GET_CODE (op0) == MINUS
17081 && rtx_equal_p (op1, XEXP (op0, 0)))
17082 return CCCmode;
17083 else
17084 return CCmode;
17085 /* Codes possibly doable only with sign flag when
17086 comparing against zero. */
17087 case GE: /* SF=OF or SF=0 */
17088 case LT: /* SF<>OF or SF=1 */
17089 if (op1 == const0_rtx)
17090 return CCGOCmode;
17091 else
17092 /* For other cases Carry flag is not required. */
17093 return CCGCmode;
17094 /* Codes doable only with sign flag when comparing
17095 against zero, but we miss jump instruction for it
17096 so we need to use relational tests against overflow
17097 that thus needs to be zero. */
17098 case GT: /* ZF=0 & SF=OF */
17099 case LE: /* ZF=1 | SF<>OF */
17100 if (op1 == const0_rtx)
17101 return CCNOmode;
17102 else
17103 return CCGCmode;
17104 /* strcmp pattern do (use flags) and combine may ask us for proper
17105 mode. */
17106 case USE:
17107 return CCmode;
17108 default:
17109 gcc_unreachable ();
17113 /* Return the fixed registers used for condition codes. */
17115 static bool
17116 ix86_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
17118 *p1 = FLAGS_REG;
17119 *p2 = FPSR_REG;
17120 return true;
17123 /* If two condition code modes are compatible, return a condition code
17124 mode which is compatible with both. Otherwise, return
17125 VOIDmode. */
17127 static enum machine_mode
17128 ix86_cc_modes_compatible (enum machine_mode m1, enum machine_mode m2)
17130 if (m1 == m2)
17131 return m1;
17133 if (GET_MODE_CLASS (m1) != MODE_CC || GET_MODE_CLASS (m2) != MODE_CC)
17134 return VOIDmode;
17136 if ((m1 == CCGCmode && m2 == CCGOCmode)
17137 || (m1 == CCGOCmode && m2 == CCGCmode))
17138 return CCGCmode;
17140 switch (m1)
17142 default:
17143 gcc_unreachable ();
17145 case CCmode:
17146 case CCGCmode:
17147 case CCGOCmode:
17148 case CCNOmode:
17149 case CCAmode:
17150 case CCCmode:
17151 case CCOmode:
17152 case CCSmode:
17153 case CCZmode:
17154 switch (m2)
17156 default:
17157 return VOIDmode;
17159 case CCmode:
17160 case CCGCmode:
17161 case CCGOCmode:
17162 case CCNOmode:
17163 case CCAmode:
17164 case CCCmode:
17165 case CCOmode:
17166 case CCSmode:
17167 case CCZmode:
17168 return CCmode;
17171 case CCFPmode:
17172 case CCFPUmode:
17173 /* These are only compatible with themselves, which we already
17174 checked above. */
17175 return VOIDmode;
17180 /* Return a comparison we can do and that it is equivalent to
17181 swap_condition (code) apart possibly from orderedness.
17182 But, never change orderedness if TARGET_IEEE_FP, returning
17183 UNKNOWN in that case if necessary. */
17185 static enum rtx_code
17186 ix86_fp_swap_condition (enum rtx_code code)
17188 switch (code)
17190 case GT: /* GTU - CF=0 & ZF=0 */
17191 return TARGET_IEEE_FP ? UNKNOWN : UNLT;
17192 case GE: /* GEU - CF=0 */
17193 return TARGET_IEEE_FP ? UNKNOWN : UNLE;
17194 case UNLT: /* LTU - CF=1 */
17195 return TARGET_IEEE_FP ? UNKNOWN : GT;
17196 case UNLE: /* LEU - CF=1 | ZF=1 */
17197 return TARGET_IEEE_FP ? UNKNOWN : GE;
17198 default:
17199 return swap_condition (code);
17203 /* Return cost of comparison CODE using the best strategy for performance.
17204 All following functions do use number of instructions as a cost metrics.
17205 In future this should be tweaked to compute bytes for optimize_size and
17206 take into account performance of various instructions on various CPUs. */
17208 static int
17209 ix86_fp_comparison_cost (enum rtx_code code)
17211 int arith_cost;
17213 /* The cost of code using bit-twiddling on %ah. */
17214 switch (code)
17216 case UNLE:
17217 case UNLT:
17218 case LTGT:
17219 case GT:
17220 case GE:
17221 case UNORDERED:
17222 case ORDERED:
17223 case UNEQ:
17224 arith_cost = 4;
17225 break;
17226 case LT:
17227 case NE:
17228 case EQ:
17229 case UNGE:
17230 arith_cost = TARGET_IEEE_FP ? 5 : 4;
17231 break;
17232 case LE:
17233 case UNGT:
17234 arith_cost = TARGET_IEEE_FP ? 6 : 4;
17235 break;
17236 default:
17237 gcc_unreachable ();
17240 switch (ix86_fp_comparison_strategy (code))
17242 case IX86_FPCMP_COMI:
17243 return arith_cost > 4 ? 3 : 2;
17244 case IX86_FPCMP_SAHF:
17245 return arith_cost > 4 ? 4 : 3;
17246 default:
17247 return arith_cost;
17251 /* Return strategy to use for floating-point. We assume that fcomi is always
17252 preferrable where available, since that is also true when looking at size
17253 (2 bytes, vs. 3 for fnstsw+sahf and at least 5 for fnstsw+test). */
17255 enum ix86_fpcmp_strategy
17256 ix86_fp_comparison_strategy (enum rtx_code code ATTRIBUTE_UNUSED)
17258 /* Do fcomi/sahf based test when profitable. */
17260 if (TARGET_CMOVE)
17261 return IX86_FPCMP_COMI;
17263 if (TARGET_SAHF && (TARGET_USE_SAHF || optimize_function_for_size_p (cfun)))
17264 return IX86_FPCMP_SAHF;
17266 return IX86_FPCMP_ARITH;
17269 /* Swap, force into registers, or otherwise massage the two operands
17270 to a fp comparison. The operands are updated in place; the new
17271 comparison code is returned. */
17273 static enum rtx_code
17274 ix86_prepare_fp_compare_args (enum rtx_code code, rtx *pop0, rtx *pop1)
17276 enum machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
17277 rtx op0 = *pop0, op1 = *pop1;
17278 enum machine_mode op_mode = GET_MODE (op0);
17279 int is_sse = TARGET_SSE_MATH && SSE_FLOAT_MODE_P (op_mode);
17281 /* All of the unordered compare instructions only work on registers.
17282 The same is true of the fcomi compare instructions. The XFmode
17283 compare instructions require registers except when comparing
17284 against zero or when converting operand 1 from fixed point to
17285 floating point. */
17287 if (!is_sse
17288 && (fpcmp_mode == CCFPUmode
17289 || (op_mode == XFmode
17290 && ! (standard_80387_constant_p (op0) == 1
17291 || standard_80387_constant_p (op1) == 1)
17292 && GET_CODE (op1) != FLOAT)
17293 || ix86_fp_comparison_strategy (code) == IX86_FPCMP_COMI))
17295 op0 = force_reg (op_mode, op0);
17296 op1 = force_reg (op_mode, op1);
17298 else
17300 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
17301 things around if they appear profitable, otherwise force op0
17302 into a register. */
17304 if (standard_80387_constant_p (op0) == 0
17305 || (MEM_P (op0)
17306 && ! (standard_80387_constant_p (op1) == 0
17307 || MEM_P (op1))))
17309 enum rtx_code new_code = ix86_fp_swap_condition (code);
17310 if (new_code != UNKNOWN)
17312 rtx tmp;
17313 tmp = op0, op0 = op1, op1 = tmp;
17314 code = new_code;
17318 if (!REG_P (op0))
17319 op0 = force_reg (op_mode, op0);
17321 if (CONSTANT_P (op1))
17323 int tmp = standard_80387_constant_p (op1);
17324 if (tmp == 0)
17325 op1 = validize_mem (force_const_mem (op_mode, op1));
17326 else if (tmp == 1)
17328 if (TARGET_CMOVE)
17329 op1 = force_reg (op_mode, op1);
17331 else
17332 op1 = force_reg (op_mode, op1);
17336 /* Try to rearrange the comparison to make it cheaper. */
17337 if (ix86_fp_comparison_cost (code)
17338 > ix86_fp_comparison_cost (swap_condition (code))
17339 && (REG_P (op1) || can_create_pseudo_p ()))
17341 rtx tmp;
17342 tmp = op0, op0 = op1, op1 = tmp;
17343 code = swap_condition (code);
17344 if (!REG_P (op0))
17345 op0 = force_reg (op_mode, op0);
17348 *pop0 = op0;
17349 *pop1 = op1;
17350 return code;
17353 /* Convert comparison codes we use to represent FP comparison to integer
17354 code that will result in proper branch. Return UNKNOWN if no such code
17355 is available. */
17357 enum rtx_code
17358 ix86_fp_compare_code_to_integer (enum rtx_code code)
17360 switch (code)
17362 case GT:
17363 return GTU;
17364 case GE:
17365 return GEU;
17366 case ORDERED:
17367 case UNORDERED:
17368 return code;
17369 break;
17370 case UNEQ:
17371 return EQ;
17372 break;
17373 case UNLT:
17374 return LTU;
17375 break;
17376 case UNLE:
17377 return LEU;
17378 break;
17379 case LTGT:
17380 return NE;
17381 break;
17382 default:
17383 return UNKNOWN;
17387 /* Generate insn patterns to do a floating point compare of OPERANDS. */
17389 static rtx
17390 ix86_expand_fp_compare (enum rtx_code code, rtx op0, rtx op1, rtx scratch)
17392 enum machine_mode fpcmp_mode, intcmp_mode;
17393 rtx tmp, tmp2;
17395 fpcmp_mode = ix86_fp_compare_mode (code);
17396 code = ix86_prepare_fp_compare_args (code, &op0, &op1);
17398 /* Do fcomi/sahf based test when profitable. */
17399 switch (ix86_fp_comparison_strategy (code))
17401 case IX86_FPCMP_COMI:
17402 intcmp_mode = fpcmp_mode;
17403 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
17404 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
17405 tmp);
17406 emit_insn (tmp);
17407 break;
17409 case IX86_FPCMP_SAHF:
17410 intcmp_mode = fpcmp_mode;
17411 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
17412 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
17413 tmp);
17415 if (!scratch)
17416 scratch = gen_reg_rtx (HImode);
17417 tmp2 = gen_rtx_CLOBBER (VOIDmode, scratch);
17418 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, tmp2)));
17419 break;
17421 case IX86_FPCMP_ARITH:
17422 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
17423 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
17424 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
17425 if (!scratch)
17426 scratch = gen_reg_rtx (HImode);
17427 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
17429 /* In the unordered case, we have to check C2 for NaN's, which
17430 doesn't happen to work out to anything nice combination-wise.
17431 So do some bit twiddling on the value we've got in AH to come
17432 up with an appropriate set of condition codes. */
17434 intcmp_mode = CCNOmode;
17435 switch (code)
17437 case GT:
17438 case UNGT:
17439 if (code == GT || !TARGET_IEEE_FP)
17441 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
17442 code = EQ;
17444 else
17446 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
17447 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
17448 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44)));
17449 intcmp_mode = CCmode;
17450 code = GEU;
17452 break;
17453 case LT:
17454 case UNLT:
17455 if (code == LT && TARGET_IEEE_FP)
17457 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
17458 emit_insn (gen_cmpqi_ext_3 (scratch, const1_rtx));
17459 intcmp_mode = CCmode;
17460 code = EQ;
17462 else
17464 emit_insn (gen_testqi_ext_ccno_0 (scratch, const1_rtx));
17465 code = NE;
17467 break;
17468 case GE:
17469 case UNGE:
17470 if (code == GE || !TARGET_IEEE_FP)
17472 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x05)));
17473 code = EQ;
17475 else
17477 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
17478 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch, const1_rtx));
17479 code = NE;
17481 break;
17482 case LE:
17483 case UNLE:
17484 if (code == LE && TARGET_IEEE_FP)
17486 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
17487 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
17488 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
17489 intcmp_mode = CCmode;
17490 code = LTU;
17492 else
17494 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
17495 code = NE;
17497 break;
17498 case EQ:
17499 case UNEQ:
17500 if (code == EQ && TARGET_IEEE_FP)
17502 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
17503 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
17504 intcmp_mode = CCmode;
17505 code = EQ;
17507 else
17509 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
17510 code = NE;
17512 break;
17513 case NE:
17514 case LTGT:
17515 if (code == NE && TARGET_IEEE_FP)
17517 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
17518 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
17519 GEN_INT (0x40)));
17520 code = NE;
17522 else
17524 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
17525 code = EQ;
17527 break;
17529 case UNORDERED:
17530 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
17531 code = NE;
17532 break;
17533 case ORDERED:
17534 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
17535 code = EQ;
17536 break;
17538 default:
17539 gcc_unreachable ();
17541 break;
17543 default:
17544 gcc_unreachable();
17547 /* Return the test that should be put into the flags user, i.e.
17548 the bcc, scc, or cmov instruction. */
17549 return gen_rtx_fmt_ee (code, VOIDmode,
17550 gen_rtx_REG (intcmp_mode, FLAGS_REG),
17551 const0_rtx);
17554 static rtx
17555 ix86_expand_compare (enum rtx_code code, rtx op0, rtx op1)
17557 rtx ret;
17559 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_CC)
17560 ret = gen_rtx_fmt_ee (code, VOIDmode, op0, op1);
17562 else if (SCALAR_FLOAT_MODE_P (GET_MODE (op0)))
17564 gcc_assert (!DECIMAL_FLOAT_MODE_P (GET_MODE (op0)));
17565 ret = ix86_expand_fp_compare (code, op0, op1, NULL_RTX);
17567 else
17568 ret = ix86_expand_int_compare (code, op0, op1);
17570 return ret;
17573 void
17574 ix86_expand_branch (enum rtx_code code, rtx op0, rtx op1, rtx label)
17576 enum machine_mode mode = GET_MODE (op0);
17577 rtx tmp;
17579 switch (mode)
17581 case SFmode:
17582 case DFmode:
17583 case XFmode:
17584 case QImode:
17585 case HImode:
17586 case SImode:
17587 simple:
17588 tmp = ix86_expand_compare (code, op0, op1);
17589 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
17590 gen_rtx_LABEL_REF (VOIDmode, label),
17591 pc_rtx);
17592 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
17593 return;
17595 case DImode:
17596 if (TARGET_64BIT)
17597 goto simple;
17598 case TImode:
17599 /* Expand DImode branch into multiple compare+branch. */
17601 rtx lo[2], hi[2], label2;
17602 enum rtx_code code1, code2, code3;
17603 enum machine_mode submode;
17605 if (CONSTANT_P (op0) && !CONSTANT_P (op1))
17607 tmp = op0, op0 = op1, op1 = tmp;
17608 code = swap_condition (code);
17611 split_double_mode (mode, &op0, 1, lo+0, hi+0);
17612 split_double_mode (mode, &op1, 1, lo+1, hi+1);
17614 submode = mode == DImode ? SImode : DImode;
17616 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
17617 avoid two branches. This costs one extra insn, so disable when
17618 optimizing for size. */
17620 if ((code == EQ || code == NE)
17621 && (!optimize_insn_for_size_p ()
17622 || hi[1] == const0_rtx || lo[1] == const0_rtx))
17624 rtx xor0, xor1;
17626 xor1 = hi[0];
17627 if (hi[1] != const0_rtx)
17628 xor1 = expand_binop (submode, xor_optab, xor1, hi[1],
17629 NULL_RTX, 0, OPTAB_WIDEN);
17631 xor0 = lo[0];
17632 if (lo[1] != const0_rtx)
17633 xor0 = expand_binop (submode, xor_optab, xor0, lo[1],
17634 NULL_RTX, 0, OPTAB_WIDEN);
17636 tmp = expand_binop (submode, ior_optab, xor1, xor0,
17637 NULL_RTX, 0, OPTAB_WIDEN);
17639 ix86_expand_branch (code, tmp, const0_rtx, label);
17640 return;
17643 /* Otherwise, if we are doing less-than or greater-or-equal-than,
17644 op1 is a constant and the low word is zero, then we can just
17645 examine the high word. Similarly for low word -1 and
17646 less-or-equal-than or greater-than. */
17648 if (CONST_INT_P (hi[1]))
17649 switch (code)
17651 case LT: case LTU: case GE: case GEU:
17652 if (lo[1] == const0_rtx)
17654 ix86_expand_branch (code, hi[0], hi[1], label);
17655 return;
17657 break;
17658 case LE: case LEU: case GT: case GTU:
17659 if (lo[1] == constm1_rtx)
17661 ix86_expand_branch (code, hi[0], hi[1], label);
17662 return;
17664 break;
17665 default:
17666 break;
17669 /* Otherwise, we need two or three jumps. */
17671 label2 = gen_label_rtx ();
17673 code1 = code;
17674 code2 = swap_condition (code);
17675 code3 = unsigned_condition (code);
17677 switch (code)
17679 case LT: case GT: case LTU: case GTU:
17680 break;
17682 case LE: code1 = LT; code2 = GT; break;
17683 case GE: code1 = GT; code2 = LT; break;
17684 case LEU: code1 = LTU; code2 = GTU; break;
17685 case GEU: code1 = GTU; code2 = LTU; break;
17687 case EQ: code1 = UNKNOWN; code2 = NE; break;
17688 case NE: code2 = UNKNOWN; break;
17690 default:
17691 gcc_unreachable ();
17695 * a < b =>
17696 * if (hi(a) < hi(b)) goto true;
17697 * if (hi(a) > hi(b)) goto false;
17698 * if (lo(a) < lo(b)) goto true;
17699 * false:
17702 if (code1 != UNKNOWN)
17703 ix86_expand_branch (code1, hi[0], hi[1], label);
17704 if (code2 != UNKNOWN)
17705 ix86_expand_branch (code2, hi[0], hi[1], label2);
17707 ix86_expand_branch (code3, lo[0], lo[1], label);
17709 if (code2 != UNKNOWN)
17710 emit_label (label2);
17711 return;
17714 default:
17715 gcc_assert (GET_MODE_CLASS (GET_MODE (op0)) == MODE_CC);
17716 goto simple;
17720 /* Split branch based on floating point condition. */
17721 void
17722 ix86_split_fp_branch (enum rtx_code code, rtx op1, rtx op2,
17723 rtx target1, rtx target2, rtx tmp, rtx pushed)
17725 rtx condition;
17726 rtx i;
17728 if (target2 != pc_rtx)
17730 rtx tmp = target2;
17731 code = reverse_condition_maybe_unordered (code);
17732 target2 = target1;
17733 target1 = tmp;
17736 condition = ix86_expand_fp_compare (code, op1, op2,
17737 tmp);
17739 /* Remove pushed operand from stack. */
17740 if (pushed)
17741 ix86_free_from_memory (GET_MODE (pushed));
17743 i = emit_jump_insn (gen_rtx_SET
17744 (VOIDmode, pc_rtx,
17745 gen_rtx_IF_THEN_ELSE (VOIDmode,
17746 condition, target1, target2)));
17747 if (split_branch_probability >= 0)
17748 add_reg_note (i, REG_BR_PROB, GEN_INT (split_branch_probability));
17751 void
17752 ix86_expand_setcc (rtx dest, enum rtx_code code, rtx op0, rtx op1)
17754 rtx ret;
17756 gcc_assert (GET_MODE (dest) == QImode);
17758 ret = ix86_expand_compare (code, op0, op1);
17759 PUT_MODE (ret, QImode);
17760 emit_insn (gen_rtx_SET (VOIDmode, dest, ret));
17763 /* Expand comparison setting or clearing carry flag. Return true when
17764 successful and set pop for the operation. */
17765 static bool
17766 ix86_expand_carry_flag_compare (enum rtx_code code, rtx op0, rtx op1, rtx *pop)
17768 enum machine_mode mode =
17769 GET_MODE (op0) != VOIDmode ? GET_MODE (op0) : GET_MODE (op1);
17771 /* Do not handle double-mode compares that go through special path. */
17772 if (mode == (TARGET_64BIT ? TImode : DImode))
17773 return false;
17775 if (SCALAR_FLOAT_MODE_P (mode))
17777 rtx compare_op, compare_seq;
17779 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode));
17781 /* Shortcut: following common codes never translate
17782 into carry flag compares. */
17783 if (code == EQ || code == NE || code == UNEQ || code == LTGT
17784 || code == ORDERED || code == UNORDERED)
17785 return false;
17787 /* These comparisons require zero flag; swap operands so they won't. */
17788 if ((code == GT || code == UNLE || code == LE || code == UNGT)
17789 && !TARGET_IEEE_FP)
17791 rtx tmp = op0;
17792 op0 = op1;
17793 op1 = tmp;
17794 code = swap_condition (code);
17797 /* Try to expand the comparison and verify that we end up with
17798 carry flag based comparison. This fails to be true only when
17799 we decide to expand comparison using arithmetic that is not
17800 too common scenario. */
17801 start_sequence ();
17802 compare_op = ix86_expand_fp_compare (code, op0, op1, NULL_RTX);
17803 compare_seq = get_insns ();
17804 end_sequence ();
17806 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
17807 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
17808 code = ix86_fp_compare_code_to_integer (GET_CODE (compare_op));
17809 else
17810 code = GET_CODE (compare_op);
17812 if (code != LTU && code != GEU)
17813 return false;
17815 emit_insn (compare_seq);
17816 *pop = compare_op;
17817 return true;
17820 if (!INTEGRAL_MODE_P (mode))
17821 return false;
17823 switch (code)
17825 case LTU:
17826 case GEU:
17827 break;
17829 /* Convert a==0 into (unsigned)a<1. */
17830 case EQ:
17831 case NE:
17832 if (op1 != const0_rtx)
17833 return false;
17834 op1 = const1_rtx;
17835 code = (code == EQ ? LTU : GEU);
17836 break;
17838 /* Convert a>b into b<a or a>=b-1. */
17839 case GTU:
17840 case LEU:
17841 if (CONST_INT_P (op1))
17843 op1 = gen_int_mode (INTVAL (op1) + 1, GET_MODE (op0));
17844 /* Bail out on overflow. We still can swap operands but that
17845 would force loading of the constant into register. */
17846 if (op1 == const0_rtx
17847 || !x86_64_immediate_operand (op1, GET_MODE (op1)))
17848 return false;
17849 code = (code == GTU ? GEU : LTU);
17851 else
17853 rtx tmp = op1;
17854 op1 = op0;
17855 op0 = tmp;
17856 code = (code == GTU ? LTU : GEU);
17858 break;
17860 /* Convert a>=0 into (unsigned)a<0x80000000. */
17861 case LT:
17862 case GE:
17863 if (mode == DImode || op1 != const0_rtx)
17864 return false;
17865 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
17866 code = (code == LT ? GEU : LTU);
17867 break;
17868 case LE:
17869 case GT:
17870 if (mode == DImode || op1 != constm1_rtx)
17871 return false;
17872 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
17873 code = (code == LE ? GEU : LTU);
17874 break;
17876 default:
17877 return false;
17879 /* Swapping operands may cause constant to appear as first operand. */
17880 if (!nonimmediate_operand (op0, VOIDmode))
17882 if (!can_create_pseudo_p ())
17883 return false;
17884 op0 = force_reg (mode, op0);
17886 *pop = ix86_expand_compare (code, op0, op1);
17887 gcc_assert (GET_CODE (*pop) == LTU || GET_CODE (*pop) == GEU);
17888 return true;
17891 bool
17892 ix86_expand_int_movcc (rtx operands[])
17894 enum rtx_code code = GET_CODE (operands[1]), compare_code;
17895 rtx compare_seq, compare_op;
17896 enum machine_mode mode = GET_MODE (operands[0]);
17897 bool sign_bit_compare_p = false;
17898 rtx op0 = XEXP (operands[1], 0);
17899 rtx op1 = XEXP (operands[1], 1);
17901 start_sequence ();
17902 compare_op = ix86_expand_compare (code, op0, op1);
17903 compare_seq = get_insns ();
17904 end_sequence ();
17906 compare_code = GET_CODE (compare_op);
17908 if ((op1 == const0_rtx && (code == GE || code == LT))
17909 || (op1 == constm1_rtx && (code == GT || code == LE)))
17910 sign_bit_compare_p = true;
17912 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
17913 HImode insns, we'd be swallowed in word prefix ops. */
17915 if ((mode != HImode || TARGET_FAST_PREFIX)
17916 && (mode != (TARGET_64BIT ? TImode : DImode))
17917 && CONST_INT_P (operands[2])
17918 && CONST_INT_P (operands[3]))
17920 rtx out = operands[0];
17921 HOST_WIDE_INT ct = INTVAL (operands[2]);
17922 HOST_WIDE_INT cf = INTVAL (operands[3]);
17923 HOST_WIDE_INT diff;
17925 diff = ct - cf;
17926 /* Sign bit compares are better done using shifts than we do by using
17927 sbb. */
17928 if (sign_bit_compare_p
17929 || ix86_expand_carry_flag_compare (code, op0, op1, &compare_op))
17931 /* Detect overlap between destination and compare sources. */
17932 rtx tmp = out;
17934 if (!sign_bit_compare_p)
17936 rtx flags;
17937 bool fpcmp = false;
17939 compare_code = GET_CODE (compare_op);
17941 flags = XEXP (compare_op, 0);
17943 if (GET_MODE (flags) == CCFPmode
17944 || GET_MODE (flags) == CCFPUmode)
17946 fpcmp = true;
17947 compare_code
17948 = ix86_fp_compare_code_to_integer (compare_code);
17951 /* To simplify rest of code, restrict to the GEU case. */
17952 if (compare_code == LTU)
17954 HOST_WIDE_INT tmp = ct;
17955 ct = cf;
17956 cf = tmp;
17957 compare_code = reverse_condition (compare_code);
17958 code = reverse_condition (code);
17960 else
17962 if (fpcmp)
17963 PUT_CODE (compare_op,
17964 reverse_condition_maybe_unordered
17965 (GET_CODE (compare_op)));
17966 else
17967 PUT_CODE (compare_op,
17968 reverse_condition (GET_CODE (compare_op)));
17970 diff = ct - cf;
17972 if (reg_overlap_mentioned_p (out, op0)
17973 || reg_overlap_mentioned_p (out, op1))
17974 tmp = gen_reg_rtx (mode);
17976 if (mode == DImode)
17977 emit_insn (gen_x86_movdicc_0_m1 (tmp, flags, compare_op));
17978 else
17979 emit_insn (gen_x86_movsicc_0_m1 (gen_lowpart (SImode, tmp),
17980 flags, compare_op));
17982 else
17984 if (code == GT || code == GE)
17985 code = reverse_condition (code);
17986 else
17988 HOST_WIDE_INT tmp = ct;
17989 ct = cf;
17990 cf = tmp;
17991 diff = ct - cf;
17993 tmp = emit_store_flag (tmp, code, op0, op1, VOIDmode, 0, -1);
17996 if (diff == 1)
17999 * cmpl op0,op1
18000 * sbbl dest,dest
18001 * [addl dest, ct]
18003 * Size 5 - 8.
18005 if (ct)
18006 tmp = expand_simple_binop (mode, PLUS,
18007 tmp, GEN_INT (ct),
18008 copy_rtx (tmp), 1, OPTAB_DIRECT);
18010 else if (cf == -1)
18013 * cmpl op0,op1
18014 * sbbl dest,dest
18015 * orl $ct, dest
18017 * Size 8.
18019 tmp = expand_simple_binop (mode, IOR,
18020 tmp, GEN_INT (ct),
18021 copy_rtx (tmp), 1, OPTAB_DIRECT);
18023 else if (diff == -1 && ct)
18026 * cmpl op0,op1
18027 * sbbl dest,dest
18028 * notl dest
18029 * [addl dest, cf]
18031 * Size 8 - 11.
18033 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
18034 if (cf)
18035 tmp = expand_simple_binop (mode, PLUS,
18036 copy_rtx (tmp), GEN_INT (cf),
18037 copy_rtx (tmp), 1, OPTAB_DIRECT);
18039 else
18042 * cmpl op0,op1
18043 * sbbl dest,dest
18044 * [notl dest]
18045 * andl cf - ct, dest
18046 * [addl dest, ct]
18048 * Size 8 - 11.
18051 if (cf == 0)
18053 cf = ct;
18054 ct = 0;
18055 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
18058 tmp = expand_simple_binop (mode, AND,
18059 copy_rtx (tmp),
18060 gen_int_mode (cf - ct, mode),
18061 copy_rtx (tmp), 1, OPTAB_DIRECT);
18062 if (ct)
18063 tmp = expand_simple_binop (mode, PLUS,
18064 copy_rtx (tmp), GEN_INT (ct),
18065 copy_rtx (tmp), 1, OPTAB_DIRECT);
18068 if (!rtx_equal_p (tmp, out))
18069 emit_move_insn (copy_rtx (out), copy_rtx (tmp));
18071 return true;
18074 if (diff < 0)
18076 enum machine_mode cmp_mode = GET_MODE (op0);
18078 HOST_WIDE_INT tmp;
18079 tmp = ct, ct = cf, cf = tmp;
18080 diff = -diff;
18082 if (SCALAR_FLOAT_MODE_P (cmp_mode))
18084 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode));
18086 /* We may be reversing unordered compare to normal compare, that
18087 is not valid in general (we may convert non-trapping condition
18088 to trapping one), however on i386 we currently emit all
18089 comparisons unordered. */
18090 compare_code = reverse_condition_maybe_unordered (compare_code);
18091 code = reverse_condition_maybe_unordered (code);
18093 else
18095 compare_code = reverse_condition (compare_code);
18096 code = reverse_condition (code);
18100 compare_code = UNKNOWN;
18101 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT
18102 && CONST_INT_P (op1))
18104 if (op1 == const0_rtx
18105 && (code == LT || code == GE))
18106 compare_code = code;
18107 else if (op1 == constm1_rtx)
18109 if (code == LE)
18110 compare_code = LT;
18111 else if (code == GT)
18112 compare_code = GE;
18116 /* Optimize dest = (op0 < 0) ? -1 : cf. */
18117 if (compare_code != UNKNOWN
18118 && GET_MODE (op0) == GET_MODE (out)
18119 && (cf == -1 || ct == -1))
18121 /* If lea code below could be used, only optimize
18122 if it results in a 2 insn sequence. */
18124 if (! (diff == 1 || diff == 2 || diff == 4 || diff == 8
18125 || diff == 3 || diff == 5 || diff == 9)
18126 || (compare_code == LT && ct == -1)
18127 || (compare_code == GE && cf == -1))
18130 * notl op1 (if necessary)
18131 * sarl $31, op1
18132 * orl cf, op1
18134 if (ct != -1)
18136 cf = ct;
18137 ct = -1;
18138 code = reverse_condition (code);
18141 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, -1);
18143 out = expand_simple_binop (mode, IOR,
18144 out, GEN_INT (cf),
18145 out, 1, OPTAB_DIRECT);
18146 if (out != operands[0])
18147 emit_move_insn (operands[0], out);
18149 return true;
18154 if ((diff == 1 || diff == 2 || diff == 4 || diff == 8
18155 || diff == 3 || diff == 5 || diff == 9)
18156 && ((mode != QImode && mode != HImode) || !TARGET_PARTIAL_REG_STALL)
18157 && (mode != DImode
18158 || x86_64_immediate_operand (GEN_INT (cf), VOIDmode)))
18161 * xorl dest,dest
18162 * cmpl op1,op2
18163 * setcc dest
18164 * lea cf(dest*(ct-cf)),dest
18166 * Size 14.
18168 * This also catches the degenerate setcc-only case.
18171 rtx tmp;
18172 int nops;
18174 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, 1);
18176 nops = 0;
18177 /* On x86_64 the lea instruction operates on Pmode, so we need
18178 to get arithmetics done in proper mode to match. */
18179 if (diff == 1)
18180 tmp = copy_rtx (out);
18181 else
18183 rtx out1;
18184 out1 = copy_rtx (out);
18185 tmp = gen_rtx_MULT (mode, out1, GEN_INT (diff & ~1));
18186 nops++;
18187 if (diff & 1)
18189 tmp = gen_rtx_PLUS (mode, tmp, out1);
18190 nops++;
18193 if (cf != 0)
18195 tmp = gen_rtx_PLUS (mode, tmp, GEN_INT (cf));
18196 nops++;
18198 if (!rtx_equal_p (tmp, out))
18200 if (nops == 1)
18201 out = force_operand (tmp, copy_rtx (out));
18202 else
18203 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (out), copy_rtx (tmp)));
18205 if (!rtx_equal_p (out, operands[0]))
18206 emit_move_insn (operands[0], copy_rtx (out));
18208 return true;
18212 * General case: Jumpful:
18213 * xorl dest,dest cmpl op1, op2
18214 * cmpl op1, op2 movl ct, dest
18215 * setcc dest jcc 1f
18216 * decl dest movl cf, dest
18217 * andl (cf-ct),dest 1:
18218 * addl ct,dest
18220 * Size 20. Size 14.
18222 * This is reasonably steep, but branch mispredict costs are
18223 * high on modern cpus, so consider failing only if optimizing
18224 * for space.
18227 if ((!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
18228 && BRANCH_COST (optimize_insn_for_speed_p (),
18229 false) >= 2)
18231 if (cf == 0)
18233 enum machine_mode cmp_mode = GET_MODE (op0);
18235 cf = ct;
18236 ct = 0;
18238 if (SCALAR_FLOAT_MODE_P (cmp_mode))
18240 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode));
18242 /* We may be reversing unordered compare to normal compare,
18243 that is not valid in general (we may convert non-trapping
18244 condition to trapping one), however on i386 we currently
18245 emit all comparisons unordered. */
18246 code = reverse_condition_maybe_unordered (code);
18248 else
18250 code = reverse_condition (code);
18251 if (compare_code != UNKNOWN)
18252 compare_code = reverse_condition (compare_code);
18256 if (compare_code != UNKNOWN)
18258 /* notl op1 (if needed)
18259 sarl $31, op1
18260 andl (cf-ct), op1
18261 addl ct, op1
18263 For x < 0 (resp. x <= -1) there will be no notl,
18264 so if possible swap the constants to get rid of the
18265 complement.
18266 True/false will be -1/0 while code below (store flag
18267 followed by decrement) is 0/-1, so the constants need
18268 to be exchanged once more. */
18270 if (compare_code == GE || !cf)
18272 code = reverse_condition (code);
18273 compare_code = LT;
18275 else
18277 HOST_WIDE_INT tmp = cf;
18278 cf = ct;
18279 ct = tmp;
18282 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, -1);
18284 else
18286 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, 1);
18288 out = expand_simple_binop (mode, PLUS, copy_rtx (out),
18289 constm1_rtx,
18290 copy_rtx (out), 1, OPTAB_DIRECT);
18293 out = expand_simple_binop (mode, AND, copy_rtx (out),
18294 gen_int_mode (cf - ct, mode),
18295 copy_rtx (out), 1, OPTAB_DIRECT);
18296 if (ct)
18297 out = expand_simple_binop (mode, PLUS, copy_rtx (out), GEN_INT (ct),
18298 copy_rtx (out), 1, OPTAB_DIRECT);
18299 if (!rtx_equal_p (out, operands[0]))
18300 emit_move_insn (operands[0], copy_rtx (out));
18302 return true;
18306 if (!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
18308 /* Try a few things more with specific constants and a variable. */
18310 optab op;
18311 rtx var, orig_out, out, tmp;
18313 if (BRANCH_COST (optimize_insn_for_speed_p (), false) <= 2)
18314 return false;
18316 /* If one of the two operands is an interesting constant, load a
18317 constant with the above and mask it in with a logical operation. */
18319 if (CONST_INT_P (operands[2]))
18321 var = operands[3];
18322 if (INTVAL (operands[2]) == 0 && operands[3] != constm1_rtx)
18323 operands[3] = constm1_rtx, op = and_optab;
18324 else if (INTVAL (operands[2]) == -1 && operands[3] != const0_rtx)
18325 operands[3] = const0_rtx, op = ior_optab;
18326 else
18327 return false;
18329 else if (CONST_INT_P (operands[3]))
18331 var = operands[2];
18332 if (INTVAL (operands[3]) == 0 && operands[2] != constm1_rtx)
18333 operands[2] = constm1_rtx, op = and_optab;
18334 else if (INTVAL (operands[3]) == -1 && operands[3] != const0_rtx)
18335 operands[2] = const0_rtx, op = ior_optab;
18336 else
18337 return false;
18339 else
18340 return false;
18342 orig_out = operands[0];
18343 tmp = gen_reg_rtx (mode);
18344 operands[0] = tmp;
18346 /* Recurse to get the constant loaded. */
18347 if (ix86_expand_int_movcc (operands) == 0)
18348 return false;
18350 /* Mask in the interesting variable. */
18351 out = expand_binop (mode, op, var, tmp, orig_out, 0,
18352 OPTAB_WIDEN);
18353 if (!rtx_equal_p (out, orig_out))
18354 emit_move_insn (copy_rtx (orig_out), copy_rtx (out));
18356 return true;
18360 * For comparison with above,
18362 * movl cf,dest
18363 * movl ct,tmp
18364 * cmpl op1,op2
18365 * cmovcc tmp,dest
18367 * Size 15.
18370 if (! nonimmediate_operand (operands[2], mode))
18371 operands[2] = force_reg (mode, operands[2]);
18372 if (! nonimmediate_operand (operands[3], mode))
18373 operands[3] = force_reg (mode, operands[3]);
18375 if (! register_operand (operands[2], VOIDmode)
18376 && (mode == QImode
18377 || ! register_operand (operands[3], VOIDmode)))
18378 operands[2] = force_reg (mode, operands[2]);
18380 if (mode == QImode
18381 && ! register_operand (operands[3], VOIDmode))
18382 operands[3] = force_reg (mode, operands[3]);
18384 emit_insn (compare_seq);
18385 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
18386 gen_rtx_IF_THEN_ELSE (mode,
18387 compare_op, operands[2],
18388 operands[3])));
18389 return true;
18392 /* Swap, force into registers, or otherwise massage the two operands
18393 to an sse comparison with a mask result. Thus we differ a bit from
18394 ix86_prepare_fp_compare_args which expects to produce a flags result.
18396 The DEST operand exists to help determine whether to commute commutative
18397 operators. The POP0/POP1 operands are updated in place. The new
18398 comparison code is returned, or UNKNOWN if not implementable. */
18400 static enum rtx_code
18401 ix86_prepare_sse_fp_compare_args (rtx dest, enum rtx_code code,
18402 rtx *pop0, rtx *pop1)
18404 rtx tmp;
18406 switch (code)
18408 case LTGT:
18409 case UNEQ:
18410 /* We have no LTGT as an operator. We could implement it with
18411 NE & ORDERED, but this requires an extra temporary. It's
18412 not clear that it's worth it. */
18413 return UNKNOWN;
18415 case LT:
18416 case LE:
18417 case UNGT:
18418 case UNGE:
18419 /* These are supported directly. */
18420 break;
18422 case EQ:
18423 case NE:
18424 case UNORDERED:
18425 case ORDERED:
18426 /* For commutative operators, try to canonicalize the destination
18427 operand to be first in the comparison - this helps reload to
18428 avoid extra moves. */
18429 if (!dest || !rtx_equal_p (dest, *pop1))
18430 break;
18431 /* FALLTHRU */
18433 case GE:
18434 case GT:
18435 case UNLE:
18436 case UNLT:
18437 /* These are not supported directly. Swap the comparison operands
18438 to transform into something that is supported. */
18439 tmp = *pop0;
18440 *pop0 = *pop1;
18441 *pop1 = tmp;
18442 code = swap_condition (code);
18443 break;
18445 default:
18446 gcc_unreachable ();
18449 return code;
18452 /* Detect conditional moves that exactly match min/max operational
18453 semantics. Note that this is IEEE safe, as long as we don't
18454 interchange the operands.
18456 Returns FALSE if this conditional move doesn't match a MIN/MAX,
18457 and TRUE if the operation is successful and instructions are emitted. */
18459 static bool
18460 ix86_expand_sse_fp_minmax (rtx dest, enum rtx_code code, rtx cmp_op0,
18461 rtx cmp_op1, rtx if_true, rtx if_false)
18463 enum machine_mode mode;
18464 bool is_min;
18465 rtx tmp;
18467 if (code == LT)
18469 else if (code == UNGE)
18471 tmp = if_true;
18472 if_true = if_false;
18473 if_false = tmp;
18475 else
18476 return false;
18478 if (rtx_equal_p (cmp_op0, if_true) && rtx_equal_p (cmp_op1, if_false))
18479 is_min = true;
18480 else if (rtx_equal_p (cmp_op1, if_true) && rtx_equal_p (cmp_op0, if_false))
18481 is_min = false;
18482 else
18483 return false;
18485 mode = GET_MODE (dest);
18487 /* We want to check HONOR_NANS and HONOR_SIGNED_ZEROS here,
18488 but MODE may be a vector mode and thus not appropriate. */
18489 if (!flag_finite_math_only || !flag_unsafe_math_optimizations)
18491 int u = is_min ? UNSPEC_IEEE_MIN : UNSPEC_IEEE_MAX;
18492 rtvec v;
18494 if_true = force_reg (mode, if_true);
18495 v = gen_rtvec (2, if_true, if_false);
18496 tmp = gen_rtx_UNSPEC (mode, v, u);
18498 else
18500 code = is_min ? SMIN : SMAX;
18501 tmp = gen_rtx_fmt_ee (code, mode, if_true, if_false);
18504 emit_insn (gen_rtx_SET (VOIDmode, dest, tmp));
18505 return true;
18508 /* Expand an sse vector comparison. Return the register with the result. */
18510 static rtx
18511 ix86_expand_sse_cmp (rtx dest, enum rtx_code code, rtx cmp_op0, rtx cmp_op1,
18512 rtx op_true, rtx op_false)
18514 enum machine_mode mode = GET_MODE (dest);
18515 rtx x;
18517 cmp_op0 = force_reg (mode, cmp_op0);
18518 if (!nonimmediate_operand (cmp_op1, mode))
18519 cmp_op1 = force_reg (mode, cmp_op1);
18521 if (optimize
18522 || reg_overlap_mentioned_p (dest, op_true)
18523 || reg_overlap_mentioned_p (dest, op_false))
18524 dest = gen_reg_rtx (mode);
18526 x = gen_rtx_fmt_ee (code, mode, cmp_op0, cmp_op1);
18527 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
18529 return dest;
18532 /* Expand DEST = CMP ? OP_TRUE : OP_FALSE into a sequence of logical
18533 operations. This is used for both scalar and vector conditional moves. */
18535 static void
18536 ix86_expand_sse_movcc (rtx dest, rtx cmp, rtx op_true, rtx op_false)
18538 enum machine_mode mode = GET_MODE (dest);
18539 rtx t2, t3, x;
18541 if (op_false == CONST0_RTX (mode))
18543 op_true = force_reg (mode, op_true);
18544 x = gen_rtx_AND (mode, cmp, op_true);
18545 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
18547 else if (op_true == CONST0_RTX (mode))
18549 op_false = force_reg (mode, op_false);
18550 x = gen_rtx_NOT (mode, cmp);
18551 x = gen_rtx_AND (mode, x, op_false);
18552 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
18554 else if (TARGET_XOP)
18556 rtx pcmov = gen_rtx_SET (mode, dest,
18557 gen_rtx_IF_THEN_ELSE (mode, cmp,
18558 op_true,
18559 op_false));
18560 emit_insn (pcmov);
18562 else
18564 op_true = force_reg (mode, op_true);
18565 op_false = force_reg (mode, op_false);
18567 t2 = gen_reg_rtx (mode);
18568 if (optimize)
18569 t3 = gen_reg_rtx (mode);
18570 else
18571 t3 = dest;
18573 x = gen_rtx_AND (mode, op_true, cmp);
18574 emit_insn (gen_rtx_SET (VOIDmode, t2, x));
18576 x = gen_rtx_NOT (mode, cmp);
18577 x = gen_rtx_AND (mode, x, op_false);
18578 emit_insn (gen_rtx_SET (VOIDmode, t3, x));
18580 x = gen_rtx_IOR (mode, t3, t2);
18581 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
18585 /* Expand a floating-point conditional move. Return true if successful. */
18587 bool
18588 ix86_expand_fp_movcc (rtx operands[])
18590 enum machine_mode mode = GET_MODE (operands[0]);
18591 enum rtx_code code = GET_CODE (operands[1]);
18592 rtx tmp, compare_op;
18593 rtx op0 = XEXP (operands[1], 0);
18594 rtx op1 = XEXP (operands[1], 1);
18596 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
18598 enum machine_mode cmode;
18600 /* Since we've no cmove for sse registers, don't force bad register
18601 allocation just to gain access to it. Deny movcc when the
18602 comparison mode doesn't match the move mode. */
18603 cmode = GET_MODE (op0);
18604 if (cmode == VOIDmode)
18605 cmode = GET_MODE (op1);
18606 if (cmode != mode)
18607 return false;
18609 code = ix86_prepare_sse_fp_compare_args (operands[0], code, &op0, &op1);
18610 if (code == UNKNOWN)
18611 return false;
18613 if (ix86_expand_sse_fp_minmax (operands[0], code, op0, op1,
18614 operands[2], operands[3]))
18615 return true;
18617 tmp = ix86_expand_sse_cmp (operands[0], code, op0, op1,
18618 operands[2], operands[3]);
18619 ix86_expand_sse_movcc (operands[0], tmp, operands[2], operands[3]);
18620 return true;
18623 /* The floating point conditional move instructions don't directly
18624 support conditions resulting from a signed integer comparison. */
18626 compare_op = ix86_expand_compare (code, op0, op1);
18627 if (!fcmov_comparison_operator (compare_op, VOIDmode))
18629 tmp = gen_reg_rtx (QImode);
18630 ix86_expand_setcc (tmp, code, op0, op1);
18632 compare_op = ix86_expand_compare (NE, tmp, const0_rtx);
18635 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
18636 gen_rtx_IF_THEN_ELSE (mode, compare_op,
18637 operands[2], operands[3])));
18639 return true;
18642 /* Expand a floating-point vector conditional move; a vcond operation
18643 rather than a movcc operation. */
18645 bool
18646 ix86_expand_fp_vcond (rtx operands[])
18648 enum rtx_code code = GET_CODE (operands[3]);
18649 rtx cmp;
18651 code = ix86_prepare_sse_fp_compare_args (operands[0], code,
18652 &operands[4], &operands[5]);
18653 if (code == UNKNOWN)
18654 return false;
18656 if (ix86_expand_sse_fp_minmax (operands[0], code, operands[4],
18657 operands[5], operands[1], operands[2]))
18658 return true;
18660 cmp = ix86_expand_sse_cmp (operands[0], code, operands[4], operands[5],
18661 operands[1], operands[2]);
18662 ix86_expand_sse_movcc (operands[0], cmp, operands[1], operands[2]);
18663 return true;
18666 /* Expand a signed/unsigned integral vector conditional move. */
18668 bool
18669 ix86_expand_int_vcond (rtx operands[])
18671 enum machine_mode mode = GET_MODE (operands[0]);
18672 enum rtx_code code = GET_CODE (operands[3]);
18673 bool negate = false;
18674 rtx x, cop0, cop1;
18676 cop0 = operands[4];
18677 cop1 = operands[5];
18679 /* XOP supports all of the comparisons on all vector int types. */
18680 if (!TARGET_XOP)
18682 /* Canonicalize the comparison to EQ, GT, GTU. */
18683 switch (code)
18685 case EQ:
18686 case GT:
18687 case GTU:
18688 break;
18690 case NE:
18691 case LE:
18692 case LEU:
18693 code = reverse_condition (code);
18694 negate = true;
18695 break;
18697 case GE:
18698 case GEU:
18699 code = reverse_condition (code);
18700 negate = true;
18701 /* FALLTHRU */
18703 case LT:
18704 case LTU:
18705 code = swap_condition (code);
18706 x = cop0, cop0 = cop1, cop1 = x;
18707 break;
18709 default:
18710 gcc_unreachable ();
18713 /* Only SSE4.1/SSE4.2 supports V2DImode. */
18714 if (mode == V2DImode)
18716 switch (code)
18718 case EQ:
18719 /* SSE4.1 supports EQ. */
18720 if (!TARGET_SSE4_1)
18721 return false;
18722 break;
18724 case GT:
18725 case GTU:
18726 /* SSE4.2 supports GT/GTU. */
18727 if (!TARGET_SSE4_2)
18728 return false;
18729 break;
18731 default:
18732 gcc_unreachable ();
18736 /* Unsigned parallel compare is not supported by the hardware.
18737 Play some tricks to turn this into a signed comparison
18738 against 0. */
18739 if (code == GTU)
18741 cop0 = force_reg (mode, cop0);
18743 switch (mode)
18745 case V4SImode:
18746 case V2DImode:
18748 rtx t1, t2, mask;
18749 rtx (*gen_sub3) (rtx, rtx, rtx);
18751 /* Subtract (-(INT MAX) - 1) from both operands to make
18752 them signed. */
18753 mask = ix86_build_signbit_mask (mode, true, false);
18754 gen_sub3 = (mode == V4SImode
18755 ? gen_subv4si3 : gen_subv2di3);
18756 t1 = gen_reg_rtx (mode);
18757 emit_insn (gen_sub3 (t1, cop0, mask));
18759 t2 = gen_reg_rtx (mode);
18760 emit_insn (gen_sub3 (t2, cop1, mask));
18762 cop0 = t1;
18763 cop1 = t2;
18764 code = GT;
18766 break;
18768 case V16QImode:
18769 case V8HImode:
18770 /* Perform a parallel unsigned saturating subtraction. */
18771 x = gen_reg_rtx (mode);
18772 emit_insn (gen_rtx_SET (VOIDmode, x,
18773 gen_rtx_US_MINUS (mode, cop0, cop1)));
18775 cop0 = x;
18776 cop1 = CONST0_RTX (mode);
18777 code = EQ;
18778 negate = !negate;
18779 break;
18781 default:
18782 gcc_unreachable ();
18787 x = ix86_expand_sse_cmp (operands[0], code, cop0, cop1,
18788 operands[1+negate], operands[2-negate]);
18790 ix86_expand_sse_movcc (operands[0], x, operands[1+negate],
18791 operands[2-negate]);
18792 return true;
18795 /* Unpack OP[1] into the next wider integer vector type. UNSIGNED_P is
18796 true if we should do zero extension, else sign extension. HIGH_P is
18797 true if we want the N/2 high elements, else the low elements. */
18799 void
18800 ix86_expand_sse_unpack (rtx operands[2], bool unsigned_p, bool high_p)
18802 enum machine_mode imode = GET_MODE (operands[1]);
18803 rtx (*unpack)(rtx, rtx, rtx);
18804 rtx se, dest;
18806 switch (imode)
18808 case V16QImode:
18809 if (high_p)
18810 unpack = gen_vec_interleave_highv16qi;
18811 else
18812 unpack = gen_vec_interleave_lowv16qi;
18813 break;
18814 case V8HImode:
18815 if (high_p)
18816 unpack = gen_vec_interleave_highv8hi;
18817 else
18818 unpack = gen_vec_interleave_lowv8hi;
18819 break;
18820 case V4SImode:
18821 if (high_p)
18822 unpack = gen_vec_interleave_highv4si;
18823 else
18824 unpack = gen_vec_interleave_lowv4si;
18825 break;
18826 default:
18827 gcc_unreachable ();
18830 dest = gen_lowpart (imode, operands[0]);
18832 if (unsigned_p)
18833 se = force_reg (imode, CONST0_RTX (imode));
18834 else
18835 se = ix86_expand_sse_cmp (gen_reg_rtx (imode), GT, CONST0_RTX (imode),
18836 operands[1], pc_rtx, pc_rtx);
18838 emit_insn (unpack (dest, operands[1], se));
18841 /* This function performs the same task as ix86_expand_sse_unpack,
18842 but with SSE4.1 instructions. */
18844 void
18845 ix86_expand_sse4_unpack (rtx operands[2], bool unsigned_p, bool high_p)
18847 enum machine_mode imode = GET_MODE (operands[1]);
18848 rtx (*unpack)(rtx, rtx);
18849 rtx src, dest;
18851 switch (imode)
18853 case V16QImode:
18854 if (unsigned_p)
18855 unpack = gen_sse4_1_zero_extendv8qiv8hi2;
18856 else
18857 unpack = gen_sse4_1_sign_extendv8qiv8hi2;
18858 break;
18859 case V8HImode:
18860 if (unsigned_p)
18861 unpack = gen_sse4_1_zero_extendv4hiv4si2;
18862 else
18863 unpack = gen_sse4_1_sign_extendv4hiv4si2;
18864 break;
18865 case V4SImode:
18866 if (unsigned_p)
18867 unpack = gen_sse4_1_zero_extendv2siv2di2;
18868 else
18869 unpack = gen_sse4_1_sign_extendv2siv2di2;
18870 break;
18871 default:
18872 gcc_unreachable ();
18875 dest = operands[0];
18876 if (high_p)
18878 /* Shift higher 8 bytes to lower 8 bytes. */
18879 src = gen_reg_rtx (imode);
18880 emit_insn (gen_sse2_lshrv1ti3 (gen_lowpart (V1TImode, src),
18881 gen_lowpart (V1TImode, operands[1]),
18882 GEN_INT (64)));
18884 else
18885 src = operands[1];
18887 emit_insn (unpack (dest, src));
18890 /* Expand conditional increment or decrement using adb/sbb instructions.
18891 The default case using setcc followed by the conditional move can be
18892 done by generic code. */
18893 bool
18894 ix86_expand_int_addcc (rtx operands[])
18896 enum rtx_code code = GET_CODE (operands[1]);
18897 rtx flags;
18898 rtx (*insn)(rtx, rtx, rtx, rtx, rtx);
18899 rtx compare_op;
18900 rtx val = const0_rtx;
18901 bool fpcmp = false;
18902 enum machine_mode mode;
18903 rtx op0 = XEXP (operands[1], 0);
18904 rtx op1 = XEXP (operands[1], 1);
18906 if (operands[3] != const1_rtx
18907 && operands[3] != constm1_rtx)
18908 return false;
18909 if (!ix86_expand_carry_flag_compare (code, op0, op1, &compare_op))
18910 return false;
18911 code = GET_CODE (compare_op);
18913 flags = XEXP (compare_op, 0);
18915 if (GET_MODE (flags) == CCFPmode
18916 || GET_MODE (flags) == CCFPUmode)
18918 fpcmp = true;
18919 code = ix86_fp_compare_code_to_integer (code);
18922 if (code != LTU)
18924 val = constm1_rtx;
18925 if (fpcmp)
18926 PUT_CODE (compare_op,
18927 reverse_condition_maybe_unordered
18928 (GET_CODE (compare_op)));
18929 else
18930 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
18933 mode = GET_MODE (operands[0]);
18935 /* Construct either adc or sbb insn. */
18936 if ((code == LTU) == (operands[3] == constm1_rtx))
18938 switch (mode)
18940 case QImode:
18941 insn = gen_subqi3_carry;
18942 break;
18943 case HImode:
18944 insn = gen_subhi3_carry;
18945 break;
18946 case SImode:
18947 insn = gen_subsi3_carry;
18948 break;
18949 case DImode:
18950 insn = gen_subdi3_carry;
18951 break;
18952 default:
18953 gcc_unreachable ();
18956 else
18958 switch (mode)
18960 case QImode:
18961 insn = gen_addqi3_carry;
18962 break;
18963 case HImode:
18964 insn = gen_addhi3_carry;
18965 break;
18966 case SImode:
18967 insn = gen_addsi3_carry;
18968 break;
18969 case DImode:
18970 insn = gen_adddi3_carry;
18971 break;
18972 default:
18973 gcc_unreachable ();
18976 emit_insn (insn (operands[0], operands[2], val, flags, compare_op));
18978 return true;
18982 /* Split operands 0 and 1 into half-mode parts. Similar to split_double_mode,
18983 but works for floating pointer parameters and nonoffsetable memories.
18984 For pushes, it returns just stack offsets; the values will be saved
18985 in the right order. Maximally three parts are generated. */
18987 static int
18988 ix86_split_to_parts (rtx operand, rtx *parts, enum machine_mode mode)
18990 int size;
18992 if (!TARGET_64BIT)
18993 size = mode==XFmode ? 3 : GET_MODE_SIZE (mode) / 4;
18994 else
18995 size = (GET_MODE_SIZE (mode) + 4) / 8;
18997 gcc_assert (!REG_P (operand) || !MMX_REGNO_P (REGNO (operand)));
18998 gcc_assert (size >= 2 && size <= 4);
19000 /* Optimize constant pool reference to immediates. This is used by fp
19001 moves, that force all constants to memory to allow combining. */
19002 if (MEM_P (operand) && MEM_READONLY_P (operand))
19004 rtx tmp = maybe_get_pool_constant (operand);
19005 if (tmp)
19006 operand = tmp;
19009 if (MEM_P (operand) && !offsettable_memref_p (operand))
19011 /* The only non-offsetable memories we handle are pushes. */
19012 int ok = push_operand (operand, VOIDmode);
19014 gcc_assert (ok);
19016 operand = copy_rtx (operand);
19017 PUT_MODE (operand, Pmode);
19018 parts[0] = parts[1] = parts[2] = parts[3] = operand;
19019 return size;
19022 if (GET_CODE (operand) == CONST_VECTOR)
19024 enum machine_mode imode = int_mode_for_mode (mode);
19025 /* Caution: if we looked through a constant pool memory above,
19026 the operand may actually have a different mode now. That's
19027 ok, since we want to pun this all the way back to an integer. */
19028 operand = simplify_subreg (imode, operand, GET_MODE (operand), 0);
19029 gcc_assert (operand != NULL);
19030 mode = imode;
19033 if (!TARGET_64BIT)
19035 if (mode == DImode)
19036 split_double_mode (mode, &operand, 1, &parts[0], &parts[1]);
19037 else
19039 int i;
19041 if (REG_P (operand))
19043 gcc_assert (reload_completed);
19044 for (i = 0; i < size; i++)
19045 parts[i] = gen_rtx_REG (SImode, REGNO (operand) + i);
19047 else if (offsettable_memref_p (operand))
19049 operand = adjust_address (operand, SImode, 0);
19050 parts[0] = operand;
19051 for (i = 1; i < size; i++)
19052 parts[i] = adjust_address (operand, SImode, 4 * i);
19054 else if (GET_CODE (operand) == CONST_DOUBLE)
19056 REAL_VALUE_TYPE r;
19057 long l[4];
19059 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
19060 switch (mode)
19062 case TFmode:
19063 real_to_target (l, &r, mode);
19064 parts[3] = gen_int_mode (l[3], SImode);
19065 parts[2] = gen_int_mode (l[2], SImode);
19066 break;
19067 case XFmode:
19068 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
19069 parts[2] = gen_int_mode (l[2], SImode);
19070 break;
19071 case DFmode:
19072 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
19073 break;
19074 default:
19075 gcc_unreachable ();
19077 parts[1] = gen_int_mode (l[1], SImode);
19078 parts[0] = gen_int_mode (l[0], SImode);
19080 else
19081 gcc_unreachable ();
19084 else
19086 if (mode == TImode)
19087 split_double_mode (mode, &operand, 1, &parts[0], &parts[1]);
19088 if (mode == XFmode || mode == TFmode)
19090 enum machine_mode upper_mode = mode==XFmode ? SImode : DImode;
19091 if (REG_P (operand))
19093 gcc_assert (reload_completed);
19094 parts[0] = gen_rtx_REG (DImode, REGNO (operand) + 0);
19095 parts[1] = gen_rtx_REG (upper_mode, REGNO (operand) + 1);
19097 else if (offsettable_memref_p (operand))
19099 operand = adjust_address (operand, DImode, 0);
19100 parts[0] = operand;
19101 parts[1] = adjust_address (operand, upper_mode, 8);
19103 else if (GET_CODE (operand) == CONST_DOUBLE)
19105 REAL_VALUE_TYPE r;
19106 long l[4];
19108 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
19109 real_to_target (l, &r, mode);
19111 /* Do not use shift by 32 to avoid warning on 32bit systems. */
19112 if (HOST_BITS_PER_WIDE_INT >= 64)
19113 parts[0]
19114 = gen_int_mode
19115 ((l[0] & (((HOST_WIDE_INT) 2 << 31) - 1))
19116 + ((((HOST_WIDE_INT) l[1]) << 31) << 1),
19117 DImode);
19118 else
19119 parts[0] = immed_double_const (l[0], l[1], DImode);
19121 if (upper_mode == SImode)
19122 parts[1] = gen_int_mode (l[2], SImode);
19123 else if (HOST_BITS_PER_WIDE_INT >= 64)
19124 parts[1]
19125 = gen_int_mode
19126 ((l[2] & (((HOST_WIDE_INT) 2 << 31) - 1))
19127 + ((((HOST_WIDE_INT) l[3]) << 31) << 1),
19128 DImode);
19129 else
19130 parts[1] = immed_double_const (l[2], l[3], DImode);
19132 else
19133 gcc_unreachable ();
19137 return size;
19140 /* Emit insns to perform a move or push of DI, DF, XF, and TF values.
19141 Return false when normal moves are needed; true when all required
19142 insns have been emitted. Operands 2-4 contain the input values
19143 int the correct order; operands 5-7 contain the output values. */
19145 void
19146 ix86_split_long_move (rtx operands[])
19148 rtx part[2][4];
19149 int nparts, i, j;
19150 int push = 0;
19151 int collisions = 0;
19152 enum machine_mode mode = GET_MODE (operands[0]);
19153 bool collisionparts[4];
19155 /* The DFmode expanders may ask us to move double.
19156 For 64bit target this is single move. By hiding the fact
19157 here we simplify i386.md splitters. */
19158 if (TARGET_64BIT && GET_MODE_SIZE (GET_MODE (operands[0])) == 8)
19160 /* Optimize constant pool reference to immediates. This is used by
19161 fp moves, that force all constants to memory to allow combining. */
19163 if (MEM_P (operands[1])
19164 && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
19165 && CONSTANT_POOL_ADDRESS_P (XEXP (operands[1], 0)))
19166 operands[1] = get_pool_constant (XEXP (operands[1], 0));
19167 if (push_operand (operands[0], VOIDmode))
19169 operands[0] = copy_rtx (operands[0]);
19170 PUT_MODE (operands[0], Pmode);
19172 else
19173 operands[0] = gen_lowpart (DImode, operands[0]);
19174 operands[1] = gen_lowpart (DImode, operands[1]);
19175 emit_move_insn (operands[0], operands[1]);
19176 return;
19179 /* The only non-offsettable memory we handle is push. */
19180 if (push_operand (operands[0], VOIDmode))
19181 push = 1;
19182 else
19183 gcc_assert (!MEM_P (operands[0])
19184 || offsettable_memref_p (operands[0]));
19186 nparts = ix86_split_to_parts (operands[1], part[1], GET_MODE (operands[0]));
19187 ix86_split_to_parts (operands[0], part[0], GET_MODE (operands[0]));
19189 /* When emitting push, take care for source operands on the stack. */
19190 if (push && MEM_P (operands[1])
19191 && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1]))
19193 rtx src_base = XEXP (part[1][nparts - 1], 0);
19195 /* Compensate for the stack decrement by 4. */
19196 if (!TARGET_64BIT && nparts == 3
19197 && mode == XFmode && TARGET_128BIT_LONG_DOUBLE)
19198 src_base = plus_constant (src_base, 4);
19200 /* src_base refers to the stack pointer and is
19201 automatically decreased by emitted push. */
19202 for (i = 0; i < nparts; i++)
19203 part[1][i] = change_address (part[1][i],
19204 GET_MODE (part[1][i]), src_base);
19207 /* We need to do copy in the right order in case an address register
19208 of the source overlaps the destination. */
19209 if (REG_P (part[0][0]) && MEM_P (part[1][0]))
19211 rtx tmp;
19213 for (i = 0; i < nparts; i++)
19215 collisionparts[i]
19216 = reg_overlap_mentioned_p (part[0][i], XEXP (part[1][0], 0));
19217 if (collisionparts[i])
19218 collisions++;
19221 /* Collision in the middle part can be handled by reordering. */
19222 if (collisions == 1 && nparts == 3 && collisionparts [1])
19224 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
19225 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
19227 else if (collisions == 1
19228 && nparts == 4
19229 && (collisionparts [1] || collisionparts [2]))
19231 if (collisionparts [1])
19233 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
19234 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
19236 else
19238 tmp = part[0][2]; part[0][2] = part[0][3]; part[0][3] = tmp;
19239 tmp = part[1][2]; part[1][2] = part[1][3]; part[1][3] = tmp;
19243 /* If there are more collisions, we can't handle it by reordering.
19244 Do an lea to the last part and use only one colliding move. */
19245 else if (collisions > 1)
19247 rtx base;
19249 collisions = 1;
19251 base = part[0][nparts - 1];
19253 /* Handle the case when the last part isn't valid for lea.
19254 Happens in 64-bit mode storing the 12-byte XFmode. */
19255 if (GET_MODE (base) != Pmode)
19256 base = gen_rtx_REG (Pmode, REGNO (base));
19258 emit_insn (gen_rtx_SET (VOIDmode, base, XEXP (part[1][0], 0)));
19259 part[1][0] = replace_equiv_address (part[1][0], base);
19260 for (i = 1; i < nparts; i++)
19262 tmp = plus_constant (base, UNITS_PER_WORD * i);
19263 part[1][i] = replace_equiv_address (part[1][i], tmp);
19268 if (push)
19270 if (!TARGET_64BIT)
19272 if (nparts == 3)
19274 if (TARGET_128BIT_LONG_DOUBLE && mode == XFmode)
19275 emit_insn (gen_addsi3 (stack_pointer_rtx,
19276 stack_pointer_rtx, GEN_INT (-4)));
19277 emit_move_insn (part[0][2], part[1][2]);
19279 else if (nparts == 4)
19281 emit_move_insn (part[0][3], part[1][3]);
19282 emit_move_insn (part[0][2], part[1][2]);
19285 else
19287 /* In 64bit mode we don't have 32bit push available. In case this is
19288 register, it is OK - we will just use larger counterpart. We also
19289 retype memory - these comes from attempt to avoid REX prefix on
19290 moving of second half of TFmode value. */
19291 if (GET_MODE (part[1][1]) == SImode)
19293 switch (GET_CODE (part[1][1]))
19295 case MEM:
19296 part[1][1] = adjust_address (part[1][1], DImode, 0);
19297 break;
19299 case REG:
19300 part[1][1] = gen_rtx_REG (DImode, REGNO (part[1][1]));
19301 break;
19303 default:
19304 gcc_unreachable ();
19307 if (GET_MODE (part[1][0]) == SImode)
19308 part[1][0] = part[1][1];
19311 emit_move_insn (part[0][1], part[1][1]);
19312 emit_move_insn (part[0][0], part[1][0]);
19313 return;
19316 /* Choose correct order to not overwrite the source before it is copied. */
19317 if ((REG_P (part[0][0])
19318 && REG_P (part[1][1])
19319 && (REGNO (part[0][0]) == REGNO (part[1][1])
19320 || (nparts == 3
19321 && REGNO (part[0][0]) == REGNO (part[1][2]))
19322 || (nparts == 4
19323 && REGNO (part[0][0]) == REGNO (part[1][3]))))
19324 || (collisions > 0
19325 && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))))
19327 for (i = 0, j = nparts - 1; i < nparts; i++, j--)
19329 operands[2 + i] = part[0][j];
19330 operands[6 + i] = part[1][j];
19333 else
19335 for (i = 0; i < nparts; i++)
19337 operands[2 + i] = part[0][i];
19338 operands[6 + i] = part[1][i];
19342 /* If optimizing for size, attempt to locally unCSE nonzero constants. */
19343 if (optimize_insn_for_size_p ())
19345 for (j = 0; j < nparts - 1; j++)
19346 if (CONST_INT_P (operands[6 + j])
19347 && operands[6 + j] != const0_rtx
19348 && REG_P (operands[2 + j]))
19349 for (i = j; i < nparts - 1; i++)
19350 if (CONST_INT_P (operands[7 + i])
19351 && INTVAL (operands[7 + i]) == INTVAL (operands[6 + j]))
19352 operands[7 + i] = operands[2 + j];
19355 for (i = 0; i < nparts; i++)
19356 emit_move_insn (operands[2 + i], operands[6 + i]);
19358 return;
19361 /* Helper function of ix86_split_ashl used to generate an SImode/DImode
19362 left shift by a constant, either using a single shift or
19363 a sequence of add instructions. */
19365 static void
19366 ix86_expand_ashl_const (rtx operand, int count, enum machine_mode mode)
19368 rtx (*insn)(rtx, rtx, rtx);
19370 if (count == 1
19371 || (count * ix86_cost->add <= ix86_cost->shift_const
19372 && !optimize_insn_for_size_p ()))
19374 insn = mode == DImode ? gen_addsi3 : gen_adddi3;
19375 while (count-- > 0)
19376 emit_insn (insn (operand, operand, operand));
19378 else
19380 insn = mode == DImode ? gen_ashlsi3 : gen_ashldi3;
19381 emit_insn (insn (operand, operand, GEN_INT (count)));
19385 void
19386 ix86_split_ashl (rtx *operands, rtx scratch, enum machine_mode mode)
19388 rtx (*gen_ashl3)(rtx, rtx, rtx);
19389 rtx (*gen_shld)(rtx, rtx, rtx);
19390 int half_width = GET_MODE_BITSIZE (mode) >> 1;
19392 rtx low[2], high[2];
19393 int count;
19395 if (CONST_INT_P (operands[2]))
19397 split_double_mode (mode, operands, 2, low, high);
19398 count = INTVAL (operands[2]) & (GET_MODE_BITSIZE (mode) - 1);
19400 if (count >= half_width)
19402 emit_move_insn (high[0], low[1]);
19403 emit_move_insn (low[0], const0_rtx);
19405 if (count > half_width)
19406 ix86_expand_ashl_const (high[0], count - half_width, mode);
19408 else
19410 gen_shld = mode == DImode ? gen_x86_shld : gen_x86_64_shld;
19412 if (!rtx_equal_p (operands[0], operands[1]))
19413 emit_move_insn (operands[0], operands[1]);
19415 emit_insn (gen_shld (high[0], low[0], GEN_INT (count)));
19416 ix86_expand_ashl_const (low[0], count, mode);
19418 return;
19421 split_double_mode (mode, operands, 1, low, high);
19423 gen_ashl3 = mode == DImode ? gen_ashlsi3 : gen_ashldi3;
19425 if (operands[1] == const1_rtx)
19427 /* Assuming we've chosen a QImode capable registers, then 1 << N
19428 can be done with two 32/64-bit shifts, no branches, no cmoves. */
19429 if (ANY_QI_REG_P (low[0]) && ANY_QI_REG_P (high[0]))
19431 rtx s, d, flags = gen_rtx_REG (CCZmode, FLAGS_REG);
19433 ix86_expand_clear (low[0]);
19434 ix86_expand_clear (high[0]);
19435 emit_insn (gen_testqi_ccz_1 (operands[2], GEN_INT (half_width)));
19437 d = gen_lowpart (QImode, low[0]);
19438 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
19439 s = gen_rtx_EQ (QImode, flags, const0_rtx);
19440 emit_insn (gen_rtx_SET (VOIDmode, d, s));
19442 d = gen_lowpart (QImode, high[0]);
19443 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
19444 s = gen_rtx_NE (QImode, flags, const0_rtx);
19445 emit_insn (gen_rtx_SET (VOIDmode, d, s));
19448 /* Otherwise, we can get the same results by manually performing
19449 a bit extract operation on bit 5/6, and then performing the two
19450 shifts. The two methods of getting 0/1 into low/high are exactly
19451 the same size. Avoiding the shift in the bit extract case helps
19452 pentium4 a bit; no one else seems to care much either way. */
19453 else
19455 enum machine_mode half_mode;
19456 rtx (*gen_lshr3)(rtx, rtx, rtx);
19457 rtx (*gen_and3)(rtx, rtx, rtx);
19458 rtx (*gen_xor3)(rtx, rtx, rtx);
19459 HOST_WIDE_INT bits;
19460 rtx x;
19462 if (mode == DImode)
19464 half_mode = SImode;
19465 gen_lshr3 = gen_lshrsi3;
19466 gen_and3 = gen_andsi3;
19467 gen_xor3 = gen_xorsi3;
19468 bits = 5;
19470 else
19472 half_mode = DImode;
19473 gen_lshr3 = gen_lshrdi3;
19474 gen_and3 = gen_anddi3;
19475 gen_xor3 = gen_xordi3;
19476 bits = 6;
19479 if (TARGET_PARTIAL_REG_STALL && !optimize_insn_for_size_p ())
19480 x = gen_rtx_ZERO_EXTEND (half_mode, operands[2]);
19481 else
19482 x = gen_lowpart (half_mode, operands[2]);
19483 emit_insn (gen_rtx_SET (VOIDmode, high[0], x));
19485 emit_insn (gen_lshr3 (high[0], high[0], GEN_INT (bits)));
19486 emit_insn (gen_and3 (high[0], high[0], const1_rtx));
19487 emit_move_insn (low[0], high[0]);
19488 emit_insn (gen_xor3 (low[0], low[0], const1_rtx));
19491 emit_insn (gen_ashl3 (low[0], low[0], operands[2]));
19492 emit_insn (gen_ashl3 (high[0], high[0], operands[2]));
19493 return;
19496 if (operands[1] == constm1_rtx)
19498 /* For -1 << N, we can avoid the shld instruction, because we
19499 know that we're shifting 0...31/63 ones into a -1. */
19500 emit_move_insn (low[0], constm1_rtx);
19501 if (optimize_insn_for_size_p ())
19502 emit_move_insn (high[0], low[0]);
19503 else
19504 emit_move_insn (high[0], constm1_rtx);
19506 else
19508 gen_shld = mode == DImode ? gen_x86_shld : gen_x86_64_shld;
19510 if (!rtx_equal_p (operands[0], operands[1]))
19511 emit_move_insn (operands[0], operands[1]);
19513 split_double_mode (mode, operands, 1, low, high);
19514 emit_insn (gen_shld (high[0], low[0], operands[2]));
19517 emit_insn (gen_ashl3 (low[0], low[0], operands[2]));
19519 if (TARGET_CMOVE && scratch)
19521 rtx (*gen_x86_shift_adj_1)(rtx, rtx, rtx, rtx)
19522 = mode == DImode ? gen_x86_shiftsi_adj_1 : gen_x86_shiftdi_adj_1;
19524 ix86_expand_clear (scratch);
19525 emit_insn (gen_x86_shift_adj_1 (high[0], low[0], operands[2], scratch));
19527 else
19529 rtx (*gen_x86_shift_adj_2)(rtx, rtx, rtx)
19530 = mode == DImode ? gen_x86_shiftsi_adj_2 : gen_x86_shiftdi_adj_2;
19532 emit_insn (gen_x86_shift_adj_2 (high[0], low[0], operands[2]));
19536 void
19537 ix86_split_ashr (rtx *operands, rtx scratch, enum machine_mode mode)
19539 rtx (*gen_ashr3)(rtx, rtx, rtx)
19540 = mode == DImode ? gen_ashrsi3 : gen_ashrdi3;
19541 rtx (*gen_shrd)(rtx, rtx, rtx);
19542 int half_width = GET_MODE_BITSIZE (mode) >> 1;
19544 rtx low[2], high[2];
19545 int count;
19547 if (CONST_INT_P (operands[2]))
19549 split_double_mode (mode, operands, 2, low, high);
19550 count = INTVAL (operands[2]) & (GET_MODE_BITSIZE (mode) - 1);
19552 if (count == GET_MODE_BITSIZE (mode) - 1)
19554 emit_move_insn (high[0], high[1]);
19555 emit_insn (gen_ashr3 (high[0], high[0],
19556 GEN_INT (half_width - 1)));
19557 emit_move_insn (low[0], high[0]);
19560 else if (count >= half_width)
19562 emit_move_insn (low[0], high[1]);
19563 emit_move_insn (high[0], low[0]);
19564 emit_insn (gen_ashr3 (high[0], high[0],
19565 GEN_INT (half_width - 1)));
19567 if (count > half_width)
19568 emit_insn (gen_ashr3 (low[0], low[0],
19569 GEN_INT (count - half_width)));
19571 else
19573 gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
19575 if (!rtx_equal_p (operands[0], operands[1]))
19576 emit_move_insn (operands[0], operands[1]);
19578 emit_insn (gen_shrd (low[0], high[0], GEN_INT (count)));
19579 emit_insn (gen_ashr3 (high[0], high[0], GEN_INT (count)));
19582 else
19584 gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
19586 if (!rtx_equal_p (operands[0], operands[1]))
19587 emit_move_insn (operands[0], operands[1]);
19589 split_double_mode (mode, operands, 1, low, high);
19591 emit_insn (gen_shrd (low[0], high[0], operands[2]));
19592 emit_insn (gen_ashr3 (high[0], high[0], operands[2]));
19594 if (TARGET_CMOVE && scratch)
19596 rtx (*gen_x86_shift_adj_1)(rtx, rtx, rtx, rtx)
19597 = mode == DImode ? gen_x86_shiftsi_adj_1 : gen_x86_shiftdi_adj_1;
19599 emit_move_insn (scratch, high[0]);
19600 emit_insn (gen_ashr3 (scratch, scratch,
19601 GEN_INT (half_width - 1)));
19602 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
19603 scratch));
19605 else
19607 rtx (*gen_x86_shift_adj_3)(rtx, rtx, rtx)
19608 = mode == DImode ? gen_x86_shiftsi_adj_3 : gen_x86_shiftdi_adj_3;
19610 emit_insn (gen_x86_shift_adj_3 (low[0], high[0], operands[2]));
19615 void
19616 ix86_split_lshr (rtx *operands, rtx scratch, enum machine_mode mode)
19618 rtx (*gen_lshr3)(rtx, rtx, rtx)
19619 = mode == DImode ? gen_lshrsi3 : gen_lshrdi3;
19620 rtx (*gen_shrd)(rtx, rtx, rtx);
19621 int half_width = GET_MODE_BITSIZE (mode) >> 1;
19623 rtx low[2], high[2];
19624 int count;
19626 if (CONST_INT_P (operands[2]))
19628 split_double_mode (mode, operands, 2, low, high);
19629 count = INTVAL (operands[2]) & (GET_MODE_BITSIZE (mode) - 1);
19631 if (count >= half_width)
19633 emit_move_insn (low[0], high[1]);
19634 ix86_expand_clear (high[0]);
19636 if (count > half_width)
19637 emit_insn (gen_lshr3 (low[0], low[0],
19638 GEN_INT (count - half_width)));
19640 else
19642 gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
19644 if (!rtx_equal_p (operands[0], operands[1]))
19645 emit_move_insn (operands[0], operands[1]);
19647 emit_insn (gen_shrd (low[0], high[0], GEN_INT (count)));
19648 emit_insn (gen_lshr3 (high[0], high[0], GEN_INT (count)));
19651 else
19653 gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
19655 if (!rtx_equal_p (operands[0], operands[1]))
19656 emit_move_insn (operands[0], operands[1]);
19658 split_double_mode (mode, operands, 1, low, high);
19660 emit_insn (gen_shrd (low[0], high[0], operands[2]));
19661 emit_insn (gen_lshr3 (high[0], high[0], operands[2]));
19663 if (TARGET_CMOVE && scratch)
19665 rtx (*gen_x86_shift_adj_1)(rtx, rtx, rtx, rtx)
19666 = mode == DImode ? gen_x86_shiftsi_adj_1 : gen_x86_shiftdi_adj_1;
19668 ix86_expand_clear (scratch);
19669 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
19670 scratch));
19672 else
19674 rtx (*gen_x86_shift_adj_2)(rtx, rtx, rtx)
19675 = mode == DImode ? gen_x86_shiftsi_adj_2 : gen_x86_shiftdi_adj_2;
19677 emit_insn (gen_x86_shift_adj_2 (low[0], high[0], operands[2]));
19682 /* Predict just emitted jump instruction to be taken with probability PROB. */
19683 static void
19684 predict_jump (int prob)
19686 rtx insn = get_last_insn ();
19687 gcc_assert (JUMP_P (insn));
19688 add_reg_note (insn, REG_BR_PROB, GEN_INT (prob));
19691 /* Helper function for the string operations below. Dest VARIABLE whether
19692 it is aligned to VALUE bytes. If true, jump to the label. */
19693 static rtx
19694 ix86_expand_aligntest (rtx variable, int value, bool epilogue)
19696 rtx label = gen_label_rtx ();
19697 rtx tmpcount = gen_reg_rtx (GET_MODE (variable));
19698 if (GET_MODE (variable) == DImode)
19699 emit_insn (gen_anddi3 (tmpcount, variable, GEN_INT (value)));
19700 else
19701 emit_insn (gen_andsi3 (tmpcount, variable, GEN_INT (value)));
19702 emit_cmp_and_jump_insns (tmpcount, const0_rtx, EQ, 0, GET_MODE (variable),
19703 1, label);
19704 if (epilogue)
19705 predict_jump (REG_BR_PROB_BASE * 50 / 100);
19706 else
19707 predict_jump (REG_BR_PROB_BASE * 90 / 100);
19708 return label;
19711 /* Adjust COUNTER by the VALUE. */
19712 static void
19713 ix86_adjust_counter (rtx countreg, HOST_WIDE_INT value)
19715 rtx (*gen_add)(rtx, rtx, rtx)
19716 = GET_MODE (countreg) == DImode ? gen_adddi3 : gen_addsi3;
19718 emit_insn (gen_add (countreg, countreg, GEN_INT (-value)));
19721 /* Zero extend possibly SImode EXP to Pmode register. */
19723 ix86_zero_extend_to_Pmode (rtx exp)
19725 rtx r;
19726 if (GET_MODE (exp) == VOIDmode)
19727 return force_reg (Pmode, exp);
19728 if (GET_MODE (exp) == Pmode)
19729 return copy_to_mode_reg (Pmode, exp);
19730 r = gen_reg_rtx (Pmode);
19731 emit_insn (gen_zero_extendsidi2 (r, exp));
19732 return r;
19735 /* Divide COUNTREG by SCALE. */
19736 static rtx
19737 scale_counter (rtx countreg, int scale)
19739 rtx sc;
19741 if (scale == 1)
19742 return countreg;
19743 if (CONST_INT_P (countreg))
19744 return GEN_INT (INTVAL (countreg) / scale);
19745 gcc_assert (REG_P (countreg));
19747 sc = expand_simple_binop (GET_MODE (countreg), LSHIFTRT, countreg,
19748 GEN_INT (exact_log2 (scale)),
19749 NULL, 1, OPTAB_DIRECT);
19750 return sc;
19753 /* Return mode for the memcpy/memset loop counter. Prefer SImode over
19754 DImode for constant loop counts. */
19756 static enum machine_mode
19757 counter_mode (rtx count_exp)
19759 if (GET_MODE (count_exp) != VOIDmode)
19760 return GET_MODE (count_exp);
19761 if (!CONST_INT_P (count_exp))
19762 return Pmode;
19763 if (TARGET_64BIT && (INTVAL (count_exp) & ~0xffffffff))
19764 return DImode;
19765 return SImode;
19768 /* When SRCPTR is non-NULL, output simple loop to move memory
19769 pointer to SRCPTR to DESTPTR via chunks of MODE unrolled UNROLL times,
19770 overall size is COUNT specified in bytes. When SRCPTR is NULL, output the
19771 equivalent loop to set memory by VALUE (supposed to be in MODE).
19773 The size is rounded down to whole number of chunk size moved at once.
19774 SRCMEM and DESTMEM provide MEMrtx to feed proper aliasing info. */
19777 static void
19778 expand_set_or_movmem_via_loop (rtx destmem, rtx srcmem,
19779 rtx destptr, rtx srcptr, rtx value,
19780 rtx count, enum machine_mode mode, int unroll,
19781 int expected_size)
19783 rtx out_label, top_label, iter, tmp;
19784 enum machine_mode iter_mode = counter_mode (count);
19785 rtx piece_size = GEN_INT (GET_MODE_SIZE (mode) * unroll);
19786 rtx piece_size_mask = GEN_INT (~((GET_MODE_SIZE (mode) * unroll) - 1));
19787 rtx size;
19788 rtx x_addr;
19789 rtx y_addr;
19790 int i;
19792 top_label = gen_label_rtx ();
19793 out_label = gen_label_rtx ();
19794 iter = gen_reg_rtx (iter_mode);
19796 size = expand_simple_binop (iter_mode, AND, count, piece_size_mask,
19797 NULL, 1, OPTAB_DIRECT);
19798 /* Those two should combine. */
19799 if (piece_size == const1_rtx)
19801 emit_cmp_and_jump_insns (size, const0_rtx, EQ, NULL_RTX, iter_mode,
19802 true, out_label);
19803 predict_jump (REG_BR_PROB_BASE * 10 / 100);
19805 emit_move_insn (iter, const0_rtx);
19807 emit_label (top_label);
19809 tmp = convert_modes (Pmode, iter_mode, iter, true);
19810 x_addr = gen_rtx_PLUS (Pmode, destptr, tmp);
19811 destmem = change_address (destmem, mode, x_addr);
19813 if (srcmem)
19815 y_addr = gen_rtx_PLUS (Pmode, srcptr, copy_rtx (tmp));
19816 srcmem = change_address (srcmem, mode, y_addr);
19818 /* When unrolling for chips that reorder memory reads and writes,
19819 we can save registers by using single temporary.
19820 Also using 4 temporaries is overkill in 32bit mode. */
19821 if (!TARGET_64BIT && 0)
19823 for (i = 0; i < unroll; i++)
19825 if (i)
19827 destmem =
19828 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
19829 srcmem =
19830 adjust_address (copy_rtx (srcmem), mode, GET_MODE_SIZE (mode));
19832 emit_move_insn (destmem, srcmem);
19835 else
19837 rtx tmpreg[4];
19838 gcc_assert (unroll <= 4);
19839 for (i = 0; i < unroll; i++)
19841 tmpreg[i] = gen_reg_rtx (mode);
19842 if (i)
19844 srcmem =
19845 adjust_address (copy_rtx (srcmem), mode, GET_MODE_SIZE (mode));
19847 emit_move_insn (tmpreg[i], srcmem);
19849 for (i = 0; i < unroll; i++)
19851 if (i)
19853 destmem =
19854 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
19856 emit_move_insn (destmem, tmpreg[i]);
19860 else
19861 for (i = 0; i < unroll; i++)
19863 if (i)
19864 destmem =
19865 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
19866 emit_move_insn (destmem, value);
19869 tmp = expand_simple_binop (iter_mode, PLUS, iter, piece_size, iter,
19870 true, OPTAB_LIB_WIDEN);
19871 if (tmp != iter)
19872 emit_move_insn (iter, tmp);
19874 emit_cmp_and_jump_insns (iter, size, LT, NULL_RTX, iter_mode,
19875 true, top_label);
19876 if (expected_size != -1)
19878 expected_size /= GET_MODE_SIZE (mode) * unroll;
19879 if (expected_size == 0)
19880 predict_jump (0);
19881 else if (expected_size > REG_BR_PROB_BASE)
19882 predict_jump (REG_BR_PROB_BASE - 1);
19883 else
19884 predict_jump (REG_BR_PROB_BASE - (REG_BR_PROB_BASE + expected_size / 2) / expected_size);
19886 else
19887 predict_jump (REG_BR_PROB_BASE * 80 / 100);
19888 iter = ix86_zero_extend_to_Pmode (iter);
19889 tmp = expand_simple_binop (Pmode, PLUS, destptr, iter, destptr,
19890 true, OPTAB_LIB_WIDEN);
19891 if (tmp != destptr)
19892 emit_move_insn (destptr, tmp);
19893 if (srcptr)
19895 tmp = expand_simple_binop (Pmode, PLUS, srcptr, iter, srcptr,
19896 true, OPTAB_LIB_WIDEN);
19897 if (tmp != srcptr)
19898 emit_move_insn (srcptr, tmp);
19900 emit_label (out_label);
19903 /* Output "rep; mov" instruction.
19904 Arguments have same meaning as for previous function */
19905 static void
19906 expand_movmem_via_rep_mov (rtx destmem, rtx srcmem,
19907 rtx destptr, rtx srcptr,
19908 rtx count,
19909 enum machine_mode mode)
19911 rtx destexp;
19912 rtx srcexp;
19913 rtx countreg;
19915 /* If the size is known, it is shorter to use rep movs. */
19916 if (mode == QImode && CONST_INT_P (count)
19917 && !(INTVAL (count) & 3))
19918 mode = SImode;
19920 if (destptr != XEXP (destmem, 0) || GET_MODE (destmem) != BLKmode)
19921 destmem = adjust_automodify_address_nv (destmem, BLKmode, destptr, 0);
19922 if (srcptr != XEXP (srcmem, 0) || GET_MODE (srcmem) != BLKmode)
19923 srcmem = adjust_automodify_address_nv (srcmem, BLKmode, srcptr, 0);
19924 countreg = ix86_zero_extend_to_Pmode (scale_counter (count, GET_MODE_SIZE (mode)));
19925 if (mode != QImode)
19927 destexp = gen_rtx_ASHIFT (Pmode, countreg,
19928 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
19929 destexp = gen_rtx_PLUS (Pmode, destexp, destptr);
19930 srcexp = gen_rtx_ASHIFT (Pmode, countreg,
19931 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
19932 srcexp = gen_rtx_PLUS (Pmode, srcexp, srcptr);
19934 else
19936 destexp = gen_rtx_PLUS (Pmode, destptr, countreg);
19937 srcexp = gen_rtx_PLUS (Pmode, srcptr, countreg);
19939 if (CONST_INT_P (count))
19941 count = GEN_INT (INTVAL (count)
19942 & ~((HOST_WIDE_INT) GET_MODE_SIZE (mode) - 1));
19943 destmem = shallow_copy_rtx (destmem);
19944 srcmem = shallow_copy_rtx (srcmem);
19945 set_mem_size (destmem, count);
19946 set_mem_size (srcmem, count);
19948 else
19950 if (MEM_SIZE (destmem))
19951 set_mem_size (destmem, NULL_RTX);
19952 if (MEM_SIZE (srcmem))
19953 set_mem_size (srcmem, NULL_RTX);
19955 emit_insn (gen_rep_mov (destptr, destmem, srcptr, srcmem, countreg,
19956 destexp, srcexp));
19959 /* Output "rep; stos" instruction.
19960 Arguments have same meaning as for previous function */
19961 static void
19962 expand_setmem_via_rep_stos (rtx destmem, rtx destptr, rtx value,
19963 rtx count, enum machine_mode mode,
19964 rtx orig_value)
19966 rtx destexp;
19967 rtx countreg;
19969 if (destptr != XEXP (destmem, 0) || GET_MODE (destmem) != BLKmode)
19970 destmem = adjust_automodify_address_nv (destmem, BLKmode, destptr, 0);
19971 value = force_reg (mode, gen_lowpart (mode, value));
19972 countreg = ix86_zero_extend_to_Pmode (scale_counter (count, GET_MODE_SIZE (mode)));
19973 if (mode != QImode)
19975 destexp = gen_rtx_ASHIFT (Pmode, countreg,
19976 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
19977 destexp = gen_rtx_PLUS (Pmode, destexp, destptr);
19979 else
19980 destexp = gen_rtx_PLUS (Pmode, destptr, countreg);
19981 if (orig_value == const0_rtx && CONST_INT_P (count))
19983 count = GEN_INT (INTVAL (count)
19984 & ~((HOST_WIDE_INT) GET_MODE_SIZE (mode) - 1));
19985 destmem = shallow_copy_rtx (destmem);
19986 set_mem_size (destmem, count);
19988 else if (MEM_SIZE (destmem))
19989 set_mem_size (destmem, NULL_RTX);
19990 emit_insn (gen_rep_stos (destptr, countreg, destmem, value, destexp));
19993 static void
19994 emit_strmov (rtx destmem, rtx srcmem,
19995 rtx destptr, rtx srcptr, enum machine_mode mode, int offset)
19997 rtx src = adjust_automodify_address_nv (srcmem, mode, srcptr, offset);
19998 rtx dest = adjust_automodify_address_nv (destmem, mode, destptr, offset);
19999 emit_insn (gen_strmov (destptr, dest, srcptr, src));
20002 /* Output code to copy at most count & (max_size - 1) bytes from SRC to DEST. */
20003 static void
20004 expand_movmem_epilogue (rtx destmem, rtx srcmem,
20005 rtx destptr, rtx srcptr, rtx count, int max_size)
20007 rtx src, dest;
20008 if (CONST_INT_P (count))
20010 HOST_WIDE_INT countval = INTVAL (count);
20011 int offset = 0;
20013 if ((countval & 0x10) && max_size > 16)
20015 if (TARGET_64BIT)
20017 emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset);
20018 emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset + 8);
20020 else
20021 gcc_unreachable ();
20022 offset += 16;
20024 if ((countval & 0x08) && max_size > 8)
20026 if (TARGET_64BIT)
20027 emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset);
20028 else
20030 emit_strmov (destmem, srcmem, destptr, srcptr, SImode, offset);
20031 emit_strmov (destmem, srcmem, destptr, srcptr, SImode, offset + 4);
20033 offset += 8;
20035 if ((countval & 0x04) && max_size > 4)
20037 emit_strmov (destmem, srcmem, destptr, srcptr, SImode, offset);
20038 offset += 4;
20040 if ((countval & 0x02) && max_size > 2)
20042 emit_strmov (destmem, srcmem, destptr, srcptr, HImode, offset);
20043 offset += 2;
20045 if ((countval & 0x01) && max_size > 1)
20047 emit_strmov (destmem, srcmem, destptr, srcptr, QImode, offset);
20048 offset += 1;
20050 return;
20052 if (max_size > 8)
20054 count = expand_simple_binop (GET_MODE (count), AND, count, GEN_INT (max_size - 1),
20055 count, 1, OPTAB_DIRECT);
20056 expand_set_or_movmem_via_loop (destmem, srcmem, destptr, srcptr, NULL,
20057 count, QImode, 1, 4);
20058 return;
20061 /* When there are stringops, we can cheaply increase dest and src pointers.
20062 Otherwise we save code size by maintaining offset (zero is readily
20063 available from preceding rep operation) and using x86 addressing modes.
20065 if (TARGET_SINGLE_STRINGOP)
20067 if (max_size > 4)
20069 rtx label = ix86_expand_aligntest (count, 4, true);
20070 src = change_address (srcmem, SImode, srcptr);
20071 dest = change_address (destmem, SImode, destptr);
20072 emit_insn (gen_strmov (destptr, dest, srcptr, src));
20073 emit_label (label);
20074 LABEL_NUSES (label) = 1;
20076 if (max_size > 2)
20078 rtx label = ix86_expand_aligntest (count, 2, true);
20079 src = change_address (srcmem, HImode, srcptr);
20080 dest = change_address (destmem, HImode, destptr);
20081 emit_insn (gen_strmov (destptr, dest, srcptr, src));
20082 emit_label (label);
20083 LABEL_NUSES (label) = 1;
20085 if (max_size > 1)
20087 rtx label = ix86_expand_aligntest (count, 1, true);
20088 src = change_address (srcmem, QImode, srcptr);
20089 dest = change_address (destmem, QImode, destptr);
20090 emit_insn (gen_strmov (destptr, dest, srcptr, src));
20091 emit_label (label);
20092 LABEL_NUSES (label) = 1;
20095 else
20097 rtx offset = force_reg (Pmode, const0_rtx);
20098 rtx tmp;
20100 if (max_size > 4)
20102 rtx label = ix86_expand_aligntest (count, 4, true);
20103 src = change_address (srcmem, SImode, srcptr);
20104 dest = change_address (destmem, SImode, destptr);
20105 emit_move_insn (dest, src);
20106 tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (4), NULL,
20107 true, OPTAB_LIB_WIDEN);
20108 if (tmp != offset)
20109 emit_move_insn (offset, tmp);
20110 emit_label (label);
20111 LABEL_NUSES (label) = 1;
20113 if (max_size > 2)
20115 rtx label = ix86_expand_aligntest (count, 2, true);
20116 tmp = gen_rtx_PLUS (Pmode, srcptr, offset);
20117 src = change_address (srcmem, HImode, tmp);
20118 tmp = gen_rtx_PLUS (Pmode, destptr, offset);
20119 dest = change_address (destmem, HImode, tmp);
20120 emit_move_insn (dest, src);
20121 tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (2), tmp,
20122 true, OPTAB_LIB_WIDEN);
20123 if (tmp != offset)
20124 emit_move_insn (offset, tmp);
20125 emit_label (label);
20126 LABEL_NUSES (label) = 1;
20128 if (max_size > 1)
20130 rtx label = ix86_expand_aligntest (count, 1, true);
20131 tmp = gen_rtx_PLUS (Pmode, srcptr, offset);
20132 src = change_address (srcmem, QImode, tmp);
20133 tmp = gen_rtx_PLUS (Pmode, destptr, offset);
20134 dest = change_address (destmem, QImode, tmp);
20135 emit_move_insn (dest, src);
20136 emit_label (label);
20137 LABEL_NUSES (label) = 1;
20142 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
20143 static void
20144 expand_setmem_epilogue_via_loop (rtx destmem, rtx destptr, rtx value,
20145 rtx count, int max_size)
20147 count =
20148 expand_simple_binop (counter_mode (count), AND, count,
20149 GEN_INT (max_size - 1), count, 1, OPTAB_DIRECT);
20150 expand_set_or_movmem_via_loop (destmem, NULL, destptr, NULL,
20151 gen_lowpart (QImode, value), count, QImode,
20152 1, max_size / 2);
20155 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
20156 static void
20157 expand_setmem_epilogue (rtx destmem, rtx destptr, rtx value, rtx count, int max_size)
20159 rtx dest;
20161 if (CONST_INT_P (count))
20163 HOST_WIDE_INT countval = INTVAL (count);
20164 int offset = 0;
20166 if ((countval & 0x10) && max_size > 16)
20168 if (TARGET_64BIT)
20170 dest = adjust_automodify_address_nv (destmem, DImode, destptr, offset);
20171 emit_insn (gen_strset (destptr, dest, value));
20172 dest = adjust_automodify_address_nv (destmem, DImode, destptr, offset + 8);
20173 emit_insn (gen_strset (destptr, dest, value));
20175 else
20176 gcc_unreachable ();
20177 offset += 16;
20179 if ((countval & 0x08) && max_size > 8)
20181 if (TARGET_64BIT)
20183 dest = adjust_automodify_address_nv (destmem, DImode, destptr, offset);
20184 emit_insn (gen_strset (destptr, dest, value));
20186 else
20188 dest = adjust_automodify_address_nv (destmem, SImode, destptr, offset);
20189 emit_insn (gen_strset (destptr, dest, value));
20190 dest = adjust_automodify_address_nv (destmem, SImode, destptr, offset + 4);
20191 emit_insn (gen_strset (destptr, dest, value));
20193 offset += 8;
20195 if ((countval & 0x04) && max_size > 4)
20197 dest = adjust_automodify_address_nv (destmem, SImode, destptr, offset);
20198 emit_insn (gen_strset (destptr, dest, gen_lowpart (SImode, value)));
20199 offset += 4;
20201 if ((countval & 0x02) && max_size > 2)
20203 dest = adjust_automodify_address_nv (destmem, HImode, destptr, offset);
20204 emit_insn (gen_strset (destptr, dest, gen_lowpart (HImode, value)));
20205 offset += 2;
20207 if ((countval & 0x01) && max_size > 1)
20209 dest = adjust_automodify_address_nv (destmem, QImode, destptr, offset);
20210 emit_insn (gen_strset (destptr, dest, gen_lowpart (QImode, value)));
20211 offset += 1;
20213 return;
20215 if (max_size > 32)
20217 expand_setmem_epilogue_via_loop (destmem, destptr, value, count, max_size);
20218 return;
20220 if (max_size > 16)
20222 rtx label = ix86_expand_aligntest (count, 16, true);
20223 if (TARGET_64BIT)
20225 dest = change_address (destmem, DImode, destptr);
20226 emit_insn (gen_strset (destptr, dest, value));
20227 emit_insn (gen_strset (destptr, dest, value));
20229 else
20231 dest = change_address (destmem, SImode, destptr);
20232 emit_insn (gen_strset (destptr, dest, value));
20233 emit_insn (gen_strset (destptr, dest, value));
20234 emit_insn (gen_strset (destptr, dest, value));
20235 emit_insn (gen_strset (destptr, dest, value));
20237 emit_label (label);
20238 LABEL_NUSES (label) = 1;
20240 if (max_size > 8)
20242 rtx label = ix86_expand_aligntest (count, 8, true);
20243 if (TARGET_64BIT)
20245 dest = change_address (destmem, DImode, destptr);
20246 emit_insn (gen_strset (destptr, dest, value));
20248 else
20250 dest = change_address (destmem, SImode, destptr);
20251 emit_insn (gen_strset (destptr, dest, value));
20252 emit_insn (gen_strset (destptr, dest, value));
20254 emit_label (label);
20255 LABEL_NUSES (label) = 1;
20257 if (max_size > 4)
20259 rtx label = ix86_expand_aligntest (count, 4, true);
20260 dest = change_address (destmem, SImode, destptr);
20261 emit_insn (gen_strset (destptr, dest, gen_lowpart (SImode, value)));
20262 emit_label (label);
20263 LABEL_NUSES (label) = 1;
20265 if (max_size > 2)
20267 rtx label = ix86_expand_aligntest (count, 2, true);
20268 dest = change_address (destmem, HImode, destptr);
20269 emit_insn (gen_strset (destptr, dest, gen_lowpart (HImode, value)));
20270 emit_label (label);
20271 LABEL_NUSES (label) = 1;
20273 if (max_size > 1)
20275 rtx label = ix86_expand_aligntest (count, 1, true);
20276 dest = change_address (destmem, QImode, destptr);
20277 emit_insn (gen_strset (destptr, dest, gen_lowpart (QImode, value)));
20278 emit_label (label);
20279 LABEL_NUSES (label) = 1;
20283 /* Copy enough from DEST to SRC to align DEST known to by aligned by ALIGN to
20284 DESIRED_ALIGNMENT. */
20285 static void
20286 expand_movmem_prologue (rtx destmem, rtx srcmem,
20287 rtx destptr, rtx srcptr, rtx count,
20288 int align, int desired_alignment)
20290 if (align <= 1 && desired_alignment > 1)
20292 rtx label = ix86_expand_aligntest (destptr, 1, false);
20293 srcmem = change_address (srcmem, QImode, srcptr);
20294 destmem = change_address (destmem, QImode, destptr);
20295 emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem));
20296 ix86_adjust_counter (count, 1);
20297 emit_label (label);
20298 LABEL_NUSES (label) = 1;
20300 if (align <= 2 && desired_alignment > 2)
20302 rtx label = ix86_expand_aligntest (destptr, 2, false);
20303 srcmem = change_address (srcmem, HImode, srcptr);
20304 destmem = change_address (destmem, HImode, destptr);
20305 emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem));
20306 ix86_adjust_counter (count, 2);
20307 emit_label (label);
20308 LABEL_NUSES (label) = 1;
20310 if (align <= 4 && desired_alignment > 4)
20312 rtx label = ix86_expand_aligntest (destptr, 4, false);
20313 srcmem = change_address (srcmem, SImode, srcptr);
20314 destmem = change_address (destmem, SImode, destptr);
20315 emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem));
20316 ix86_adjust_counter (count, 4);
20317 emit_label (label);
20318 LABEL_NUSES (label) = 1;
20320 gcc_assert (desired_alignment <= 8);
20323 /* Copy enough from DST to SRC to align DST known to DESIRED_ALIGN.
20324 ALIGN_BYTES is how many bytes need to be copied. */
20325 static rtx
20326 expand_constant_movmem_prologue (rtx dst, rtx *srcp, rtx destreg, rtx srcreg,
20327 int desired_align, int align_bytes)
20329 rtx src = *srcp;
20330 rtx src_size, dst_size;
20331 int off = 0;
20332 int src_align_bytes = get_mem_align_offset (src, desired_align * BITS_PER_UNIT);
20333 if (src_align_bytes >= 0)
20334 src_align_bytes = desired_align - src_align_bytes;
20335 src_size = MEM_SIZE (src);
20336 dst_size = MEM_SIZE (dst);
20337 if (align_bytes & 1)
20339 dst = adjust_automodify_address_nv (dst, QImode, destreg, 0);
20340 src = adjust_automodify_address_nv (src, QImode, srcreg, 0);
20341 off = 1;
20342 emit_insn (gen_strmov (destreg, dst, srcreg, src));
20344 if (align_bytes & 2)
20346 dst = adjust_automodify_address_nv (dst, HImode, destreg, off);
20347 src = adjust_automodify_address_nv (src, HImode, srcreg, off);
20348 if (MEM_ALIGN (dst) < 2 * BITS_PER_UNIT)
20349 set_mem_align (dst, 2 * BITS_PER_UNIT);
20350 if (src_align_bytes >= 0
20351 && (src_align_bytes & 1) == (align_bytes & 1)
20352 && MEM_ALIGN (src) < 2 * BITS_PER_UNIT)
20353 set_mem_align (src, 2 * BITS_PER_UNIT);
20354 off = 2;
20355 emit_insn (gen_strmov (destreg, dst, srcreg, src));
20357 if (align_bytes & 4)
20359 dst = adjust_automodify_address_nv (dst, SImode, destreg, off);
20360 src = adjust_automodify_address_nv (src, SImode, srcreg, off);
20361 if (MEM_ALIGN (dst) < 4 * BITS_PER_UNIT)
20362 set_mem_align (dst, 4 * BITS_PER_UNIT);
20363 if (src_align_bytes >= 0)
20365 unsigned int src_align = 0;
20366 if ((src_align_bytes & 3) == (align_bytes & 3))
20367 src_align = 4;
20368 else if ((src_align_bytes & 1) == (align_bytes & 1))
20369 src_align = 2;
20370 if (MEM_ALIGN (src) < src_align * BITS_PER_UNIT)
20371 set_mem_align (src, src_align * BITS_PER_UNIT);
20373 off = 4;
20374 emit_insn (gen_strmov (destreg, dst, srcreg, src));
20376 dst = adjust_automodify_address_nv (dst, BLKmode, destreg, off);
20377 src = adjust_automodify_address_nv (src, BLKmode, srcreg, off);
20378 if (MEM_ALIGN (dst) < (unsigned int) desired_align * BITS_PER_UNIT)
20379 set_mem_align (dst, desired_align * BITS_PER_UNIT);
20380 if (src_align_bytes >= 0)
20382 unsigned int src_align = 0;
20383 if ((src_align_bytes & 7) == (align_bytes & 7))
20384 src_align = 8;
20385 else if ((src_align_bytes & 3) == (align_bytes & 3))
20386 src_align = 4;
20387 else if ((src_align_bytes & 1) == (align_bytes & 1))
20388 src_align = 2;
20389 if (src_align > (unsigned int) desired_align)
20390 src_align = desired_align;
20391 if (MEM_ALIGN (src) < src_align * BITS_PER_UNIT)
20392 set_mem_align (src, src_align * BITS_PER_UNIT);
20394 if (dst_size)
20395 set_mem_size (dst, GEN_INT (INTVAL (dst_size) - align_bytes));
20396 if (src_size)
20397 set_mem_size (dst, GEN_INT (INTVAL (src_size) - align_bytes));
20398 *srcp = src;
20399 return dst;
20402 /* Set enough from DEST to align DEST known to by aligned by ALIGN to
20403 DESIRED_ALIGNMENT. */
20404 static void
20405 expand_setmem_prologue (rtx destmem, rtx destptr, rtx value, rtx count,
20406 int align, int desired_alignment)
20408 if (align <= 1 && desired_alignment > 1)
20410 rtx label = ix86_expand_aligntest (destptr, 1, false);
20411 destmem = change_address (destmem, QImode, destptr);
20412 emit_insn (gen_strset (destptr, destmem, gen_lowpart (QImode, value)));
20413 ix86_adjust_counter (count, 1);
20414 emit_label (label);
20415 LABEL_NUSES (label) = 1;
20417 if (align <= 2 && desired_alignment > 2)
20419 rtx label = ix86_expand_aligntest (destptr, 2, false);
20420 destmem = change_address (destmem, HImode, destptr);
20421 emit_insn (gen_strset (destptr, destmem, gen_lowpart (HImode, value)));
20422 ix86_adjust_counter (count, 2);
20423 emit_label (label);
20424 LABEL_NUSES (label) = 1;
20426 if (align <= 4 && desired_alignment > 4)
20428 rtx label = ix86_expand_aligntest (destptr, 4, false);
20429 destmem = change_address (destmem, SImode, destptr);
20430 emit_insn (gen_strset (destptr, destmem, gen_lowpart (SImode, value)));
20431 ix86_adjust_counter (count, 4);
20432 emit_label (label);
20433 LABEL_NUSES (label) = 1;
20435 gcc_assert (desired_alignment <= 8);
20438 /* Set enough from DST to align DST known to by aligned by ALIGN to
20439 DESIRED_ALIGN. ALIGN_BYTES is how many bytes need to be stored. */
20440 static rtx
20441 expand_constant_setmem_prologue (rtx dst, rtx destreg, rtx value,
20442 int desired_align, int align_bytes)
20444 int off = 0;
20445 rtx dst_size = MEM_SIZE (dst);
20446 if (align_bytes & 1)
20448 dst = adjust_automodify_address_nv (dst, QImode, destreg, 0);
20449 off = 1;
20450 emit_insn (gen_strset (destreg, dst,
20451 gen_lowpart (QImode, value)));
20453 if (align_bytes & 2)
20455 dst = adjust_automodify_address_nv (dst, HImode, destreg, off);
20456 if (MEM_ALIGN (dst) < 2 * BITS_PER_UNIT)
20457 set_mem_align (dst, 2 * BITS_PER_UNIT);
20458 off = 2;
20459 emit_insn (gen_strset (destreg, dst,
20460 gen_lowpart (HImode, value)));
20462 if (align_bytes & 4)
20464 dst = adjust_automodify_address_nv (dst, SImode, destreg, off);
20465 if (MEM_ALIGN (dst) < 4 * BITS_PER_UNIT)
20466 set_mem_align (dst, 4 * BITS_PER_UNIT);
20467 off = 4;
20468 emit_insn (gen_strset (destreg, dst,
20469 gen_lowpart (SImode, value)));
20471 dst = adjust_automodify_address_nv (dst, BLKmode, destreg, off);
20472 if (MEM_ALIGN (dst) < (unsigned int) desired_align * BITS_PER_UNIT)
20473 set_mem_align (dst, desired_align * BITS_PER_UNIT);
20474 if (dst_size)
20475 set_mem_size (dst, GEN_INT (INTVAL (dst_size) - align_bytes));
20476 return dst;
20479 /* Given COUNT and EXPECTED_SIZE, decide on codegen of string operation. */
20480 static enum stringop_alg
20481 decide_alg (HOST_WIDE_INT count, HOST_WIDE_INT expected_size, bool memset,
20482 int *dynamic_check)
20484 const struct stringop_algs * algs;
20485 bool optimize_for_speed;
20486 /* Algorithms using the rep prefix want at least edi and ecx;
20487 additionally, memset wants eax and memcpy wants esi. Don't
20488 consider such algorithms if the user has appropriated those
20489 registers for their own purposes. */
20490 bool rep_prefix_usable = !(fixed_regs[CX_REG] || fixed_regs[DI_REG]
20491 || (memset
20492 ? fixed_regs[AX_REG] : fixed_regs[SI_REG]));
20494 #define ALG_USABLE_P(alg) (rep_prefix_usable \
20495 || (alg != rep_prefix_1_byte \
20496 && alg != rep_prefix_4_byte \
20497 && alg != rep_prefix_8_byte))
20498 const struct processor_costs *cost;
20500 /* Even if the string operation call is cold, we still might spend a lot
20501 of time processing large blocks. */
20502 if (optimize_function_for_size_p (cfun)
20503 || (optimize_insn_for_size_p ()
20504 && expected_size != -1 && expected_size < 256))
20505 optimize_for_speed = false;
20506 else
20507 optimize_for_speed = true;
20509 cost = optimize_for_speed ? ix86_cost : &ix86_size_cost;
20511 *dynamic_check = -1;
20512 if (memset)
20513 algs = &cost->memset[TARGET_64BIT != 0];
20514 else
20515 algs = &cost->memcpy[TARGET_64BIT != 0];
20516 if (stringop_alg != no_stringop && ALG_USABLE_P (stringop_alg))
20517 return stringop_alg;
20518 /* rep; movq or rep; movl is the smallest variant. */
20519 else if (!optimize_for_speed)
20521 if (!count || (count & 3))
20522 return rep_prefix_usable ? rep_prefix_1_byte : loop_1_byte;
20523 else
20524 return rep_prefix_usable ? rep_prefix_4_byte : loop;
20526 /* Very tiny blocks are best handled via the loop, REP is expensive to setup.
20528 else if (expected_size != -1 && expected_size < 4)
20529 return loop_1_byte;
20530 else if (expected_size != -1)
20532 unsigned int i;
20533 enum stringop_alg alg = libcall;
20534 for (i = 0; i < MAX_STRINGOP_ALGS; i++)
20536 /* We get here if the algorithms that were not libcall-based
20537 were rep-prefix based and we are unable to use rep prefixes
20538 based on global register usage. Break out of the loop and
20539 use the heuristic below. */
20540 if (algs->size[i].max == 0)
20541 break;
20542 if (algs->size[i].max >= expected_size || algs->size[i].max == -1)
20544 enum stringop_alg candidate = algs->size[i].alg;
20546 if (candidate != libcall && ALG_USABLE_P (candidate))
20547 alg = candidate;
20548 /* Honor TARGET_INLINE_ALL_STRINGOPS by picking
20549 last non-libcall inline algorithm. */
20550 if (TARGET_INLINE_ALL_STRINGOPS)
20552 /* When the current size is best to be copied by a libcall,
20553 but we are still forced to inline, run the heuristic below
20554 that will pick code for medium sized blocks. */
20555 if (alg != libcall)
20556 return alg;
20557 break;
20559 else if (ALG_USABLE_P (candidate))
20560 return candidate;
20563 gcc_assert (TARGET_INLINE_ALL_STRINGOPS || !rep_prefix_usable);
20565 /* When asked to inline the call anyway, try to pick meaningful choice.
20566 We look for maximal size of block that is faster to copy by hand and
20567 take blocks of at most of that size guessing that average size will
20568 be roughly half of the block.
20570 If this turns out to be bad, we might simply specify the preferred
20571 choice in ix86_costs. */
20572 if ((TARGET_INLINE_ALL_STRINGOPS || TARGET_INLINE_STRINGOPS_DYNAMICALLY)
20573 && (algs->unknown_size == libcall || !ALG_USABLE_P (algs->unknown_size)))
20575 int max = -1;
20576 enum stringop_alg alg;
20577 int i;
20578 bool any_alg_usable_p = true;
20580 for (i = 0; i < MAX_STRINGOP_ALGS; i++)
20582 enum stringop_alg candidate = algs->size[i].alg;
20583 any_alg_usable_p = any_alg_usable_p && ALG_USABLE_P (candidate);
20585 if (candidate != libcall && candidate
20586 && ALG_USABLE_P (candidate))
20587 max = algs->size[i].max;
20589 /* If there aren't any usable algorithms, then recursing on
20590 smaller sizes isn't going to find anything. Just return the
20591 simple byte-at-a-time copy loop. */
20592 if (!any_alg_usable_p)
20594 /* Pick something reasonable. */
20595 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY)
20596 *dynamic_check = 128;
20597 return loop_1_byte;
20599 if (max == -1)
20600 max = 4096;
20601 alg = decide_alg (count, max / 2, memset, dynamic_check);
20602 gcc_assert (*dynamic_check == -1);
20603 gcc_assert (alg != libcall);
20604 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY)
20605 *dynamic_check = max;
20606 return alg;
20608 return ALG_USABLE_P (algs->unknown_size) ? algs->unknown_size : libcall;
20609 #undef ALG_USABLE_P
20612 /* Decide on alignment. We know that the operand is already aligned to ALIGN
20613 (ALIGN can be based on profile feedback and thus it is not 100% guaranteed). */
20614 static int
20615 decide_alignment (int align,
20616 enum stringop_alg alg,
20617 int expected_size)
20619 int desired_align = 0;
20620 switch (alg)
20622 case no_stringop:
20623 gcc_unreachable ();
20624 case loop:
20625 case unrolled_loop:
20626 desired_align = GET_MODE_SIZE (Pmode);
20627 break;
20628 case rep_prefix_8_byte:
20629 desired_align = 8;
20630 break;
20631 case rep_prefix_4_byte:
20632 /* PentiumPro has special logic triggering for 8 byte aligned blocks.
20633 copying whole cacheline at once. */
20634 if (TARGET_PENTIUMPRO)
20635 desired_align = 8;
20636 else
20637 desired_align = 4;
20638 break;
20639 case rep_prefix_1_byte:
20640 /* PentiumPro has special logic triggering for 8 byte aligned blocks.
20641 copying whole cacheline at once. */
20642 if (TARGET_PENTIUMPRO)
20643 desired_align = 8;
20644 else
20645 desired_align = 1;
20646 break;
20647 case loop_1_byte:
20648 desired_align = 1;
20649 break;
20650 case libcall:
20651 return 0;
20654 if (optimize_size)
20655 desired_align = 1;
20656 if (desired_align < align)
20657 desired_align = align;
20658 if (expected_size != -1 && expected_size < 4)
20659 desired_align = align;
20660 return desired_align;
20663 /* Return the smallest power of 2 greater than VAL. */
20664 static int
20665 smallest_pow2_greater_than (int val)
20667 int ret = 1;
20668 while (ret <= val)
20669 ret <<= 1;
20670 return ret;
20673 /* Expand string move (memcpy) operation. Use i386 string operations when
20674 profitable. expand_setmem contains similar code. The code depends upon
20675 architecture, block size and alignment, but always has the same
20676 overall structure:
20678 1) Prologue guard: Conditional that jumps up to epilogues for small
20679 blocks that can be handled by epilogue alone. This is faster but
20680 also needed for correctness, since prologue assume the block is larger
20681 than the desired alignment.
20683 Optional dynamic check for size and libcall for large
20684 blocks is emitted here too, with -minline-stringops-dynamically.
20686 2) Prologue: copy first few bytes in order to get destination aligned
20687 to DESIRED_ALIGN. It is emitted only when ALIGN is less than
20688 DESIRED_ALIGN and and up to DESIRED_ALIGN - ALIGN bytes can be copied.
20689 We emit either a jump tree on power of two sized blocks, or a byte loop.
20691 3) Main body: the copying loop itself, copying in SIZE_NEEDED chunks
20692 with specified algorithm.
20694 4) Epilogue: code copying tail of the block that is too small to be
20695 handled by main body (or up to size guarded by prologue guard). */
20697 bool
20698 ix86_expand_movmem (rtx dst, rtx src, rtx count_exp, rtx align_exp,
20699 rtx expected_align_exp, rtx expected_size_exp)
20701 rtx destreg;
20702 rtx srcreg;
20703 rtx label = NULL;
20704 rtx tmp;
20705 rtx jump_around_label = NULL;
20706 HOST_WIDE_INT align = 1;
20707 unsigned HOST_WIDE_INT count = 0;
20708 HOST_WIDE_INT expected_size = -1;
20709 int size_needed = 0, epilogue_size_needed;
20710 int desired_align = 0, align_bytes = 0;
20711 enum stringop_alg alg;
20712 int dynamic_check;
20713 bool need_zero_guard = false;
20715 if (CONST_INT_P (align_exp))
20716 align = INTVAL (align_exp);
20717 /* i386 can do misaligned access on reasonably increased cost. */
20718 if (CONST_INT_P (expected_align_exp)
20719 && INTVAL (expected_align_exp) > align)
20720 align = INTVAL (expected_align_exp);
20721 /* ALIGN is the minimum of destination and source alignment, but we care here
20722 just about destination alignment. */
20723 else if (MEM_ALIGN (dst) > (unsigned HOST_WIDE_INT) align * BITS_PER_UNIT)
20724 align = MEM_ALIGN (dst) / BITS_PER_UNIT;
20726 if (CONST_INT_P (count_exp))
20727 count = expected_size = INTVAL (count_exp);
20728 if (CONST_INT_P (expected_size_exp) && count == 0)
20729 expected_size = INTVAL (expected_size_exp);
20731 /* Make sure we don't need to care about overflow later on. */
20732 if (count > ((unsigned HOST_WIDE_INT) 1 << 30))
20733 return false;
20735 /* Step 0: Decide on preferred algorithm, desired alignment and
20736 size of chunks to be copied by main loop. */
20738 alg = decide_alg (count, expected_size, false, &dynamic_check);
20739 desired_align = decide_alignment (align, alg, expected_size);
20741 if (!TARGET_ALIGN_STRINGOPS)
20742 align = desired_align;
20744 if (alg == libcall)
20745 return false;
20746 gcc_assert (alg != no_stringop);
20747 if (!count)
20748 count_exp = copy_to_mode_reg (GET_MODE (count_exp), count_exp);
20749 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
20750 srcreg = copy_to_mode_reg (Pmode, XEXP (src, 0));
20751 switch (alg)
20753 case libcall:
20754 case no_stringop:
20755 gcc_unreachable ();
20756 case loop:
20757 need_zero_guard = true;
20758 size_needed = GET_MODE_SIZE (Pmode);
20759 break;
20760 case unrolled_loop:
20761 need_zero_guard = true;
20762 size_needed = GET_MODE_SIZE (Pmode) * (TARGET_64BIT ? 4 : 2);
20763 break;
20764 case rep_prefix_8_byte:
20765 size_needed = 8;
20766 break;
20767 case rep_prefix_4_byte:
20768 size_needed = 4;
20769 break;
20770 case rep_prefix_1_byte:
20771 size_needed = 1;
20772 break;
20773 case loop_1_byte:
20774 need_zero_guard = true;
20775 size_needed = 1;
20776 break;
20779 epilogue_size_needed = size_needed;
20781 /* Step 1: Prologue guard. */
20783 /* Alignment code needs count to be in register. */
20784 if (CONST_INT_P (count_exp) && desired_align > align)
20786 if (INTVAL (count_exp) > desired_align
20787 && INTVAL (count_exp) > size_needed)
20789 align_bytes
20790 = get_mem_align_offset (dst, desired_align * BITS_PER_UNIT);
20791 if (align_bytes <= 0)
20792 align_bytes = 0;
20793 else
20794 align_bytes = desired_align - align_bytes;
20796 if (align_bytes == 0)
20797 count_exp = force_reg (counter_mode (count_exp), count_exp);
20799 gcc_assert (desired_align >= 1 && align >= 1);
20801 /* Ensure that alignment prologue won't copy past end of block. */
20802 if (size_needed > 1 || (desired_align > 1 && desired_align > align))
20804 epilogue_size_needed = MAX (size_needed - 1, desired_align - align);
20805 /* Epilogue always copies COUNT_EXP & EPILOGUE_SIZE_NEEDED bytes.
20806 Make sure it is power of 2. */
20807 epilogue_size_needed = smallest_pow2_greater_than (epilogue_size_needed);
20809 if (count)
20811 if (count < (unsigned HOST_WIDE_INT)epilogue_size_needed)
20813 /* If main algorithm works on QImode, no epilogue is needed.
20814 For small sizes just don't align anything. */
20815 if (size_needed == 1)
20816 desired_align = align;
20817 else
20818 goto epilogue;
20821 else
20823 label = gen_label_rtx ();
20824 emit_cmp_and_jump_insns (count_exp,
20825 GEN_INT (epilogue_size_needed),
20826 LTU, 0, counter_mode (count_exp), 1, label);
20827 if (expected_size == -1 || expected_size < epilogue_size_needed)
20828 predict_jump (REG_BR_PROB_BASE * 60 / 100);
20829 else
20830 predict_jump (REG_BR_PROB_BASE * 20 / 100);
20834 /* Emit code to decide on runtime whether library call or inline should be
20835 used. */
20836 if (dynamic_check != -1)
20838 if (CONST_INT_P (count_exp))
20840 if (UINTVAL (count_exp) >= (unsigned HOST_WIDE_INT)dynamic_check)
20842 emit_block_move_via_libcall (dst, src, count_exp, false);
20843 count_exp = const0_rtx;
20844 goto epilogue;
20847 else
20849 rtx hot_label = gen_label_rtx ();
20850 jump_around_label = gen_label_rtx ();
20851 emit_cmp_and_jump_insns (count_exp, GEN_INT (dynamic_check - 1),
20852 LEU, 0, GET_MODE (count_exp), 1, hot_label);
20853 predict_jump (REG_BR_PROB_BASE * 90 / 100);
20854 emit_block_move_via_libcall (dst, src, count_exp, false);
20855 emit_jump (jump_around_label);
20856 emit_label (hot_label);
20860 /* Step 2: Alignment prologue. */
20862 if (desired_align > align)
20864 if (align_bytes == 0)
20866 /* Except for the first move in epilogue, we no longer know
20867 constant offset in aliasing info. It don't seems to worth
20868 the pain to maintain it for the first move, so throw away
20869 the info early. */
20870 src = change_address (src, BLKmode, srcreg);
20871 dst = change_address (dst, BLKmode, destreg);
20872 expand_movmem_prologue (dst, src, destreg, srcreg, count_exp, align,
20873 desired_align);
20875 else
20877 /* If we know how many bytes need to be stored before dst is
20878 sufficiently aligned, maintain aliasing info accurately. */
20879 dst = expand_constant_movmem_prologue (dst, &src, destreg, srcreg,
20880 desired_align, align_bytes);
20881 count_exp = plus_constant (count_exp, -align_bytes);
20882 count -= align_bytes;
20884 if (need_zero_guard
20885 && (count < (unsigned HOST_WIDE_INT) size_needed
20886 || (align_bytes == 0
20887 && count < ((unsigned HOST_WIDE_INT) size_needed
20888 + desired_align - align))))
20890 /* It is possible that we copied enough so the main loop will not
20891 execute. */
20892 gcc_assert (size_needed > 1);
20893 if (label == NULL_RTX)
20894 label = gen_label_rtx ();
20895 emit_cmp_and_jump_insns (count_exp,
20896 GEN_INT (size_needed),
20897 LTU, 0, counter_mode (count_exp), 1, label);
20898 if (expected_size == -1
20899 || expected_size < (desired_align - align) / 2 + size_needed)
20900 predict_jump (REG_BR_PROB_BASE * 20 / 100);
20901 else
20902 predict_jump (REG_BR_PROB_BASE * 60 / 100);
20905 if (label && size_needed == 1)
20907 emit_label (label);
20908 LABEL_NUSES (label) = 1;
20909 label = NULL;
20910 epilogue_size_needed = 1;
20912 else if (label == NULL_RTX)
20913 epilogue_size_needed = size_needed;
20915 /* Step 3: Main loop. */
20917 switch (alg)
20919 case libcall:
20920 case no_stringop:
20921 gcc_unreachable ();
20922 case loop_1_byte:
20923 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL,
20924 count_exp, QImode, 1, expected_size);
20925 break;
20926 case loop:
20927 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL,
20928 count_exp, Pmode, 1, expected_size);
20929 break;
20930 case unrolled_loop:
20931 /* Unroll only by factor of 2 in 32bit mode, since we don't have enough
20932 registers for 4 temporaries anyway. */
20933 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL,
20934 count_exp, Pmode, TARGET_64BIT ? 4 : 2,
20935 expected_size);
20936 break;
20937 case rep_prefix_8_byte:
20938 expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp,
20939 DImode);
20940 break;
20941 case rep_prefix_4_byte:
20942 expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp,
20943 SImode);
20944 break;
20945 case rep_prefix_1_byte:
20946 expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp,
20947 QImode);
20948 break;
20950 /* Adjust properly the offset of src and dest memory for aliasing. */
20951 if (CONST_INT_P (count_exp))
20953 src = adjust_automodify_address_nv (src, BLKmode, srcreg,
20954 (count / size_needed) * size_needed);
20955 dst = adjust_automodify_address_nv (dst, BLKmode, destreg,
20956 (count / size_needed) * size_needed);
20958 else
20960 src = change_address (src, BLKmode, srcreg);
20961 dst = change_address (dst, BLKmode, destreg);
20964 /* Step 4: Epilogue to copy the remaining bytes. */
20965 epilogue:
20966 if (label)
20968 /* When the main loop is done, COUNT_EXP might hold original count,
20969 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
20970 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
20971 bytes. Compensate if needed. */
20973 if (size_needed < epilogue_size_needed)
20975 tmp =
20976 expand_simple_binop (counter_mode (count_exp), AND, count_exp,
20977 GEN_INT (size_needed - 1), count_exp, 1,
20978 OPTAB_DIRECT);
20979 if (tmp != count_exp)
20980 emit_move_insn (count_exp, tmp);
20982 emit_label (label);
20983 LABEL_NUSES (label) = 1;
20986 if (count_exp != const0_rtx && epilogue_size_needed > 1)
20987 expand_movmem_epilogue (dst, src, destreg, srcreg, count_exp,
20988 epilogue_size_needed);
20989 if (jump_around_label)
20990 emit_label (jump_around_label);
20991 return true;
20994 /* Helper function for memcpy. For QImode value 0xXY produce
20995 0xXYXYXYXY of wide specified by MODE. This is essentially
20996 a * 0x10101010, but we can do slightly better than
20997 synth_mult by unwinding the sequence by hand on CPUs with
20998 slow multiply. */
20999 static rtx
21000 promote_duplicated_reg (enum machine_mode mode, rtx val)
21002 enum machine_mode valmode = GET_MODE (val);
21003 rtx tmp;
21004 int nops = mode == DImode ? 3 : 2;
21006 gcc_assert (mode == SImode || mode == DImode);
21007 if (val == const0_rtx)
21008 return copy_to_mode_reg (mode, const0_rtx);
21009 if (CONST_INT_P (val))
21011 HOST_WIDE_INT v = INTVAL (val) & 255;
21013 v |= v << 8;
21014 v |= v << 16;
21015 if (mode == DImode)
21016 v |= (v << 16) << 16;
21017 return copy_to_mode_reg (mode, gen_int_mode (v, mode));
21020 if (valmode == VOIDmode)
21021 valmode = QImode;
21022 if (valmode != QImode)
21023 val = gen_lowpart (QImode, val);
21024 if (mode == QImode)
21025 return val;
21026 if (!TARGET_PARTIAL_REG_STALL)
21027 nops--;
21028 if (ix86_cost->mult_init[mode == DImode ? 3 : 2]
21029 + ix86_cost->mult_bit * (mode == DImode ? 8 : 4)
21030 <= (ix86_cost->shift_const + ix86_cost->add) * nops
21031 + (COSTS_N_INSNS (TARGET_PARTIAL_REG_STALL == 0)))
21033 rtx reg = convert_modes (mode, QImode, val, true);
21034 tmp = promote_duplicated_reg (mode, const1_rtx);
21035 return expand_simple_binop (mode, MULT, reg, tmp, NULL, 1,
21036 OPTAB_DIRECT);
21038 else
21040 rtx reg = convert_modes (mode, QImode, val, true);
21042 if (!TARGET_PARTIAL_REG_STALL)
21043 if (mode == SImode)
21044 emit_insn (gen_movsi_insv_1 (reg, reg));
21045 else
21046 emit_insn (gen_movdi_insv_1 (reg, reg));
21047 else
21049 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (8),
21050 NULL, 1, OPTAB_DIRECT);
21051 reg =
21052 expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
21054 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (16),
21055 NULL, 1, OPTAB_DIRECT);
21056 reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
21057 if (mode == SImode)
21058 return reg;
21059 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (32),
21060 NULL, 1, OPTAB_DIRECT);
21061 reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
21062 return reg;
21066 /* Duplicate value VAL using promote_duplicated_reg into maximal size that will
21067 be needed by main loop copying SIZE_NEEDED chunks and prologue getting
21068 alignment from ALIGN to DESIRED_ALIGN. */
21069 static rtx
21070 promote_duplicated_reg_to_size (rtx val, int size_needed, int desired_align, int align)
21072 rtx promoted_val;
21074 if (TARGET_64BIT
21075 && (size_needed > 4 || (desired_align > align && desired_align > 4)))
21076 promoted_val = promote_duplicated_reg (DImode, val);
21077 else if (size_needed > 2 || (desired_align > align && desired_align > 2))
21078 promoted_val = promote_duplicated_reg (SImode, val);
21079 else if (size_needed > 1 || (desired_align > align && desired_align > 1))
21080 promoted_val = promote_duplicated_reg (HImode, val);
21081 else
21082 promoted_val = val;
21084 return promoted_val;
21087 /* Expand string clear operation (bzero). Use i386 string operations when
21088 profitable. See expand_movmem comment for explanation of individual
21089 steps performed. */
21090 bool
21091 ix86_expand_setmem (rtx dst, rtx count_exp, rtx val_exp, rtx align_exp,
21092 rtx expected_align_exp, rtx expected_size_exp)
21094 rtx destreg;
21095 rtx label = NULL;
21096 rtx tmp;
21097 rtx jump_around_label = NULL;
21098 HOST_WIDE_INT align = 1;
21099 unsigned HOST_WIDE_INT count = 0;
21100 HOST_WIDE_INT expected_size = -1;
21101 int size_needed = 0, epilogue_size_needed;
21102 int desired_align = 0, align_bytes = 0;
21103 enum stringop_alg alg;
21104 rtx promoted_val = NULL;
21105 bool force_loopy_epilogue = false;
21106 int dynamic_check;
21107 bool need_zero_guard = false;
21109 if (CONST_INT_P (align_exp))
21110 align = INTVAL (align_exp);
21111 /* i386 can do misaligned access on reasonably increased cost. */
21112 if (CONST_INT_P (expected_align_exp)
21113 && INTVAL (expected_align_exp) > align)
21114 align = INTVAL (expected_align_exp);
21115 if (CONST_INT_P (count_exp))
21116 count = expected_size = INTVAL (count_exp);
21117 if (CONST_INT_P (expected_size_exp) && count == 0)
21118 expected_size = INTVAL (expected_size_exp);
21120 /* Make sure we don't need to care about overflow later on. */
21121 if (count > ((unsigned HOST_WIDE_INT) 1 << 30))
21122 return false;
21124 /* Step 0: Decide on preferred algorithm, desired alignment and
21125 size of chunks to be copied by main loop. */
21127 alg = decide_alg (count, expected_size, true, &dynamic_check);
21128 desired_align = decide_alignment (align, alg, expected_size);
21130 if (!TARGET_ALIGN_STRINGOPS)
21131 align = desired_align;
21133 if (alg == libcall)
21134 return false;
21135 gcc_assert (alg != no_stringop);
21136 if (!count)
21137 count_exp = copy_to_mode_reg (counter_mode (count_exp), count_exp);
21138 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
21139 switch (alg)
21141 case libcall:
21142 case no_stringop:
21143 gcc_unreachable ();
21144 case loop:
21145 need_zero_guard = true;
21146 size_needed = GET_MODE_SIZE (Pmode);
21147 break;
21148 case unrolled_loop:
21149 need_zero_guard = true;
21150 size_needed = GET_MODE_SIZE (Pmode) * 4;
21151 break;
21152 case rep_prefix_8_byte:
21153 size_needed = 8;
21154 break;
21155 case rep_prefix_4_byte:
21156 size_needed = 4;
21157 break;
21158 case rep_prefix_1_byte:
21159 size_needed = 1;
21160 break;
21161 case loop_1_byte:
21162 need_zero_guard = true;
21163 size_needed = 1;
21164 break;
21166 epilogue_size_needed = size_needed;
21168 /* Step 1: Prologue guard. */
21170 /* Alignment code needs count to be in register. */
21171 if (CONST_INT_P (count_exp) && desired_align > align)
21173 if (INTVAL (count_exp) > desired_align
21174 && INTVAL (count_exp) > size_needed)
21176 align_bytes
21177 = get_mem_align_offset (dst, desired_align * BITS_PER_UNIT);
21178 if (align_bytes <= 0)
21179 align_bytes = 0;
21180 else
21181 align_bytes = desired_align - align_bytes;
21183 if (align_bytes == 0)
21185 enum machine_mode mode = SImode;
21186 if (TARGET_64BIT && (count & ~0xffffffff))
21187 mode = DImode;
21188 count_exp = force_reg (mode, count_exp);
21191 /* Do the cheap promotion to allow better CSE across the
21192 main loop and epilogue (ie one load of the big constant in the
21193 front of all code. */
21194 if (CONST_INT_P (val_exp))
21195 promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed,
21196 desired_align, align);
21197 /* Ensure that alignment prologue won't copy past end of block. */
21198 if (size_needed > 1 || (desired_align > 1 && desired_align > align))
21200 epilogue_size_needed = MAX (size_needed - 1, desired_align - align);
21201 /* Epilogue always copies COUNT_EXP & (EPILOGUE_SIZE_NEEDED - 1) bytes.
21202 Make sure it is power of 2. */
21203 epilogue_size_needed = smallest_pow2_greater_than (epilogue_size_needed);
21205 /* To improve performance of small blocks, we jump around the VAL
21206 promoting mode. This mean that if the promoted VAL is not constant,
21207 we might not use it in the epilogue and have to use byte
21208 loop variant. */
21209 if (epilogue_size_needed > 2 && !promoted_val)
21210 force_loopy_epilogue = true;
21211 if (count)
21213 if (count < (unsigned HOST_WIDE_INT)epilogue_size_needed)
21215 /* If main algorithm works on QImode, no epilogue is needed.
21216 For small sizes just don't align anything. */
21217 if (size_needed == 1)
21218 desired_align = align;
21219 else
21220 goto epilogue;
21223 else
21225 label = gen_label_rtx ();
21226 emit_cmp_and_jump_insns (count_exp,
21227 GEN_INT (epilogue_size_needed),
21228 LTU, 0, counter_mode (count_exp), 1, label);
21229 if (expected_size == -1 || expected_size <= epilogue_size_needed)
21230 predict_jump (REG_BR_PROB_BASE * 60 / 100);
21231 else
21232 predict_jump (REG_BR_PROB_BASE * 20 / 100);
21235 if (dynamic_check != -1)
21237 rtx hot_label = gen_label_rtx ();
21238 jump_around_label = gen_label_rtx ();
21239 emit_cmp_and_jump_insns (count_exp, GEN_INT (dynamic_check - 1),
21240 LEU, 0, counter_mode (count_exp), 1, hot_label);
21241 predict_jump (REG_BR_PROB_BASE * 90 / 100);
21242 set_storage_via_libcall (dst, count_exp, val_exp, false);
21243 emit_jump (jump_around_label);
21244 emit_label (hot_label);
21247 /* Step 2: Alignment prologue. */
21249 /* Do the expensive promotion once we branched off the small blocks. */
21250 if (!promoted_val)
21251 promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed,
21252 desired_align, align);
21253 gcc_assert (desired_align >= 1 && align >= 1);
21255 if (desired_align > align)
21257 if (align_bytes == 0)
21259 /* Except for the first move in epilogue, we no longer know
21260 constant offset in aliasing info. It don't seems to worth
21261 the pain to maintain it for the first move, so throw away
21262 the info early. */
21263 dst = change_address (dst, BLKmode, destreg);
21264 expand_setmem_prologue (dst, destreg, promoted_val, count_exp, align,
21265 desired_align);
21267 else
21269 /* If we know how many bytes need to be stored before dst is
21270 sufficiently aligned, maintain aliasing info accurately. */
21271 dst = expand_constant_setmem_prologue (dst, destreg, promoted_val,
21272 desired_align, align_bytes);
21273 count_exp = plus_constant (count_exp, -align_bytes);
21274 count -= align_bytes;
21276 if (need_zero_guard
21277 && (count < (unsigned HOST_WIDE_INT) size_needed
21278 || (align_bytes == 0
21279 && count < ((unsigned HOST_WIDE_INT) size_needed
21280 + desired_align - align))))
21282 /* It is possible that we copied enough so the main loop will not
21283 execute. */
21284 gcc_assert (size_needed > 1);
21285 if (label == NULL_RTX)
21286 label = gen_label_rtx ();
21287 emit_cmp_and_jump_insns (count_exp,
21288 GEN_INT (size_needed),
21289 LTU, 0, counter_mode (count_exp), 1, label);
21290 if (expected_size == -1
21291 || expected_size < (desired_align - align) / 2 + size_needed)
21292 predict_jump (REG_BR_PROB_BASE * 20 / 100);
21293 else
21294 predict_jump (REG_BR_PROB_BASE * 60 / 100);
21297 if (label && size_needed == 1)
21299 emit_label (label);
21300 LABEL_NUSES (label) = 1;
21301 label = NULL;
21302 promoted_val = val_exp;
21303 epilogue_size_needed = 1;
21305 else if (label == NULL_RTX)
21306 epilogue_size_needed = size_needed;
21308 /* Step 3: Main loop. */
21310 switch (alg)
21312 case libcall:
21313 case no_stringop:
21314 gcc_unreachable ();
21315 case loop_1_byte:
21316 expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, promoted_val,
21317 count_exp, QImode, 1, expected_size);
21318 break;
21319 case loop:
21320 expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, promoted_val,
21321 count_exp, Pmode, 1, expected_size);
21322 break;
21323 case unrolled_loop:
21324 expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, promoted_val,
21325 count_exp, Pmode, 4, expected_size);
21326 break;
21327 case rep_prefix_8_byte:
21328 expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
21329 DImode, val_exp);
21330 break;
21331 case rep_prefix_4_byte:
21332 expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
21333 SImode, val_exp);
21334 break;
21335 case rep_prefix_1_byte:
21336 expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
21337 QImode, val_exp);
21338 break;
21340 /* Adjust properly the offset of src and dest memory for aliasing. */
21341 if (CONST_INT_P (count_exp))
21342 dst = adjust_automodify_address_nv (dst, BLKmode, destreg,
21343 (count / size_needed) * size_needed);
21344 else
21345 dst = change_address (dst, BLKmode, destreg);
21347 /* Step 4: Epilogue to copy the remaining bytes. */
21349 if (label)
21351 /* When the main loop is done, COUNT_EXP might hold original count,
21352 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
21353 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
21354 bytes. Compensate if needed. */
21356 if (size_needed < epilogue_size_needed)
21358 tmp =
21359 expand_simple_binop (counter_mode (count_exp), AND, count_exp,
21360 GEN_INT (size_needed - 1), count_exp, 1,
21361 OPTAB_DIRECT);
21362 if (tmp != count_exp)
21363 emit_move_insn (count_exp, tmp);
21365 emit_label (label);
21366 LABEL_NUSES (label) = 1;
21368 epilogue:
21369 if (count_exp != const0_rtx && epilogue_size_needed > 1)
21371 if (force_loopy_epilogue)
21372 expand_setmem_epilogue_via_loop (dst, destreg, val_exp, count_exp,
21373 epilogue_size_needed);
21374 else
21375 expand_setmem_epilogue (dst, destreg, promoted_val, count_exp,
21376 epilogue_size_needed);
21378 if (jump_around_label)
21379 emit_label (jump_around_label);
21380 return true;
21383 /* Expand the appropriate insns for doing strlen if not just doing
21384 repnz; scasb
21386 out = result, initialized with the start address
21387 align_rtx = alignment of the address.
21388 scratch = scratch register, initialized with the startaddress when
21389 not aligned, otherwise undefined
21391 This is just the body. It needs the initializations mentioned above and
21392 some address computing at the end. These things are done in i386.md. */
21394 static void
21395 ix86_expand_strlensi_unroll_1 (rtx out, rtx src, rtx align_rtx)
21397 int align;
21398 rtx tmp;
21399 rtx align_2_label = NULL_RTX;
21400 rtx align_3_label = NULL_RTX;
21401 rtx align_4_label = gen_label_rtx ();
21402 rtx end_0_label = gen_label_rtx ();
21403 rtx mem;
21404 rtx tmpreg = gen_reg_rtx (SImode);
21405 rtx scratch = gen_reg_rtx (SImode);
21406 rtx cmp;
21408 align = 0;
21409 if (CONST_INT_P (align_rtx))
21410 align = INTVAL (align_rtx);
21412 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
21414 /* Is there a known alignment and is it less than 4? */
21415 if (align < 4)
21417 rtx scratch1 = gen_reg_rtx (Pmode);
21418 emit_move_insn (scratch1, out);
21419 /* Is there a known alignment and is it not 2? */
21420 if (align != 2)
21422 align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */
21423 align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */
21425 /* Leave just the 3 lower bits. */
21426 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (3),
21427 NULL_RTX, 0, OPTAB_WIDEN);
21429 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
21430 Pmode, 1, align_4_label);
21431 emit_cmp_and_jump_insns (align_rtx, const2_rtx, EQ, NULL,
21432 Pmode, 1, align_2_label);
21433 emit_cmp_and_jump_insns (align_rtx, const2_rtx, GTU, NULL,
21434 Pmode, 1, align_3_label);
21436 else
21438 /* Since the alignment is 2, we have to check 2 or 0 bytes;
21439 check if is aligned to 4 - byte. */
21441 align_rtx = expand_binop (Pmode, and_optab, scratch1, const2_rtx,
21442 NULL_RTX, 0, OPTAB_WIDEN);
21444 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
21445 Pmode, 1, align_4_label);
21448 mem = change_address (src, QImode, out);
21450 /* Now compare the bytes. */
21452 /* Compare the first n unaligned byte on a byte per byte basis. */
21453 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
21454 QImode, 1, end_0_label);
21456 /* Increment the address. */
21457 emit_insn (ix86_gen_add3 (out, out, const1_rtx));
21459 /* Not needed with an alignment of 2 */
21460 if (align != 2)
21462 emit_label (align_2_label);
21464 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
21465 end_0_label);
21467 emit_insn (ix86_gen_add3 (out, out, const1_rtx));
21469 emit_label (align_3_label);
21472 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
21473 end_0_label);
21475 emit_insn (ix86_gen_add3 (out, out, const1_rtx));
21478 /* Generate loop to check 4 bytes at a time. It is not a good idea to
21479 align this loop. It gives only huge programs, but does not help to
21480 speed up. */
21481 emit_label (align_4_label);
21483 mem = change_address (src, SImode, out);
21484 emit_move_insn (scratch, mem);
21485 emit_insn (ix86_gen_add3 (out, out, GEN_INT (4)));
21487 /* This formula yields a nonzero result iff one of the bytes is zero.
21488 This saves three branches inside loop and many cycles. */
21490 emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101)));
21491 emit_insn (gen_one_cmplsi2 (scratch, scratch));
21492 emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch));
21493 emit_insn (gen_andsi3 (tmpreg, tmpreg,
21494 gen_int_mode (0x80808080, SImode)));
21495 emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0, SImode, 1,
21496 align_4_label);
21498 if (TARGET_CMOVE)
21500 rtx reg = gen_reg_rtx (SImode);
21501 rtx reg2 = gen_reg_rtx (Pmode);
21502 emit_move_insn (reg, tmpreg);
21503 emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16)));
21505 /* If zero is not in the first two bytes, move two bytes forward. */
21506 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
21507 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
21508 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
21509 emit_insn (gen_rtx_SET (VOIDmode, tmpreg,
21510 gen_rtx_IF_THEN_ELSE (SImode, tmp,
21511 reg,
21512 tmpreg)));
21513 /* Emit lea manually to avoid clobbering of flags. */
21514 emit_insn (gen_rtx_SET (SImode, reg2,
21515 gen_rtx_PLUS (Pmode, out, const2_rtx)));
21517 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
21518 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
21519 emit_insn (gen_rtx_SET (VOIDmode, out,
21520 gen_rtx_IF_THEN_ELSE (Pmode, tmp,
21521 reg2,
21522 out)));
21524 else
21526 rtx end_2_label = gen_label_rtx ();
21527 /* Is zero in the first two bytes? */
21529 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
21530 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
21531 tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx);
21532 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
21533 gen_rtx_LABEL_REF (VOIDmode, end_2_label),
21534 pc_rtx);
21535 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
21536 JUMP_LABEL (tmp) = end_2_label;
21538 /* Not in the first two. Move two bytes forward. */
21539 emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16)));
21540 emit_insn (ix86_gen_add3 (out, out, const2_rtx));
21542 emit_label (end_2_label);
21546 /* Avoid branch in fixing the byte. */
21547 tmpreg = gen_lowpart (QImode, tmpreg);
21548 emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg));
21549 tmp = gen_rtx_REG (CCmode, FLAGS_REG);
21550 cmp = gen_rtx_LTU (VOIDmode, tmp, const0_rtx);
21551 emit_insn (ix86_gen_sub3_carry (out, out, GEN_INT (3), tmp, cmp));
21553 emit_label (end_0_label);
21556 /* Expand strlen. */
21558 bool
21559 ix86_expand_strlen (rtx out, rtx src, rtx eoschar, rtx align)
21561 rtx addr, scratch1, scratch2, scratch3, scratch4;
21563 /* The generic case of strlen expander is long. Avoid it's
21564 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
21566 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
21567 && !TARGET_INLINE_ALL_STRINGOPS
21568 && !optimize_insn_for_size_p ()
21569 && (!CONST_INT_P (align) || INTVAL (align) < 4))
21570 return false;
21572 addr = force_reg (Pmode, XEXP (src, 0));
21573 scratch1 = gen_reg_rtx (Pmode);
21575 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
21576 && !optimize_insn_for_size_p ())
21578 /* Well it seems that some optimizer does not combine a call like
21579 foo(strlen(bar), strlen(bar));
21580 when the move and the subtraction is done here. It does calculate
21581 the length just once when these instructions are done inside of
21582 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
21583 often used and I use one fewer register for the lifetime of
21584 output_strlen_unroll() this is better. */
21586 emit_move_insn (out, addr);
21588 ix86_expand_strlensi_unroll_1 (out, src, align);
21590 /* strlensi_unroll_1 returns the address of the zero at the end of
21591 the string, like memchr(), so compute the length by subtracting
21592 the start address. */
21593 emit_insn (ix86_gen_sub3 (out, out, addr));
21595 else
21597 rtx unspec;
21599 /* Can't use this if the user has appropriated eax, ecx, or edi. */
21600 if (fixed_regs[AX_REG] || fixed_regs[CX_REG] || fixed_regs[DI_REG])
21601 return false;
21603 scratch2 = gen_reg_rtx (Pmode);
21604 scratch3 = gen_reg_rtx (Pmode);
21605 scratch4 = force_reg (Pmode, constm1_rtx);
21607 emit_move_insn (scratch3, addr);
21608 eoschar = force_reg (QImode, eoschar);
21610 src = replace_equiv_address_nv (src, scratch3);
21612 /* If .md starts supporting :P, this can be done in .md. */
21613 unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (4, src, eoschar, align,
21614 scratch4), UNSPEC_SCAS);
21615 emit_insn (gen_strlenqi_1 (scratch1, scratch3, unspec));
21616 emit_insn (ix86_gen_one_cmpl2 (scratch2, scratch1));
21617 emit_insn (ix86_gen_add3 (out, scratch2, constm1_rtx));
21619 return true;
21622 /* For given symbol (function) construct code to compute address of it's PLT
21623 entry in large x86-64 PIC model. */
21625 construct_plt_address (rtx symbol)
21627 rtx tmp = gen_reg_rtx (Pmode);
21628 rtx unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, symbol), UNSPEC_PLTOFF);
21630 gcc_assert (GET_CODE (symbol) == SYMBOL_REF);
21631 gcc_assert (ix86_cmodel == CM_LARGE_PIC);
21633 emit_move_insn (tmp, gen_rtx_CONST (Pmode, unspec));
21634 emit_insn (gen_adddi3 (tmp, tmp, pic_offset_table_rtx));
21635 return tmp;
21639 ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1,
21640 rtx callarg2,
21641 rtx pop, int sibcall)
21643 rtx use = NULL, call;
21645 if (pop == const0_rtx)
21646 pop = NULL;
21647 gcc_assert (!TARGET_64BIT || !pop);
21649 if (TARGET_MACHO && !TARGET_64BIT)
21651 #if TARGET_MACHO
21652 if (flag_pic && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF)
21653 fnaddr = machopic_indirect_call_target (fnaddr);
21654 #endif
21656 else
21658 /* Static functions and indirect calls don't need the pic register. */
21659 if (flag_pic && (!TARGET_64BIT || ix86_cmodel == CM_LARGE_PIC)
21660 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
21661 && ! SYMBOL_REF_LOCAL_P (XEXP (fnaddr, 0)))
21662 use_reg (&use, pic_offset_table_rtx);
21665 if (TARGET_64BIT && INTVAL (callarg2) >= 0)
21667 rtx al = gen_rtx_REG (QImode, AX_REG);
21668 emit_move_insn (al, callarg2);
21669 use_reg (&use, al);
21672 if (ix86_cmodel == CM_LARGE_PIC
21673 && MEM_P (fnaddr)
21674 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
21675 && !local_symbolic_operand (XEXP (fnaddr, 0), VOIDmode))
21676 fnaddr = gen_rtx_MEM (QImode, construct_plt_address (XEXP (fnaddr, 0)));
21677 else if (sibcall
21678 ? !sibcall_insn_operand (XEXP (fnaddr, 0), Pmode)
21679 : !call_insn_operand (XEXP (fnaddr, 0), Pmode))
21681 fnaddr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
21682 fnaddr = gen_rtx_MEM (QImode, fnaddr);
21685 call = gen_rtx_CALL (VOIDmode, fnaddr, callarg1);
21686 if (retval)
21687 call = gen_rtx_SET (VOIDmode, retval, call);
21688 if (pop)
21690 pop = gen_rtx_PLUS (Pmode, stack_pointer_rtx, pop);
21691 pop = gen_rtx_SET (VOIDmode, stack_pointer_rtx, pop);
21692 call = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, call, pop));
21694 if (TARGET_64BIT
21695 && ix86_cfun_abi () == MS_ABI
21696 && (!callarg2 || INTVAL (callarg2) != -2))
21698 /* We need to represent that SI and DI registers are clobbered
21699 by SYSV calls. */
21700 static int clobbered_registers[] = {
21701 XMM6_REG, XMM7_REG, XMM8_REG,
21702 XMM9_REG, XMM10_REG, XMM11_REG,
21703 XMM12_REG, XMM13_REG, XMM14_REG,
21704 XMM15_REG, SI_REG, DI_REG
21706 unsigned int i;
21707 rtx vec[ARRAY_SIZE (clobbered_registers) + 2];
21708 rtx unspec = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, const0_rtx),
21709 UNSPEC_MS_TO_SYSV_CALL);
21711 vec[0] = call;
21712 vec[1] = unspec;
21713 for (i = 0; i < ARRAY_SIZE (clobbered_registers); i++)
21714 vec[i + 2] = gen_rtx_CLOBBER (SSE_REGNO_P (clobbered_registers[i])
21715 ? TImode : DImode,
21716 gen_rtx_REG
21717 (SSE_REGNO_P (clobbered_registers[i])
21718 ? TImode : DImode,
21719 clobbered_registers[i]));
21721 call = gen_rtx_PARALLEL (VOIDmode,
21722 gen_rtvec_v (ARRAY_SIZE (clobbered_registers)
21723 + 2, vec));
21726 /* Add UNSPEC_CALL_NEEDS_VZEROUPPER decoration. */
21727 if (TARGET_VZEROUPPER)
21729 rtx unspec;
21730 int avx256;
21732 if (cfun->machine->callee_pass_avx256_p)
21734 if (cfun->machine->callee_return_avx256_p)
21735 avx256 = callee_return_pass_avx256;
21736 else
21737 avx256 = callee_pass_avx256;
21739 else if (cfun->machine->callee_return_avx256_p)
21740 avx256 = callee_return_avx256;
21741 else
21742 avx256 = call_no_avx256;
21744 if (reload_completed)
21745 emit_insn (gen_avx_vzeroupper (GEN_INT (avx256)));
21746 else
21748 unspec = gen_rtx_UNSPEC (VOIDmode,
21749 gen_rtvec (1, GEN_INT (avx256)),
21750 UNSPEC_CALL_NEEDS_VZEROUPPER);
21751 call = gen_rtx_PARALLEL (VOIDmode,
21752 gen_rtvec (2, call, unspec));
21756 call = emit_call_insn (call);
21757 if (use)
21758 CALL_INSN_FUNCTION_USAGE (call) = use;
21760 return call;
21763 void
21764 ix86_split_call_vzeroupper (rtx insn, rtx vzeroupper)
21766 rtx call = XVECEXP (PATTERN (insn), 0, 0);
21767 emit_insn (gen_avx_vzeroupper (vzeroupper));
21768 emit_call_insn (call);
21771 /* Output the assembly for a call instruction. */
21773 const char *
21774 ix86_output_call_insn (rtx insn, rtx call_op, int addr_op)
21776 bool direct_p = constant_call_address_operand (call_op, Pmode);
21777 bool seh_nop_p = false;
21779 gcc_assert (addr_op == 0 || addr_op == 1);
21781 if (SIBLING_CALL_P (insn))
21783 if (direct_p)
21784 return addr_op ? "jmp\t%P1" : "jmp\t%P0";
21785 /* SEH epilogue detection requires the indirect branch case
21786 to include REX.W. */
21787 else if (TARGET_SEH)
21788 return addr_op ? "rex.W jmp %A1" : "rex.W jmp %A0";
21789 else
21790 return addr_op ? "jmp\t%A1" : "jmp\t%A0";
21793 /* SEH unwinding can require an extra nop to be emitted in several
21794 circumstances. Determine if we have one of those. */
21795 if (TARGET_SEH)
21797 rtx i;
21799 for (i = NEXT_INSN (insn); i ; i = NEXT_INSN (i))
21801 /* If we get to another real insn, we don't need the nop. */
21802 if (INSN_P (i))
21803 break;
21805 /* If we get to the epilogue note, prevent a catch region from
21806 being adjacent to the standard epilogue sequence. If non-
21807 call-exceptions, we'll have done this during epilogue emission. */
21808 if (NOTE_P (i) && NOTE_KIND (i) == NOTE_INSN_EPILOGUE_BEG
21809 && !flag_non_call_exceptions
21810 && !can_throw_internal (insn))
21812 seh_nop_p = true;
21813 break;
21817 /* If we didn't find a real insn following the call, prevent the
21818 unwinder from looking into the next function. */
21819 if (i == NULL)
21820 seh_nop_p = true;
21823 if (direct_p)
21825 if (seh_nop_p)
21826 return addr_op ? "call\t%P1\n\tnop" : "call\t%P0\n\tnop";
21827 else
21828 return addr_op ? "call\t%P1" : "call\t%P0";
21830 else
21832 if (seh_nop_p)
21833 return addr_op ? "call\t%A1\n\tnop" : "call\t%A0\n\tnop";
21834 else
21835 return addr_op ? "call\t%A1" : "call\t%A0";
21839 /* Clear stack slot assignments remembered from previous functions.
21840 This is called from INIT_EXPANDERS once before RTL is emitted for each
21841 function. */
21843 static struct machine_function *
21844 ix86_init_machine_status (void)
21846 struct machine_function *f;
21848 f = ggc_alloc_cleared_machine_function ();
21849 f->use_fast_prologue_epilogue_nregs = -1;
21850 f->tls_descriptor_call_expanded_p = 0;
21851 f->call_abi = ix86_abi;
21853 return f;
21856 /* Return a MEM corresponding to a stack slot with mode MODE.
21857 Allocate a new slot if necessary.
21859 The RTL for a function can have several slots available: N is
21860 which slot to use. */
21863 assign_386_stack_local (enum machine_mode mode, enum ix86_stack_slot n)
21865 struct stack_local_entry *s;
21867 gcc_assert (n < MAX_386_STACK_LOCALS);
21869 /* Virtual slot is valid only before vregs are instantiated. */
21870 gcc_assert ((n == SLOT_VIRTUAL) == !virtuals_instantiated);
21872 for (s = ix86_stack_locals; s; s = s->next)
21873 if (s->mode == mode && s->n == n)
21874 return copy_rtx (s->rtl);
21876 s = ggc_alloc_stack_local_entry ();
21877 s->n = n;
21878 s->mode = mode;
21879 s->rtl = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
21881 s->next = ix86_stack_locals;
21882 ix86_stack_locals = s;
21883 return s->rtl;
21886 /* Construct the SYMBOL_REF for the tls_get_addr function. */
21888 static GTY(()) rtx ix86_tls_symbol;
21890 ix86_tls_get_addr (void)
21893 if (!ix86_tls_symbol)
21895 ix86_tls_symbol = gen_rtx_SYMBOL_REF (Pmode,
21896 (TARGET_ANY_GNU_TLS
21897 && !TARGET_64BIT)
21898 ? "___tls_get_addr"
21899 : "__tls_get_addr");
21902 return ix86_tls_symbol;
21905 /* Construct the SYMBOL_REF for the _TLS_MODULE_BASE_ symbol. */
21907 static GTY(()) rtx ix86_tls_module_base_symbol;
21909 ix86_tls_module_base (void)
21912 if (!ix86_tls_module_base_symbol)
21914 ix86_tls_module_base_symbol = gen_rtx_SYMBOL_REF (Pmode,
21915 "_TLS_MODULE_BASE_");
21916 SYMBOL_REF_FLAGS (ix86_tls_module_base_symbol)
21917 |= TLS_MODEL_GLOBAL_DYNAMIC << SYMBOL_FLAG_TLS_SHIFT;
21920 return ix86_tls_module_base_symbol;
21923 /* Calculate the length of the memory address in the instruction
21924 encoding. Does not include the one-byte modrm, opcode, or prefix. */
21927 memory_address_length (rtx addr)
21929 struct ix86_address parts;
21930 rtx base, index, disp;
21931 int len;
21932 int ok;
21934 if (GET_CODE (addr) == PRE_DEC
21935 || GET_CODE (addr) == POST_INC
21936 || GET_CODE (addr) == PRE_MODIFY
21937 || GET_CODE (addr) == POST_MODIFY)
21938 return 0;
21940 ok = ix86_decompose_address (addr, &parts);
21941 gcc_assert (ok);
21943 if (parts.base && GET_CODE (parts.base) == SUBREG)
21944 parts.base = SUBREG_REG (parts.base);
21945 if (parts.index && GET_CODE (parts.index) == SUBREG)
21946 parts.index = SUBREG_REG (parts.index);
21948 base = parts.base;
21949 index = parts.index;
21950 disp = parts.disp;
21951 len = 0;
21953 /* Rule of thumb:
21954 - esp as the base always wants an index,
21955 - ebp as the base always wants a displacement,
21956 - r12 as the base always wants an index,
21957 - r13 as the base always wants a displacement. */
21959 /* Register Indirect. */
21960 if (base && !index && !disp)
21962 /* esp (for its index) and ebp (for its displacement) need
21963 the two-byte modrm form. Similarly for r12 and r13 in 64-bit
21964 code. */
21965 if (REG_P (addr)
21966 && (addr == arg_pointer_rtx
21967 || addr == frame_pointer_rtx
21968 || REGNO (addr) == SP_REG
21969 || REGNO (addr) == BP_REG
21970 || REGNO (addr) == R12_REG
21971 || REGNO (addr) == R13_REG))
21972 len = 1;
21975 /* Direct Addressing. In 64-bit mode mod 00 r/m 5
21976 is not disp32, but disp32(%rip), so for disp32
21977 SIB byte is needed, unless print_operand_address
21978 optimizes it into disp32(%rip) or (%rip) is implied
21979 by UNSPEC. */
21980 else if (disp && !base && !index)
21982 len = 4;
21983 if (TARGET_64BIT)
21985 rtx symbol = disp;
21987 if (GET_CODE (disp) == CONST)
21988 symbol = XEXP (disp, 0);
21989 if (GET_CODE (symbol) == PLUS
21990 && CONST_INT_P (XEXP (symbol, 1)))
21991 symbol = XEXP (symbol, 0);
21993 if (GET_CODE (symbol) != LABEL_REF
21994 && (GET_CODE (symbol) != SYMBOL_REF
21995 || SYMBOL_REF_TLS_MODEL (symbol) != 0)
21996 && (GET_CODE (symbol) != UNSPEC
21997 || (XINT (symbol, 1) != UNSPEC_GOTPCREL
21998 && XINT (symbol, 1) != UNSPEC_PCREL
21999 && XINT (symbol, 1) != UNSPEC_GOTNTPOFF)))
22000 len += 1;
22004 else
22006 /* Find the length of the displacement constant. */
22007 if (disp)
22009 if (base && satisfies_constraint_K (disp))
22010 len = 1;
22011 else
22012 len = 4;
22014 /* ebp always wants a displacement. Similarly r13. */
22015 else if (base && REG_P (base)
22016 && (REGNO (base) == BP_REG || REGNO (base) == R13_REG))
22017 len = 1;
22019 /* An index requires the two-byte modrm form.... */
22020 if (index
22021 /* ...like esp (or r12), which always wants an index. */
22022 || base == arg_pointer_rtx
22023 || base == frame_pointer_rtx
22024 || (base && REG_P (base)
22025 && (REGNO (base) == SP_REG || REGNO (base) == R12_REG)))
22026 len += 1;
22029 switch (parts.seg)
22031 case SEG_FS:
22032 case SEG_GS:
22033 len += 1;
22034 break;
22035 default:
22036 break;
22039 return len;
22042 /* Compute default value for "length_immediate" attribute. When SHORTFORM
22043 is set, expect that insn have 8bit immediate alternative. */
22045 ix86_attr_length_immediate_default (rtx insn, int shortform)
22047 int len = 0;
22048 int i;
22049 extract_insn_cached (insn);
22050 for (i = recog_data.n_operands - 1; i >= 0; --i)
22051 if (CONSTANT_P (recog_data.operand[i]))
22053 enum attr_mode mode = get_attr_mode (insn);
22055 gcc_assert (!len);
22056 if (shortform && CONST_INT_P (recog_data.operand[i]))
22058 HOST_WIDE_INT ival = INTVAL (recog_data.operand[i]);
22059 switch (mode)
22061 case MODE_QI:
22062 len = 1;
22063 continue;
22064 case MODE_HI:
22065 ival = trunc_int_for_mode (ival, HImode);
22066 break;
22067 case MODE_SI:
22068 ival = trunc_int_for_mode (ival, SImode);
22069 break;
22070 default:
22071 break;
22073 if (IN_RANGE (ival, -128, 127))
22075 len = 1;
22076 continue;
22079 switch (mode)
22081 case MODE_QI:
22082 len = 1;
22083 break;
22084 case MODE_HI:
22085 len = 2;
22086 break;
22087 case MODE_SI:
22088 len = 4;
22089 break;
22090 /* Immediates for DImode instructions are encoded as 32bit sign extended values. */
22091 case MODE_DI:
22092 len = 4;
22093 break;
22094 default:
22095 fatal_insn ("unknown insn mode", insn);
22098 return len;
22100 /* Compute default value for "length_address" attribute. */
22102 ix86_attr_length_address_default (rtx insn)
22104 int i;
22106 if (get_attr_type (insn) == TYPE_LEA)
22108 rtx set = PATTERN (insn), addr;
22110 if (GET_CODE (set) == PARALLEL)
22111 set = XVECEXP (set, 0, 0);
22113 gcc_assert (GET_CODE (set) == SET);
22115 addr = SET_SRC (set);
22116 if (TARGET_64BIT && get_attr_mode (insn) == MODE_SI)
22118 if (GET_CODE (addr) == ZERO_EXTEND)
22119 addr = XEXP (addr, 0);
22120 if (GET_CODE (addr) == SUBREG)
22121 addr = SUBREG_REG (addr);
22124 return memory_address_length (addr);
22127 extract_insn_cached (insn);
22128 for (i = recog_data.n_operands - 1; i >= 0; --i)
22129 if (MEM_P (recog_data.operand[i]))
22131 constrain_operands_cached (reload_completed);
22132 if (which_alternative != -1)
22134 const char *constraints = recog_data.constraints[i];
22135 int alt = which_alternative;
22137 while (*constraints == '=' || *constraints == '+')
22138 constraints++;
22139 while (alt-- > 0)
22140 while (*constraints++ != ',')
22142 /* Skip ignored operands. */
22143 if (*constraints == 'X')
22144 continue;
22146 return memory_address_length (XEXP (recog_data.operand[i], 0));
22148 return 0;
22151 /* Compute default value for "length_vex" attribute. It includes
22152 2 or 3 byte VEX prefix and 1 opcode byte. */
22155 ix86_attr_length_vex_default (rtx insn, int has_0f_opcode,
22156 int has_vex_w)
22158 int i;
22160 /* Only 0f opcode can use 2 byte VEX prefix and VEX W bit uses 3
22161 byte VEX prefix. */
22162 if (!has_0f_opcode || has_vex_w)
22163 return 3 + 1;
22165 /* We can always use 2 byte VEX prefix in 32bit. */
22166 if (!TARGET_64BIT)
22167 return 2 + 1;
22169 extract_insn_cached (insn);
22171 for (i = recog_data.n_operands - 1; i >= 0; --i)
22172 if (REG_P (recog_data.operand[i]))
22174 /* REX.W bit uses 3 byte VEX prefix. */
22175 if (GET_MODE (recog_data.operand[i]) == DImode
22176 && GENERAL_REG_P (recog_data.operand[i]))
22177 return 3 + 1;
22179 else
22181 /* REX.X or REX.B bits use 3 byte VEX prefix. */
22182 if (MEM_P (recog_data.operand[i])
22183 && x86_extended_reg_mentioned_p (recog_data.operand[i]))
22184 return 3 + 1;
22187 return 2 + 1;
22190 /* Return the maximum number of instructions a cpu can issue. */
22192 static int
22193 ix86_issue_rate (void)
22195 switch (ix86_tune)
22197 case PROCESSOR_PENTIUM:
22198 case PROCESSOR_ATOM:
22199 case PROCESSOR_K6:
22200 return 2;
22202 case PROCESSOR_PENTIUMPRO:
22203 case PROCESSOR_PENTIUM4:
22204 case PROCESSOR_CORE2_32:
22205 case PROCESSOR_CORE2_64:
22206 case PROCESSOR_COREI7_32:
22207 case PROCESSOR_COREI7_64:
22208 case PROCESSOR_ATHLON:
22209 case PROCESSOR_K8:
22210 case PROCESSOR_AMDFAM10:
22211 case PROCESSOR_NOCONA:
22212 case PROCESSOR_GENERIC32:
22213 case PROCESSOR_GENERIC64:
22214 case PROCESSOR_BDVER1:
22215 return 3;
22217 default:
22218 return 1;
22222 /* A subroutine of ix86_adjust_cost -- return true iff INSN reads flags set
22223 by DEP_INSN and nothing set by DEP_INSN. */
22225 static int
22226 ix86_flags_dependent (rtx insn, rtx dep_insn, enum attr_type insn_type)
22228 rtx set, set2;
22230 /* Simplify the test for uninteresting insns. */
22231 if (insn_type != TYPE_SETCC
22232 && insn_type != TYPE_ICMOV
22233 && insn_type != TYPE_FCMOV
22234 && insn_type != TYPE_IBR)
22235 return 0;
22237 if ((set = single_set (dep_insn)) != 0)
22239 set = SET_DEST (set);
22240 set2 = NULL_RTX;
22242 else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL
22243 && XVECLEN (PATTERN (dep_insn), 0) == 2
22244 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET
22245 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET)
22247 set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
22248 set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
22250 else
22251 return 0;
22253 if (!REG_P (set) || REGNO (set) != FLAGS_REG)
22254 return 0;
22256 /* This test is true if the dependent insn reads the flags but
22257 not any other potentially set register. */
22258 if (!reg_overlap_mentioned_p (set, PATTERN (insn)))
22259 return 0;
22261 if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn)))
22262 return 0;
22264 return 1;
22267 /* Return true iff USE_INSN has a memory address with operands set by
22268 SET_INSN. */
22270 bool
22271 ix86_agi_dependent (rtx set_insn, rtx use_insn)
22273 int i;
22274 extract_insn_cached (use_insn);
22275 for (i = recog_data.n_operands - 1; i >= 0; --i)
22276 if (MEM_P (recog_data.operand[i]))
22278 rtx addr = XEXP (recog_data.operand[i], 0);
22279 return modified_in_p (addr, set_insn) != 0;
22281 return false;
22284 static int
22285 ix86_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
22287 enum attr_type insn_type, dep_insn_type;
22288 enum attr_memory memory;
22289 rtx set, set2;
22290 int dep_insn_code_number;
22292 /* Anti and output dependencies have zero cost on all CPUs. */
22293 if (REG_NOTE_KIND (link) != 0)
22294 return 0;
22296 dep_insn_code_number = recog_memoized (dep_insn);
22298 /* If we can't recognize the insns, we can't really do anything. */
22299 if (dep_insn_code_number < 0 || recog_memoized (insn) < 0)
22300 return cost;
22302 insn_type = get_attr_type (insn);
22303 dep_insn_type = get_attr_type (dep_insn);
22305 switch (ix86_tune)
22307 case PROCESSOR_PENTIUM:
22308 /* Address Generation Interlock adds a cycle of latency. */
22309 if (insn_type == TYPE_LEA)
22311 rtx addr = PATTERN (insn);
22313 if (GET_CODE (addr) == PARALLEL)
22314 addr = XVECEXP (addr, 0, 0);
22316 gcc_assert (GET_CODE (addr) == SET);
22318 addr = SET_SRC (addr);
22319 if (modified_in_p (addr, dep_insn))
22320 cost += 1;
22322 else if (ix86_agi_dependent (dep_insn, insn))
22323 cost += 1;
22325 /* ??? Compares pair with jump/setcc. */
22326 if (ix86_flags_dependent (insn, dep_insn, insn_type))
22327 cost = 0;
22329 /* Floating point stores require value to be ready one cycle earlier. */
22330 if (insn_type == TYPE_FMOV
22331 && get_attr_memory (insn) == MEMORY_STORE
22332 && !ix86_agi_dependent (dep_insn, insn))
22333 cost += 1;
22334 break;
22336 case PROCESSOR_PENTIUMPRO:
22337 memory = get_attr_memory (insn);
22339 /* INT->FP conversion is expensive. */
22340 if (get_attr_fp_int_src (dep_insn))
22341 cost += 5;
22343 /* There is one cycle extra latency between an FP op and a store. */
22344 if (insn_type == TYPE_FMOV
22345 && (set = single_set (dep_insn)) != NULL_RTX
22346 && (set2 = single_set (insn)) != NULL_RTX
22347 && rtx_equal_p (SET_DEST (set), SET_SRC (set2))
22348 && MEM_P (SET_DEST (set2)))
22349 cost += 1;
22351 /* Show ability of reorder buffer to hide latency of load by executing
22352 in parallel with previous instruction in case
22353 previous instruction is not needed to compute the address. */
22354 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
22355 && !ix86_agi_dependent (dep_insn, insn))
22357 /* Claim moves to take one cycle, as core can issue one load
22358 at time and the next load can start cycle later. */
22359 if (dep_insn_type == TYPE_IMOV
22360 || dep_insn_type == TYPE_FMOV)
22361 cost = 1;
22362 else if (cost > 1)
22363 cost--;
22365 break;
22367 case PROCESSOR_K6:
22368 memory = get_attr_memory (insn);
22370 /* The esp dependency is resolved before the instruction is really
22371 finished. */
22372 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
22373 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
22374 return 1;
22376 /* INT->FP conversion is expensive. */
22377 if (get_attr_fp_int_src (dep_insn))
22378 cost += 5;
22380 /* Show ability of reorder buffer to hide latency of load by executing
22381 in parallel with previous instruction in case
22382 previous instruction is not needed to compute the address. */
22383 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
22384 && !ix86_agi_dependent (dep_insn, insn))
22386 /* Claim moves to take one cycle, as core can issue one load
22387 at time and the next load can start cycle later. */
22388 if (dep_insn_type == TYPE_IMOV
22389 || dep_insn_type == TYPE_FMOV)
22390 cost = 1;
22391 else if (cost > 2)
22392 cost -= 2;
22393 else
22394 cost = 1;
22396 break;
22398 case PROCESSOR_ATHLON:
22399 case PROCESSOR_K8:
22400 case PROCESSOR_AMDFAM10:
22401 case PROCESSOR_BDVER1:
22402 case PROCESSOR_ATOM:
22403 case PROCESSOR_GENERIC32:
22404 case PROCESSOR_GENERIC64:
22405 memory = get_attr_memory (insn);
22407 /* Show ability of reorder buffer to hide latency of load by executing
22408 in parallel with previous instruction in case
22409 previous instruction is not needed to compute the address. */
22410 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
22411 && !ix86_agi_dependent (dep_insn, insn))
22413 enum attr_unit unit = get_attr_unit (insn);
22414 int loadcost = 3;
22416 /* Because of the difference between the length of integer and
22417 floating unit pipeline preparation stages, the memory operands
22418 for floating point are cheaper.
22420 ??? For Athlon it the difference is most probably 2. */
22421 if (unit == UNIT_INTEGER || unit == UNIT_UNKNOWN)
22422 loadcost = 3;
22423 else
22424 loadcost = TARGET_ATHLON ? 2 : 0;
22426 if (cost >= loadcost)
22427 cost -= loadcost;
22428 else
22429 cost = 0;
22432 default:
22433 break;
22436 return cost;
22439 /* How many alternative schedules to try. This should be as wide as the
22440 scheduling freedom in the DFA, but no wider. Making this value too
22441 large results extra work for the scheduler. */
22443 static int
22444 ia32_multipass_dfa_lookahead (void)
22446 switch (ix86_tune)
22448 case PROCESSOR_PENTIUM:
22449 return 2;
22451 case PROCESSOR_PENTIUMPRO:
22452 case PROCESSOR_K6:
22453 return 1;
22455 case PROCESSOR_CORE2_32:
22456 case PROCESSOR_CORE2_64:
22457 case PROCESSOR_COREI7_32:
22458 case PROCESSOR_COREI7_64:
22459 /* Generally, we want haifa-sched:max_issue() to look ahead as far
22460 as many instructions can be executed on a cycle, i.e.,
22461 issue_rate. I wonder why tuning for many CPUs does not do this. */
22462 return ix86_issue_rate ();
22464 default:
22465 return 0;
22471 /* Model decoder of Core 2/i7.
22472 Below hooks for multipass scheduling (see haifa-sched.c:max_issue)
22473 track the instruction fetch block boundaries and make sure that long
22474 (9+ bytes) instructions are assigned to D0. */
22476 /* Maximum length of an insn that can be handled by
22477 a secondary decoder unit. '8' for Core 2/i7. */
22478 static int core2i7_secondary_decoder_max_insn_size;
22480 /* Ifetch block size, i.e., number of bytes decoder reads per cycle.
22481 '16' for Core 2/i7. */
22482 static int core2i7_ifetch_block_size;
22484 /* Maximum number of instructions decoder can handle per cycle.
22485 '6' for Core 2/i7. */
22486 static int core2i7_ifetch_block_max_insns;
22488 typedef struct ix86_first_cycle_multipass_data_ *
22489 ix86_first_cycle_multipass_data_t;
22490 typedef const struct ix86_first_cycle_multipass_data_ *
22491 const_ix86_first_cycle_multipass_data_t;
22493 /* A variable to store target state across calls to max_issue within
22494 one cycle. */
22495 static struct ix86_first_cycle_multipass_data_ _ix86_first_cycle_multipass_data,
22496 *ix86_first_cycle_multipass_data = &_ix86_first_cycle_multipass_data;
22498 /* Initialize DATA. */
22499 static void
22500 core2i7_first_cycle_multipass_init (void *_data)
22502 ix86_first_cycle_multipass_data_t data
22503 = (ix86_first_cycle_multipass_data_t) _data;
22505 data->ifetch_block_len = 0;
22506 data->ifetch_block_n_insns = 0;
22507 data->ready_try_change = NULL;
22508 data->ready_try_change_size = 0;
22511 /* Advancing the cycle; reset ifetch block counts. */
22512 static void
22513 core2i7_dfa_post_advance_cycle (void)
22515 ix86_first_cycle_multipass_data_t data = ix86_first_cycle_multipass_data;
22517 gcc_assert (data->ifetch_block_n_insns <= core2i7_ifetch_block_max_insns);
22519 data->ifetch_block_len = 0;
22520 data->ifetch_block_n_insns = 0;
22523 static int min_insn_size (rtx);
22525 /* Filter out insns from ready_try that the core will not be able to issue
22526 on current cycle due to decoder. */
22527 static void
22528 core2i7_first_cycle_multipass_filter_ready_try
22529 (const_ix86_first_cycle_multipass_data_t data,
22530 char *ready_try, int n_ready, bool first_cycle_insn_p)
22532 while (n_ready--)
22534 rtx insn;
22535 int insn_size;
22537 if (ready_try[n_ready])
22538 continue;
22540 insn = get_ready_element (n_ready);
22541 insn_size = min_insn_size (insn);
22543 if (/* If this is a too long an insn for a secondary decoder ... */
22544 (!first_cycle_insn_p
22545 && insn_size > core2i7_secondary_decoder_max_insn_size)
22546 /* ... or it would not fit into the ifetch block ... */
22547 || data->ifetch_block_len + insn_size > core2i7_ifetch_block_size
22548 /* ... or the decoder is full already ... */
22549 || data->ifetch_block_n_insns + 1 > core2i7_ifetch_block_max_insns)
22550 /* ... mask the insn out. */
22552 ready_try[n_ready] = 1;
22554 if (data->ready_try_change)
22555 SET_BIT (data->ready_try_change, n_ready);
22560 /* Prepare for a new round of multipass lookahead scheduling. */
22561 static void
22562 core2i7_first_cycle_multipass_begin (void *_data, char *ready_try, int n_ready,
22563 bool first_cycle_insn_p)
22565 ix86_first_cycle_multipass_data_t data
22566 = (ix86_first_cycle_multipass_data_t) _data;
22567 const_ix86_first_cycle_multipass_data_t prev_data
22568 = ix86_first_cycle_multipass_data;
22570 /* Restore the state from the end of the previous round. */
22571 data->ifetch_block_len = prev_data->ifetch_block_len;
22572 data->ifetch_block_n_insns = prev_data->ifetch_block_n_insns;
22574 /* Filter instructions that cannot be issued on current cycle due to
22575 decoder restrictions. */
22576 core2i7_first_cycle_multipass_filter_ready_try (data, ready_try, n_ready,
22577 first_cycle_insn_p);
22580 /* INSN is being issued in current solution. Account for its impact on
22581 the decoder model. */
22582 static void
22583 core2i7_first_cycle_multipass_issue (void *_data, char *ready_try, int n_ready,
22584 rtx insn, const void *_prev_data)
22586 ix86_first_cycle_multipass_data_t data
22587 = (ix86_first_cycle_multipass_data_t) _data;
22588 const_ix86_first_cycle_multipass_data_t prev_data
22589 = (const_ix86_first_cycle_multipass_data_t) _prev_data;
22591 int insn_size = min_insn_size (insn);
22593 data->ifetch_block_len = prev_data->ifetch_block_len + insn_size;
22594 data->ifetch_block_n_insns = prev_data->ifetch_block_n_insns + 1;
22595 gcc_assert (data->ifetch_block_len <= core2i7_ifetch_block_size
22596 && data->ifetch_block_n_insns <= core2i7_ifetch_block_max_insns);
22598 /* Allocate or resize the bitmap for storing INSN's effect on ready_try. */
22599 if (!data->ready_try_change)
22601 data->ready_try_change = sbitmap_alloc (n_ready);
22602 data->ready_try_change_size = n_ready;
22604 else if (data->ready_try_change_size < n_ready)
22606 data->ready_try_change = sbitmap_resize (data->ready_try_change,
22607 n_ready, 0);
22608 data->ready_try_change_size = n_ready;
22610 sbitmap_zero (data->ready_try_change);
22612 /* Filter out insns from ready_try that the core will not be able to issue
22613 on current cycle due to decoder. */
22614 core2i7_first_cycle_multipass_filter_ready_try (data, ready_try, n_ready,
22615 false);
22618 /* Revert the effect on ready_try. */
22619 static void
22620 core2i7_first_cycle_multipass_backtrack (const void *_data,
22621 char *ready_try,
22622 int n_ready ATTRIBUTE_UNUSED)
22624 const_ix86_first_cycle_multipass_data_t data
22625 = (const_ix86_first_cycle_multipass_data_t) _data;
22626 unsigned int i = 0;
22627 sbitmap_iterator sbi;
22629 gcc_assert (sbitmap_last_set_bit (data->ready_try_change) < n_ready);
22630 EXECUTE_IF_SET_IN_SBITMAP (data->ready_try_change, 0, i, sbi)
22632 ready_try[i] = 0;
22636 /* Save the result of multipass lookahead scheduling for the next round. */
22637 static void
22638 core2i7_first_cycle_multipass_end (const void *_data)
22640 const_ix86_first_cycle_multipass_data_t data
22641 = (const_ix86_first_cycle_multipass_data_t) _data;
22642 ix86_first_cycle_multipass_data_t next_data
22643 = ix86_first_cycle_multipass_data;
22645 if (data != NULL)
22647 next_data->ifetch_block_len = data->ifetch_block_len;
22648 next_data->ifetch_block_n_insns = data->ifetch_block_n_insns;
22652 /* Deallocate target data. */
22653 static void
22654 core2i7_first_cycle_multipass_fini (void *_data)
22656 ix86_first_cycle_multipass_data_t data
22657 = (ix86_first_cycle_multipass_data_t) _data;
22659 if (data->ready_try_change)
22661 sbitmap_free (data->ready_try_change);
22662 data->ready_try_change = NULL;
22663 data->ready_try_change_size = 0;
22667 /* Prepare for scheduling pass. */
22668 static void
22669 ix86_sched_init_global (FILE *dump ATTRIBUTE_UNUSED,
22670 int verbose ATTRIBUTE_UNUSED,
22671 int max_uid ATTRIBUTE_UNUSED)
22673 /* Install scheduling hooks for current CPU. Some of these hooks are used
22674 in time-critical parts of the scheduler, so we only set them up when
22675 they are actually used. */
22676 switch (ix86_tune)
22678 case PROCESSOR_CORE2_32:
22679 case PROCESSOR_CORE2_64:
22680 case PROCESSOR_COREI7_32:
22681 case PROCESSOR_COREI7_64:
22682 targetm.sched.dfa_post_advance_cycle
22683 = core2i7_dfa_post_advance_cycle;
22684 targetm.sched.first_cycle_multipass_init
22685 = core2i7_first_cycle_multipass_init;
22686 targetm.sched.first_cycle_multipass_begin
22687 = core2i7_first_cycle_multipass_begin;
22688 targetm.sched.first_cycle_multipass_issue
22689 = core2i7_first_cycle_multipass_issue;
22690 targetm.sched.first_cycle_multipass_backtrack
22691 = core2i7_first_cycle_multipass_backtrack;
22692 targetm.sched.first_cycle_multipass_end
22693 = core2i7_first_cycle_multipass_end;
22694 targetm.sched.first_cycle_multipass_fini
22695 = core2i7_first_cycle_multipass_fini;
22697 /* Set decoder parameters. */
22698 core2i7_secondary_decoder_max_insn_size = 8;
22699 core2i7_ifetch_block_size = 16;
22700 core2i7_ifetch_block_max_insns = 6;
22701 break;
22703 default:
22704 targetm.sched.dfa_post_advance_cycle = NULL;
22705 targetm.sched.first_cycle_multipass_init = NULL;
22706 targetm.sched.first_cycle_multipass_begin = NULL;
22707 targetm.sched.first_cycle_multipass_issue = NULL;
22708 targetm.sched.first_cycle_multipass_backtrack = NULL;
22709 targetm.sched.first_cycle_multipass_end = NULL;
22710 targetm.sched.first_cycle_multipass_fini = NULL;
22711 break;
22716 /* Compute the alignment given to a constant that is being placed in memory.
22717 EXP is the constant and ALIGN is the alignment that the object would
22718 ordinarily have.
22719 The value of this function is used instead of that alignment to align
22720 the object. */
22723 ix86_constant_alignment (tree exp, int align)
22725 if (TREE_CODE (exp) == REAL_CST || TREE_CODE (exp) == VECTOR_CST
22726 || TREE_CODE (exp) == INTEGER_CST)
22728 if (TYPE_MODE (TREE_TYPE (exp)) == DFmode && align < 64)
22729 return 64;
22730 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp))) && align < 128)
22731 return 128;
22733 else if (!optimize_size && TREE_CODE (exp) == STRING_CST
22734 && TREE_STRING_LENGTH (exp) >= 31 && align < BITS_PER_WORD)
22735 return BITS_PER_WORD;
22737 return align;
22740 /* Compute the alignment for a static variable.
22741 TYPE is the data type, and ALIGN is the alignment that
22742 the object would ordinarily have. The value of this function is used
22743 instead of that alignment to align the object. */
22746 ix86_data_alignment (tree type, int align)
22748 int max_align = optimize_size ? BITS_PER_WORD : MIN (256, MAX_OFILE_ALIGNMENT);
22750 if (AGGREGATE_TYPE_P (type)
22751 && TYPE_SIZE (type)
22752 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
22753 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= (unsigned) max_align
22754 || TREE_INT_CST_HIGH (TYPE_SIZE (type)))
22755 && align < max_align)
22756 align = max_align;
22758 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
22759 to 16byte boundary. */
22760 if (TARGET_64BIT)
22762 if (AGGREGATE_TYPE_P (type)
22763 && TYPE_SIZE (type)
22764 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
22765 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 128
22766 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
22767 return 128;
22770 if (TREE_CODE (type) == ARRAY_TYPE)
22772 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
22773 return 64;
22774 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
22775 return 128;
22777 else if (TREE_CODE (type) == COMPLEX_TYPE)
22780 if (TYPE_MODE (type) == DCmode && align < 64)
22781 return 64;
22782 if ((TYPE_MODE (type) == XCmode
22783 || TYPE_MODE (type) == TCmode) && align < 128)
22784 return 128;
22786 else if ((TREE_CODE (type) == RECORD_TYPE
22787 || TREE_CODE (type) == UNION_TYPE
22788 || TREE_CODE (type) == QUAL_UNION_TYPE)
22789 && TYPE_FIELDS (type))
22791 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
22792 return 64;
22793 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
22794 return 128;
22796 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
22797 || TREE_CODE (type) == INTEGER_TYPE)
22799 if (TYPE_MODE (type) == DFmode && align < 64)
22800 return 64;
22801 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
22802 return 128;
22805 return align;
22808 /* Compute the alignment for a local variable or a stack slot. EXP is
22809 the data type or decl itself, MODE is the widest mode available and
22810 ALIGN is the alignment that the object would ordinarily have. The
22811 value of this macro is used instead of that alignment to align the
22812 object. */
22814 unsigned int
22815 ix86_local_alignment (tree exp, enum machine_mode mode,
22816 unsigned int align)
22818 tree type, decl;
22820 if (exp && DECL_P (exp))
22822 type = TREE_TYPE (exp);
22823 decl = exp;
22825 else
22827 type = exp;
22828 decl = NULL;
22831 /* Don't do dynamic stack realignment for long long objects with
22832 -mpreferred-stack-boundary=2. */
22833 if (!TARGET_64BIT
22834 && align == 64
22835 && ix86_preferred_stack_boundary < 64
22836 && (mode == DImode || (type && TYPE_MODE (type) == DImode))
22837 && (!type || !TYPE_USER_ALIGN (type))
22838 && (!decl || !DECL_USER_ALIGN (decl)))
22839 align = 32;
22841 /* If TYPE is NULL, we are allocating a stack slot for caller-save
22842 register in MODE. We will return the largest alignment of XF
22843 and DF. */
22844 if (!type)
22846 if (mode == XFmode && align < GET_MODE_ALIGNMENT (DFmode))
22847 align = GET_MODE_ALIGNMENT (DFmode);
22848 return align;
22851 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
22852 to 16byte boundary. Exact wording is:
22854 An array uses the same alignment as its elements, except that a local or
22855 global array variable of length at least 16 bytes or
22856 a C99 variable-length array variable always has alignment of at least 16 bytes.
22858 This was added to allow use of aligned SSE instructions at arrays. This
22859 rule is meant for static storage (where compiler can not do the analysis
22860 by itself). We follow it for automatic variables only when convenient.
22861 We fully control everything in the function compiled and functions from
22862 other unit can not rely on the alignment.
22864 Exclude va_list type. It is the common case of local array where
22865 we can not benefit from the alignment. */
22866 if (TARGET_64BIT && optimize_function_for_speed_p (cfun)
22867 && TARGET_SSE)
22869 if (AGGREGATE_TYPE_P (type)
22870 && (TYPE_MAIN_VARIANT (type)
22871 != TYPE_MAIN_VARIANT (va_list_type_node))
22872 && TYPE_SIZE (type)
22873 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
22874 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 16
22875 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
22876 return 128;
22878 if (TREE_CODE (type) == ARRAY_TYPE)
22880 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
22881 return 64;
22882 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
22883 return 128;
22885 else if (TREE_CODE (type) == COMPLEX_TYPE)
22887 if (TYPE_MODE (type) == DCmode && align < 64)
22888 return 64;
22889 if ((TYPE_MODE (type) == XCmode
22890 || TYPE_MODE (type) == TCmode) && align < 128)
22891 return 128;
22893 else if ((TREE_CODE (type) == RECORD_TYPE
22894 || TREE_CODE (type) == UNION_TYPE
22895 || TREE_CODE (type) == QUAL_UNION_TYPE)
22896 && TYPE_FIELDS (type))
22898 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
22899 return 64;
22900 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
22901 return 128;
22903 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
22904 || TREE_CODE (type) == INTEGER_TYPE)
22907 if (TYPE_MODE (type) == DFmode && align < 64)
22908 return 64;
22909 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
22910 return 128;
22912 return align;
22915 /* Compute the minimum required alignment for dynamic stack realignment
22916 purposes for a local variable, parameter or a stack slot. EXP is
22917 the data type or decl itself, MODE is its mode and ALIGN is the
22918 alignment that the object would ordinarily have. */
22920 unsigned int
22921 ix86_minimum_alignment (tree exp, enum machine_mode mode,
22922 unsigned int align)
22924 tree type, decl;
22926 if (exp && DECL_P (exp))
22928 type = TREE_TYPE (exp);
22929 decl = exp;
22931 else
22933 type = exp;
22934 decl = NULL;
22937 if (TARGET_64BIT || align != 64 || ix86_preferred_stack_boundary >= 64)
22938 return align;
22940 /* Don't do dynamic stack realignment for long long objects with
22941 -mpreferred-stack-boundary=2. */
22942 if ((mode == DImode || (type && TYPE_MODE (type) == DImode))
22943 && (!type || !TYPE_USER_ALIGN (type))
22944 && (!decl || !DECL_USER_ALIGN (decl)))
22945 return 32;
22947 return align;
22950 /* Find a location for the static chain incoming to a nested function.
22951 This is a register, unless all free registers are used by arguments. */
22953 static rtx
22954 ix86_static_chain (const_tree fndecl, bool incoming_p)
22956 unsigned regno;
22958 if (!DECL_STATIC_CHAIN (fndecl))
22959 return NULL;
22961 if (TARGET_64BIT)
22963 /* We always use R10 in 64-bit mode. */
22964 regno = R10_REG;
22966 else
22968 tree fntype;
22969 /* By default in 32-bit mode we use ECX to pass the static chain. */
22970 regno = CX_REG;
22972 fntype = TREE_TYPE (fndecl);
22973 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)))
22975 /* Fastcall functions use ecx/edx for arguments, which leaves
22976 us with EAX for the static chain. */
22977 regno = AX_REG;
22979 else if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (fntype)))
22981 /* Thiscall functions use ecx for arguments, which leaves
22982 us with EAX for the static chain. */
22983 regno = AX_REG;
22985 else if (ix86_function_regparm (fntype, fndecl) == 3)
22987 /* For regparm 3, we have no free call-clobbered registers in
22988 which to store the static chain. In order to implement this,
22989 we have the trampoline push the static chain to the stack.
22990 However, we can't push a value below the return address when
22991 we call the nested function directly, so we have to use an
22992 alternate entry point. For this we use ESI, and have the
22993 alternate entry point push ESI, so that things appear the
22994 same once we're executing the nested function. */
22995 if (incoming_p)
22997 if (fndecl == current_function_decl)
22998 ix86_static_chain_on_stack = true;
22999 return gen_frame_mem (SImode,
23000 plus_constant (arg_pointer_rtx, -8));
23002 regno = SI_REG;
23006 return gen_rtx_REG (Pmode, regno);
23009 /* Emit RTL insns to initialize the variable parts of a trampoline.
23010 FNDECL is the decl of the target address; M_TRAMP is a MEM for
23011 the trampoline, and CHAIN_VALUE is an RTX for the static chain
23012 to be passed to the target function. */
23014 static void
23015 ix86_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
23017 rtx mem, fnaddr;
23019 fnaddr = XEXP (DECL_RTL (fndecl), 0);
23021 if (!TARGET_64BIT)
23023 rtx disp, chain;
23024 int opcode;
23026 /* Depending on the static chain location, either load a register
23027 with a constant, or push the constant to the stack. All of the
23028 instructions are the same size. */
23029 chain = ix86_static_chain (fndecl, true);
23030 if (REG_P (chain))
23032 if (REGNO (chain) == CX_REG)
23033 opcode = 0xb9;
23034 else if (REGNO (chain) == AX_REG)
23035 opcode = 0xb8;
23036 else
23037 gcc_unreachable ();
23039 else
23040 opcode = 0x68;
23042 mem = adjust_address (m_tramp, QImode, 0);
23043 emit_move_insn (mem, gen_int_mode (opcode, QImode));
23045 mem = adjust_address (m_tramp, SImode, 1);
23046 emit_move_insn (mem, chain_value);
23048 /* Compute offset from the end of the jmp to the target function.
23049 In the case in which the trampoline stores the static chain on
23050 the stack, we need to skip the first insn which pushes the
23051 (call-saved) register static chain; this push is 1 byte. */
23052 disp = expand_binop (SImode, sub_optab, fnaddr,
23053 plus_constant (XEXP (m_tramp, 0),
23054 MEM_P (chain) ? 9 : 10),
23055 NULL_RTX, 1, OPTAB_DIRECT);
23057 mem = adjust_address (m_tramp, QImode, 5);
23058 emit_move_insn (mem, gen_int_mode (0xe9, QImode));
23060 mem = adjust_address (m_tramp, SImode, 6);
23061 emit_move_insn (mem, disp);
23063 else
23065 int offset = 0;
23067 /* Load the function address to r11. Try to load address using
23068 the shorter movl instead of movabs. We may want to support
23069 movq for kernel mode, but kernel does not use trampolines at
23070 the moment. */
23071 if (x86_64_zext_immediate_operand (fnaddr, VOIDmode))
23073 fnaddr = copy_to_mode_reg (DImode, fnaddr);
23075 mem = adjust_address (m_tramp, HImode, offset);
23076 emit_move_insn (mem, gen_int_mode (0xbb41, HImode));
23078 mem = adjust_address (m_tramp, SImode, offset + 2);
23079 emit_move_insn (mem, gen_lowpart (SImode, fnaddr));
23080 offset += 6;
23082 else
23084 mem = adjust_address (m_tramp, HImode, offset);
23085 emit_move_insn (mem, gen_int_mode (0xbb49, HImode));
23087 mem = adjust_address (m_tramp, DImode, offset + 2);
23088 emit_move_insn (mem, fnaddr);
23089 offset += 10;
23092 /* Load static chain using movabs to r10. */
23093 mem = adjust_address (m_tramp, HImode, offset);
23094 emit_move_insn (mem, gen_int_mode (0xba49, HImode));
23096 mem = adjust_address (m_tramp, DImode, offset + 2);
23097 emit_move_insn (mem, chain_value);
23098 offset += 10;
23100 /* Jump to r11; the last (unused) byte is a nop, only there to
23101 pad the write out to a single 32-bit store. */
23102 mem = adjust_address (m_tramp, SImode, offset);
23103 emit_move_insn (mem, gen_int_mode (0x90e3ff49, SImode));
23104 offset += 4;
23106 gcc_assert (offset <= TRAMPOLINE_SIZE);
23109 #ifdef ENABLE_EXECUTE_STACK
23110 #ifdef CHECK_EXECUTE_STACK_ENABLED
23111 if (CHECK_EXECUTE_STACK_ENABLED)
23112 #endif
23113 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
23114 LCT_NORMAL, VOIDmode, 1, XEXP (m_tramp, 0), Pmode);
23115 #endif
23118 /* The following file contains several enumerations and data structures
23119 built from the definitions in i386-builtin-types.def. */
23121 #include "i386-builtin-types.inc"
23123 /* Table for the ix86 builtin non-function types. */
23124 static GTY(()) tree ix86_builtin_type_tab[(int) IX86_BT_LAST_CPTR + 1];
23126 /* Retrieve an element from the above table, building some of
23127 the types lazily. */
23129 static tree
23130 ix86_get_builtin_type (enum ix86_builtin_type tcode)
23132 unsigned int index;
23133 tree type, itype;
23135 gcc_assert ((unsigned)tcode < ARRAY_SIZE(ix86_builtin_type_tab));
23137 type = ix86_builtin_type_tab[(int) tcode];
23138 if (type != NULL)
23139 return type;
23141 gcc_assert (tcode > IX86_BT_LAST_PRIM);
23142 if (tcode <= IX86_BT_LAST_VECT)
23144 enum machine_mode mode;
23146 index = tcode - IX86_BT_LAST_PRIM - 1;
23147 itype = ix86_get_builtin_type (ix86_builtin_type_vect_base[index]);
23148 mode = ix86_builtin_type_vect_mode[index];
23150 type = build_vector_type_for_mode (itype, mode);
23152 else
23154 int quals;
23156 index = tcode - IX86_BT_LAST_VECT - 1;
23157 if (tcode <= IX86_BT_LAST_PTR)
23158 quals = TYPE_UNQUALIFIED;
23159 else
23160 quals = TYPE_QUAL_CONST;
23162 itype = ix86_get_builtin_type (ix86_builtin_type_ptr_base[index]);
23163 if (quals != TYPE_UNQUALIFIED)
23164 itype = build_qualified_type (itype, quals);
23166 type = build_pointer_type (itype);
23169 ix86_builtin_type_tab[(int) tcode] = type;
23170 return type;
23173 /* Table for the ix86 builtin function types. */
23174 static GTY(()) tree ix86_builtin_func_type_tab[(int) IX86_BT_LAST_ALIAS + 1];
23176 /* Retrieve an element from the above table, building some of
23177 the types lazily. */
23179 static tree
23180 ix86_get_builtin_func_type (enum ix86_builtin_func_type tcode)
23182 tree type;
23184 gcc_assert ((unsigned)tcode < ARRAY_SIZE (ix86_builtin_func_type_tab));
23186 type = ix86_builtin_func_type_tab[(int) tcode];
23187 if (type != NULL)
23188 return type;
23190 if (tcode <= IX86_BT_LAST_FUNC)
23192 unsigned start = ix86_builtin_func_start[(int) tcode];
23193 unsigned after = ix86_builtin_func_start[(int) tcode + 1];
23194 tree rtype, atype, args = void_list_node;
23195 unsigned i;
23197 rtype = ix86_get_builtin_type (ix86_builtin_func_args[start]);
23198 for (i = after - 1; i > start; --i)
23200 atype = ix86_get_builtin_type (ix86_builtin_func_args[i]);
23201 args = tree_cons (NULL, atype, args);
23204 type = build_function_type (rtype, args);
23206 else
23208 unsigned index = tcode - IX86_BT_LAST_FUNC - 1;
23209 enum ix86_builtin_func_type icode;
23211 icode = ix86_builtin_func_alias_base[index];
23212 type = ix86_get_builtin_func_type (icode);
23215 ix86_builtin_func_type_tab[(int) tcode] = type;
23216 return type;
23220 /* Codes for all the SSE/MMX builtins. */
23221 enum ix86_builtins
23223 IX86_BUILTIN_ADDPS,
23224 IX86_BUILTIN_ADDSS,
23225 IX86_BUILTIN_DIVPS,
23226 IX86_BUILTIN_DIVSS,
23227 IX86_BUILTIN_MULPS,
23228 IX86_BUILTIN_MULSS,
23229 IX86_BUILTIN_SUBPS,
23230 IX86_BUILTIN_SUBSS,
23232 IX86_BUILTIN_CMPEQPS,
23233 IX86_BUILTIN_CMPLTPS,
23234 IX86_BUILTIN_CMPLEPS,
23235 IX86_BUILTIN_CMPGTPS,
23236 IX86_BUILTIN_CMPGEPS,
23237 IX86_BUILTIN_CMPNEQPS,
23238 IX86_BUILTIN_CMPNLTPS,
23239 IX86_BUILTIN_CMPNLEPS,
23240 IX86_BUILTIN_CMPNGTPS,
23241 IX86_BUILTIN_CMPNGEPS,
23242 IX86_BUILTIN_CMPORDPS,
23243 IX86_BUILTIN_CMPUNORDPS,
23244 IX86_BUILTIN_CMPEQSS,
23245 IX86_BUILTIN_CMPLTSS,
23246 IX86_BUILTIN_CMPLESS,
23247 IX86_BUILTIN_CMPNEQSS,
23248 IX86_BUILTIN_CMPNLTSS,
23249 IX86_BUILTIN_CMPNLESS,
23250 IX86_BUILTIN_CMPNGTSS,
23251 IX86_BUILTIN_CMPNGESS,
23252 IX86_BUILTIN_CMPORDSS,
23253 IX86_BUILTIN_CMPUNORDSS,
23255 IX86_BUILTIN_COMIEQSS,
23256 IX86_BUILTIN_COMILTSS,
23257 IX86_BUILTIN_COMILESS,
23258 IX86_BUILTIN_COMIGTSS,
23259 IX86_BUILTIN_COMIGESS,
23260 IX86_BUILTIN_COMINEQSS,
23261 IX86_BUILTIN_UCOMIEQSS,
23262 IX86_BUILTIN_UCOMILTSS,
23263 IX86_BUILTIN_UCOMILESS,
23264 IX86_BUILTIN_UCOMIGTSS,
23265 IX86_BUILTIN_UCOMIGESS,
23266 IX86_BUILTIN_UCOMINEQSS,
23268 IX86_BUILTIN_CVTPI2PS,
23269 IX86_BUILTIN_CVTPS2PI,
23270 IX86_BUILTIN_CVTSI2SS,
23271 IX86_BUILTIN_CVTSI642SS,
23272 IX86_BUILTIN_CVTSS2SI,
23273 IX86_BUILTIN_CVTSS2SI64,
23274 IX86_BUILTIN_CVTTPS2PI,
23275 IX86_BUILTIN_CVTTSS2SI,
23276 IX86_BUILTIN_CVTTSS2SI64,
23278 IX86_BUILTIN_MAXPS,
23279 IX86_BUILTIN_MAXSS,
23280 IX86_BUILTIN_MINPS,
23281 IX86_BUILTIN_MINSS,
23283 IX86_BUILTIN_LOADUPS,
23284 IX86_BUILTIN_STOREUPS,
23285 IX86_BUILTIN_MOVSS,
23287 IX86_BUILTIN_MOVHLPS,
23288 IX86_BUILTIN_MOVLHPS,
23289 IX86_BUILTIN_LOADHPS,
23290 IX86_BUILTIN_LOADLPS,
23291 IX86_BUILTIN_STOREHPS,
23292 IX86_BUILTIN_STORELPS,
23294 IX86_BUILTIN_MASKMOVQ,
23295 IX86_BUILTIN_MOVMSKPS,
23296 IX86_BUILTIN_PMOVMSKB,
23298 IX86_BUILTIN_MOVNTPS,
23299 IX86_BUILTIN_MOVNTQ,
23301 IX86_BUILTIN_LOADDQU,
23302 IX86_BUILTIN_STOREDQU,
23304 IX86_BUILTIN_PACKSSWB,
23305 IX86_BUILTIN_PACKSSDW,
23306 IX86_BUILTIN_PACKUSWB,
23308 IX86_BUILTIN_PADDB,
23309 IX86_BUILTIN_PADDW,
23310 IX86_BUILTIN_PADDD,
23311 IX86_BUILTIN_PADDQ,
23312 IX86_BUILTIN_PADDSB,
23313 IX86_BUILTIN_PADDSW,
23314 IX86_BUILTIN_PADDUSB,
23315 IX86_BUILTIN_PADDUSW,
23316 IX86_BUILTIN_PSUBB,
23317 IX86_BUILTIN_PSUBW,
23318 IX86_BUILTIN_PSUBD,
23319 IX86_BUILTIN_PSUBQ,
23320 IX86_BUILTIN_PSUBSB,
23321 IX86_BUILTIN_PSUBSW,
23322 IX86_BUILTIN_PSUBUSB,
23323 IX86_BUILTIN_PSUBUSW,
23325 IX86_BUILTIN_PAND,
23326 IX86_BUILTIN_PANDN,
23327 IX86_BUILTIN_POR,
23328 IX86_BUILTIN_PXOR,
23330 IX86_BUILTIN_PAVGB,
23331 IX86_BUILTIN_PAVGW,
23333 IX86_BUILTIN_PCMPEQB,
23334 IX86_BUILTIN_PCMPEQW,
23335 IX86_BUILTIN_PCMPEQD,
23336 IX86_BUILTIN_PCMPGTB,
23337 IX86_BUILTIN_PCMPGTW,
23338 IX86_BUILTIN_PCMPGTD,
23340 IX86_BUILTIN_PMADDWD,
23342 IX86_BUILTIN_PMAXSW,
23343 IX86_BUILTIN_PMAXUB,
23344 IX86_BUILTIN_PMINSW,
23345 IX86_BUILTIN_PMINUB,
23347 IX86_BUILTIN_PMULHUW,
23348 IX86_BUILTIN_PMULHW,
23349 IX86_BUILTIN_PMULLW,
23351 IX86_BUILTIN_PSADBW,
23352 IX86_BUILTIN_PSHUFW,
23354 IX86_BUILTIN_PSLLW,
23355 IX86_BUILTIN_PSLLD,
23356 IX86_BUILTIN_PSLLQ,
23357 IX86_BUILTIN_PSRAW,
23358 IX86_BUILTIN_PSRAD,
23359 IX86_BUILTIN_PSRLW,
23360 IX86_BUILTIN_PSRLD,
23361 IX86_BUILTIN_PSRLQ,
23362 IX86_BUILTIN_PSLLWI,
23363 IX86_BUILTIN_PSLLDI,
23364 IX86_BUILTIN_PSLLQI,
23365 IX86_BUILTIN_PSRAWI,
23366 IX86_BUILTIN_PSRADI,
23367 IX86_BUILTIN_PSRLWI,
23368 IX86_BUILTIN_PSRLDI,
23369 IX86_BUILTIN_PSRLQI,
23371 IX86_BUILTIN_PUNPCKHBW,
23372 IX86_BUILTIN_PUNPCKHWD,
23373 IX86_BUILTIN_PUNPCKHDQ,
23374 IX86_BUILTIN_PUNPCKLBW,
23375 IX86_BUILTIN_PUNPCKLWD,
23376 IX86_BUILTIN_PUNPCKLDQ,
23378 IX86_BUILTIN_SHUFPS,
23380 IX86_BUILTIN_RCPPS,
23381 IX86_BUILTIN_RCPSS,
23382 IX86_BUILTIN_RSQRTPS,
23383 IX86_BUILTIN_RSQRTPS_NR,
23384 IX86_BUILTIN_RSQRTSS,
23385 IX86_BUILTIN_RSQRTF,
23386 IX86_BUILTIN_SQRTPS,
23387 IX86_BUILTIN_SQRTPS_NR,
23388 IX86_BUILTIN_SQRTSS,
23390 IX86_BUILTIN_UNPCKHPS,
23391 IX86_BUILTIN_UNPCKLPS,
23393 IX86_BUILTIN_ANDPS,
23394 IX86_BUILTIN_ANDNPS,
23395 IX86_BUILTIN_ORPS,
23396 IX86_BUILTIN_XORPS,
23398 IX86_BUILTIN_EMMS,
23399 IX86_BUILTIN_LDMXCSR,
23400 IX86_BUILTIN_STMXCSR,
23401 IX86_BUILTIN_SFENCE,
23403 /* 3DNow! Original */
23404 IX86_BUILTIN_FEMMS,
23405 IX86_BUILTIN_PAVGUSB,
23406 IX86_BUILTIN_PF2ID,
23407 IX86_BUILTIN_PFACC,
23408 IX86_BUILTIN_PFADD,
23409 IX86_BUILTIN_PFCMPEQ,
23410 IX86_BUILTIN_PFCMPGE,
23411 IX86_BUILTIN_PFCMPGT,
23412 IX86_BUILTIN_PFMAX,
23413 IX86_BUILTIN_PFMIN,
23414 IX86_BUILTIN_PFMUL,
23415 IX86_BUILTIN_PFRCP,
23416 IX86_BUILTIN_PFRCPIT1,
23417 IX86_BUILTIN_PFRCPIT2,
23418 IX86_BUILTIN_PFRSQIT1,
23419 IX86_BUILTIN_PFRSQRT,
23420 IX86_BUILTIN_PFSUB,
23421 IX86_BUILTIN_PFSUBR,
23422 IX86_BUILTIN_PI2FD,
23423 IX86_BUILTIN_PMULHRW,
23425 /* 3DNow! Athlon Extensions */
23426 IX86_BUILTIN_PF2IW,
23427 IX86_BUILTIN_PFNACC,
23428 IX86_BUILTIN_PFPNACC,
23429 IX86_BUILTIN_PI2FW,
23430 IX86_BUILTIN_PSWAPDSI,
23431 IX86_BUILTIN_PSWAPDSF,
23433 /* SSE2 */
23434 IX86_BUILTIN_ADDPD,
23435 IX86_BUILTIN_ADDSD,
23436 IX86_BUILTIN_DIVPD,
23437 IX86_BUILTIN_DIVSD,
23438 IX86_BUILTIN_MULPD,
23439 IX86_BUILTIN_MULSD,
23440 IX86_BUILTIN_SUBPD,
23441 IX86_BUILTIN_SUBSD,
23443 IX86_BUILTIN_CMPEQPD,
23444 IX86_BUILTIN_CMPLTPD,
23445 IX86_BUILTIN_CMPLEPD,
23446 IX86_BUILTIN_CMPGTPD,
23447 IX86_BUILTIN_CMPGEPD,
23448 IX86_BUILTIN_CMPNEQPD,
23449 IX86_BUILTIN_CMPNLTPD,
23450 IX86_BUILTIN_CMPNLEPD,
23451 IX86_BUILTIN_CMPNGTPD,
23452 IX86_BUILTIN_CMPNGEPD,
23453 IX86_BUILTIN_CMPORDPD,
23454 IX86_BUILTIN_CMPUNORDPD,
23455 IX86_BUILTIN_CMPEQSD,
23456 IX86_BUILTIN_CMPLTSD,
23457 IX86_BUILTIN_CMPLESD,
23458 IX86_BUILTIN_CMPNEQSD,
23459 IX86_BUILTIN_CMPNLTSD,
23460 IX86_BUILTIN_CMPNLESD,
23461 IX86_BUILTIN_CMPORDSD,
23462 IX86_BUILTIN_CMPUNORDSD,
23464 IX86_BUILTIN_COMIEQSD,
23465 IX86_BUILTIN_COMILTSD,
23466 IX86_BUILTIN_COMILESD,
23467 IX86_BUILTIN_COMIGTSD,
23468 IX86_BUILTIN_COMIGESD,
23469 IX86_BUILTIN_COMINEQSD,
23470 IX86_BUILTIN_UCOMIEQSD,
23471 IX86_BUILTIN_UCOMILTSD,
23472 IX86_BUILTIN_UCOMILESD,
23473 IX86_BUILTIN_UCOMIGTSD,
23474 IX86_BUILTIN_UCOMIGESD,
23475 IX86_BUILTIN_UCOMINEQSD,
23477 IX86_BUILTIN_MAXPD,
23478 IX86_BUILTIN_MAXSD,
23479 IX86_BUILTIN_MINPD,
23480 IX86_BUILTIN_MINSD,
23482 IX86_BUILTIN_ANDPD,
23483 IX86_BUILTIN_ANDNPD,
23484 IX86_BUILTIN_ORPD,
23485 IX86_BUILTIN_XORPD,
23487 IX86_BUILTIN_SQRTPD,
23488 IX86_BUILTIN_SQRTSD,
23490 IX86_BUILTIN_UNPCKHPD,
23491 IX86_BUILTIN_UNPCKLPD,
23493 IX86_BUILTIN_SHUFPD,
23495 IX86_BUILTIN_LOADUPD,
23496 IX86_BUILTIN_STOREUPD,
23497 IX86_BUILTIN_MOVSD,
23499 IX86_BUILTIN_LOADHPD,
23500 IX86_BUILTIN_LOADLPD,
23502 IX86_BUILTIN_CVTDQ2PD,
23503 IX86_BUILTIN_CVTDQ2PS,
23505 IX86_BUILTIN_CVTPD2DQ,
23506 IX86_BUILTIN_CVTPD2PI,
23507 IX86_BUILTIN_CVTPD2PS,
23508 IX86_BUILTIN_CVTTPD2DQ,
23509 IX86_BUILTIN_CVTTPD2PI,
23511 IX86_BUILTIN_CVTPI2PD,
23512 IX86_BUILTIN_CVTSI2SD,
23513 IX86_BUILTIN_CVTSI642SD,
23515 IX86_BUILTIN_CVTSD2SI,
23516 IX86_BUILTIN_CVTSD2SI64,
23517 IX86_BUILTIN_CVTSD2SS,
23518 IX86_BUILTIN_CVTSS2SD,
23519 IX86_BUILTIN_CVTTSD2SI,
23520 IX86_BUILTIN_CVTTSD2SI64,
23522 IX86_BUILTIN_CVTPS2DQ,
23523 IX86_BUILTIN_CVTPS2PD,
23524 IX86_BUILTIN_CVTTPS2DQ,
23526 IX86_BUILTIN_MOVNTI,
23527 IX86_BUILTIN_MOVNTPD,
23528 IX86_BUILTIN_MOVNTDQ,
23530 IX86_BUILTIN_MOVQ128,
23532 /* SSE2 MMX */
23533 IX86_BUILTIN_MASKMOVDQU,
23534 IX86_BUILTIN_MOVMSKPD,
23535 IX86_BUILTIN_PMOVMSKB128,
23537 IX86_BUILTIN_PACKSSWB128,
23538 IX86_BUILTIN_PACKSSDW128,
23539 IX86_BUILTIN_PACKUSWB128,
23541 IX86_BUILTIN_PADDB128,
23542 IX86_BUILTIN_PADDW128,
23543 IX86_BUILTIN_PADDD128,
23544 IX86_BUILTIN_PADDQ128,
23545 IX86_BUILTIN_PADDSB128,
23546 IX86_BUILTIN_PADDSW128,
23547 IX86_BUILTIN_PADDUSB128,
23548 IX86_BUILTIN_PADDUSW128,
23549 IX86_BUILTIN_PSUBB128,
23550 IX86_BUILTIN_PSUBW128,
23551 IX86_BUILTIN_PSUBD128,
23552 IX86_BUILTIN_PSUBQ128,
23553 IX86_BUILTIN_PSUBSB128,
23554 IX86_BUILTIN_PSUBSW128,
23555 IX86_BUILTIN_PSUBUSB128,
23556 IX86_BUILTIN_PSUBUSW128,
23558 IX86_BUILTIN_PAND128,
23559 IX86_BUILTIN_PANDN128,
23560 IX86_BUILTIN_POR128,
23561 IX86_BUILTIN_PXOR128,
23563 IX86_BUILTIN_PAVGB128,
23564 IX86_BUILTIN_PAVGW128,
23566 IX86_BUILTIN_PCMPEQB128,
23567 IX86_BUILTIN_PCMPEQW128,
23568 IX86_BUILTIN_PCMPEQD128,
23569 IX86_BUILTIN_PCMPGTB128,
23570 IX86_BUILTIN_PCMPGTW128,
23571 IX86_BUILTIN_PCMPGTD128,
23573 IX86_BUILTIN_PMADDWD128,
23575 IX86_BUILTIN_PMAXSW128,
23576 IX86_BUILTIN_PMAXUB128,
23577 IX86_BUILTIN_PMINSW128,
23578 IX86_BUILTIN_PMINUB128,
23580 IX86_BUILTIN_PMULUDQ,
23581 IX86_BUILTIN_PMULUDQ128,
23582 IX86_BUILTIN_PMULHUW128,
23583 IX86_BUILTIN_PMULHW128,
23584 IX86_BUILTIN_PMULLW128,
23586 IX86_BUILTIN_PSADBW128,
23587 IX86_BUILTIN_PSHUFHW,
23588 IX86_BUILTIN_PSHUFLW,
23589 IX86_BUILTIN_PSHUFD,
23591 IX86_BUILTIN_PSLLDQI128,
23592 IX86_BUILTIN_PSLLWI128,
23593 IX86_BUILTIN_PSLLDI128,
23594 IX86_BUILTIN_PSLLQI128,
23595 IX86_BUILTIN_PSRAWI128,
23596 IX86_BUILTIN_PSRADI128,
23597 IX86_BUILTIN_PSRLDQI128,
23598 IX86_BUILTIN_PSRLWI128,
23599 IX86_BUILTIN_PSRLDI128,
23600 IX86_BUILTIN_PSRLQI128,
23602 IX86_BUILTIN_PSLLDQ128,
23603 IX86_BUILTIN_PSLLW128,
23604 IX86_BUILTIN_PSLLD128,
23605 IX86_BUILTIN_PSLLQ128,
23606 IX86_BUILTIN_PSRAW128,
23607 IX86_BUILTIN_PSRAD128,
23608 IX86_BUILTIN_PSRLW128,
23609 IX86_BUILTIN_PSRLD128,
23610 IX86_BUILTIN_PSRLQ128,
23612 IX86_BUILTIN_PUNPCKHBW128,
23613 IX86_BUILTIN_PUNPCKHWD128,
23614 IX86_BUILTIN_PUNPCKHDQ128,
23615 IX86_BUILTIN_PUNPCKHQDQ128,
23616 IX86_BUILTIN_PUNPCKLBW128,
23617 IX86_BUILTIN_PUNPCKLWD128,
23618 IX86_BUILTIN_PUNPCKLDQ128,
23619 IX86_BUILTIN_PUNPCKLQDQ128,
23621 IX86_BUILTIN_CLFLUSH,
23622 IX86_BUILTIN_MFENCE,
23623 IX86_BUILTIN_LFENCE,
23625 IX86_BUILTIN_BSRSI,
23626 IX86_BUILTIN_BSRDI,
23627 IX86_BUILTIN_RDPMC,
23628 IX86_BUILTIN_RDTSC,
23629 IX86_BUILTIN_RDTSCP,
23630 IX86_BUILTIN_ROLQI,
23631 IX86_BUILTIN_ROLHI,
23632 IX86_BUILTIN_RORQI,
23633 IX86_BUILTIN_RORHI,
23635 /* SSE3. */
23636 IX86_BUILTIN_ADDSUBPS,
23637 IX86_BUILTIN_HADDPS,
23638 IX86_BUILTIN_HSUBPS,
23639 IX86_BUILTIN_MOVSHDUP,
23640 IX86_BUILTIN_MOVSLDUP,
23641 IX86_BUILTIN_ADDSUBPD,
23642 IX86_BUILTIN_HADDPD,
23643 IX86_BUILTIN_HSUBPD,
23644 IX86_BUILTIN_LDDQU,
23646 IX86_BUILTIN_MONITOR,
23647 IX86_BUILTIN_MWAIT,
23649 /* SSSE3. */
23650 IX86_BUILTIN_PHADDW,
23651 IX86_BUILTIN_PHADDD,
23652 IX86_BUILTIN_PHADDSW,
23653 IX86_BUILTIN_PHSUBW,
23654 IX86_BUILTIN_PHSUBD,
23655 IX86_BUILTIN_PHSUBSW,
23656 IX86_BUILTIN_PMADDUBSW,
23657 IX86_BUILTIN_PMULHRSW,
23658 IX86_BUILTIN_PSHUFB,
23659 IX86_BUILTIN_PSIGNB,
23660 IX86_BUILTIN_PSIGNW,
23661 IX86_BUILTIN_PSIGND,
23662 IX86_BUILTIN_PALIGNR,
23663 IX86_BUILTIN_PABSB,
23664 IX86_BUILTIN_PABSW,
23665 IX86_BUILTIN_PABSD,
23667 IX86_BUILTIN_PHADDW128,
23668 IX86_BUILTIN_PHADDD128,
23669 IX86_BUILTIN_PHADDSW128,
23670 IX86_BUILTIN_PHSUBW128,
23671 IX86_BUILTIN_PHSUBD128,
23672 IX86_BUILTIN_PHSUBSW128,
23673 IX86_BUILTIN_PMADDUBSW128,
23674 IX86_BUILTIN_PMULHRSW128,
23675 IX86_BUILTIN_PSHUFB128,
23676 IX86_BUILTIN_PSIGNB128,
23677 IX86_BUILTIN_PSIGNW128,
23678 IX86_BUILTIN_PSIGND128,
23679 IX86_BUILTIN_PALIGNR128,
23680 IX86_BUILTIN_PABSB128,
23681 IX86_BUILTIN_PABSW128,
23682 IX86_BUILTIN_PABSD128,
23684 /* AMDFAM10 - SSE4A New Instructions. */
23685 IX86_BUILTIN_MOVNTSD,
23686 IX86_BUILTIN_MOVNTSS,
23687 IX86_BUILTIN_EXTRQI,
23688 IX86_BUILTIN_EXTRQ,
23689 IX86_BUILTIN_INSERTQI,
23690 IX86_BUILTIN_INSERTQ,
23692 /* SSE4.1. */
23693 IX86_BUILTIN_BLENDPD,
23694 IX86_BUILTIN_BLENDPS,
23695 IX86_BUILTIN_BLENDVPD,
23696 IX86_BUILTIN_BLENDVPS,
23697 IX86_BUILTIN_PBLENDVB128,
23698 IX86_BUILTIN_PBLENDW128,
23700 IX86_BUILTIN_DPPD,
23701 IX86_BUILTIN_DPPS,
23703 IX86_BUILTIN_INSERTPS128,
23705 IX86_BUILTIN_MOVNTDQA,
23706 IX86_BUILTIN_MPSADBW128,
23707 IX86_BUILTIN_PACKUSDW128,
23708 IX86_BUILTIN_PCMPEQQ,
23709 IX86_BUILTIN_PHMINPOSUW128,
23711 IX86_BUILTIN_PMAXSB128,
23712 IX86_BUILTIN_PMAXSD128,
23713 IX86_BUILTIN_PMAXUD128,
23714 IX86_BUILTIN_PMAXUW128,
23716 IX86_BUILTIN_PMINSB128,
23717 IX86_BUILTIN_PMINSD128,
23718 IX86_BUILTIN_PMINUD128,
23719 IX86_BUILTIN_PMINUW128,
23721 IX86_BUILTIN_PMOVSXBW128,
23722 IX86_BUILTIN_PMOVSXBD128,
23723 IX86_BUILTIN_PMOVSXBQ128,
23724 IX86_BUILTIN_PMOVSXWD128,
23725 IX86_BUILTIN_PMOVSXWQ128,
23726 IX86_BUILTIN_PMOVSXDQ128,
23728 IX86_BUILTIN_PMOVZXBW128,
23729 IX86_BUILTIN_PMOVZXBD128,
23730 IX86_BUILTIN_PMOVZXBQ128,
23731 IX86_BUILTIN_PMOVZXWD128,
23732 IX86_BUILTIN_PMOVZXWQ128,
23733 IX86_BUILTIN_PMOVZXDQ128,
23735 IX86_BUILTIN_PMULDQ128,
23736 IX86_BUILTIN_PMULLD128,
23738 IX86_BUILTIN_ROUNDPD,
23739 IX86_BUILTIN_ROUNDPS,
23740 IX86_BUILTIN_ROUNDSD,
23741 IX86_BUILTIN_ROUNDSS,
23743 IX86_BUILTIN_PTESTZ,
23744 IX86_BUILTIN_PTESTC,
23745 IX86_BUILTIN_PTESTNZC,
23747 IX86_BUILTIN_VEC_INIT_V2SI,
23748 IX86_BUILTIN_VEC_INIT_V4HI,
23749 IX86_BUILTIN_VEC_INIT_V8QI,
23750 IX86_BUILTIN_VEC_EXT_V2DF,
23751 IX86_BUILTIN_VEC_EXT_V2DI,
23752 IX86_BUILTIN_VEC_EXT_V4SF,
23753 IX86_BUILTIN_VEC_EXT_V4SI,
23754 IX86_BUILTIN_VEC_EXT_V8HI,
23755 IX86_BUILTIN_VEC_EXT_V2SI,
23756 IX86_BUILTIN_VEC_EXT_V4HI,
23757 IX86_BUILTIN_VEC_EXT_V16QI,
23758 IX86_BUILTIN_VEC_SET_V2DI,
23759 IX86_BUILTIN_VEC_SET_V4SF,
23760 IX86_BUILTIN_VEC_SET_V4SI,
23761 IX86_BUILTIN_VEC_SET_V8HI,
23762 IX86_BUILTIN_VEC_SET_V4HI,
23763 IX86_BUILTIN_VEC_SET_V16QI,
23765 IX86_BUILTIN_VEC_PACK_SFIX,
23767 /* SSE4.2. */
23768 IX86_BUILTIN_CRC32QI,
23769 IX86_BUILTIN_CRC32HI,
23770 IX86_BUILTIN_CRC32SI,
23771 IX86_BUILTIN_CRC32DI,
23773 IX86_BUILTIN_PCMPESTRI128,
23774 IX86_BUILTIN_PCMPESTRM128,
23775 IX86_BUILTIN_PCMPESTRA128,
23776 IX86_BUILTIN_PCMPESTRC128,
23777 IX86_BUILTIN_PCMPESTRO128,
23778 IX86_BUILTIN_PCMPESTRS128,
23779 IX86_BUILTIN_PCMPESTRZ128,
23780 IX86_BUILTIN_PCMPISTRI128,
23781 IX86_BUILTIN_PCMPISTRM128,
23782 IX86_BUILTIN_PCMPISTRA128,
23783 IX86_BUILTIN_PCMPISTRC128,
23784 IX86_BUILTIN_PCMPISTRO128,
23785 IX86_BUILTIN_PCMPISTRS128,
23786 IX86_BUILTIN_PCMPISTRZ128,
23788 IX86_BUILTIN_PCMPGTQ,
23790 /* AES instructions */
23791 IX86_BUILTIN_AESENC128,
23792 IX86_BUILTIN_AESENCLAST128,
23793 IX86_BUILTIN_AESDEC128,
23794 IX86_BUILTIN_AESDECLAST128,
23795 IX86_BUILTIN_AESIMC128,
23796 IX86_BUILTIN_AESKEYGENASSIST128,
23798 /* PCLMUL instruction */
23799 IX86_BUILTIN_PCLMULQDQ128,
23801 /* AVX */
23802 IX86_BUILTIN_ADDPD256,
23803 IX86_BUILTIN_ADDPS256,
23804 IX86_BUILTIN_ADDSUBPD256,
23805 IX86_BUILTIN_ADDSUBPS256,
23806 IX86_BUILTIN_ANDPD256,
23807 IX86_BUILTIN_ANDPS256,
23808 IX86_BUILTIN_ANDNPD256,
23809 IX86_BUILTIN_ANDNPS256,
23810 IX86_BUILTIN_BLENDPD256,
23811 IX86_BUILTIN_BLENDPS256,
23812 IX86_BUILTIN_BLENDVPD256,
23813 IX86_BUILTIN_BLENDVPS256,
23814 IX86_BUILTIN_DIVPD256,
23815 IX86_BUILTIN_DIVPS256,
23816 IX86_BUILTIN_DPPS256,
23817 IX86_BUILTIN_HADDPD256,
23818 IX86_BUILTIN_HADDPS256,
23819 IX86_BUILTIN_HSUBPD256,
23820 IX86_BUILTIN_HSUBPS256,
23821 IX86_BUILTIN_MAXPD256,
23822 IX86_BUILTIN_MAXPS256,
23823 IX86_BUILTIN_MINPD256,
23824 IX86_BUILTIN_MINPS256,
23825 IX86_BUILTIN_MULPD256,
23826 IX86_BUILTIN_MULPS256,
23827 IX86_BUILTIN_ORPD256,
23828 IX86_BUILTIN_ORPS256,
23829 IX86_BUILTIN_SHUFPD256,
23830 IX86_BUILTIN_SHUFPS256,
23831 IX86_BUILTIN_SUBPD256,
23832 IX86_BUILTIN_SUBPS256,
23833 IX86_BUILTIN_XORPD256,
23834 IX86_BUILTIN_XORPS256,
23835 IX86_BUILTIN_CMPSD,
23836 IX86_BUILTIN_CMPSS,
23837 IX86_BUILTIN_CMPPD,
23838 IX86_BUILTIN_CMPPS,
23839 IX86_BUILTIN_CMPPD256,
23840 IX86_BUILTIN_CMPPS256,
23841 IX86_BUILTIN_CVTDQ2PD256,
23842 IX86_BUILTIN_CVTDQ2PS256,
23843 IX86_BUILTIN_CVTPD2PS256,
23844 IX86_BUILTIN_CVTPS2DQ256,
23845 IX86_BUILTIN_CVTPS2PD256,
23846 IX86_BUILTIN_CVTTPD2DQ256,
23847 IX86_BUILTIN_CVTPD2DQ256,
23848 IX86_BUILTIN_CVTTPS2DQ256,
23849 IX86_BUILTIN_EXTRACTF128PD256,
23850 IX86_BUILTIN_EXTRACTF128PS256,
23851 IX86_BUILTIN_EXTRACTF128SI256,
23852 IX86_BUILTIN_VZEROALL,
23853 IX86_BUILTIN_VZEROUPPER,
23854 IX86_BUILTIN_VPERMILVARPD,
23855 IX86_BUILTIN_VPERMILVARPS,
23856 IX86_BUILTIN_VPERMILVARPD256,
23857 IX86_BUILTIN_VPERMILVARPS256,
23858 IX86_BUILTIN_VPERMILPD,
23859 IX86_BUILTIN_VPERMILPS,
23860 IX86_BUILTIN_VPERMILPD256,
23861 IX86_BUILTIN_VPERMILPS256,
23862 IX86_BUILTIN_VPERMIL2PD,
23863 IX86_BUILTIN_VPERMIL2PS,
23864 IX86_BUILTIN_VPERMIL2PD256,
23865 IX86_BUILTIN_VPERMIL2PS256,
23866 IX86_BUILTIN_VPERM2F128PD256,
23867 IX86_BUILTIN_VPERM2F128PS256,
23868 IX86_BUILTIN_VPERM2F128SI256,
23869 IX86_BUILTIN_VBROADCASTSS,
23870 IX86_BUILTIN_VBROADCASTSD256,
23871 IX86_BUILTIN_VBROADCASTSS256,
23872 IX86_BUILTIN_VBROADCASTPD256,
23873 IX86_BUILTIN_VBROADCASTPS256,
23874 IX86_BUILTIN_VINSERTF128PD256,
23875 IX86_BUILTIN_VINSERTF128PS256,
23876 IX86_BUILTIN_VINSERTF128SI256,
23877 IX86_BUILTIN_LOADUPD256,
23878 IX86_BUILTIN_LOADUPS256,
23879 IX86_BUILTIN_STOREUPD256,
23880 IX86_BUILTIN_STOREUPS256,
23881 IX86_BUILTIN_LDDQU256,
23882 IX86_BUILTIN_MOVNTDQ256,
23883 IX86_BUILTIN_MOVNTPD256,
23884 IX86_BUILTIN_MOVNTPS256,
23885 IX86_BUILTIN_LOADDQU256,
23886 IX86_BUILTIN_STOREDQU256,
23887 IX86_BUILTIN_MASKLOADPD,
23888 IX86_BUILTIN_MASKLOADPS,
23889 IX86_BUILTIN_MASKSTOREPD,
23890 IX86_BUILTIN_MASKSTOREPS,
23891 IX86_BUILTIN_MASKLOADPD256,
23892 IX86_BUILTIN_MASKLOADPS256,
23893 IX86_BUILTIN_MASKSTOREPD256,
23894 IX86_BUILTIN_MASKSTOREPS256,
23895 IX86_BUILTIN_MOVSHDUP256,
23896 IX86_BUILTIN_MOVSLDUP256,
23897 IX86_BUILTIN_MOVDDUP256,
23899 IX86_BUILTIN_SQRTPD256,
23900 IX86_BUILTIN_SQRTPS256,
23901 IX86_BUILTIN_SQRTPS_NR256,
23902 IX86_BUILTIN_RSQRTPS256,
23903 IX86_BUILTIN_RSQRTPS_NR256,
23905 IX86_BUILTIN_RCPPS256,
23907 IX86_BUILTIN_ROUNDPD256,
23908 IX86_BUILTIN_ROUNDPS256,
23910 IX86_BUILTIN_UNPCKHPD256,
23911 IX86_BUILTIN_UNPCKLPD256,
23912 IX86_BUILTIN_UNPCKHPS256,
23913 IX86_BUILTIN_UNPCKLPS256,
23915 IX86_BUILTIN_SI256_SI,
23916 IX86_BUILTIN_PS256_PS,
23917 IX86_BUILTIN_PD256_PD,
23918 IX86_BUILTIN_SI_SI256,
23919 IX86_BUILTIN_PS_PS256,
23920 IX86_BUILTIN_PD_PD256,
23922 IX86_BUILTIN_VTESTZPD,
23923 IX86_BUILTIN_VTESTCPD,
23924 IX86_BUILTIN_VTESTNZCPD,
23925 IX86_BUILTIN_VTESTZPS,
23926 IX86_BUILTIN_VTESTCPS,
23927 IX86_BUILTIN_VTESTNZCPS,
23928 IX86_BUILTIN_VTESTZPD256,
23929 IX86_BUILTIN_VTESTCPD256,
23930 IX86_BUILTIN_VTESTNZCPD256,
23931 IX86_BUILTIN_VTESTZPS256,
23932 IX86_BUILTIN_VTESTCPS256,
23933 IX86_BUILTIN_VTESTNZCPS256,
23934 IX86_BUILTIN_PTESTZ256,
23935 IX86_BUILTIN_PTESTC256,
23936 IX86_BUILTIN_PTESTNZC256,
23938 IX86_BUILTIN_MOVMSKPD256,
23939 IX86_BUILTIN_MOVMSKPS256,
23941 /* TFmode support builtins. */
23942 IX86_BUILTIN_INFQ,
23943 IX86_BUILTIN_HUGE_VALQ,
23944 IX86_BUILTIN_FABSQ,
23945 IX86_BUILTIN_COPYSIGNQ,
23947 /* Vectorizer support builtins. */
23948 IX86_BUILTIN_CPYSGNPS,
23949 IX86_BUILTIN_CPYSGNPD,
23950 IX86_BUILTIN_CPYSGNPS256,
23951 IX86_BUILTIN_CPYSGNPD256,
23953 IX86_BUILTIN_CVTUDQ2PS,
23955 IX86_BUILTIN_VEC_PERM_V2DF,
23956 IX86_BUILTIN_VEC_PERM_V4SF,
23957 IX86_BUILTIN_VEC_PERM_V2DI,
23958 IX86_BUILTIN_VEC_PERM_V4SI,
23959 IX86_BUILTIN_VEC_PERM_V8HI,
23960 IX86_BUILTIN_VEC_PERM_V16QI,
23961 IX86_BUILTIN_VEC_PERM_V2DI_U,
23962 IX86_BUILTIN_VEC_PERM_V4SI_U,
23963 IX86_BUILTIN_VEC_PERM_V8HI_U,
23964 IX86_BUILTIN_VEC_PERM_V16QI_U,
23965 IX86_BUILTIN_VEC_PERM_V4DF,
23966 IX86_BUILTIN_VEC_PERM_V8SF,
23968 /* FMA4 and XOP instructions. */
23969 IX86_BUILTIN_VFMADDSS,
23970 IX86_BUILTIN_VFMADDSD,
23971 IX86_BUILTIN_VFMADDPS,
23972 IX86_BUILTIN_VFMADDPD,
23973 IX86_BUILTIN_VFMADDPS256,
23974 IX86_BUILTIN_VFMADDPD256,
23975 IX86_BUILTIN_VFMADDSUBPS,
23976 IX86_BUILTIN_VFMADDSUBPD,
23977 IX86_BUILTIN_VFMADDSUBPS256,
23978 IX86_BUILTIN_VFMADDSUBPD256,
23980 IX86_BUILTIN_VPCMOV,
23981 IX86_BUILTIN_VPCMOV_V2DI,
23982 IX86_BUILTIN_VPCMOV_V4SI,
23983 IX86_BUILTIN_VPCMOV_V8HI,
23984 IX86_BUILTIN_VPCMOV_V16QI,
23985 IX86_BUILTIN_VPCMOV_V4SF,
23986 IX86_BUILTIN_VPCMOV_V2DF,
23987 IX86_BUILTIN_VPCMOV256,
23988 IX86_BUILTIN_VPCMOV_V4DI256,
23989 IX86_BUILTIN_VPCMOV_V8SI256,
23990 IX86_BUILTIN_VPCMOV_V16HI256,
23991 IX86_BUILTIN_VPCMOV_V32QI256,
23992 IX86_BUILTIN_VPCMOV_V8SF256,
23993 IX86_BUILTIN_VPCMOV_V4DF256,
23995 IX86_BUILTIN_VPPERM,
23997 IX86_BUILTIN_VPMACSSWW,
23998 IX86_BUILTIN_VPMACSWW,
23999 IX86_BUILTIN_VPMACSSWD,
24000 IX86_BUILTIN_VPMACSWD,
24001 IX86_BUILTIN_VPMACSSDD,
24002 IX86_BUILTIN_VPMACSDD,
24003 IX86_BUILTIN_VPMACSSDQL,
24004 IX86_BUILTIN_VPMACSSDQH,
24005 IX86_BUILTIN_VPMACSDQL,
24006 IX86_BUILTIN_VPMACSDQH,
24007 IX86_BUILTIN_VPMADCSSWD,
24008 IX86_BUILTIN_VPMADCSWD,
24010 IX86_BUILTIN_VPHADDBW,
24011 IX86_BUILTIN_VPHADDBD,
24012 IX86_BUILTIN_VPHADDBQ,
24013 IX86_BUILTIN_VPHADDWD,
24014 IX86_BUILTIN_VPHADDWQ,
24015 IX86_BUILTIN_VPHADDDQ,
24016 IX86_BUILTIN_VPHADDUBW,
24017 IX86_BUILTIN_VPHADDUBD,
24018 IX86_BUILTIN_VPHADDUBQ,
24019 IX86_BUILTIN_VPHADDUWD,
24020 IX86_BUILTIN_VPHADDUWQ,
24021 IX86_BUILTIN_VPHADDUDQ,
24022 IX86_BUILTIN_VPHSUBBW,
24023 IX86_BUILTIN_VPHSUBWD,
24024 IX86_BUILTIN_VPHSUBDQ,
24026 IX86_BUILTIN_VPROTB,
24027 IX86_BUILTIN_VPROTW,
24028 IX86_BUILTIN_VPROTD,
24029 IX86_BUILTIN_VPROTQ,
24030 IX86_BUILTIN_VPROTB_IMM,
24031 IX86_BUILTIN_VPROTW_IMM,
24032 IX86_BUILTIN_VPROTD_IMM,
24033 IX86_BUILTIN_VPROTQ_IMM,
24035 IX86_BUILTIN_VPSHLB,
24036 IX86_BUILTIN_VPSHLW,
24037 IX86_BUILTIN_VPSHLD,
24038 IX86_BUILTIN_VPSHLQ,
24039 IX86_BUILTIN_VPSHAB,
24040 IX86_BUILTIN_VPSHAW,
24041 IX86_BUILTIN_VPSHAD,
24042 IX86_BUILTIN_VPSHAQ,
24044 IX86_BUILTIN_VFRCZSS,
24045 IX86_BUILTIN_VFRCZSD,
24046 IX86_BUILTIN_VFRCZPS,
24047 IX86_BUILTIN_VFRCZPD,
24048 IX86_BUILTIN_VFRCZPS256,
24049 IX86_BUILTIN_VFRCZPD256,
24051 IX86_BUILTIN_VPCOMEQUB,
24052 IX86_BUILTIN_VPCOMNEUB,
24053 IX86_BUILTIN_VPCOMLTUB,
24054 IX86_BUILTIN_VPCOMLEUB,
24055 IX86_BUILTIN_VPCOMGTUB,
24056 IX86_BUILTIN_VPCOMGEUB,
24057 IX86_BUILTIN_VPCOMFALSEUB,
24058 IX86_BUILTIN_VPCOMTRUEUB,
24060 IX86_BUILTIN_VPCOMEQUW,
24061 IX86_BUILTIN_VPCOMNEUW,
24062 IX86_BUILTIN_VPCOMLTUW,
24063 IX86_BUILTIN_VPCOMLEUW,
24064 IX86_BUILTIN_VPCOMGTUW,
24065 IX86_BUILTIN_VPCOMGEUW,
24066 IX86_BUILTIN_VPCOMFALSEUW,
24067 IX86_BUILTIN_VPCOMTRUEUW,
24069 IX86_BUILTIN_VPCOMEQUD,
24070 IX86_BUILTIN_VPCOMNEUD,
24071 IX86_BUILTIN_VPCOMLTUD,
24072 IX86_BUILTIN_VPCOMLEUD,
24073 IX86_BUILTIN_VPCOMGTUD,
24074 IX86_BUILTIN_VPCOMGEUD,
24075 IX86_BUILTIN_VPCOMFALSEUD,
24076 IX86_BUILTIN_VPCOMTRUEUD,
24078 IX86_BUILTIN_VPCOMEQUQ,
24079 IX86_BUILTIN_VPCOMNEUQ,
24080 IX86_BUILTIN_VPCOMLTUQ,
24081 IX86_BUILTIN_VPCOMLEUQ,
24082 IX86_BUILTIN_VPCOMGTUQ,
24083 IX86_BUILTIN_VPCOMGEUQ,
24084 IX86_BUILTIN_VPCOMFALSEUQ,
24085 IX86_BUILTIN_VPCOMTRUEUQ,
24087 IX86_BUILTIN_VPCOMEQB,
24088 IX86_BUILTIN_VPCOMNEB,
24089 IX86_BUILTIN_VPCOMLTB,
24090 IX86_BUILTIN_VPCOMLEB,
24091 IX86_BUILTIN_VPCOMGTB,
24092 IX86_BUILTIN_VPCOMGEB,
24093 IX86_BUILTIN_VPCOMFALSEB,
24094 IX86_BUILTIN_VPCOMTRUEB,
24096 IX86_BUILTIN_VPCOMEQW,
24097 IX86_BUILTIN_VPCOMNEW,
24098 IX86_BUILTIN_VPCOMLTW,
24099 IX86_BUILTIN_VPCOMLEW,
24100 IX86_BUILTIN_VPCOMGTW,
24101 IX86_BUILTIN_VPCOMGEW,
24102 IX86_BUILTIN_VPCOMFALSEW,
24103 IX86_BUILTIN_VPCOMTRUEW,
24105 IX86_BUILTIN_VPCOMEQD,
24106 IX86_BUILTIN_VPCOMNED,
24107 IX86_BUILTIN_VPCOMLTD,
24108 IX86_BUILTIN_VPCOMLED,
24109 IX86_BUILTIN_VPCOMGTD,
24110 IX86_BUILTIN_VPCOMGED,
24111 IX86_BUILTIN_VPCOMFALSED,
24112 IX86_BUILTIN_VPCOMTRUED,
24114 IX86_BUILTIN_VPCOMEQQ,
24115 IX86_BUILTIN_VPCOMNEQ,
24116 IX86_BUILTIN_VPCOMLTQ,
24117 IX86_BUILTIN_VPCOMLEQ,
24118 IX86_BUILTIN_VPCOMGTQ,
24119 IX86_BUILTIN_VPCOMGEQ,
24120 IX86_BUILTIN_VPCOMFALSEQ,
24121 IX86_BUILTIN_VPCOMTRUEQ,
24123 /* LWP instructions. */
24124 IX86_BUILTIN_LLWPCB,
24125 IX86_BUILTIN_SLWPCB,
24126 IX86_BUILTIN_LWPVAL32,
24127 IX86_BUILTIN_LWPVAL64,
24128 IX86_BUILTIN_LWPINS32,
24129 IX86_BUILTIN_LWPINS64,
24131 IX86_BUILTIN_CLZS,
24133 /* BMI instructions. */
24134 IX86_BUILTIN_BEXTR32,
24135 IX86_BUILTIN_BEXTR64,
24136 IX86_BUILTIN_CTZS,
24138 /* TBM instructions. */
24139 IX86_BUILTIN_BEXTRI32,
24140 IX86_BUILTIN_BEXTRI64,
24143 /* FSGSBASE instructions. */
24144 IX86_BUILTIN_RDFSBASE32,
24145 IX86_BUILTIN_RDFSBASE64,
24146 IX86_BUILTIN_RDGSBASE32,
24147 IX86_BUILTIN_RDGSBASE64,
24148 IX86_BUILTIN_WRFSBASE32,
24149 IX86_BUILTIN_WRFSBASE64,
24150 IX86_BUILTIN_WRGSBASE32,
24151 IX86_BUILTIN_WRGSBASE64,
24153 /* RDRND instructions. */
24154 IX86_BUILTIN_RDRAND16_STEP,
24155 IX86_BUILTIN_RDRAND32_STEP,
24156 IX86_BUILTIN_RDRAND64_STEP,
24158 /* F16C instructions. */
24159 IX86_BUILTIN_CVTPH2PS,
24160 IX86_BUILTIN_CVTPH2PS256,
24161 IX86_BUILTIN_CVTPS2PH,
24162 IX86_BUILTIN_CVTPS2PH256,
24164 IX86_BUILTIN_MAX
24167 /* Table for the ix86 builtin decls. */
24168 static GTY(()) tree ix86_builtins[(int) IX86_BUILTIN_MAX];
24170 /* Table of all of the builtin functions that are possible with different ISA's
24171 but are waiting to be built until a function is declared to use that
24172 ISA. */
24173 struct builtin_isa {
24174 const char *name; /* function name */
24175 enum ix86_builtin_func_type tcode; /* type to use in the declaration */
24176 int isa; /* isa_flags this builtin is defined for */
24177 bool const_p; /* true if the declaration is constant */
24178 bool set_and_not_built_p;
24181 static struct builtin_isa ix86_builtins_isa[(int) IX86_BUILTIN_MAX];
24184 /* Add an ix86 target builtin function with CODE, NAME and TYPE. Save the MASK
24185 of which isa_flags to use in the ix86_builtins_isa array. Stores the
24186 function decl in the ix86_builtins array. Returns the function decl or
24187 NULL_TREE, if the builtin was not added.
24189 If the front end has a special hook for builtin functions, delay adding
24190 builtin functions that aren't in the current ISA until the ISA is changed
24191 with function specific optimization. Doing so, can save about 300K for the
24192 default compiler. When the builtin is expanded, check at that time whether
24193 it is valid.
24195 If the front end doesn't have a special hook, record all builtins, even if
24196 it isn't an instruction set in the current ISA in case the user uses
24197 function specific options for a different ISA, so that we don't get scope
24198 errors if a builtin is added in the middle of a function scope. */
24200 static inline tree
24201 def_builtin (int mask, const char *name, enum ix86_builtin_func_type tcode,
24202 enum ix86_builtins code)
24204 tree decl = NULL_TREE;
24206 if (!(mask & OPTION_MASK_ISA_64BIT) || TARGET_64BIT)
24208 ix86_builtins_isa[(int) code].isa = mask;
24210 mask &= ~OPTION_MASK_ISA_64BIT;
24211 if (mask == 0
24212 || (mask & ix86_isa_flags) != 0
24213 || (lang_hooks.builtin_function
24214 == lang_hooks.builtin_function_ext_scope))
24217 tree type = ix86_get_builtin_func_type (tcode);
24218 decl = add_builtin_function (name, type, code, BUILT_IN_MD,
24219 NULL, NULL_TREE);
24220 ix86_builtins[(int) code] = decl;
24221 ix86_builtins_isa[(int) code].set_and_not_built_p = false;
24223 else
24225 ix86_builtins[(int) code] = NULL_TREE;
24226 ix86_builtins_isa[(int) code].tcode = tcode;
24227 ix86_builtins_isa[(int) code].name = name;
24228 ix86_builtins_isa[(int) code].const_p = false;
24229 ix86_builtins_isa[(int) code].set_and_not_built_p = true;
24233 return decl;
24236 /* Like def_builtin, but also marks the function decl "const". */
24238 static inline tree
24239 def_builtin_const (int mask, const char *name,
24240 enum ix86_builtin_func_type tcode, enum ix86_builtins code)
24242 tree decl = def_builtin (mask, name, tcode, code);
24243 if (decl)
24244 TREE_READONLY (decl) = 1;
24245 else
24246 ix86_builtins_isa[(int) code].const_p = true;
24248 return decl;
24251 /* Add any new builtin functions for a given ISA that may not have been
24252 declared. This saves a bit of space compared to adding all of the
24253 declarations to the tree, even if we didn't use them. */
24255 static void
24256 ix86_add_new_builtins (int isa)
24258 int i;
24260 for (i = 0; i < (int)IX86_BUILTIN_MAX; i++)
24262 if ((ix86_builtins_isa[i].isa & isa) != 0
24263 && ix86_builtins_isa[i].set_and_not_built_p)
24265 tree decl, type;
24267 /* Don't define the builtin again. */
24268 ix86_builtins_isa[i].set_and_not_built_p = false;
24270 type = ix86_get_builtin_func_type (ix86_builtins_isa[i].tcode);
24271 decl = add_builtin_function_ext_scope (ix86_builtins_isa[i].name,
24272 type, i, BUILT_IN_MD, NULL,
24273 NULL_TREE);
24275 ix86_builtins[i] = decl;
24276 if (ix86_builtins_isa[i].const_p)
24277 TREE_READONLY (decl) = 1;
24282 /* Bits for builtin_description.flag. */
24284 /* Set when we don't support the comparison natively, and should
24285 swap_comparison in order to support it. */
24286 #define BUILTIN_DESC_SWAP_OPERANDS 1
24288 struct builtin_description
24290 const unsigned int mask;
24291 const enum insn_code icode;
24292 const char *const name;
24293 const enum ix86_builtins code;
24294 const enum rtx_code comparison;
24295 const int flag;
24298 static const struct builtin_description bdesc_comi[] =
24300 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, UNEQ, 0 },
24301 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, UNLT, 0 },
24302 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, UNLE, 0 },
24303 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, GT, 0 },
24304 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, GE, 0 },
24305 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, LTGT, 0 },
24306 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, UNEQ, 0 },
24307 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, UNLT, 0 },
24308 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, UNLE, 0 },
24309 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, GT, 0 },
24310 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, GE, 0 },
24311 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, LTGT, 0 },
24312 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD, UNEQ, 0 },
24313 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD, UNLT, 0 },
24314 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD, UNLE, 0 },
24315 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD, GT, 0 },
24316 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD, GE, 0 },
24317 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD, LTGT, 0 },
24318 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD, UNEQ, 0 },
24319 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD, UNLT, 0 },
24320 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD, UNLE, 0 },
24321 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD, GT, 0 },
24322 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD, GE, 0 },
24323 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD, LTGT, 0 },
24326 static const struct builtin_description bdesc_pcmpestr[] =
24328 /* SSE4.2 */
24329 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestri128", IX86_BUILTIN_PCMPESTRI128, UNKNOWN, 0 },
24330 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestrm128", IX86_BUILTIN_PCMPESTRM128, UNKNOWN, 0 },
24331 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestria128", IX86_BUILTIN_PCMPESTRA128, UNKNOWN, (int) CCAmode },
24332 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestric128", IX86_BUILTIN_PCMPESTRC128, UNKNOWN, (int) CCCmode },
24333 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestrio128", IX86_BUILTIN_PCMPESTRO128, UNKNOWN, (int) CCOmode },
24334 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestris128", IX86_BUILTIN_PCMPESTRS128, UNKNOWN, (int) CCSmode },
24335 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestriz128", IX86_BUILTIN_PCMPESTRZ128, UNKNOWN, (int) CCZmode },
24338 static const struct builtin_description bdesc_pcmpistr[] =
24340 /* SSE4.2 */
24341 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistri128", IX86_BUILTIN_PCMPISTRI128, UNKNOWN, 0 },
24342 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistrm128", IX86_BUILTIN_PCMPISTRM128, UNKNOWN, 0 },
24343 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistria128", IX86_BUILTIN_PCMPISTRA128, UNKNOWN, (int) CCAmode },
24344 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistric128", IX86_BUILTIN_PCMPISTRC128, UNKNOWN, (int) CCCmode },
24345 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistrio128", IX86_BUILTIN_PCMPISTRO128, UNKNOWN, (int) CCOmode },
24346 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistris128", IX86_BUILTIN_PCMPISTRS128, UNKNOWN, (int) CCSmode },
24347 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistriz128", IX86_BUILTIN_PCMPISTRZ128, UNKNOWN, (int) CCZmode },
24350 /* Special builtins with variable number of arguments. */
24351 static const struct builtin_description bdesc_special_args[] =
24353 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rdtsc, "__builtin_ia32_rdtsc", IX86_BUILTIN_RDTSC, UNKNOWN, (int) UINT64_FTYPE_VOID },
24354 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rdtscp, "__builtin_ia32_rdtscp", IX86_BUILTIN_RDTSCP, UNKNOWN, (int) UINT64_FTYPE_PUNSIGNED },
24356 /* MMX */
24357 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_emms, "__builtin_ia32_emms", IX86_BUILTIN_EMMS, UNKNOWN, (int) VOID_FTYPE_VOID },
24359 /* 3DNow! */
24360 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_femms, "__builtin_ia32_femms", IX86_BUILTIN_FEMMS, UNKNOWN, (int) VOID_FTYPE_VOID },
24362 /* SSE */
24363 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movups, "__builtin_ia32_storeups", IX86_BUILTIN_STOREUPS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
24364 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movntv4sf, "__builtin_ia32_movntps", IX86_BUILTIN_MOVNTPS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
24365 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movups, "__builtin_ia32_loadups", IX86_BUILTIN_LOADUPS, UNKNOWN, (int) V4SF_FTYPE_PCFLOAT },
24367 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_loadhps_exp, "__builtin_ia32_loadhps", IX86_BUILTIN_LOADHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_PCV2SF },
24368 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_loadlps_exp, "__builtin_ia32_loadlps", IX86_BUILTIN_LOADLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_PCV2SF },
24369 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_storehps, "__builtin_ia32_storehps", IX86_BUILTIN_STOREHPS, UNKNOWN, (int) VOID_FTYPE_PV2SF_V4SF },
24370 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_storelps, "__builtin_ia32_storelps", IX86_BUILTIN_STORELPS, UNKNOWN, (int) VOID_FTYPE_PV2SF_V4SF },
24372 /* SSE or 3DNow!A */
24373 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_sse_sfence, "__builtin_ia32_sfence", IX86_BUILTIN_SFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
24374 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_sse_movntdi, "__builtin_ia32_movntq", IX86_BUILTIN_MOVNTQ, UNKNOWN, (int) VOID_FTYPE_PULONGLONG_ULONGLONG },
24376 /* SSE2 */
24377 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_lfence, "__builtin_ia32_lfence", IX86_BUILTIN_LFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
24378 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_mfence, 0, IX86_BUILTIN_MFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
24379 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movupd, "__builtin_ia32_storeupd", IX86_BUILTIN_STOREUPD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
24380 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movdqu, "__builtin_ia32_storedqu", IX86_BUILTIN_STOREDQU, UNKNOWN, (int) VOID_FTYPE_PCHAR_V16QI },
24381 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntv2df, "__builtin_ia32_movntpd", IX86_BUILTIN_MOVNTPD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
24382 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntv2di, "__builtin_ia32_movntdq", IX86_BUILTIN_MOVNTDQ, UNKNOWN, (int) VOID_FTYPE_PV2DI_V2DI },
24383 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntsi, "__builtin_ia32_movnti", IX86_BUILTIN_MOVNTI, UNKNOWN, (int) VOID_FTYPE_PINT_INT },
24384 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movupd, "__builtin_ia32_loadupd", IX86_BUILTIN_LOADUPD, UNKNOWN, (int) V2DF_FTYPE_PCDOUBLE },
24385 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movdqu, "__builtin_ia32_loaddqu", IX86_BUILTIN_LOADDQU, UNKNOWN, (int) V16QI_FTYPE_PCCHAR },
24387 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loadhpd_exp, "__builtin_ia32_loadhpd", IX86_BUILTIN_LOADHPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_PCDOUBLE },
24388 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loadlpd_exp, "__builtin_ia32_loadlpd", IX86_BUILTIN_LOADLPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_PCDOUBLE },
24390 /* SSE3 */
24391 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_lddqu, "__builtin_ia32_lddqu", IX86_BUILTIN_LDDQU, UNKNOWN, (int) V16QI_FTYPE_PCCHAR },
24393 /* SSE4.1 */
24394 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_movntdqa, "__builtin_ia32_movntdqa", IX86_BUILTIN_MOVNTDQA, UNKNOWN, (int) V2DI_FTYPE_PV2DI },
24396 /* SSE4A */
24397 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_vmmovntv2df, "__builtin_ia32_movntsd", IX86_BUILTIN_MOVNTSD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
24398 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_vmmovntv4sf, "__builtin_ia32_movntss", IX86_BUILTIN_MOVNTSS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
24400 /* AVX */
24401 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vzeroall, "__builtin_ia32_vzeroall", IX86_BUILTIN_VZEROALL, UNKNOWN, (int) VOID_FTYPE_VOID },
24402 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vzeroupper, "__builtin_ia32_vzeroupper", IX86_BUILTIN_VZEROUPPER, UNKNOWN, (int) VOID_FTYPE_VOID },
24404 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_dupv4sf, "__builtin_ia32_vbroadcastss", IX86_BUILTIN_VBROADCASTSS, UNKNOWN, (int) V4SF_FTYPE_PCFLOAT },
24405 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_dupv4df, "__builtin_ia32_vbroadcastsd256", IX86_BUILTIN_VBROADCASTSD256, UNKNOWN, (int) V4DF_FTYPE_PCDOUBLE },
24406 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_dupv8sf, "__builtin_ia32_vbroadcastss256", IX86_BUILTIN_VBROADCASTSS256, UNKNOWN, (int) V8SF_FTYPE_PCFLOAT },
24407 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastf128_v4df, "__builtin_ia32_vbroadcastf128_pd256", IX86_BUILTIN_VBROADCASTPD256, UNKNOWN, (int) V4DF_FTYPE_PCV2DF },
24408 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastf128_v8sf, "__builtin_ia32_vbroadcastf128_ps256", IX86_BUILTIN_VBROADCASTPS256, UNKNOWN, (int) V8SF_FTYPE_PCV4SF },
24410 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movupd256, "__builtin_ia32_loadupd256", IX86_BUILTIN_LOADUPD256, UNKNOWN, (int) V4DF_FTYPE_PCDOUBLE },
24411 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movups256, "__builtin_ia32_loadups256", IX86_BUILTIN_LOADUPS256, UNKNOWN, (int) V8SF_FTYPE_PCFLOAT },
24412 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movupd256, "__builtin_ia32_storeupd256", IX86_BUILTIN_STOREUPD256, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V4DF },
24413 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movups256, "__builtin_ia32_storeups256", IX86_BUILTIN_STOREUPS256, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V8SF },
24414 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movdqu256, "__builtin_ia32_loaddqu256", IX86_BUILTIN_LOADDQU256, UNKNOWN, (int) V32QI_FTYPE_PCCHAR },
24415 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movdqu256, "__builtin_ia32_storedqu256", IX86_BUILTIN_STOREDQU256, UNKNOWN, (int) VOID_FTYPE_PCHAR_V32QI },
24416 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_lddqu256, "__builtin_ia32_lddqu256", IX86_BUILTIN_LDDQU256, UNKNOWN, (int) V32QI_FTYPE_PCCHAR },
24418 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv4di, "__builtin_ia32_movntdq256", IX86_BUILTIN_MOVNTDQ256, UNKNOWN, (int) VOID_FTYPE_PV4DI_V4DI },
24419 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv4df, "__builtin_ia32_movntpd256", IX86_BUILTIN_MOVNTPD256, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V4DF },
24420 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv8sf, "__builtin_ia32_movntps256", IX86_BUILTIN_MOVNTPS256, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V8SF },
24422 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadpd, "__builtin_ia32_maskloadpd", IX86_BUILTIN_MASKLOADPD, UNKNOWN, (int) V2DF_FTYPE_PCV2DF_V2DF },
24423 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadps, "__builtin_ia32_maskloadps", IX86_BUILTIN_MASKLOADPS, UNKNOWN, (int) V4SF_FTYPE_PCV4SF_V4SF },
24424 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadpd256, "__builtin_ia32_maskloadpd256", IX86_BUILTIN_MASKLOADPD256, UNKNOWN, (int) V4DF_FTYPE_PCV4DF_V4DF },
24425 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadps256, "__builtin_ia32_maskloadps256", IX86_BUILTIN_MASKLOADPS256, UNKNOWN, (int) V8SF_FTYPE_PCV8SF_V8SF },
24426 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstorepd, "__builtin_ia32_maskstorepd", IX86_BUILTIN_MASKSTOREPD, UNKNOWN, (int) VOID_FTYPE_PV2DF_V2DF_V2DF },
24427 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstoreps, "__builtin_ia32_maskstoreps", IX86_BUILTIN_MASKSTOREPS, UNKNOWN, (int) VOID_FTYPE_PV4SF_V4SF_V4SF },
24428 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstorepd256, "__builtin_ia32_maskstorepd256", IX86_BUILTIN_MASKSTOREPD256, UNKNOWN, (int) VOID_FTYPE_PV4DF_V4DF_V4DF },
24429 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstoreps256, "__builtin_ia32_maskstoreps256", IX86_BUILTIN_MASKSTOREPS256, UNKNOWN, (int) VOID_FTYPE_PV8SF_V8SF_V8SF },
24431 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_llwpcb, "__builtin_ia32_llwpcb", IX86_BUILTIN_LLWPCB, UNKNOWN, (int) VOID_FTYPE_PVOID },
24432 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_slwpcb, "__builtin_ia32_slwpcb", IX86_BUILTIN_SLWPCB, UNKNOWN, (int) PVOID_FTYPE_VOID },
24433 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpvalsi3, "__builtin_ia32_lwpval32", IX86_BUILTIN_LWPVAL32, UNKNOWN, (int) VOID_FTYPE_UINT_UINT_UINT },
24434 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpvaldi3, "__builtin_ia32_lwpval64", IX86_BUILTIN_LWPVAL64, UNKNOWN, (int) VOID_FTYPE_UINT64_UINT_UINT },
24435 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpinssi3, "__builtin_ia32_lwpins32", IX86_BUILTIN_LWPINS32, UNKNOWN, (int) UCHAR_FTYPE_UINT_UINT_UINT },
24436 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpinsdi3, "__builtin_ia32_lwpins64", IX86_BUILTIN_LWPINS64, UNKNOWN, (int) UCHAR_FTYPE_UINT64_UINT_UINT },
24438 /* FSGSBASE */
24439 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdfsbasesi, "__builtin_ia32_rdfsbase32", IX86_BUILTIN_RDFSBASE32, UNKNOWN, (int) UNSIGNED_FTYPE_VOID },
24440 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdfsbasedi, "__builtin_ia32_rdfsbase64", IX86_BUILTIN_RDFSBASE64, UNKNOWN, (int) UINT64_FTYPE_VOID },
24441 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdgsbasesi, "__builtin_ia32_rdgsbase32", IX86_BUILTIN_RDGSBASE32, UNKNOWN, (int) UNSIGNED_FTYPE_VOID },
24442 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdgsbasedi, "__builtin_ia32_rdgsbase64", IX86_BUILTIN_RDGSBASE64, UNKNOWN, (int) UINT64_FTYPE_VOID },
24443 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrfsbasesi, "__builtin_ia32_wrfsbase32", IX86_BUILTIN_WRFSBASE32, UNKNOWN, (int) VOID_FTYPE_UNSIGNED },
24444 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrfsbasedi, "__builtin_ia32_wrfsbase64", IX86_BUILTIN_WRFSBASE64, UNKNOWN, (int) VOID_FTYPE_UINT64 },
24445 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrgsbasesi, "__builtin_ia32_wrgsbase32", IX86_BUILTIN_WRGSBASE32, UNKNOWN, (int) VOID_FTYPE_UNSIGNED },
24446 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrgsbasedi, "__builtin_ia32_wrgsbase64", IX86_BUILTIN_WRGSBASE64, UNKNOWN, (int) VOID_FTYPE_UINT64 },
24449 /* Builtins with variable number of arguments. */
24450 static const struct builtin_description bdesc_args[] =
24452 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_bsr, "__builtin_ia32_bsrsi", IX86_BUILTIN_BSRSI, UNKNOWN, (int) INT_FTYPE_INT },
24453 { OPTION_MASK_ISA_64BIT, CODE_FOR_bsr_rex64, "__builtin_ia32_bsrdi", IX86_BUILTIN_BSRDI, UNKNOWN, (int) INT64_FTYPE_INT64 },
24454 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rdpmc, "__builtin_ia32_rdpmc", IX86_BUILTIN_RDPMC, UNKNOWN, (int) UINT64_FTYPE_INT },
24455 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotlqi3, "__builtin_ia32_rolqi", IX86_BUILTIN_ROLQI, UNKNOWN, (int) UINT8_FTYPE_UINT8_INT },
24456 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotlhi3, "__builtin_ia32_rolhi", IX86_BUILTIN_ROLHI, UNKNOWN, (int) UINT16_FTYPE_UINT16_INT },
24457 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotrqi3, "__builtin_ia32_rorqi", IX86_BUILTIN_RORQI, UNKNOWN, (int) UINT8_FTYPE_UINT8_INT },
24458 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotrhi3, "__builtin_ia32_rorhi", IX86_BUILTIN_RORHI, UNKNOWN, (int) UINT16_FTYPE_UINT16_INT },
24460 /* MMX */
24461 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
24462 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
24463 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
24464 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
24465 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
24466 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
24468 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
24469 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
24470 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
24471 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
24472 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
24473 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
24474 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
24475 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
24477 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
24478 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
24480 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_andv2si3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
24481 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_andnotv2si3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
24482 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_iorv2si3, "__builtin_ia32_por", IX86_BUILTIN_POR, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
24483 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_xorv2si3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
24485 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
24486 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
24487 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv2si3, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
24488 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv8qi3, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
24489 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
24490 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
24492 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
24493 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
24494 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhdq, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
24495 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpcklbw, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
24496 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpcklwd, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI},
24497 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckldq, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI},
24499 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packsswb, "__builtin_ia32_packsswb", IX86_BUILTIN_PACKSSWB, UNKNOWN, (int) V8QI_FTYPE_V4HI_V4HI },
24500 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packssdw, "__builtin_ia32_packssdw", IX86_BUILTIN_PACKSSDW, UNKNOWN, (int) V4HI_FTYPE_V2SI_V2SI },
24501 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packuswb, "__builtin_ia32_packuswb", IX86_BUILTIN_PACKUSWB, UNKNOWN, (int) V8QI_FTYPE_V4HI_V4HI },
24503 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_pmaddwd, "__builtin_ia32_pmaddwd", IX86_BUILTIN_PMADDWD, UNKNOWN, (int) V2SI_FTYPE_V4HI_V4HI },
24505 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv4hi3, "__builtin_ia32_psllwi", IX86_BUILTIN_PSLLWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
24506 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv2si3, "__builtin_ia32_pslldi", IX86_BUILTIN_PSLLDI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
24507 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv1di3, "__builtin_ia32_psllqi", IX86_BUILTIN_PSLLQI, UNKNOWN, (int) V1DI_FTYPE_V1DI_SI_COUNT },
24508 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv4hi3, "__builtin_ia32_psllw", IX86_BUILTIN_PSLLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
24509 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv2si3, "__builtin_ia32_pslld", IX86_BUILTIN_PSLLD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
24510 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv1di3, "__builtin_ia32_psllq", IX86_BUILTIN_PSLLQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_COUNT },
24512 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv4hi3, "__builtin_ia32_psrlwi", IX86_BUILTIN_PSRLWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
24513 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv2si3, "__builtin_ia32_psrldi", IX86_BUILTIN_PSRLDI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
24514 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv1di3, "__builtin_ia32_psrlqi", IX86_BUILTIN_PSRLQI, UNKNOWN, (int) V1DI_FTYPE_V1DI_SI_COUNT },
24515 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv4hi3, "__builtin_ia32_psrlw", IX86_BUILTIN_PSRLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
24516 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv2si3, "__builtin_ia32_psrld", IX86_BUILTIN_PSRLD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
24517 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv1di3, "__builtin_ia32_psrlq", IX86_BUILTIN_PSRLQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_COUNT },
24519 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv4hi3, "__builtin_ia32_psrawi", IX86_BUILTIN_PSRAWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
24520 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv2si3, "__builtin_ia32_psradi", IX86_BUILTIN_PSRADI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
24521 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv4hi3, "__builtin_ia32_psraw", IX86_BUILTIN_PSRAW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
24522 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv2si3, "__builtin_ia32_psrad", IX86_BUILTIN_PSRAD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
24524 /* 3DNow! */
24525 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_pf2id, "__builtin_ia32_pf2id", IX86_BUILTIN_PF2ID, UNKNOWN, (int) V2SI_FTYPE_V2SF },
24526 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_floatv2si2, "__builtin_ia32_pi2fd", IX86_BUILTIN_PI2FD, UNKNOWN, (int) V2SF_FTYPE_V2SI },
24527 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpv2sf2, "__builtin_ia32_pfrcp", IX86_BUILTIN_PFRCP, UNKNOWN, (int) V2SF_FTYPE_V2SF },
24528 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rsqrtv2sf2, "__builtin_ia32_pfrsqrt", IX86_BUILTIN_PFRSQRT, UNKNOWN, (int) V2SF_FTYPE_V2SF },
24530 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgusb", IX86_BUILTIN_PAVGUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
24531 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_haddv2sf3, "__builtin_ia32_pfacc", IX86_BUILTIN_PFACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
24532 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_addv2sf3, "__builtin_ia32_pfadd", IX86_BUILTIN_PFADD, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
24533 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_eqv2sf3, "__builtin_ia32_pfcmpeq", IX86_BUILTIN_PFCMPEQ, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
24534 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_gev2sf3, "__builtin_ia32_pfcmpge", IX86_BUILTIN_PFCMPGE, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
24535 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_gtv2sf3, "__builtin_ia32_pfcmpgt", IX86_BUILTIN_PFCMPGT, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
24536 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_smaxv2sf3, "__builtin_ia32_pfmax", IX86_BUILTIN_PFMAX, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
24537 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_sminv2sf3, "__builtin_ia32_pfmin", IX86_BUILTIN_PFMIN, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
24538 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_mulv2sf3, "__builtin_ia32_pfmul", IX86_BUILTIN_PFMUL, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
24539 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpit1v2sf3, "__builtin_ia32_pfrcpit1", IX86_BUILTIN_PFRCPIT1, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
24540 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpit2v2sf3, "__builtin_ia32_pfrcpit2", IX86_BUILTIN_PFRCPIT2, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
24541 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rsqit1v2sf3, "__builtin_ia32_pfrsqit1", IX86_BUILTIN_PFRSQIT1, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
24542 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_subv2sf3, "__builtin_ia32_pfsub", IX86_BUILTIN_PFSUB, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
24543 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_subrv2sf3, "__builtin_ia32_pfsubr", IX86_BUILTIN_PFSUBR, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
24544 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_pmulhrwv4hi3, "__builtin_ia32_pmulhrw", IX86_BUILTIN_PMULHRW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
24546 /* 3DNow!A */
24547 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pf2iw, "__builtin_ia32_pf2iw", IX86_BUILTIN_PF2IW, UNKNOWN, (int) V2SI_FTYPE_V2SF },
24548 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pi2fw, "__builtin_ia32_pi2fw", IX86_BUILTIN_PI2FW, UNKNOWN, (int) V2SF_FTYPE_V2SI },
24549 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pswapdv2si2, "__builtin_ia32_pswapdsi", IX86_BUILTIN_PSWAPDSI, UNKNOWN, (int) V2SI_FTYPE_V2SI },
24550 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pswapdv2sf2, "__builtin_ia32_pswapdsf", IX86_BUILTIN_PSWAPDSF, UNKNOWN, (int) V2SF_FTYPE_V2SF },
24551 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_hsubv2sf3, "__builtin_ia32_pfnacc", IX86_BUILTIN_PFNACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
24552 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_addsubv2sf3, "__builtin_ia32_pfpnacc", IX86_BUILTIN_PFPNACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
24554 /* SSE */
24555 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movmskps, "__builtin_ia32_movmskps", IX86_BUILTIN_MOVMSKPS, UNKNOWN, (int) INT_FTYPE_V4SF },
24556 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_sqrtv4sf2, "__builtin_ia32_sqrtps", IX86_BUILTIN_SQRTPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
24557 { OPTION_MASK_ISA_SSE, CODE_FOR_sqrtv4sf2, "__builtin_ia32_sqrtps_nr", IX86_BUILTIN_SQRTPS_NR, UNKNOWN, (int) V4SF_FTYPE_V4SF },
24558 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_rsqrtv4sf2, "__builtin_ia32_rsqrtps", IX86_BUILTIN_RSQRTPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
24559 { OPTION_MASK_ISA_SSE, CODE_FOR_rsqrtv4sf2, "__builtin_ia32_rsqrtps_nr", IX86_BUILTIN_RSQRTPS_NR, UNKNOWN, (int) V4SF_FTYPE_V4SF },
24560 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_rcpv4sf2, "__builtin_ia32_rcpps", IX86_BUILTIN_RCPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
24561 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtps2pi, "__builtin_ia32_cvtps2pi", IX86_BUILTIN_CVTPS2PI, UNKNOWN, (int) V2SI_FTYPE_V4SF },
24562 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtss2si, "__builtin_ia32_cvtss2si", IX86_BUILTIN_CVTSS2SI, UNKNOWN, (int) INT_FTYPE_V4SF },
24563 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtss2siq, "__builtin_ia32_cvtss2si64", IX86_BUILTIN_CVTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF },
24564 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvttps2pi, "__builtin_ia32_cvttps2pi", IX86_BUILTIN_CVTTPS2PI, UNKNOWN, (int) V2SI_FTYPE_V4SF },
24565 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvttss2si, "__builtin_ia32_cvttss2si", IX86_BUILTIN_CVTTSS2SI, UNKNOWN, (int) INT_FTYPE_V4SF },
24566 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvttss2siq, "__builtin_ia32_cvttss2si64", IX86_BUILTIN_CVTTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF },
24568 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_shufps, "__builtin_ia32_shufps", IX86_BUILTIN_SHUFPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
24570 { OPTION_MASK_ISA_SSE, CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24571 { OPTION_MASK_ISA_SSE, CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24572 { OPTION_MASK_ISA_SSE, CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24573 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24574 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmaddv4sf3, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24575 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsubv4sf3, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24576 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmulv4sf3, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24577 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmdivv4sf3, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24579 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, (int) V4SF_FTYPE_V4SF_V4SF },
24580 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, (int) V4SF_FTYPE_V4SF_V4SF },
24581 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, (int) V4SF_FTYPE_V4SF_V4SF },
24582 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
24583 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
24584 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
24585 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, NE, (int) V4SF_FTYPE_V4SF_V4SF },
24586 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF },
24587 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF },
24588 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
24589 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF_SWAP},
24590 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, ORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
24591 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, (int) V4SF_FTYPE_V4SF_V4SF },
24592 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, (int) V4SF_FTYPE_V4SF_V4SF },
24593 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, (int) V4SF_FTYPE_V4SF_V4SF },
24594 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
24595 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, NE, (int) V4SF_FTYPE_V4SF_V4SF },
24596 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF },
24597 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF },
24598 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpngtss", IX86_BUILTIN_CMPNGTSS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
24599 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpngess", IX86_BUILTIN_CMPNGESS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
24600 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, ORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
24602 { OPTION_MASK_ISA_SSE, CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24603 { OPTION_MASK_ISA_SSE, CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24604 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24605 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24607 { OPTION_MASK_ISA_SSE, CODE_FOR_andv4sf3, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24608 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_andnotv4sf3, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24609 { OPTION_MASK_ISA_SSE, CODE_FOR_iorv4sf3, "__builtin_ia32_orps", IX86_BUILTIN_ORPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24610 { OPTION_MASK_ISA_SSE, CODE_FOR_xorv4sf3, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24612 { OPTION_MASK_ISA_SSE, CODE_FOR_copysignv4sf3, "__builtin_ia32_copysignps", IX86_BUILTIN_CPYSGNPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24614 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movss, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24615 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movhlps_exp, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24616 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movlhps_exp, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24617 { OPTION_MASK_ISA_SSE, CODE_FOR_vec_interleave_highv4sf, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24618 { OPTION_MASK_ISA_SSE, CODE_FOR_vec_interleave_lowv4sf, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24620 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtpi2ps, "__builtin_ia32_cvtpi2ps", IX86_BUILTIN_CVTPI2PS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V2SI },
24621 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtsi2ss, "__builtin_ia32_cvtsi2ss", IX86_BUILTIN_CVTSI2SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_SI },
24622 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtsi2ssq, "__builtin_ia32_cvtsi642ss", IX86_BUILTIN_CVTSI642SS, UNKNOWN, V4SF_FTYPE_V4SF_DI },
24624 { OPTION_MASK_ISA_SSE, CODE_FOR_rsqrtsf2, "__builtin_ia32_rsqrtf", IX86_BUILTIN_RSQRTF, UNKNOWN, (int) FLOAT_FTYPE_FLOAT },
24626 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsqrtv4sf2, "__builtin_ia32_sqrtss", IX86_BUILTIN_SQRTSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
24627 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmrsqrtv4sf2, "__builtin_ia32_rsqrtss", IX86_BUILTIN_RSQRTSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
24628 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmrcpv4sf2, "__builtin_ia32_rcpss", IX86_BUILTIN_RCPSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
24630 /* SSE MMX or 3Dnow!A */
24631 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
24632 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
24633 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
24635 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
24636 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
24637 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
24638 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
24640 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_psadbw, "__builtin_ia32_psadbw", IX86_BUILTIN_PSADBW, UNKNOWN, (int) V1DI_FTYPE_V8QI_V8QI },
24641 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pmovmskb, "__builtin_ia32_pmovmskb", IX86_BUILTIN_PMOVMSKB, UNKNOWN, (int) INT_FTYPE_V8QI },
24643 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pshufw, "__builtin_ia32_pshufw", IX86_BUILTIN_PSHUFW, UNKNOWN, (int) V4HI_FTYPE_V4HI_INT },
24645 /* SSE2 */
24646 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_shufpd, "__builtin_ia32_shufpd", IX86_BUILTIN_SHUFPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
24648 { OPTION_MASK_ISA_SSE2, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v2df", IX86_BUILTIN_VEC_PERM_V2DF, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DI },
24649 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v4sf", IX86_BUILTIN_VEC_PERM_V4SF, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SI },
24650 { OPTION_MASK_ISA_SSE2, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v2di", IX86_BUILTIN_VEC_PERM_V2DI, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI },
24651 { OPTION_MASK_ISA_SSE2, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v4si", IX86_BUILTIN_VEC_PERM_V4SI, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI },
24652 { OPTION_MASK_ISA_SSE2, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v8hi", IX86_BUILTIN_VEC_PERM_V8HI, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI },
24653 { OPTION_MASK_ISA_SSE2, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v16qi", IX86_BUILTIN_VEC_PERM_V16QI, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI },
24654 { OPTION_MASK_ISA_SSE2, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v2di_u", IX86_BUILTIN_VEC_PERM_V2DI_U, UNKNOWN, (int) V2UDI_FTYPE_V2UDI_V2UDI_V2UDI },
24655 { OPTION_MASK_ISA_SSE2, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v4si_u", IX86_BUILTIN_VEC_PERM_V4SI_U, UNKNOWN, (int) V4USI_FTYPE_V4USI_V4USI_V4USI },
24656 { OPTION_MASK_ISA_SSE2, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v8hi_u", IX86_BUILTIN_VEC_PERM_V8HI_U, UNKNOWN, (int) V8UHI_FTYPE_V8UHI_V8UHI_V8UHI },
24657 { OPTION_MASK_ISA_SSE2, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v16qi_u", IX86_BUILTIN_VEC_PERM_V16QI_U, UNKNOWN, (int) V16UQI_FTYPE_V16UQI_V16UQI_V16UQI },
24658 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v4df", IX86_BUILTIN_VEC_PERM_V4DF, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DI },
24659 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v8sf", IX86_BUILTIN_VEC_PERM_V8SF, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SI },
24661 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movmskpd, "__builtin_ia32_movmskpd", IX86_BUILTIN_MOVMSKPD, UNKNOWN, (int) INT_FTYPE_V2DF },
24662 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pmovmskb, "__builtin_ia32_pmovmskb128", IX86_BUILTIN_PMOVMSKB128, UNKNOWN, (int) INT_FTYPE_V16QI },
24663 { OPTION_MASK_ISA_SSE2, CODE_FOR_sqrtv2df2, "__builtin_ia32_sqrtpd", IX86_BUILTIN_SQRTPD, UNKNOWN, (int) V2DF_FTYPE_V2DF },
24664 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtdq2pd, "__builtin_ia32_cvtdq2pd", IX86_BUILTIN_CVTDQ2PD, UNKNOWN, (int) V2DF_FTYPE_V4SI },
24665 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtdq2ps, "__builtin_ia32_cvtdq2ps", IX86_BUILTIN_CVTDQ2PS, UNKNOWN, (int) V4SF_FTYPE_V4SI },
24666 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtudq2ps, "__builtin_ia32_cvtudq2ps", IX86_BUILTIN_CVTUDQ2PS, UNKNOWN, (int) V4SF_FTYPE_V4SI },
24668 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2dq, "__builtin_ia32_cvtpd2dq", IX86_BUILTIN_CVTPD2DQ, UNKNOWN, (int) V4SI_FTYPE_V2DF },
24669 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2pi, "__builtin_ia32_cvtpd2pi", IX86_BUILTIN_CVTPD2PI, UNKNOWN, (int) V2SI_FTYPE_V2DF },
24670 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2ps, "__builtin_ia32_cvtpd2ps", IX86_BUILTIN_CVTPD2PS, UNKNOWN, (int) V4SF_FTYPE_V2DF },
24671 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttpd2dq, "__builtin_ia32_cvttpd2dq", IX86_BUILTIN_CVTTPD2DQ, UNKNOWN, (int) V4SI_FTYPE_V2DF },
24672 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttpd2pi, "__builtin_ia32_cvttpd2pi", IX86_BUILTIN_CVTTPD2PI, UNKNOWN, (int) V2SI_FTYPE_V2DF },
24674 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpi2pd, "__builtin_ia32_cvtpi2pd", IX86_BUILTIN_CVTPI2PD, UNKNOWN, (int) V2DF_FTYPE_V2SI },
24676 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsd2si, "__builtin_ia32_cvtsd2si", IX86_BUILTIN_CVTSD2SI, UNKNOWN, (int) INT_FTYPE_V2DF },
24677 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttsd2si, "__builtin_ia32_cvttsd2si", IX86_BUILTIN_CVTTSD2SI, UNKNOWN, (int) INT_FTYPE_V2DF },
24678 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsd2siq, "__builtin_ia32_cvtsd2si64", IX86_BUILTIN_CVTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF },
24679 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvttsd2siq, "__builtin_ia32_cvttsd2si64", IX86_BUILTIN_CVTTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF },
24681 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtps2dq, "__builtin_ia32_cvtps2dq", IX86_BUILTIN_CVTPS2DQ, UNKNOWN, (int) V4SI_FTYPE_V4SF },
24682 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtps2pd, "__builtin_ia32_cvtps2pd", IX86_BUILTIN_CVTPS2PD, UNKNOWN, (int) V2DF_FTYPE_V4SF },
24683 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttps2dq, "__builtin_ia32_cvttps2dq", IX86_BUILTIN_CVTTPS2DQ, UNKNOWN, (int) V4SI_FTYPE_V4SF },
24685 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv2df3, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
24686 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv2df3, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
24687 { OPTION_MASK_ISA_SSE2, CODE_FOR_mulv2df3, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
24688 { OPTION_MASK_ISA_SSE2, CODE_FOR_divv2df3, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
24689 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmaddv2df3, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
24690 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsubv2df3, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
24691 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmulv2df3, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
24692 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmdivv2df3, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
24694 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD, EQ, (int) V2DF_FTYPE_V2DF_V2DF },
24695 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD, LT, (int) V2DF_FTYPE_V2DF_V2DF },
24696 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD, LE, (int) V2DF_FTYPE_V2DF_V2DF },
24697 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD, LT, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
24698 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD, LE, (int) V2DF_FTYPE_V2DF_V2DF_SWAP},
24699 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD, UNORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
24700 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD, NE, (int) V2DF_FTYPE_V2DF_V2DF },
24701 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF },
24702 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF },
24703 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
24704 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
24705 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD, ORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
24706 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD, EQ, (int) V2DF_FTYPE_V2DF_V2DF },
24707 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD, LT, (int) V2DF_FTYPE_V2DF_V2DF },
24708 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD, LE, (int) V2DF_FTYPE_V2DF_V2DF },
24709 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD, UNORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
24710 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD, NE, (int) V2DF_FTYPE_V2DF_V2DF },
24711 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF },
24712 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF },
24713 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD, ORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
24715 { OPTION_MASK_ISA_SSE2, CODE_FOR_sminv2df3, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
24716 { OPTION_MASK_ISA_SSE2, CODE_FOR_smaxv2df3, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
24717 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsminv2df3, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
24718 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsmaxv2df3, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
24720 { OPTION_MASK_ISA_SSE2, CODE_FOR_andv2df3, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
24721 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_andnotv2df3, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
24722 { OPTION_MASK_ISA_SSE2, CODE_FOR_iorv2df3, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
24723 { OPTION_MASK_ISA_SSE2, CODE_FOR_xorv2df3, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
24725 { OPTION_MASK_ISA_SSE2, CODE_FOR_copysignv2df3, "__builtin_ia32_copysignpd", IX86_BUILTIN_CPYSGNPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
24727 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movsd, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
24728 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv2df, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
24729 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv2df, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
24731 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_pack_sfix_v2df, "__builtin_ia32_vec_pack_sfix", IX86_BUILTIN_VEC_PACK_SFIX, UNKNOWN, (int) V4SI_FTYPE_V2DF_V2DF },
24733 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv16qi3, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
24734 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv8hi3, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
24735 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
24736 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv2di3, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
24737 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv16qi3, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
24738 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv8hi3, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
24739 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
24740 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv2di3, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
24742 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ssaddv16qi3, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
24743 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ssaddv8hi3, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
24744 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_sssubv16qi3, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
24745 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_sssubv8hi3, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
24746 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_usaddv16qi3, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
24747 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_usaddv8hi3, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
24748 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ussubv16qi3, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
24749 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ussubv8hi3, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
24751 { OPTION_MASK_ISA_SSE2, CODE_FOR_mulv8hi3, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
24752 { OPTION_MASK_ISA_SSE2, CODE_FOR_smulv8hi3_highpart, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128, UNKNOWN,(int) V8HI_FTYPE_V8HI_V8HI },
24754 { OPTION_MASK_ISA_SSE2, CODE_FOR_andv2di3, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
24755 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_andnotv2di3, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
24756 { OPTION_MASK_ISA_SSE2, CODE_FOR_iorv2di3, "__builtin_ia32_por128", IX86_BUILTIN_POR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
24757 { OPTION_MASK_ISA_SSE2, CODE_FOR_xorv2di3, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
24759 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_uavgv16qi3, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
24760 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_uavgv8hi3, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
24762 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv16qi3, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
24763 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv8hi3, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
24764 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv4si3, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
24765 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv16qi3, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
24766 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv8hi3, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
24767 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv4si3, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
24769 { OPTION_MASK_ISA_SSE2, CODE_FOR_umaxv16qi3, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
24770 { OPTION_MASK_ISA_SSE2, CODE_FOR_smaxv8hi3, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
24771 { OPTION_MASK_ISA_SSE2, CODE_FOR_uminv16qi3, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
24772 { OPTION_MASK_ISA_SSE2, CODE_FOR_sminv8hi3, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
24774 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv16qi, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
24775 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv8hi, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
24776 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv4si, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
24777 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv2di, "__builtin_ia32_punpckhqdq128", IX86_BUILTIN_PUNPCKHQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
24778 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv16qi, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
24779 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv8hi, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
24780 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv4si, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
24781 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv2di, "__builtin_ia32_punpcklqdq128", IX86_BUILTIN_PUNPCKLQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
24783 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packsswb, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V8HI },
24784 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packssdw, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V4SI },
24785 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packuswb, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V8HI },
24787 { OPTION_MASK_ISA_SSE2, CODE_FOR_umulv8hi3_highpart, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
24788 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_psadbw, "__builtin_ia32_psadbw128", IX86_BUILTIN_PSADBW128, UNKNOWN, (int) V2DI_FTYPE_V16QI_V16QI },
24790 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_umulv1siv1di3, "__builtin_ia32_pmuludq", IX86_BUILTIN_PMULUDQ, UNKNOWN, (int) V1DI_FTYPE_V2SI_V2SI },
24791 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_umulv2siv2di3, "__builtin_ia32_pmuludq128", IX86_BUILTIN_PMULUDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI_V4SI },
24793 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pmaddwd, "__builtin_ia32_pmaddwd128", IX86_BUILTIN_PMADDWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI_V8HI },
24795 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsi2sd, "__builtin_ia32_cvtsi2sd", IX86_BUILTIN_CVTSI2SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_SI },
24796 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsi2sdq, "__builtin_ia32_cvtsi642sd", IX86_BUILTIN_CVTSI642SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_DI },
24797 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsd2ss, "__builtin_ia32_cvtsd2ss", IX86_BUILTIN_CVTSD2SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V2DF },
24798 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtss2sd, "__builtin_ia32_cvtss2sd", IX86_BUILTIN_CVTSS2SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V4SF },
24800 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ashlv1ti3, "__builtin_ia32_pslldqi128", IX86_BUILTIN_PSLLDQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_CONVERT },
24801 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv8hi3, "__builtin_ia32_psllwi128", IX86_BUILTIN_PSLLWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
24802 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv4si3, "__builtin_ia32_pslldi128", IX86_BUILTIN_PSLLDI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
24803 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv2di3, "__builtin_ia32_psllqi128", IX86_BUILTIN_PSLLQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_SI_COUNT },
24804 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv8hi3, "__builtin_ia32_psllw128", IX86_BUILTIN_PSLLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
24805 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv4si3, "__builtin_ia32_pslld128", IX86_BUILTIN_PSLLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
24806 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv2di3, "__builtin_ia32_psllq128", IX86_BUILTIN_PSLLQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_COUNT },
24808 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_lshrv1ti3, "__builtin_ia32_psrldqi128", IX86_BUILTIN_PSRLDQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_CONVERT },
24809 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv8hi3, "__builtin_ia32_psrlwi128", IX86_BUILTIN_PSRLWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
24810 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv4si3, "__builtin_ia32_psrldi128", IX86_BUILTIN_PSRLDI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
24811 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv2di3, "__builtin_ia32_psrlqi128", IX86_BUILTIN_PSRLQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_SI_COUNT },
24812 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv8hi3, "__builtin_ia32_psrlw128", IX86_BUILTIN_PSRLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
24813 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv4si3, "__builtin_ia32_psrld128", IX86_BUILTIN_PSRLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
24814 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv2di3, "__builtin_ia32_psrlq128", IX86_BUILTIN_PSRLQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_COUNT },
24816 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv8hi3, "__builtin_ia32_psrawi128", IX86_BUILTIN_PSRAWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
24817 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv4si3, "__builtin_ia32_psradi128", IX86_BUILTIN_PSRADI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
24818 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv8hi3, "__builtin_ia32_psraw128", IX86_BUILTIN_PSRAW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
24819 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv4si3, "__builtin_ia32_psrad128", IX86_BUILTIN_PSRAD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
24821 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshufd, "__builtin_ia32_pshufd", IX86_BUILTIN_PSHUFD, UNKNOWN, (int) V4SI_FTYPE_V4SI_INT },
24822 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshuflw, "__builtin_ia32_pshuflw", IX86_BUILTIN_PSHUFLW, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT },
24823 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshufhw, "__builtin_ia32_pshufhw", IX86_BUILTIN_PSHUFHW, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT },
24825 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsqrtv2df2, "__builtin_ia32_sqrtsd", IX86_BUILTIN_SQRTSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_VEC_MERGE },
24827 { OPTION_MASK_ISA_SSE2, CODE_FOR_abstf2, 0, IX86_BUILTIN_FABSQ, UNKNOWN, (int) FLOAT128_FTYPE_FLOAT128 },
24828 { OPTION_MASK_ISA_SSE2, CODE_FOR_copysigntf3, 0, IX86_BUILTIN_COPYSIGNQ, UNKNOWN, (int) FLOAT128_FTYPE_FLOAT128_FLOAT128 },
24830 { OPTION_MASK_ISA_SSE, CODE_FOR_sse2_movq128, "__builtin_ia32_movq128", IX86_BUILTIN_MOVQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI },
24832 /* SSE2 MMX */
24833 { OPTION_MASK_ISA_SSE2, CODE_FOR_mmx_addv1di3, "__builtin_ia32_paddq", IX86_BUILTIN_PADDQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI },
24834 { OPTION_MASK_ISA_SSE2, CODE_FOR_mmx_subv1di3, "__builtin_ia32_psubq", IX86_BUILTIN_PSUBQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI },
24836 /* SSE3 */
24837 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_movshdup, "__builtin_ia32_movshdup", IX86_BUILTIN_MOVSHDUP, UNKNOWN, (int) V4SF_FTYPE_V4SF},
24838 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_movsldup, "__builtin_ia32_movsldup", IX86_BUILTIN_MOVSLDUP, UNKNOWN, (int) V4SF_FTYPE_V4SF },
24840 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_addsubv4sf3, "__builtin_ia32_addsubps", IX86_BUILTIN_ADDSUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24841 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_addsubv2df3, "__builtin_ia32_addsubpd", IX86_BUILTIN_ADDSUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
24842 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_haddv4sf3, "__builtin_ia32_haddps", IX86_BUILTIN_HADDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24843 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_haddv2df3, "__builtin_ia32_haddpd", IX86_BUILTIN_HADDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
24844 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_hsubv4sf3, "__builtin_ia32_hsubps", IX86_BUILTIN_HSUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24845 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_hsubv2df3, "__builtin_ia32_hsubpd", IX86_BUILTIN_HSUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
24847 /* SSSE3 */
24848 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv16qi2, "__builtin_ia32_pabsb128", IX86_BUILTIN_PABSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI },
24849 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv8qi2, "__builtin_ia32_pabsb", IX86_BUILTIN_PABSB, UNKNOWN, (int) V8QI_FTYPE_V8QI },
24850 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv8hi2, "__builtin_ia32_pabsw128", IX86_BUILTIN_PABSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI },
24851 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv4hi2, "__builtin_ia32_pabsw", IX86_BUILTIN_PABSW, UNKNOWN, (int) V4HI_FTYPE_V4HI },
24852 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv4si2, "__builtin_ia32_pabsd128", IX86_BUILTIN_PABSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI },
24853 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv2si2, "__builtin_ia32_pabsd", IX86_BUILTIN_PABSD, UNKNOWN, (int) V2SI_FTYPE_V2SI },
24855 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddwv8hi3, "__builtin_ia32_phaddw128", IX86_BUILTIN_PHADDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
24856 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddwv4hi3, "__builtin_ia32_phaddw", IX86_BUILTIN_PHADDW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
24857 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phadddv4si3, "__builtin_ia32_phaddd128", IX86_BUILTIN_PHADDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
24858 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phadddv2si3, "__builtin_ia32_phaddd", IX86_BUILTIN_PHADDD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
24859 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddswv8hi3, "__builtin_ia32_phaddsw128", IX86_BUILTIN_PHADDSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
24860 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddswv4hi3, "__builtin_ia32_phaddsw", IX86_BUILTIN_PHADDSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
24861 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubwv8hi3, "__builtin_ia32_phsubw128", IX86_BUILTIN_PHSUBW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
24862 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubwv4hi3, "__builtin_ia32_phsubw", IX86_BUILTIN_PHSUBW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
24863 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubdv4si3, "__builtin_ia32_phsubd128", IX86_BUILTIN_PHSUBD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
24864 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubdv2si3, "__builtin_ia32_phsubd", IX86_BUILTIN_PHSUBD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
24865 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubswv8hi3, "__builtin_ia32_phsubsw128", IX86_BUILTIN_PHSUBSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
24866 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubswv4hi3, "__builtin_ia32_phsubsw", IX86_BUILTIN_PHSUBSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
24867 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmaddubsw128, "__builtin_ia32_pmaddubsw128", IX86_BUILTIN_PMADDUBSW128, UNKNOWN, (int) V8HI_FTYPE_V16QI_V16QI },
24868 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmaddubsw, "__builtin_ia32_pmaddubsw", IX86_BUILTIN_PMADDUBSW, UNKNOWN, (int) V4HI_FTYPE_V8QI_V8QI },
24869 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmulhrswv8hi3, "__builtin_ia32_pmulhrsw128", IX86_BUILTIN_PMULHRSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
24870 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmulhrswv4hi3, "__builtin_ia32_pmulhrsw", IX86_BUILTIN_PMULHRSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
24871 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pshufbv16qi3, "__builtin_ia32_pshufb128", IX86_BUILTIN_PSHUFB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
24872 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pshufbv8qi3, "__builtin_ia32_pshufb", IX86_BUILTIN_PSHUFB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
24873 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv16qi3, "__builtin_ia32_psignb128", IX86_BUILTIN_PSIGNB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
24874 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv8qi3, "__builtin_ia32_psignb", IX86_BUILTIN_PSIGNB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
24875 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv8hi3, "__builtin_ia32_psignw128", IX86_BUILTIN_PSIGNW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
24876 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv4hi3, "__builtin_ia32_psignw", IX86_BUILTIN_PSIGNW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
24877 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv4si3, "__builtin_ia32_psignd128", IX86_BUILTIN_PSIGND128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
24878 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv2si3, "__builtin_ia32_psignd", IX86_BUILTIN_PSIGND, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
24880 /* SSSE3. */
24881 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_palignrti, "__builtin_ia32_palignr128", IX86_BUILTIN_PALIGNR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_INT_CONVERT },
24882 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_palignrdi, "__builtin_ia32_palignr", IX86_BUILTIN_PALIGNR, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_INT_CONVERT },
24884 /* SSE4.1 */
24885 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendpd, "__builtin_ia32_blendpd", IX86_BUILTIN_BLENDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
24886 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendps, "__builtin_ia32_blendps", IX86_BUILTIN_BLENDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
24887 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendvpd, "__builtin_ia32_blendvpd", IX86_BUILTIN_BLENDVPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF },
24888 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendvps, "__builtin_ia32_blendvps", IX86_BUILTIN_BLENDVPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF },
24889 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_dppd, "__builtin_ia32_dppd", IX86_BUILTIN_DPPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
24890 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_dpps, "__builtin_ia32_dpps", IX86_BUILTIN_DPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
24891 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_insertps, "__builtin_ia32_insertps128", IX86_BUILTIN_INSERTPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
24892 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_mpsadbw, "__builtin_ia32_mpsadbw128", IX86_BUILTIN_MPSADBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_INT },
24893 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_pblendvb, "__builtin_ia32_pblendvb128", IX86_BUILTIN_PBLENDVB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI },
24894 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_pblendw, "__builtin_ia32_pblendw128", IX86_BUILTIN_PBLENDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_INT },
24896 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv8qiv8hi2, "__builtin_ia32_pmovsxbw128", IX86_BUILTIN_PMOVSXBW128, UNKNOWN, (int) V8HI_FTYPE_V16QI },
24897 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv4qiv4si2, "__builtin_ia32_pmovsxbd128", IX86_BUILTIN_PMOVSXBD128, UNKNOWN, (int) V4SI_FTYPE_V16QI },
24898 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv2qiv2di2, "__builtin_ia32_pmovsxbq128", IX86_BUILTIN_PMOVSXBQ128, UNKNOWN, (int) V2DI_FTYPE_V16QI },
24899 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv4hiv4si2, "__builtin_ia32_pmovsxwd128", IX86_BUILTIN_PMOVSXWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI },
24900 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv2hiv2di2, "__builtin_ia32_pmovsxwq128", IX86_BUILTIN_PMOVSXWQ128, UNKNOWN, (int) V2DI_FTYPE_V8HI },
24901 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv2siv2di2, "__builtin_ia32_pmovsxdq128", IX86_BUILTIN_PMOVSXDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI },
24902 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv8qiv8hi2, "__builtin_ia32_pmovzxbw128", IX86_BUILTIN_PMOVZXBW128, UNKNOWN, (int) V8HI_FTYPE_V16QI },
24903 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv4qiv4si2, "__builtin_ia32_pmovzxbd128", IX86_BUILTIN_PMOVZXBD128, UNKNOWN, (int) V4SI_FTYPE_V16QI },
24904 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2qiv2di2, "__builtin_ia32_pmovzxbq128", IX86_BUILTIN_PMOVZXBQ128, UNKNOWN, (int) V2DI_FTYPE_V16QI },
24905 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv4hiv4si2, "__builtin_ia32_pmovzxwd128", IX86_BUILTIN_PMOVZXWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI },
24906 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2hiv2di2, "__builtin_ia32_pmovzxwq128", IX86_BUILTIN_PMOVZXWQ128, UNKNOWN, (int) V2DI_FTYPE_V8HI },
24907 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2siv2di2, "__builtin_ia32_pmovzxdq128", IX86_BUILTIN_PMOVZXDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI },
24908 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_phminposuw, "__builtin_ia32_phminposuw128", IX86_BUILTIN_PHMINPOSUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI },
24910 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_packusdw, "__builtin_ia32_packusdw128", IX86_BUILTIN_PACKUSDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V4SI },
24911 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_eqv2di3, "__builtin_ia32_pcmpeqq", IX86_BUILTIN_PCMPEQQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
24912 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_smaxv16qi3, "__builtin_ia32_pmaxsb128", IX86_BUILTIN_PMAXSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
24913 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_smaxv4si3, "__builtin_ia32_pmaxsd128", IX86_BUILTIN_PMAXSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
24914 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_umaxv4si3, "__builtin_ia32_pmaxud128", IX86_BUILTIN_PMAXUD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
24915 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_umaxv8hi3, "__builtin_ia32_pmaxuw128", IX86_BUILTIN_PMAXUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
24916 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sminv16qi3, "__builtin_ia32_pminsb128", IX86_BUILTIN_PMINSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
24917 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sminv4si3, "__builtin_ia32_pminsd128", IX86_BUILTIN_PMINSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
24918 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_uminv4si3, "__builtin_ia32_pminud128", IX86_BUILTIN_PMINUD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
24919 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_uminv8hi3, "__builtin_ia32_pminuw128", IX86_BUILTIN_PMINUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
24920 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_mulv2siv2di3, "__builtin_ia32_pmuldq128", IX86_BUILTIN_PMULDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI_V4SI },
24921 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_mulv4si3, "__builtin_ia32_pmulld128", IX86_BUILTIN_PMULLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
24923 /* SSE4.1 */
24924 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_roundpd", IX86_BUILTIN_ROUNDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT },
24925 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_roundps", IX86_BUILTIN_ROUNDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT },
24926 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundsd, "__builtin_ia32_roundsd", IX86_BUILTIN_ROUNDSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
24927 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundss, "__builtin_ia32_roundss", IX86_BUILTIN_ROUNDSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
24929 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestz128", IX86_BUILTIN_PTESTZ, EQ, (int) INT_FTYPE_V2DI_V2DI_PTEST },
24930 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestc128", IX86_BUILTIN_PTESTC, LTU, (int) INT_FTYPE_V2DI_V2DI_PTEST },
24931 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestnzc128", IX86_BUILTIN_PTESTNZC, GTU, (int) INT_FTYPE_V2DI_V2DI_PTEST },
24933 /* SSE4.2 */
24934 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_gtv2di3, "__builtin_ia32_pcmpgtq", IX86_BUILTIN_PCMPGTQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
24935 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32, CODE_FOR_sse4_2_crc32qi, "__builtin_ia32_crc32qi", IX86_BUILTIN_CRC32QI, UNKNOWN, (int) UINT_FTYPE_UINT_UCHAR },
24936 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32, CODE_FOR_sse4_2_crc32hi, "__builtin_ia32_crc32hi", IX86_BUILTIN_CRC32HI, UNKNOWN, (int) UINT_FTYPE_UINT_USHORT },
24937 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32, CODE_FOR_sse4_2_crc32si, "__builtin_ia32_crc32si", IX86_BUILTIN_CRC32SI, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
24938 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse4_2_crc32di, "__builtin_ia32_crc32di", IX86_BUILTIN_CRC32DI, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
24940 /* SSE4A */
24941 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_extrqi, "__builtin_ia32_extrqi", IX86_BUILTIN_EXTRQI, UNKNOWN, (int) V2DI_FTYPE_V2DI_UINT_UINT },
24942 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_extrq, "__builtin_ia32_extrq", IX86_BUILTIN_EXTRQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V16QI },
24943 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_insertqi, "__builtin_ia32_insertqi", IX86_BUILTIN_INSERTQI, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_UINT_UINT },
24944 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_insertq, "__builtin_ia32_insertq", IX86_BUILTIN_INSERTQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
24946 /* AES */
24947 { OPTION_MASK_ISA_SSE2, CODE_FOR_aeskeygenassist, 0, IX86_BUILTIN_AESKEYGENASSIST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT },
24948 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesimc, 0, IX86_BUILTIN_AESIMC128, UNKNOWN, (int) V2DI_FTYPE_V2DI },
24950 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesenc, 0, IX86_BUILTIN_AESENC128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
24951 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesenclast, 0, IX86_BUILTIN_AESENCLAST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
24952 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesdec, 0, IX86_BUILTIN_AESDEC128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
24953 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesdeclast, 0, IX86_BUILTIN_AESDECLAST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
24955 /* PCLMUL */
24956 { OPTION_MASK_ISA_SSE2, CODE_FOR_pclmulqdq, 0, IX86_BUILTIN_PCLMULQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_INT },
24958 /* AVX */
24959 { OPTION_MASK_ISA_AVX, CODE_FOR_addv4df3, "__builtin_ia32_addpd256", IX86_BUILTIN_ADDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
24960 { OPTION_MASK_ISA_AVX, CODE_FOR_addv8sf3, "__builtin_ia32_addps256", IX86_BUILTIN_ADDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
24961 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_addsubv4df3, "__builtin_ia32_addsubpd256", IX86_BUILTIN_ADDSUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
24962 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_addsubv8sf3, "__builtin_ia32_addsubps256", IX86_BUILTIN_ADDSUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
24963 { OPTION_MASK_ISA_AVX, CODE_FOR_andv4df3, "__builtin_ia32_andpd256", IX86_BUILTIN_ANDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
24964 { OPTION_MASK_ISA_AVX, CODE_FOR_andv8sf3, "__builtin_ia32_andps256", IX86_BUILTIN_ANDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
24965 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_andnotv4df3, "__builtin_ia32_andnpd256", IX86_BUILTIN_ANDNPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
24966 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_andnotv8sf3, "__builtin_ia32_andnps256", IX86_BUILTIN_ANDNPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
24967 { OPTION_MASK_ISA_AVX, CODE_FOR_divv4df3, "__builtin_ia32_divpd256", IX86_BUILTIN_DIVPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
24968 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_divv8sf3, "__builtin_ia32_divps256", IX86_BUILTIN_DIVPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
24969 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_haddv4df3, "__builtin_ia32_haddpd256", IX86_BUILTIN_HADDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
24970 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_hsubv8sf3, "__builtin_ia32_hsubps256", IX86_BUILTIN_HSUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
24971 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_hsubv4df3, "__builtin_ia32_hsubpd256", IX86_BUILTIN_HSUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
24972 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_haddv8sf3, "__builtin_ia32_haddps256", IX86_BUILTIN_HADDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
24973 { OPTION_MASK_ISA_AVX, CODE_FOR_smaxv4df3, "__builtin_ia32_maxpd256", IX86_BUILTIN_MAXPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
24974 { OPTION_MASK_ISA_AVX, CODE_FOR_smaxv8sf3, "__builtin_ia32_maxps256", IX86_BUILTIN_MAXPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
24975 { OPTION_MASK_ISA_AVX, CODE_FOR_sminv4df3, "__builtin_ia32_minpd256", IX86_BUILTIN_MINPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
24976 { OPTION_MASK_ISA_AVX, CODE_FOR_sminv8sf3, "__builtin_ia32_minps256", IX86_BUILTIN_MINPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
24977 { OPTION_MASK_ISA_AVX, CODE_FOR_mulv4df3, "__builtin_ia32_mulpd256", IX86_BUILTIN_MULPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
24978 { OPTION_MASK_ISA_AVX, CODE_FOR_mulv8sf3, "__builtin_ia32_mulps256", IX86_BUILTIN_MULPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
24979 { OPTION_MASK_ISA_AVX, CODE_FOR_iorv4df3, "__builtin_ia32_orpd256", IX86_BUILTIN_ORPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
24980 { OPTION_MASK_ISA_AVX, CODE_FOR_iorv8sf3, "__builtin_ia32_orps256", IX86_BUILTIN_ORPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
24981 { OPTION_MASK_ISA_AVX, CODE_FOR_subv4df3, "__builtin_ia32_subpd256", IX86_BUILTIN_SUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
24982 { OPTION_MASK_ISA_AVX, CODE_FOR_subv8sf3, "__builtin_ia32_subps256", IX86_BUILTIN_SUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
24983 { OPTION_MASK_ISA_AVX, CODE_FOR_xorv4df3, "__builtin_ia32_xorpd256", IX86_BUILTIN_XORPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
24984 { OPTION_MASK_ISA_AVX, CODE_FOR_xorv8sf3, "__builtin_ia32_xorps256", IX86_BUILTIN_XORPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
24986 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv2df3, "__builtin_ia32_vpermilvarpd", IX86_BUILTIN_VPERMILVARPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DI },
24987 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv4sf3, "__builtin_ia32_vpermilvarps", IX86_BUILTIN_VPERMILVARPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SI },
24988 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv4df3, "__builtin_ia32_vpermilvarpd256", IX86_BUILTIN_VPERMILVARPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DI },
24989 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv8sf3, "__builtin_ia32_vpermilvarps256", IX86_BUILTIN_VPERMILVARPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SI },
24991 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendpd256, "__builtin_ia32_blendpd256", IX86_BUILTIN_BLENDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
24992 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendps256, "__builtin_ia32_blendps256", IX86_BUILTIN_BLENDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
24993 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendvpd256, "__builtin_ia32_blendvpd256", IX86_BUILTIN_BLENDVPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF },
24994 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendvps256, "__builtin_ia32_blendvps256", IX86_BUILTIN_BLENDVPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF },
24995 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_dpps256, "__builtin_ia32_dpps256", IX86_BUILTIN_DPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
24996 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_shufpd256, "__builtin_ia32_shufpd256", IX86_BUILTIN_SHUFPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
24997 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_shufps256, "__builtin_ia32_shufps256", IX86_BUILTIN_SHUFPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
24998 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpsdv2df3, "__builtin_ia32_cmpsd", IX86_BUILTIN_CMPSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
24999 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpssv4sf3, "__builtin_ia32_cmpss", IX86_BUILTIN_CMPSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
25000 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmppdv2df3, "__builtin_ia32_cmppd", IX86_BUILTIN_CMPPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
25001 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmppsv4sf3, "__builtin_ia32_cmpps", IX86_BUILTIN_CMPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
25002 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmppdv4df3, "__builtin_ia32_cmppd256", IX86_BUILTIN_CMPPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
25003 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmppsv8sf3, "__builtin_ia32_cmpps256", IX86_BUILTIN_CMPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
25004 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v4df, "__builtin_ia32_vextractf128_pd256", IX86_BUILTIN_EXTRACTF128PD256, UNKNOWN, (int) V2DF_FTYPE_V4DF_INT },
25005 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v8sf, "__builtin_ia32_vextractf128_ps256", IX86_BUILTIN_EXTRACTF128PS256, UNKNOWN, (int) V4SF_FTYPE_V8SF_INT },
25006 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v8si, "__builtin_ia32_vextractf128_si256", IX86_BUILTIN_EXTRACTF128SI256, UNKNOWN, (int) V4SI_FTYPE_V8SI_INT },
25007 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtdq2pd256, "__builtin_ia32_cvtdq2pd256", IX86_BUILTIN_CVTDQ2PD256, UNKNOWN, (int) V4DF_FTYPE_V4SI },
25008 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtdq2ps256, "__builtin_ia32_cvtdq2ps256", IX86_BUILTIN_CVTDQ2PS256, UNKNOWN, (int) V8SF_FTYPE_V8SI },
25009 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtpd2ps256, "__builtin_ia32_cvtpd2ps256", IX86_BUILTIN_CVTPD2PS256, UNKNOWN, (int) V4SF_FTYPE_V4DF },
25010 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtps2dq256, "__builtin_ia32_cvtps2dq256", IX86_BUILTIN_CVTPS2DQ256, UNKNOWN, (int) V8SI_FTYPE_V8SF },
25011 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtps2pd256, "__builtin_ia32_cvtps2pd256", IX86_BUILTIN_CVTPS2PD256, UNKNOWN, (int) V4DF_FTYPE_V4SF },
25012 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvttpd2dq256, "__builtin_ia32_cvttpd2dq256", IX86_BUILTIN_CVTTPD2DQ256, UNKNOWN, (int) V4SI_FTYPE_V4DF },
25013 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtpd2dq256, "__builtin_ia32_cvtpd2dq256", IX86_BUILTIN_CVTPD2DQ256, UNKNOWN, (int) V4SI_FTYPE_V4DF },
25014 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvttps2dq256, "__builtin_ia32_cvttps2dq256", IX86_BUILTIN_CVTTPS2DQ256, UNKNOWN, (int) V8SI_FTYPE_V8SF },
25015 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v4df3, "__builtin_ia32_vperm2f128_pd256", IX86_BUILTIN_VPERM2F128PD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
25016 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v8sf3, "__builtin_ia32_vperm2f128_ps256", IX86_BUILTIN_VPERM2F128PS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
25017 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v8si3, "__builtin_ia32_vperm2f128_si256", IX86_BUILTIN_VPERM2F128SI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_INT },
25018 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv2df, "__builtin_ia32_vpermilpd", IX86_BUILTIN_VPERMILPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT },
25019 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv4sf, "__builtin_ia32_vpermilps", IX86_BUILTIN_VPERMILPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT },
25020 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv4df, "__builtin_ia32_vpermilpd256", IX86_BUILTIN_VPERMILPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT },
25021 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv8sf, "__builtin_ia32_vpermilps256", IX86_BUILTIN_VPERMILPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT },
25022 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v4df, "__builtin_ia32_vinsertf128_pd256", IX86_BUILTIN_VINSERTF128PD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V2DF_INT },
25023 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v8sf, "__builtin_ia32_vinsertf128_ps256", IX86_BUILTIN_VINSERTF128PS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V4SF_INT },
25024 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v8si, "__builtin_ia32_vinsertf128_si256", IX86_BUILTIN_VINSERTF128SI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_INT },
25026 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movshdup256, "__builtin_ia32_movshdup256", IX86_BUILTIN_MOVSHDUP256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
25027 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movsldup256, "__builtin_ia32_movsldup256", IX86_BUILTIN_MOVSLDUP256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
25028 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movddup256, "__builtin_ia32_movddup256", IX86_BUILTIN_MOVDDUP256, UNKNOWN, (int) V4DF_FTYPE_V4DF },
25030 { OPTION_MASK_ISA_AVX, CODE_FOR_sqrtv4df2, "__builtin_ia32_sqrtpd256", IX86_BUILTIN_SQRTPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF },
25031 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_sqrtv8sf2, "__builtin_ia32_sqrtps256", IX86_BUILTIN_SQRTPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
25032 { OPTION_MASK_ISA_AVX, CODE_FOR_sqrtv8sf2, "__builtin_ia32_sqrtps_nr256", IX86_BUILTIN_SQRTPS_NR256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
25033 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_rsqrtv8sf2, "__builtin_ia32_rsqrtps256", IX86_BUILTIN_RSQRTPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
25034 { OPTION_MASK_ISA_AVX, CODE_FOR_rsqrtv8sf2, "__builtin_ia32_rsqrtps_nr256", IX86_BUILTIN_RSQRTPS_NR256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
25036 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_rcpv8sf2, "__builtin_ia32_rcpps256", IX86_BUILTIN_RCPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
25038 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_roundpd256", IX86_BUILTIN_ROUNDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT },
25039 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_roundps256", IX86_BUILTIN_ROUNDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT },
25041 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpckhpd256, "__builtin_ia32_unpckhpd256", IX86_BUILTIN_UNPCKHPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
25042 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpcklpd256, "__builtin_ia32_unpcklpd256", IX86_BUILTIN_UNPCKLPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
25043 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpckhps256, "__builtin_ia32_unpckhps256", IX86_BUILTIN_UNPCKHPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
25044 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpcklps256, "__builtin_ia32_unpcklps256", IX86_BUILTIN_UNPCKLPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
25046 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_si256_si, "__builtin_ia32_si256_si", IX86_BUILTIN_SI256_SI, UNKNOWN, (int) V8SI_FTYPE_V4SI },
25047 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ps256_ps, "__builtin_ia32_ps256_ps", IX86_BUILTIN_PS256_PS, UNKNOWN, (int) V8SF_FTYPE_V4SF },
25048 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_pd256_pd, "__builtin_ia32_pd256_pd", IX86_BUILTIN_PD256_PD, UNKNOWN, (int) V4DF_FTYPE_V2DF },
25049 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_extract_lo_v8si, "__builtin_ia32_si_si256", IX86_BUILTIN_SI_SI256, UNKNOWN, (int) V4SI_FTYPE_V8SI },
25050 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_extract_lo_v8sf, "__builtin_ia32_ps_ps256", IX86_BUILTIN_PS_PS256, UNKNOWN, (int) V4SF_FTYPE_V8SF },
25051 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_extract_lo_v4df, "__builtin_ia32_pd_pd256", IX86_BUILTIN_PD_PD256, UNKNOWN, (int) V2DF_FTYPE_V4DF },
25053 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestzpd", IX86_BUILTIN_VTESTZPD, EQ, (int) INT_FTYPE_V2DF_V2DF_PTEST },
25054 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestcpd", IX86_BUILTIN_VTESTCPD, LTU, (int) INT_FTYPE_V2DF_V2DF_PTEST },
25055 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestnzcpd", IX86_BUILTIN_VTESTNZCPD, GTU, (int) INT_FTYPE_V2DF_V2DF_PTEST },
25056 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestzps", IX86_BUILTIN_VTESTZPS, EQ, (int) INT_FTYPE_V4SF_V4SF_PTEST },
25057 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestcps", IX86_BUILTIN_VTESTCPS, LTU, (int) INT_FTYPE_V4SF_V4SF_PTEST },
25058 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestnzcps", IX86_BUILTIN_VTESTNZCPS, GTU, (int) INT_FTYPE_V4SF_V4SF_PTEST },
25059 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestzpd256", IX86_BUILTIN_VTESTZPD256, EQ, (int) INT_FTYPE_V4DF_V4DF_PTEST },
25060 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestcpd256", IX86_BUILTIN_VTESTCPD256, LTU, (int) INT_FTYPE_V4DF_V4DF_PTEST },
25061 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestnzcpd256", IX86_BUILTIN_VTESTNZCPD256, GTU, (int) INT_FTYPE_V4DF_V4DF_PTEST },
25062 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestzps256", IX86_BUILTIN_VTESTZPS256, EQ, (int) INT_FTYPE_V8SF_V8SF_PTEST },
25063 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestcps256", IX86_BUILTIN_VTESTCPS256, LTU, (int) INT_FTYPE_V8SF_V8SF_PTEST },
25064 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestnzcps256", IX86_BUILTIN_VTESTNZCPS256, GTU, (int) INT_FTYPE_V8SF_V8SF_PTEST },
25065 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptest256, "__builtin_ia32_ptestz256", IX86_BUILTIN_PTESTZ256, EQ, (int) INT_FTYPE_V4DI_V4DI_PTEST },
25066 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptest256, "__builtin_ia32_ptestc256", IX86_BUILTIN_PTESTC256, LTU, (int) INT_FTYPE_V4DI_V4DI_PTEST },
25067 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptest256, "__builtin_ia32_ptestnzc256", IX86_BUILTIN_PTESTNZC256, GTU, (int) INT_FTYPE_V4DI_V4DI_PTEST },
25069 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movmskpd256, "__builtin_ia32_movmskpd256", IX86_BUILTIN_MOVMSKPD256, UNKNOWN, (int) INT_FTYPE_V4DF },
25070 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movmskps256, "__builtin_ia32_movmskps256", IX86_BUILTIN_MOVMSKPS256, UNKNOWN, (int) INT_FTYPE_V8SF },
25072 { OPTION_MASK_ISA_AVX, CODE_FOR_copysignv8sf3, "__builtin_ia32_copysignps256", IX86_BUILTIN_CPYSGNPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
25073 { OPTION_MASK_ISA_AVX, CODE_FOR_copysignv4df3, "__builtin_ia32_copysignpd256", IX86_BUILTIN_CPYSGNPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
25075 { OPTION_MASK_ISA_ABM, CODE_FOR_clzhi2_abm, "__builtin_clzs", IX86_BUILTIN_CLZS, UNKNOWN, (int) UINT16_FTYPE_UINT16 },
25077 /* BMI */
25078 { OPTION_MASK_ISA_BMI, CODE_FOR_bmi_bextr_si, "__builtin_ia32_bextr_u32", IX86_BUILTIN_BEXTR32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
25079 { OPTION_MASK_ISA_BMI, CODE_FOR_bmi_bextr_di, "__builtin_ia32_bextr_u64", IX86_BUILTIN_BEXTR64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
25080 { OPTION_MASK_ISA_BMI, CODE_FOR_ctzhi2, "__builtin_ctzs", IX86_BUILTIN_CTZS, UNKNOWN, (int) UINT16_FTYPE_UINT16 },
25082 /* TBM */
25083 { OPTION_MASK_ISA_TBM, CODE_FOR_tbm_bextri_si, "__builtin_ia32_bextri_u32", IX86_BUILTIN_BEXTRI32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
25084 { OPTION_MASK_ISA_TBM, CODE_FOR_tbm_bextri_di, "__builtin_ia32_bextri_u64", IX86_BUILTIN_BEXTRI64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
25086 /* F16C */
25087 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtph2ps, "__builtin_ia32_vcvtph2ps", IX86_BUILTIN_CVTPH2PS, UNKNOWN, (int) V4SF_FTYPE_V8HI },
25088 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtph2ps256, "__builtin_ia32_vcvtph2ps256", IX86_BUILTIN_CVTPH2PS256, UNKNOWN, (int) V8SF_FTYPE_V8HI },
25089 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtps2ph, "__builtin_ia32_vcvtps2ph", IX86_BUILTIN_CVTPS2PH, UNKNOWN, (int) V8HI_FTYPE_V4SF_INT },
25090 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtps2ph256, "__builtin_ia32_vcvtps2ph256", IX86_BUILTIN_CVTPS2PH256, UNKNOWN, (int) V8HI_FTYPE_V8SF_INT },
25093 /* FMA4 and XOP. */
25094 #define MULTI_ARG_4_DF2_DI_I V2DF_FTYPE_V2DF_V2DF_V2DI_INT
25095 #define MULTI_ARG_4_DF2_DI_I1 V4DF_FTYPE_V4DF_V4DF_V4DI_INT
25096 #define MULTI_ARG_4_SF2_SI_I V4SF_FTYPE_V4SF_V4SF_V4SI_INT
25097 #define MULTI_ARG_4_SF2_SI_I1 V8SF_FTYPE_V8SF_V8SF_V8SI_INT
25098 #define MULTI_ARG_3_SF V4SF_FTYPE_V4SF_V4SF_V4SF
25099 #define MULTI_ARG_3_DF V2DF_FTYPE_V2DF_V2DF_V2DF
25100 #define MULTI_ARG_3_SF2 V8SF_FTYPE_V8SF_V8SF_V8SF
25101 #define MULTI_ARG_3_DF2 V4DF_FTYPE_V4DF_V4DF_V4DF
25102 #define MULTI_ARG_3_DI V2DI_FTYPE_V2DI_V2DI_V2DI
25103 #define MULTI_ARG_3_SI V4SI_FTYPE_V4SI_V4SI_V4SI
25104 #define MULTI_ARG_3_SI_DI V4SI_FTYPE_V4SI_V4SI_V2DI
25105 #define MULTI_ARG_3_HI V8HI_FTYPE_V8HI_V8HI_V8HI
25106 #define MULTI_ARG_3_HI_SI V8HI_FTYPE_V8HI_V8HI_V4SI
25107 #define MULTI_ARG_3_QI V16QI_FTYPE_V16QI_V16QI_V16QI
25108 #define MULTI_ARG_3_DI2 V4DI_FTYPE_V4DI_V4DI_V4DI
25109 #define MULTI_ARG_3_SI2 V8SI_FTYPE_V8SI_V8SI_V8SI
25110 #define MULTI_ARG_3_HI2 V16HI_FTYPE_V16HI_V16HI_V16HI
25111 #define MULTI_ARG_3_QI2 V32QI_FTYPE_V32QI_V32QI_V32QI
25112 #define MULTI_ARG_2_SF V4SF_FTYPE_V4SF_V4SF
25113 #define MULTI_ARG_2_DF V2DF_FTYPE_V2DF_V2DF
25114 #define MULTI_ARG_2_DI V2DI_FTYPE_V2DI_V2DI
25115 #define MULTI_ARG_2_SI V4SI_FTYPE_V4SI_V4SI
25116 #define MULTI_ARG_2_HI V8HI_FTYPE_V8HI_V8HI
25117 #define MULTI_ARG_2_QI V16QI_FTYPE_V16QI_V16QI
25118 #define MULTI_ARG_2_DI_IMM V2DI_FTYPE_V2DI_SI
25119 #define MULTI_ARG_2_SI_IMM V4SI_FTYPE_V4SI_SI
25120 #define MULTI_ARG_2_HI_IMM V8HI_FTYPE_V8HI_SI
25121 #define MULTI_ARG_2_QI_IMM V16QI_FTYPE_V16QI_SI
25122 #define MULTI_ARG_2_DI_CMP V2DI_FTYPE_V2DI_V2DI_CMP
25123 #define MULTI_ARG_2_SI_CMP V4SI_FTYPE_V4SI_V4SI_CMP
25124 #define MULTI_ARG_2_HI_CMP V8HI_FTYPE_V8HI_V8HI_CMP
25125 #define MULTI_ARG_2_QI_CMP V16QI_FTYPE_V16QI_V16QI_CMP
25126 #define MULTI_ARG_2_SF_TF V4SF_FTYPE_V4SF_V4SF_TF
25127 #define MULTI_ARG_2_DF_TF V2DF_FTYPE_V2DF_V2DF_TF
25128 #define MULTI_ARG_2_DI_TF V2DI_FTYPE_V2DI_V2DI_TF
25129 #define MULTI_ARG_2_SI_TF V4SI_FTYPE_V4SI_V4SI_TF
25130 #define MULTI_ARG_2_HI_TF V8HI_FTYPE_V8HI_V8HI_TF
25131 #define MULTI_ARG_2_QI_TF V16QI_FTYPE_V16QI_V16QI_TF
25132 #define MULTI_ARG_1_SF V4SF_FTYPE_V4SF
25133 #define MULTI_ARG_1_DF V2DF_FTYPE_V2DF
25134 #define MULTI_ARG_1_SF2 V8SF_FTYPE_V8SF
25135 #define MULTI_ARG_1_DF2 V4DF_FTYPE_V4DF
25136 #define MULTI_ARG_1_DI V2DI_FTYPE_V2DI
25137 #define MULTI_ARG_1_SI V4SI_FTYPE_V4SI
25138 #define MULTI_ARG_1_HI V8HI_FTYPE_V8HI
25139 #define MULTI_ARG_1_QI V16QI_FTYPE_V16QI
25140 #define MULTI_ARG_1_SI_DI V2DI_FTYPE_V4SI
25141 #define MULTI_ARG_1_HI_DI V2DI_FTYPE_V8HI
25142 #define MULTI_ARG_1_HI_SI V4SI_FTYPE_V8HI
25143 #define MULTI_ARG_1_QI_DI V2DI_FTYPE_V16QI
25144 #define MULTI_ARG_1_QI_SI V4SI_FTYPE_V16QI
25145 #define MULTI_ARG_1_QI_HI V8HI_FTYPE_V16QI
25147 static const struct builtin_description bdesc_multi_arg[] =
25149 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_vmfmadd_v4sf,
25150 "__builtin_ia32_vfmaddss", IX86_BUILTIN_VFMADDSS,
25151 UNKNOWN, (int)MULTI_ARG_3_SF },
25152 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_vmfmadd_v2df,
25153 "__builtin_ia32_vfmaddsd", IX86_BUILTIN_VFMADDSD,
25154 UNKNOWN, (int)MULTI_ARG_3_DF },
25156 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmadd_v4sf,
25157 "__builtin_ia32_vfmaddps", IX86_BUILTIN_VFMADDPS,
25158 UNKNOWN, (int)MULTI_ARG_3_SF },
25159 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmadd_v2df,
25160 "__builtin_ia32_vfmaddpd", IX86_BUILTIN_VFMADDPD,
25161 UNKNOWN, (int)MULTI_ARG_3_DF },
25162 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmadd_v8sf,
25163 "__builtin_ia32_vfmaddps256", IX86_BUILTIN_VFMADDPS256,
25164 UNKNOWN, (int)MULTI_ARG_3_SF2 },
25165 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmadd_v4df,
25166 "__builtin_ia32_vfmaddpd256", IX86_BUILTIN_VFMADDPD256,
25167 UNKNOWN, (int)MULTI_ARG_3_DF2 },
25169 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fmaddsub_v4sf,
25170 "__builtin_ia32_vfmaddsubps", IX86_BUILTIN_VFMADDSUBPS,
25171 UNKNOWN, (int)MULTI_ARG_3_SF },
25172 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fmaddsub_v2df,
25173 "__builtin_ia32_vfmaddsubpd", IX86_BUILTIN_VFMADDSUBPD,
25174 UNKNOWN, (int)MULTI_ARG_3_DF },
25175 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fmaddsub_v8sf,
25176 "__builtin_ia32_vfmaddsubps256", IX86_BUILTIN_VFMADDSUBPS256,
25177 UNKNOWN, (int)MULTI_ARG_3_SF2 },
25178 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fmaddsub_v4df,
25179 "__builtin_ia32_vfmaddsubpd256", IX86_BUILTIN_VFMADDSUBPD256,
25180 UNKNOWN, (int)MULTI_ARG_3_DF2 },
25182 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v2di, "__builtin_ia32_vpcmov", IX86_BUILTIN_VPCMOV, UNKNOWN, (int)MULTI_ARG_3_DI },
25183 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v2di, "__builtin_ia32_vpcmov_v2di", IX86_BUILTIN_VPCMOV_V2DI, UNKNOWN, (int)MULTI_ARG_3_DI },
25184 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4si, "__builtin_ia32_vpcmov_v4si", IX86_BUILTIN_VPCMOV_V4SI, UNKNOWN, (int)MULTI_ARG_3_SI },
25185 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v8hi, "__builtin_ia32_vpcmov_v8hi", IX86_BUILTIN_VPCMOV_V8HI, UNKNOWN, (int)MULTI_ARG_3_HI },
25186 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v16qi, "__builtin_ia32_vpcmov_v16qi",IX86_BUILTIN_VPCMOV_V16QI,UNKNOWN, (int)MULTI_ARG_3_QI },
25187 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v2df, "__builtin_ia32_vpcmov_v2df", IX86_BUILTIN_VPCMOV_V2DF, UNKNOWN, (int)MULTI_ARG_3_DF },
25188 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4sf, "__builtin_ia32_vpcmov_v4sf", IX86_BUILTIN_VPCMOV_V4SF, UNKNOWN, (int)MULTI_ARG_3_SF },
25190 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4di256, "__builtin_ia32_vpcmov256", IX86_BUILTIN_VPCMOV256, UNKNOWN, (int)MULTI_ARG_3_DI2 },
25191 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4di256, "__builtin_ia32_vpcmov_v4di256", IX86_BUILTIN_VPCMOV_V4DI256, UNKNOWN, (int)MULTI_ARG_3_DI2 },
25192 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v8si256, "__builtin_ia32_vpcmov_v8si256", IX86_BUILTIN_VPCMOV_V8SI256, UNKNOWN, (int)MULTI_ARG_3_SI2 },
25193 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v16hi256, "__builtin_ia32_vpcmov_v16hi256", IX86_BUILTIN_VPCMOV_V16HI256, UNKNOWN, (int)MULTI_ARG_3_HI2 },
25194 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v32qi256, "__builtin_ia32_vpcmov_v32qi256", IX86_BUILTIN_VPCMOV_V32QI256, UNKNOWN, (int)MULTI_ARG_3_QI2 },
25195 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4df256, "__builtin_ia32_vpcmov_v4df256", IX86_BUILTIN_VPCMOV_V4DF256, UNKNOWN, (int)MULTI_ARG_3_DF2 },
25196 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v8sf256, "__builtin_ia32_vpcmov_v8sf256", IX86_BUILTIN_VPCMOV_V8SF256, UNKNOWN, (int)MULTI_ARG_3_SF2 },
25198 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pperm, "__builtin_ia32_vpperm", IX86_BUILTIN_VPPERM, UNKNOWN, (int)MULTI_ARG_3_QI },
25200 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssww, "__builtin_ia32_vpmacssww", IX86_BUILTIN_VPMACSSWW, UNKNOWN, (int)MULTI_ARG_3_HI },
25201 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsww, "__builtin_ia32_vpmacsww", IX86_BUILTIN_VPMACSWW, UNKNOWN, (int)MULTI_ARG_3_HI },
25202 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsswd, "__builtin_ia32_vpmacsswd", IX86_BUILTIN_VPMACSSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
25203 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacswd, "__builtin_ia32_vpmacswd", IX86_BUILTIN_VPMACSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
25204 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssdd, "__builtin_ia32_vpmacssdd", IX86_BUILTIN_VPMACSSDD, UNKNOWN, (int)MULTI_ARG_3_SI },
25205 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsdd, "__builtin_ia32_vpmacsdd", IX86_BUILTIN_VPMACSDD, UNKNOWN, (int)MULTI_ARG_3_SI },
25206 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssdql, "__builtin_ia32_vpmacssdql", IX86_BUILTIN_VPMACSSDQL, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
25207 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssdqh, "__builtin_ia32_vpmacssdqh", IX86_BUILTIN_VPMACSSDQH, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
25208 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsdql, "__builtin_ia32_vpmacsdql", IX86_BUILTIN_VPMACSDQL, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
25209 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsdqh, "__builtin_ia32_vpmacsdqh", IX86_BUILTIN_VPMACSDQH, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
25210 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmadcsswd, "__builtin_ia32_vpmadcsswd", IX86_BUILTIN_VPMADCSSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
25211 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmadcswd, "__builtin_ia32_vpmadcswd", IX86_BUILTIN_VPMADCSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
25213 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv2di3, "__builtin_ia32_vprotq", IX86_BUILTIN_VPROTQ, UNKNOWN, (int)MULTI_ARG_2_DI },
25214 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv4si3, "__builtin_ia32_vprotd", IX86_BUILTIN_VPROTD, UNKNOWN, (int)MULTI_ARG_2_SI },
25215 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv8hi3, "__builtin_ia32_vprotw", IX86_BUILTIN_VPROTW, UNKNOWN, (int)MULTI_ARG_2_HI },
25216 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv16qi3, "__builtin_ia32_vprotb", IX86_BUILTIN_VPROTB, UNKNOWN, (int)MULTI_ARG_2_QI },
25217 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv2di3, "__builtin_ia32_vprotqi", IX86_BUILTIN_VPROTQ_IMM, UNKNOWN, (int)MULTI_ARG_2_DI_IMM },
25218 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv4si3, "__builtin_ia32_vprotdi", IX86_BUILTIN_VPROTD_IMM, UNKNOWN, (int)MULTI_ARG_2_SI_IMM },
25219 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv8hi3, "__builtin_ia32_vprotwi", IX86_BUILTIN_VPROTW_IMM, UNKNOWN, (int)MULTI_ARG_2_HI_IMM },
25220 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv16qi3, "__builtin_ia32_vprotbi", IX86_BUILTIN_VPROTB_IMM, UNKNOWN, (int)MULTI_ARG_2_QI_IMM },
25221 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_ashlv2di3, "__builtin_ia32_vpshaq", IX86_BUILTIN_VPSHAQ, UNKNOWN, (int)MULTI_ARG_2_DI },
25222 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_ashlv4si3, "__builtin_ia32_vpshad", IX86_BUILTIN_VPSHAD, UNKNOWN, (int)MULTI_ARG_2_SI },
25223 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_ashlv8hi3, "__builtin_ia32_vpshaw", IX86_BUILTIN_VPSHAW, UNKNOWN, (int)MULTI_ARG_2_HI },
25224 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_ashlv16qi3, "__builtin_ia32_vpshab", IX86_BUILTIN_VPSHAB, UNKNOWN, (int)MULTI_ARG_2_QI },
25225 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_lshlv2di3, "__builtin_ia32_vpshlq", IX86_BUILTIN_VPSHLQ, UNKNOWN, (int)MULTI_ARG_2_DI },
25226 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_lshlv4si3, "__builtin_ia32_vpshld", IX86_BUILTIN_VPSHLD, UNKNOWN, (int)MULTI_ARG_2_SI },
25227 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_lshlv8hi3, "__builtin_ia32_vpshlw", IX86_BUILTIN_VPSHLW, UNKNOWN, (int)MULTI_ARG_2_HI },
25228 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_lshlv16qi3, "__builtin_ia32_vpshlb", IX86_BUILTIN_VPSHLB, UNKNOWN, (int)MULTI_ARG_2_QI },
25230 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vmfrczv4sf2, "__builtin_ia32_vfrczss", IX86_BUILTIN_VFRCZSS, UNKNOWN, (int)MULTI_ARG_2_SF },
25231 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vmfrczv2df2, "__builtin_ia32_vfrczsd", IX86_BUILTIN_VFRCZSD, UNKNOWN, (int)MULTI_ARG_2_DF },
25232 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv4sf2, "__builtin_ia32_vfrczps", IX86_BUILTIN_VFRCZPS, UNKNOWN, (int)MULTI_ARG_1_SF },
25233 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv2df2, "__builtin_ia32_vfrczpd", IX86_BUILTIN_VFRCZPD, UNKNOWN, (int)MULTI_ARG_1_DF },
25234 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv8sf2, "__builtin_ia32_vfrczps256", IX86_BUILTIN_VFRCZPS256, UNKNOWN, (int)MULTI_ARG_1_SF2 },
25235 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv4df2, "__builtin_ia32_vfrczpd256", IX86_BUILTIN_VFRCZPD256, UNKNOWN, (int)MULTI_ARG_1_DF2 },
25237 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddbw, "__builtin_ia32_vphaddbw", IX86_BUILTIN_VPHADDBW, UNKNOWN, (int)MULTI_ARG_1_QI_HI },
25238 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddbd, "__builtin_ia32_vphaddbd", IX86_BUILTIN_VPHADDBD, UNKNOWN, (int)MULTI_ARG_1_QI_SI },
25239 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddbq, "__builtin_ia32_vphaddbq", IX86_BUILTIN_VPHADDBQ, UNKNOWN, (int)MULTI_ARG_1_QI_DI },
25240 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddwd, "__builtin_ia32_vphaddwd", IX86_BUILTIN_VPHADDWD, UNKNOWN, (int)MULTI_ARG_1_HI_SI },
25241 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddwq, "__builtin_ia32_vphaddwq", IX86_BUILTIN_VPHADDWQ, UNKNOWN, (int)MULTI_ARG_1_HI_DI },
25242 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phadddq, "__builtin_ia32_vphadddq", IX86_BUILTIN_VPHADDDQ, UNKNOWN, (int)MULTI_ARG_1_SI_DI },
25243 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddubw, "__builtin_ia32_vphaddubw", IX86_BUILTIN_VPHADDUBW, UNKNOWN, (int)MULTI_ARG_1_QI_HI },
25244 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddubd, "__builtin_ia32_vphaddubd", IX86_BUILTIN_VPHADDUBD, UNKNOWN, (int)MULTI_ARG_1_QI_SI },
25245 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddubq, "__builtin_ia32_vphaddubq", IX86_BUILTIN_VPHADDUBQ, UNKNOWN, (int)MULTI_ARG_1_QI_DI },
25246 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phadduwd, "__builtin_ia32_vphadduwd", IX86_BUILTIN_VPHADDUWD, UNKNOWN, (int)MULTI_ARG_1_HI_SI },
25247 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phadduwq, "__builtin_ia32_vphadduwq", IX86_BUILTIN_VPHADDUWQ, UNKNOWN, (int)MULTI_ARG_1_HI_DI },
25248 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddudq, "__builtin_ia32_vphaddudq", IX86_BUILTIN_VPHADDUDQ, UNKNOWN, (int)MULTI_ARG_1_SI_DI },
25249 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phsubbw, "__builtin_ia32_vphsubbw", IX86_BUILTIN_VPHSUBBW, UNKNOWN, (int)MULTI_ARG_1_QI_HI },
25250 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phsubwd, "__builtin_ia32_vphsubwd", IX86_BUILTIN_VPHSUBWD, UNKNOWN, (int)MULTI_ARG_1_HI_SI },
25251 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phsubdq, "__builtin_ia32_vphsubdq", IX86_BUILTIN_VPHSUBDQ, UNKNOWN, (int)MULTI_ARG_1_SI_DI },
25253 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomeqb", IX86_BUILTIN_VPCOMEQB, EQ, (int)MULTI_ARG_2_QI_CMP },
25254 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomneb", IX86_BUILTIN_VPCOMNEB, NE, (int)MULTI_ARG_2_QI_CMP },
25255 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomneqb", IX86_BUILTIN_VPCOMNEB, NE, (int)MULTI_ARG_2_QI_CMP },
25256 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomltb", IX86_BUILTIN_VPCOMLTB, LT, (int)MULTI_ARG_2_QI_CMP },
25257 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomleb", IX86_BUILTIN_VPCOMLEB, LE, (int)MULTI_ARG_2_QI_CMP },
25258 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomgtb", IX86_BUILTIN_VPCOMGTB, GT, (int)MULTI_ARG_2_QI_CMP },
25259 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomgeb", IX86_BUILTIN_VPCOMGEB, GE, (int)MULTI_ARG_2_QI_CMP },
25261 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomeqw", IX86_BUILTIN_VPCOMEQW, EQ, (int)MULTI_ARG_2_HI_CMP },
25262 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomnew", IX86_BUILTIN_VPCOMNEW, NE, (int)MULTI_ARG_2_HI_CMP },
25263 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomneqw", IX86_BUILTIN_VPCOMNEW, NE, (int)MULTI_ARG_2_HI_CMP },
25264 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomltw", IX86_BUILTIN_VPCOMLTW, LT, (int)MULTI_ARG_2_HI_CMP },
25265 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomlew", IX86_BUILTIN_VPCOMLEW, LE, (int)MULTI_ARG_2_HI_CMP },
25266 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomgtw", IX86_BUILTIN_VPCOMGTW, GT, (int)MULTI_ARG_2_HI_CMP },
25267 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomgew", IX86_BUILTIN_VPCOMGEW, GE, (int)MULTI_ARG_2_HI_CMP },
25269 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomeqd", IX86_BUILTIN_VPCOMEQD, EQ, (int)MULTI_ARG_2_SI_CMP },
25270 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomned", IX86_BUILTIN_VPCOMNED, NE, (int)MULTI_ARG_2_SI_CMP },
25271 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomneqd", IX86_BUILTIN_VPCOMNED, NE, (int)MULTI_ARG_2_SI_CMP },
25272 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomltd", IX86_BUILTIN_VPCOMLTD, LT, (int)MULTI_ARG_2_SI_CMP },
25273 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomled", IX86_BUILTIN_VPCOMLED, LE, (int)MULTI_ARG_2_SI_CMP },
25274 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomgtd", IX86_BUILTIN_VPCOMGTD, GT, (int)MULTI_ARG_2_SI_CMP },
25275 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomged", IX86_BUILTIN_VPCOMGED, GE, (int)MULTI_ARG_2_SI_CMP },
25277 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomeqq", IX86_BUILTIN_VPCOMEQQ, EQ, (int)MULTI_ARG_2_DI_CMP },
25278 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomneq", IX86_BUILTIN_VPCOMNEQ, NE, (int)MULTI_ARG_2_DI_CMP },
25279 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomneqq", IX86_BUILTIN_VPCOMNEQ, NE, (int)MULTI_ARG_2_DI_CMP },
25280 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomltq", IX86_BUILTIN_VPCOMLTQ, LT, (int)MULTI_ARG_2_DI_CMP },
25281 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomleq", IX86_BUILTIN_VPCOMLEQ, LE, (int)MULTI_ARG_2_DI_CMP },
25282 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomgtq", IX86_BUILTIN_VPCOMGTQ, GT, (int)MULTI_ARG_2_DI_CMP },
25283 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomgeq", IX86_BUILTIN_VPCOMGEQ, GE, (int)MULTI_ARG_2_DI_CMP },
25285 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v16qi3,"__builtin_ia32_vpcomequb", IX86_BUILTIN_VPCOMEQUB, EQ, (int)MULTI_ARG_2_QI_CMP },
25286 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v16qi3,"__builtin_ia32_vpcomneub", IX86_BUILTIN_VPCOMNEUB, NE, (int)MULTI_ARG_2_QI_CMP },
25287 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v16qi3,"__builtin_ia32_vpcomnequb", IX86_BUILTIN_VPCOMNEUB, NE, (int)MULTI_ARG_2_QI_CMP },
25288 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomltub", IX86_BUILTIN_VPCOMLTUB, LTU, (int)MULTI_ARG_2_QI_CMP },
25289 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomleub", IX86_BUILTIN_VPCOMLEUB, LEU, (int)MULTI_ARG_2_QI_CMP },
25290 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomgtub", IX86_BUILTIN_VPCOMGTUB, GTU, (int)MULTI_ARG_2_QI_CMP },
25291 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomgeub", IX86_BUILTIN_VPCOMGEUB, GEU, (int)MULTI_ARG_2_QI_CMP },
25293 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v8hi3, "__builtin_ia32_vpcomequw", IX86_BUILTIN_VPCOMEQUW, EQ, (int)MULTI_ARG_2_HI_CMP },
25294 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v8hi3, "__builtin_ia32_vpcomneuw", IX86_BUILTIN_VPCOMNEUW, NE, (int)MULTI_ARG_2_HI_CMP },
25295 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v8hi3, "__builtin_ia32_vpcomnequw", IX86_BUILTIN_VPCOMNEUW, NE, (int)MULTI_ARG_2_HI_CMP },
25296 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomltuw", IX86_BUILTIN_VPCOMLTUW, LTU, (int)MULTI_ARG_2_HI_CMP },
25297 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomleuw", IX86_BUILTIN_VPCOMLEUW, LEU, (int)MULTI_ARG_2_HI_CMP },
25298 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomgtuw", IX86_BUILTIN_VPCOMGTUW, GTU, (int)MULTI_ARG_2_HI_CMP },
25299 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomgeuw", IX86_BUILTIN_VPCOMGEUW, GEU, (int)MULTI_ARG_2_HI_CMP },
25301 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v4si3, "__builtin_ia32_vpcomequd", IX86_BUILTIN_VPCOMEQUD, EQ, (int)MULTI_ARG_2_SI_CMP },
25302 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v4si3, "__builtin_ia32_vpcomneud", IX86_BUILTIN_VPCOMNEUD, NE, (int)MULTI_ARG_2_SI_CMP },
25303 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v4si3, "__builtin_ia32_vpcomnequd", IX86_BUILTIN_VPCOMNEUD, NE, (int)MULTI_ARG_2_SI_CMP },
25304 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomltud", IX86_BUILTIN_VPCOMLTUD, LTU, (int)MULTI_ARG_2_SI_CMP },
25305 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomleud", IX86_BUILTIN_VPCOMLEUD, LEU, (int)MULTI_ARG_2_SI_CMP },
25306 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomgtud", IX86_BUILTIN_VPCOMGTUD, GTU, (int)MULTI_ARG_2_SI_CMP },
25307 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomgeud", IX86_BUILTIN_VPCOMGEUD, GEU, (int)MULTI_ARG_2_SI_CMP },
25309 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v2di3, "__builtin_ia32_vpcomequq", IX86_BUILTIN_VPCOMEQUQ, EQ, (int)MULTI_ARG_2_DI_CMP },
25310 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v2di3, "__builtin_ia32_vpcomneuq", IX86_BUILTIN_VPCOMNEUQ, NE, (int)MULTI_ARG_2_DI_CMP },
25311 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v2di3, "__builtin_ia32_vpcomnequq", IX86_BUILTIN_VPCOMNEUQ, NE, (int)MULTI_ARG_2_DI_CMP },
25312 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomltuq", IX86_BUILTIN_VPCOMLTUQ, LTU, (int)MULTI_ARG_2_DI_CMP },
25313 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomleuq", IX86_BUILTIN_VPCOMLEUQ, LEU, (int)MULTI_ARG_2_DI_CMP },
25314 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomgtuq", IX86_BUILTIN_VPCOMGTUQ, GTU, (int)MULTI_ARG_2_DI_CMP },
25315 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomgeuq", IX86_BUILTIN_VPCOMGEUQ, GEU, (int)MULTI_ARG_2_DI_CMP },
25317 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomfalseb", IX86_BUILTIN_VPCOMFALSEB, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_QI_TF },
25318 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomfalsew", IX86_BUILTIN_VPCOMFALSEW, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_HI_TF },
25319 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomfalsed", IX86_BUILTIN_VPCOMFALSED, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_SI_TF },
25320 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomfalseq", IX86_BUILTIN_VPCOMFALSEQ, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_DI_TF },
25321 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomfalseub",IX86_BUILTIN_VPCOMFALSEUB,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_QI_TF },
25322 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomfalseuw",IX86_BUILTIN_VPCOMFALSEUW,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_HI_TF },
25323 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomfalseud",IX86_BUILTIN_VPCOMFALSEUD,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_SI_TF },
25324 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomfalseuq",IX86_BUILTIN_VPCOMFALSEUQ,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_DI_TF },
25326 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomtrueb", IX86_BUILTIN_VPCOMTRUEB, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_QI_TF },
25327 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomtruew", IX86_BUILTIN_VPCOMTRUEW, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_HI_TF },
25328 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomtrued", IX86_BUILTIN_VPCOMTRUED, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_SI_TF },
25329 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomtrueq", IX86_BUILTIN_VPCOMTRUEQ, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_DI_TF },
25330 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomtrueub", IX86_BUILTIN_VPCOMTRUEUB, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_QI_TF },
25331 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomtrueuw", IX86_BUILTIN_VPCOMTRUEUW, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_HI_TF },
25332 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomtrueud", IX86_BUILTIN_VPCOMTRUEUD, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_SI_TF },
25333 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomtrueuq", IX86_BUILTIN_VPCOMTRUEUQ, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_DI_TF },
25335 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v2df3, "__builtin_ia32_vpermil2pd", IX86_BUILTIN_VPERMIL2PD, UNKNOWN, (int)MULTI_ARG_4_DF2_DI_I },
25336 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v4sf3, "__builtin_ia32_vpermil2ps", IX86_BUILTIN_VPERMIL2PS, UNKNOWN, (int)MULTI_ARG_4_SF2_SI_I },
25337 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v4df3, "__builtin_ia32_vpermil2pd256", IX86_BUILTIN_VPERMIL2PD256, UNKNOWN, (int)MULTI_ARG_4_DF2_DI_I1 },
25338 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v8sf3, "__builtin_ia32_vpermil2ps256", IX86_BUILTIN_VPERMIL2PS256, UNKNOWN, (int)MULTI_ARG_4_SF2_SI_I1 },
25342 /* Set up all the MMX/SSE builtins, even builtins for instructions that are not
25343 in the current target ISA to allow the user to compile particular modules
25344 with different target specific options that differ from the command line
25345 options. */
25346 static void
25347 ix86_init_mmx_sse_builtins (void)
25349 const struct builtin_description * d;
25350 enum ix86_builtin_func_type ftype;
25351 size_t i;
25353 /* Add all special builtins with variable number of operands. */
25354 for (i = 0, d = bdesc_special_args;
25355 i < ARRAY_SIZE (bdesc_special_args);
25356 i++, d++)
25358 if (d->name == 0)
25359 continue;
25361 ftype = (enum ix86_builtin_func_type) d->flag;
25362 def_builtin (d->mask, d->name, ftype, d->code);
25365 /* Add all builtins with variable number of operands. */
25366 for (i = 0, d = bdesc_args;
25367 i < ARRAY_SIZE (bdesc_args);
25368 i++, d++)
25370 if (d->name == 0)
25371 continue;
25373 ftype = (enum ix86_builtin_func_type) d->flag;
25374 def_builtin_const (d->mask, d->name, ftype, d->code);
25377 /* pcmpestr[im] insns. */
25378 for (i = 0, d = bdesc_pcmpestr;
25379 i < ARRAY_SIZE (bdesc_pcmpestr);
25380 i++, d++)
25382 if (d->code == IX86_BUILTIN_PCMPESTRM128)
25383 ftype = V16QI_FTYPE_V16QI_INT_V16QI_INT_INT;
25384 else
25385 ftype = INT_FTYPE_V16QI_INT_V16QI_INT_INT;
25386 def_builtin_const (d->mask, d->name, ftype, d->code);
25389 /* pcmpistr[im] insns. */
25390 for (i = 0, d = bdesc_pcmpistr;
25391 i < ARRAY_SIZE (bdesc_pcmpistr);
25392 i++, d++)
25394 if (d->code == IX86_BUILTIN_PCMPISTRM128)
25395 ftype = V16QI_FTYPE_V16QI_V16QI_INT;
25396 else
25397 ftype = INT_FTYPE_V16QI_V16QI_INT;
25398 def_builtin_const (d->mask, d->name, ftype, d->code);
25401 /* comi/ucomi insns. */
25402 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
25404 if (d->mask == OPTION_MASK_ISA_SSE2)
25405 ftype = INT_FTYPE_V2DF_V2DF;
25406 else
25407 ftype = INT_FTYPE_V4SF_V4SF;
25408 def_builtin_const (d->mask, d->name, ftype, d->code);
25411 /* SSE */
25412 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_ldmxcsr",
25413 VOID_FTYPE_UNSIGNED, IX86_BUILTIN_LDMXCSR);
25414 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_stmxcsr",
25415 UNSIGNED_FTYPE_VOID, IX86_BUILTIN_STMXCSR);
25417 /* SSE or 3DNow!A */
25418 def_builtin (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A,
25419 "__builtin_ia32_maskmovq", VOID_FTYPE_V8QI_V8QI_PCHAR,
25420 IX86_BUILTIN_MASKMOVQ);
25422 /* SSE2 */
25423 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_maskmovdqu",
25424 VOID_FTYPE_V16QI_V16QI_PCHAR, IX86_BUILTIN_MASKMOVDQU);
25426 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_clflush",
25427 VOID_FTYPE_PCVOID, IX86_BUILTIN_CLFLUSH);
25428 x86_mfence = def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_mfence",
25429 VOID_FTYPE_VOID, IX86_BUILTIN_MFENCE);
25431 /* SSE3. */
25432 def_builtin (OPTION_MASK_ISA_SSE3, "__builtin_ia32_monitor",
25433 VOID_FTYPE_PCVOID_UNSIGNED_UNSIGNED, IX86_BUILTIN_MONITOR);
25434 def_builtin (OPTION_MASK_ISA_SSE3, "__builtin_ia32_mwait",
25435 VOID_FTYPE_UNSIGNED_UNSIGNED, IX86_BUILTIN_MWAIT);
25437 /* AES */
25438 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesenc128",
25439 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESENC128);
25440 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesenclast128",
25441 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESENCLAST128);
25442 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesdec128",
25443 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESDEC128);
25444 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesdeclast128",
25445 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESDECLAST128);
25446 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesimc128",
25447 V2DI_FTYPE_V2DI, IX86_BUILTIN_AESIMC128);
25448 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aeskeygenassist128",
25449 V2DI_FTYPE_V2DI_INT, IX86_BUILTIN_AESKEYGENASSIST128);
25451 /* PCLMUL */
25452 def_builtin_const (OPTION_MASK_ISA_PCLMUL, "__builtin_ia32_pclmulqdq128",
25453 V2DI_FTYPE_V2DI_V2DI_INT, IX86_BUILTIN_PCLMULQDQ128);
25455 /* RDRND */
25456 def_builtin (OPTION_MASK_ISA_RDRND, "__builtin_ia32_rdrand16_step",
25457 INT_FTYPE_PUSHORT, IX86_BUILTIN_RDRAND16_STEP);
25458 def_builtin (OPTION_MASK_ISA_RDRND, "__builtin_ia32_rdrand32_step",
25459 INT_FTYPE_PUNSIGNED, IX86_BUILTIN_RDRAND32_STEP);
25460 def_builtin (OPTION_MASK_ISA_RDRND | OPTION_MASK_ISA_64BIT,
25461 "__builtin_ia32_rdrand64_step", INT_FTYPE_PULONGLONG,
25462 IX86_BUILTIN_RDRAND64_STEP);
25464 /* MMX access to the vec_init patterns. */
25465 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v2si",
25466 V2SI_FTYPE_INT_INT, IX86_BUILTIN_VEC_INIT_V2SI);
25468 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v4hi",
25469 V4HI_FTYPE_HI_HI_HI_HI,
25470 IX86_BUILTIN_VEC_INIT_V4HI);
25472 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v8qi",
25473 V8QI_FTYPE_QI_QI_QI_QI_QI_QI_QI_QI,
25474 IX86_BUILTIN_VEC_INIT_V8QI);
25476 /* Access to the vec_extract patterns. */
25477 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v2df",
25478 DOUBLE_FTYPE_V2DF_INT, IX86_BUILTIN_VEC_EXT_V2DF);
25479 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v2di",
25480 DI_FTYPE_V2DI_INT, IX86_BUILTIN_VEC_EXT_V2DI);
25481 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_vec_ext_v4sf",
25482 FLOAT_FTYPE_V4SF_INT, IX86_BUILTIN_VEC_EXT_V4SF);
25483 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v4si",
25484 SI_FTYPE_V4SI_INT, IX86_BUILTIN_VEC_EXT_V4SI);
25485 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v8hi",
25486 HI_FTYPE_V8HI_INT, IX86_BUILTIN_VEC_EXT_V8HI);
25488 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A,
25489 "__builtin_ia32_vec_ext_v4hi",
25490 HI_FTYPE_V4HI_INT, IX86_BUILTIN_VEC_EXT_V4HI);
25492 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_ext_v2si",
25493 SI_FTYPE_V2SI_INT, IX86_BUILTIN_VEC_EXT_V2SI);
25495 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v16qi",
25496 QI_FTYPE_V16QI_INT, IX86_BUILTIN_VEC_EXT_V16QI);
25498 /* Access to the vec_set patterns. */
25499 def_builtin_const (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_64BIT,
25500 "__builtin_ia32_vec_set_v2di",
25501 V2DI_FTYPE_V2DI_DI_INT, IX86_BUILTIN_VEC_SET_V2DI);
25503 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v4sf",
25504 V4SF_FTYPE_V4SF_FLOAT_INT, IX86_BUILTIN_VEC_SET_V4SF);
25506 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v4si",
25507 V4SI_FTYPE_V4SI_SI_INT, IX86_BUILTIN_VEC_SET_V4SI);
25509 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_set_v8hi",
25510 V8HI_FTYPE_V8HI_HI_INT, IX86_BUILTIN_VEC_SET_V8HI);
25512 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A,
25513 "__builtin_ia32_vec_set_v4hi",
25514 V4HI_FTYPE_V4HI_HI_INT, IX86_BUILTIN_VEC_SET_V4HI);
25516 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v16qi",
25517 V16QI_FTYPE_V16QI_QI_INT, IX86_BUILTIN_VEC_SET_V16QI);
25519 /* Add FMA4 multi-arg argument instructions */
25520 for (i = 0, d = bdesc_multi_arg; i < ARRAY_SIZE (bdesc_multi_arg); i++, d++)
25522 if (d->name == 0)
25523 continue;
25525 ftype = (enum ix86_builtin_func_type) d->flag;
25526 def_builtin_const (d->mask, d->name, ftype, d->code);
25530 /* Internal method for ix86_init_builtins. */
25532 static void
25533 ix86_init_builtins_va_builtins_abi (void)
25535 tree ms_va_ref, sysv_va_ref;
25536 tree fnvoid_va_end_ms, fnvoid_va_end_sysv;
25537 tree fnvoid_va_start_ms, fnvoid_va_start_sysv;
25538 tree fnvoid_va_copy_ms, fnvoid_va_copy_sysv;
25539 tree fnattr_ms = NULL_TREE, fnattr_sysv = NULL_TREE;
25541 if (!TARGET_64BIT)
25542 return;
25543 fnattr_ms = build_tree_list (get_identifier ("ms_abi"), NULL_TREE);
25544 fnattr_sysv = build_tree_list (get_identifier ("sysv_abi"), NULL_TREE);
25545 ms_va_ref = build_reference_type (ms_va_list_type_node);
25546 sysv_va_ref =
25547 build_pointer_type (TREE_TYPE (sysv_va_list_type_node));
25549 fnvoid_va_end_ms =
25550 build_function_type_list (void_type_node, ms_va_ref, NULL_TREE);
25551 fnvoid_va_start_ms =
25552 build_varargs_function_type_list (void_type_node, ms_va_ref, NULL_TREE);
25553 fnvoid_va_end_sysv =
25554 build_function_type_list (void_type_node, sysv_va_ref, NULL_TREE);
25555 fnvoid_va_start_sysv =
25556 build_varargs_function_type_list (void_type_node, sysv_va_ref,
25557 NULL_TREE);
25558 fnvoid_va_copy_ms =
25559 build_function_type_list (void_type_node, ms_va_ref, ms_va_list_type_node,
25560 NULL_TREE);
25561 fnvoid_va_copy_sysv =
25562 build_function_type_list (void_type_node, sysv_va_ref,
25563 sysv_va_ref, NULL_TREE);
25565 add_builtin_function ("__builtin_ms_va_start", fnvoid_va_start_ms,
25566 BUILT_IN_VA_START, BUILT_IN_NORMAL, NULL, fnattr_ms);
25567 add_builtin_function ("__builtin_ms_va_end", fnvoid_va_end_ms,
25568 BUILT_IN_VA_END, BUILT_IN_NORMAL, NULL, fnattr_ms);
25569 add_builtin_function ("__builtin_ms_va_copy", fnvoid_va_copy_ms,
25570 BUILT_IN_VA_COPY, BUILT_IN_NORMAL, NULL, fnattr_ms);
25571 add_builtin_function ("__builtin_sysv_va_start", fnvoid_va_start_sysv,
25572 BUILT_IN_VA_START, BUILT_IN_NORMAL, NULL, fnattr_sysv);
25573 add_builtin_function ("__builtin_sysv_va_end", fnvoid_va_end_sysv,
25574 BUILT_IN_VA_END, BUILT_IN_NORMAL, NULL, fnattr_sysv);
25575 add_builtin_function ("__builtin_sysv_va_copy", fnvoid_va_copy_sysv,
25576 BUILT_IN_VA_COPY, BUILT_IN_NORMAL, NULL, fnattr_sysv);
25579 static void
25580 ix86_init_builtin_types (void)
25582 tree float128_type_node, float80_type_node;
25584 /* The __float80 type. */
25585 float80_type_node = long_double_type_node;
25586 if (TYPE_MODE (float80_type_node) != XFmode)
25588 /* The __float80 type. */
25589 float80_type_node = make_node (REAL_TYPE);
25591 TYPE_PRECISION (float80_type_node) = 80;
25592 layout_type (float80_type_node);
25594 lang_hooks.types.register_builtin_type (float80_type_node, "__float80");
25596 /* The __float128 type. */
25597 float128_type_node = make_node (REAL_TYPE);
25598 TYPE_PRECISION (float128_type_node) = 128;
25599 layout_type (float128_type_node);
25600 lang_hooks.types.register_builtin_type (float128_type_node, "__float128");
25602 /* This macro is built by i386-builtin-types.awk. */
25603 DEFINE_BUILTIN_PRIMITIVE_TYPES;
25606 static void
25607 ix86_init_builtins (void)
25609 tree t;
25611 ix86_init_builtin_types ();
25613 /* TFmode support builtins. */
25614 def_builtin_const (0, "__builtin_infq",
25615 FLOAT128_FTYPE_VOID, IX86_BUILTIN_INFQ);
25616 def_builtin_const (0, "__builtin_huge_valq",
25617 FLOAT128_FTYPE_VOID, IX86_BUILTIN_HUGE_VALQ);
25619 /* We will expand them to normal call if SSE2 isn't available since
25620 they are used by libgcc. */
25621 t = ix86_get_builtin_func_type (FLOAT128_FTYPE_FLOAT128);
25622 t = add_builtin_function ("__builtin_fabsq", t, IX86_BUILTIN_FABSQ,
25623 BUILT_IN_MD, "__fabstf2", NULL_TREE);
25624 TREE_READONLY (t) = 1;
25625 ix86_builtins[(int) IX86_BUILTIN_FABSQ] = t;
25627 t = ix86_get_builtin_func_type (FLOAT128_FTYPE_FLOAT128_FLOAT128);
25628 t = add_builtin_function ("__builtin_copysignq", t, IX86_BUILTIN_COPYSIGNQ,
25629 BUILT_IN_MD, "__copysigntf3", NULL_TREE);
25630 TREE_READONLY (t) = 1;
25631 ix86_builtins[(int) IX86_BUILTIN_COPYSIGNQ] = t;
25633 ix86_init_mmx_sse_builtins ();
25635 if (TARGET_64BIT)
25636 ix86_init_builtins_va_builtins_abi ();
25638 #ifdef SUBTARGET_INIT_BUILTINS
25639 SUBTARGET_INIT_BUILTINS;
25640 #endif
25643 /* Return the ix86 builtin for CODE. */
25645 static tree
25646 ix86_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED)
25648 if (code >= IX86_BUILTIN_MAX)
25649 return error_mark_node;
25651 return ix86_builtins[code];
25654 /* Errors in the source file can cause expand_expr to return const0_rtx
25655 where we expect a vector. To avoid crashing, use one of the vector
25656 clear instructions. */
25657 static rtx
25658 safe_vector_operand (rtx x, enum machine_mode mode)
25660 if (x == const0_rtx)
25661 x = CONST0_RTX (mode);
25662 return x;
25665 /* Subroutine of ix86_expand_builtin to take care of binop insns. */
25667 static rtx
25668 ix86_expand_binop_builtin (enum insn_code icode, tree exp, rtx target)
25670 rtx pat;
25671 tree arg0 = CALL_EXPR_ARG (exp, 0);
25672 tree arg1 = CALL_EXPR_ARG (exp, 1);
25673 rtx op0 = expand_normal (arg0);
25674 rtx op1 = expand_normal (arg1);
25675 enum machine_mode tmode = insn_data[icode].operand[0].mode;
25676 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
25677 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
25679 if (VECTOR_MODE_P (mode0))
25680 op0 = safe_vector_operand (op0, mode0);
25681 if (VECTOR_MODE_P (mode1))
25682 op1 = safe_vector_operand (op1, mode1);
25684 if (optimize || !target
25685 || GET_MODE (target) != tmode
25686 || !insn_data[icode].operand[0].predicate (target, tmode))
25687 target = gen_reg_rtx (tmode);
25689 if (GET_MODE (op1) == SImode && mode1 == TImode)
25691 rtx x = gen_reg_rtx (V4SImode);
25692 emit_insn (gen_sse2_loadd (x, op1));
25693 op1 = gen_lowpart (TImode, x);
25696 if (!insn_data[icode].operand[1].predicate (op0, mode0))
25697 op0 = copy_to_mode_reg (mode0, op0);
25698 if (!insn_data[icode].operand[2].predicate (op1, mode1))
25699 op1 = copy_to_mode_reg (mode1, op1);
25701 pat = GEN_FCN (icode) (target, op0, op1);
25702 if (! pat)
25703 return 0;
25705 emit_insn (pat);
25707 return target;
25710 /* Subroutine of ix86_expand_builtin to take care of 2-4 argument insns. */
25712 static rtx
25713 ix86_expand_multi_arg_builtin (enum insn_code icode, tree exp, rtx target,
25714 enum ix86_builtin_func_type m_type,
25715 enum rtx_code sub_code)
25717 rtx pat;
25718 int i;
25719 int nargs;
25720 bool comparison_p = false;
25721 bool tf_p = false;
25722 bool last_arg_constant = false;
25723 int num_memory = 0;
25724 struct {
25725 rtx op;
25726 enum machine_mode mode;
25727 } args[4];
25729 enum machine_mode tmode = insn_data[icode].operand[0].mode;
25731 switch (m_type)
25733 case MULTI_ARG_4_DF2_DI_I:
25734 case MULTI_ARG_4_DF2_DI_I1:
25735 case MULTI_ARG_4_SF2_SI_I:
25736 case MULTI_ARG_4_SF2_SI_I1:
25737 nargs = 4;
25738 last_arg_constant = true;
25739 break;
25741 case MULTI_ARG_3_SF:
25742 case MULTI_ARG_3_DF:
25743 case MULTI_ARG_3_SF2:
25744 case MULTI_ARG_3_DF2:
25745 case MULTI_ARG_3_DI:
25746 case MULTI_ARG_3_SI:
25747 case MULTI_ARG_3_SI_DI:
25748 case MULTI_ARG_3_HI:
25749 case MULTI_ARG_3_HI_SI:
25750 case MULTI_ARG_3_QI:
25751 case MULTI_ARG_3_DI2:
25752 case MULTI_ARG_3_SI2:
25753 case MULTI_ARG_3_HI2:
25754 case MULTI_ARG_3_QI2:
25755 nargs = 3;
25756 break;
25758 case MULTI_ARG_2_SF:
25759 case MULTI_ARG_2_DF:
25760 case MULTI_ARG_2_DI:
25761 case MULTI_ARG_2_SI:
25762 case MULTI_ARG_2_HI:
25763 case MULTI_ARG_2_QI:
25764 nargs = 2;
25765 break;
25767 case MULTI_ARG_2_DI_IMM:
25768 case MULTI_ARG_2_SI_IMM:
25769 case MULTI_ARG_2_HI_IMM:
25770 case MULTI_ARG_2_QI_IMM:
25771 nargs = 2;
25772 last_arg_constant = true;
25773 break;
25775 case MULTI_ARG_1_SF:
25776 case MULTI_ARG_1_DF:
25777 case MULTI_ARG_1_SF2:
25778 case MULTI_ARG_1_DF2:
25779 case MULTI_ARG_1_DI:
25780 case MULTI_ARG_1_SI:
25781 case MULTI_ARG_1_HI:
25782 case MULTI_ARG_1_QI:
25783 case MULTI_ARG_1_SI_DI:
25784 case MULTI_ARG_1_HI_DI:
25785 case MULTI_ARG_1_HI_SI:
25786 case MULTI_ARG_1_QI_DI:
25787 case MULTI_ARG_1_QI_SI:
25788 case MULTI_ARG_1_QI_HI:
25789 nargs = 1;
25790 break;
25792 case MULTI_ARG_2_DI_CMP:
25793 case MULTI_ARG_2_SI_CMP:
25794 case MULTI_ARG_2_HI_CMP:
25795 case MULTI_ARG_2_QI_CMP:
25796 nargs = 2;
25797 comparison_p = true;
25798 break;
25800 case MULTI_ARG_2_SF_TF:
25801 case MULTI_ARG_2_DF_TF:
25802 case MULTI_ARG_2_DI_TF:
25803 case MULTI_ARG_2_SI_TF:
25804 case MULTI_ARG_2_HI_TF:
25805 case MULTI_ARG_2_QI_TF:
25806 nargs = 2;
25807 tf_p = true;
25808 break;
25810 default:
25811 gcc_unreachable ();
25814 if (optimize || !target
25815 || GET_MODE (target) != tmode
25816 || !insn_data[icode].operand[0].predicate (target, tmode))
25817 target = gen_reg_rtx (tmode);
25819 gcc_assert (nargs <= 4);
25821 for (i = 0; i < nargs; i++)
25823 tree arg = CALL_EXPR_ARG (exp, i);
25824 rtx op = expand_normal (arg);
25825 int adjust = (comparison_p) ? 1 : 0;
25826 enum machine_mode mode = insn_data[icode].operand[i+adjust+1].mode;
25828 if (last_arg_constant && i == nargs-1)
25830 if (!CONST_INT_P (op))
25832 error ("last argument must be an immediate");
25833 return gen_reg_rtx (tmode);
25836 else
25838 if (VECTOR_MODE_P (mode))
25839 op = safe_vector_operand (op, mode);
25841 /* If we aren't optimizing, only allow one memory operand to be
25842 generated. */
25843 if (memory_operand (op, mode))
25844 num_memory++;
25846 gcc_assert (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode);
25848 if (optimize
25849 || !insn_data[icode].operand[i+adjust+1].predicate (op, mode)
25850 || num_memory > 1)
25851 op = force_reg (mode, op);
25854 args[i].op = op;
25855 args[i].mode = mode;
25858 switch (nargs)
25860 case 1:
25861 pat = GEN_FCN (icode) (target, args[0].op);
25862 break;
25864 case 2:
25865 if (tf_p)
25866 pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
25867 GEN_INT ((int)sub_code));
25868 else if (! comparison_p)
25869 pat = GEN_FCN (icode) (target, args[0].op, args[1].op);
25870 else
25872 rtx cmp_op = gen_rtx_fmt_ee (sub_code, GET_MODE (target),
25873 args[0].op,
25874 args[1].op);
25876 pat = GEN_FCN (icode) (target, cmp_op, args[0].op, args[1].op);
25878 break;
25880 case 3:
25881 pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op);
25882 break;
25884 case 4:
25885 pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op, args[3].op);
25886 break;
25888 default:
25889 gcc_unreachable ();
25892 if (! pat)
25893 return 0;
25895 emit_insn (pat);
25896 return target;
25899 /* Subroutine of ix86_expand_args_builtin to take care of scalar unop
25900 insns with vec_merge. */
25902 static rtx
25903 ix86_expand_unop_vec_merge_builtin (enum insn_code icode, tree exp,
25904 rtx target)
25906 rtx pat;
25907 tree arg0 = CALL_EXPR_ARG (exp, 0);
25908 rtx op1, op0 = expand_normal (arg0);
25909 enum machine_mode tmode = insn_data[icode].operand[0].mode;
25910 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
25912 if (optimize || !target
25913 || GET_MODE (target) != tmode
25914 || !insn_data[icode].operand[0].predicate (target, tmode))
25915 target = gen_reg_rtx (tmode);
25917 if (VECTOR_MODE_P (mode0))
25918 op0 = safe_vector_operand (op0, mode0);
25920 if ((optimize && !register_operand (op0, mode0))
25921 || !insn_data[icode].operand[1].predicate (op0, mode0))
25922 op0 = copy_to_mode_reg (mode0, op0);
25924 op1 = op0;
25925 if (!insn_data[icode].operand[2].predicate (op1, mode0))
25926 op1 = copy_to_mode_reg (mode0, op1);
25928 pat = GEN_FCN (icode) (target, op0, op1);
25929 if (! pat)
25930 return 0;
25931 emit_insn (pat);
25932 return target;
25935 /* Subroutine of ix86_expand_builtin to take care of comparison insns. */
25937 static rtx
25938 ix86_expand_sse_compare (const struct builtin_description *d,
25939 tree exp, rtx target, bool swap)
25941 rtx pat;
25942 tree arg0 = CALL_EXPR_ARG (exp, 0);
25943 tree arg1 = CALL_EXPR_ARG (exp, 1);
25944 rtx op0 = expand_normal (arg0);
25945 rtx op1 = expand_normal (arg1);
25946 rtx op2;
25947 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
25948 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
25949 enum machine_mode mode1 = insn_data[d->icode].operand[2].mode;
25950 enum rtx_code comparison = d->comparison;
25952 if (VECTOR_MODE_P (mode0))
25953 op0 = safe_vector_operand (op0, mode0);
25954 if (VECTOR_MODE_P (mode1))
25955 op1 = safe_vector_operand (op1, mode1);
25957 /* Swap operands if we have a comparison that isn't available in
25958 hardware. */
25959 if (swap)
25961 rtx tmp = gen_reg_rtx (mode1);
25962 emit_move_insn (tmp, op1);
25963 op1 = op0;
25964 op0 = tmp;
25967 if (optimize || !target
25968 || GET_MODE (target) != tmode
25969 || !insn_data[d->icode].operand[0].predicate (target, tmode))
25970 target = gen_reg_rtx (tmode);
25972 if ((optimize && !register_operand (op0, mode0))
25973 || !insn_data[d->icode].operand[1].predicate (op0, mode0))
25974 op0 = copy_to_mode_reg (mode0, op0);
25975 if ((optimize && !register_operand (op1, mode1))
25976 || !insn_data[d->icode].operand[2].predicate (op1, mode1))
25977 op1 = copy_to_mode_reg (mode1, op1);
25979 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
25980 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
25981 if (! pat)
25982 return 0;
25983 emit_insn (pat);
25984 return target;
25987 /* Subroutine of ix86_expand_builtin to take care of comi insns. */
25989 static rtx
25990 ix86_expand_sse_comi (const struct builtin_description *d, tree exp,
25991 rtx target)
25993 rtx pat;
25994 tree arg0 = CALL_EXPR_ARG (exp, 0);
25995 tree arg1 = CALL_EXPR_ARG (exp, 1);
25996 rtx op0 = expand_normal (arg0);
25997 rtx op1 = expand_normal (arg1);
25998 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
25999 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
26000 enum rtx_code comparison = d->comparison;
26002 if (VECTOR_MODE_P (mode0))
26003 op0 = safe_vector_operand (op0, mode0);
26004 if (VECTOR_MODE_P (mode1))
26005 op1 = safe_vector_operand (op1, mode1);
26007 /* Swap operands if we have a comparison that isn't available in
26008 hardware. */
26009 if (d->flag & BUILTIN_DESC_SWAP_OPERANDS)
26011 rtx tmp = op1;
26012 op1 = op0;
26013 op0 = tmp;
26016 target = gen_reg_rtx (SImode);
26017 emit_move_insn (target, const0_rtx);
26018 target = gen_rtx_SUBREG (QImode, target, 0);
26020 if ((optimize && !register_operand (op0, mode0))
26021 || !insn_data[d->icode].operand[0].predicate (op0, mode0))
26022 op0 = copy_to_mode_reg (mode0, op0);
26023 if ((optimize && !register_operand (op1, mode1))
26024 || !insn_data[d->icode].operand[1].predicate (op1, mode1))
26025 op1 = copy_to_mode_reg (mode1, op1);
26027 pat = GEN_FCN (d->icode) (op0, op1);
26028 if (! pat)
26029 return 0;
26030 emit_insn (pat);
26031 emit_insn (gen_rtx_SET (VOIDmode,
26032 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
26033 gen_rtx_fmt_ee (comparison, QImode,
26034 SET_DEST (pat),
26035 const0_rtx)));
26037 return SUBREG_REG (target);
26040 /* Subroutine of ix86_expand_builtin to take care of ptest insns. */
26042 static rtx
26043 ix86_expand_sse_ptest (const struct builtin_description *d, tree exp,
26044 rtx target)
26046 rtx pat;
26047 tree arg0 = CALL_EXPR_ARG (exp, 0);
26048 tree arg1 = CALL_EXPR_ARG (exp, 1);
26049 rtx op0 = expand_normal (arg0);
26050 rtx op1 = expand_normal (arg1);
26051 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
26052 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
26053 enum rtx_code comparison = d->comparison;
26055 if (VECTOR_MODE_P (mode0))
26056 op0 = safe_vector_operand (op0, mode0);
26057 if (VECTOR_MODE_P (mode1))
26058 op1 = safe_vector_operand (op1, mode1);
26060 target = gen_reg_rtx (SImode);
26061 emit_move_insn (target, const0_rtx);
26062 target = gen_rtx_SUBREG (QImode, target, 0);
26064 if ((optimize && !register_operand (op0, mode0))
26065 || !insn_data[d->icode].operand[0].predicate (op0, mode0))
26066 op0 = copy_to_mode_reg (mode0, op0);
26067 if ((optimize && !register_operand (op1, mode1))
26068 || !insn_data[d->icode].operand[1].predicate (op1, mode1))
26069 op1 = copy_to_mode_reg (mode1, op1);
26071 pat = GEN_FCN (d->icode) (op0, op1);
26072 if (! pat)
26073 return 0;
26074 emit_insn (pat);
26075 emit_insn (gen_rtx_SET (VOIDmode,
26076 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
26077 gen_rtx_fmt_ee (comparison, QImode,
26078 SET_DEST (pat),
26079 const0_rtx)));
26081 return SUBREG_REG (target);
26084 /* Subroutine of ix86_expand_builtin to take care of pcmpestr[im] insns. */
26086 static rtx
26087 ix86_expand_sse_pcmpestr (const struct builtin_description *d,
26088 tree exp, rtx target)
26090 rtx pat;
26091 tree arg0 = CALL_EXPR_ARG (exp, 0);
26092 tree arg1 = CALL_EXPR_ARG (exp, 1);
26093 tree arg2 = CALL_EXPR_ARG (exp, 2);
26094 tree arg3 = CALL_EXPR_ARG (exp, 3);
26095 tree arg4 = CALL_EXPR_ARG (exp, 4);
26096 rtx scratch0, scratch1;
26097 rtx op0 = expand_normal (arg0);
26098 rtx op1 = expand_normal (arg1);
26099 rtx op2 = expand_normal (arg2);
26100 rtx op3 = expand_normal (arg3);
26101 rtx op4 = expand_normal (arg4);
26102 enum machine_mode tmode0, tmode1, modev2, modei3, modev4, modei5, modeimm;
26104 tmode0 = insn_data[d->icode].operand[0].mode;
26105 tmode1 = insn_data[d->icode].operand[1].mode;
26106 modev2 = insn_data[d->icode].operand[2].mode;
26107 modei3 = insn_data[d->icode].operand[3].mode;
26108 modev4 = insn_data[d->icode].operand[4].mode;
26109 modei5 = insn_data[d->icode].operand[5].mode;
26110 modeimm = insn_data[d->icode].operand[6].mode;
26112 if (VECTOR_MODE_P (modev2))
26113 op0 = safe_vector_operand (op0, modev2);
26114 if (VECTOR_MODE_P (modev4))
26115 op2 = safe_vector_operand (op2, modev4);
26117 if (!insn_data[d->icode].operand[2].predicate (op0, modev2))
26118 op0 = copy_to_mode_reg (modev2, op0);
26119 if (!insn_data[d->icode].operand[3].predicate (op1, modei3))
26120 op1 = copy_to_mode_reg (modei3, op1);
26121 if ((optimize && !register_operand (op2, modev4))
26122 || !insn_data[d->icode].operand[4].predicate (op2, modev4))
26123 op2 = copy_to_mode_reg (modev4, op2);
26124 if (!insn_data[d->icode].operand[5].predicate (op3, modei5))
26125 op3 = copy_to_mode_reg (modei5, op3);
26127 if (!insn_data[d->icode].operand[6].predicate (op4, modeimm))
26129 error ("the fifth argument must be a 8-bit immediate");
26130 return const0_rtx;
26133 if (d->code == IX86_BUILTIN_PCMPESTRI128)
26135 if (optimize || !target
26136 || GET_MODE (target) != tmode0
26137 || !insn_data[d->icode].operand[0].predicate (target, tmode0))
26138 target = gen_reg_rtx (tmode0);
26140 scratch1 = gen_reg_rtx (tmode1);
26142 pat = GEN_FCN (d->icode) (target, scratch1, op0, op1, op2, op3, op4);
26144 else if (d->code == IX86_BUILTIN_PCMPESTRM128)
26146 if (optimize || !target
26147 || GET_MODE (target) != tmode1
26148 || !insn_data[d->icode].operand[1].predicate (target, tmode1))
26149 target = gen_reg_rtx (tmode1);
26151 scratch0 = gen_reg_rtx (tmode0);
26153 pat = GEN_FCN (d->icode) (scratch0, target, op0, op1, op2, op3, op4);
26155 else
26157 gcc_assert (d->flag);
26159 scratch0 = gen_reg_rtx (tmode0);
26160 scratch1 = gen_reg_rtx (tmode1);
26162 pat = GEN_FCN (d->icode) (scratch0, scratch1, op0, op1, op2, op3, op4);
26165 if (! pat)
26166 return 0;
26168 emit_insn (pat);
26170 if (d->flag)
26172 target = gen_reg_rtx (SImode);
26173 emit_move_insn (target, const0_rtx);
26174 target = gen_rtx_SUBREG (QImode, target, 0);
26176 emit_insn
26177 (gen_rtx_SET (VOIDmode, gen_rtx_STRICT_LOW_PART (VOIDmode, target),
26178 gen_rtx_fmt_ee (EQ, QImode,
26179 gen_rtx_REG ((enum machine_mode) d->flag,
26180 FLAGS_REG),
26181 const0_rtx)));
26182 return SUBREG_REG (target);
26184 else
26185 return target;
26189 /* Subroutine of ix86_expand_builtin to take care of pcmpistr[im] insns. */
26191 static rtx
26192 ix86_expand_sse_pcmpistr (const struct builtin_description *d,
26193 tree exp, rtx target)
26195 rtx pat;
26196 tree arg0 = CALL_EXPR_ARG (exp, 0);
26197 tree arg1 = CALL_EXPR_ARG (exp, 1);
26198 tree arg2 = CALL_EXPR_ARG (exp, 2);
26199 rtx scratch0, scratch1;
26200 rtx op0 = expand_normal (arg0);
26201 rtx op1 = expand_normal (arg1);
26202 rtx op2 = expand_normal (arg2);
26203 enum machine_mode tmode0, tmode1, modev2, modev3, modeimm;
26205 tmode0 = insn_data[d->icode].operand[0].mode;
26206 tmode1 = insn_data[d->icode].operand[1].mode;
26207 modev2 = insn_data[d->icode].operand[2].mode;
26208 modev3 = insn_data[d->icode].operand[3].mode;
26209 modeimm = insn_data[d->icode].operand[4].mode;
26211 if (VECTOR_MODE_P (modev2))
26212 op0 = safe_vector_operand (op0, modev2);
26213 if (VECTOR_MODE_P (modev3))
26214 op1 = safe_vector_operand (op1, modev3);
26216 if (!insn_data[d->icode].operand[2].predicate (op0, modev2))
26217 op0 = copy_to_mode_reg (modev2, op0);
26218 if ((optimize && !register_operand (op1, modev3))
26219 || !insn_data[d->icode].operand[3].predicate (op1, modev3))
26220 op1 = copy_to_mode_reg (modev3, op1);
26222 if (!insn_data[d->icode].operand[4].predicate (op2, modeimm))
26224 error ("the third argument must be a 8-bit immediate");
26225 return const0_rtx;
26228 if (d->code == IX86_BUILTIN_PCMPISTRI128)
26230 if (optimize || !target
26231 || GET_MODE (target) != tmode0
26232 || !insn_data[d->icode].operand[0].predicate (target, tmode0))
26233 target = gen_reg_rtx (tmode0);
26235 scratch1 = gen_reg_rtx (tmode1);
26237 pat = GEN_FCN (d->icode) (target, scratch1, op0, op1, op2);
26239 else if (d->code == IX86_BUILTIN_PCMPISTRM128)
26241 if (optimize || !target
26242 || GET_MODE (target) != tmode1
26243 || !insn_data[d->icode].operand[1].predicate (target, tmode1))
26244 target = gen_reg_rtx (tmode1);
26246 scratch0 = gen_reg_rtx (tmode0);
26248 pat = GEN_FCN (d->icode) (scratch0, target, op0, op1, op2);
26250 else
26252 gcc_assert (d->flag);
26254 scratch0 = gen_reg_rtx (tmode0);
26255 scratch1 = gen_reg_rtx (tmode1);
26257 pat = GEN_FCN (d->icode) (scratch0, scratch1, op0, op1, op2);
26260 if (! pat)
26261 return 0;
26263 emit_insn (pat);
26265 if (d->flag)
26267 target = gen_reg_rtx (SImode);
26268 emit_move_insn (target, const0_rtx);
26269 target = gen_rtx_SUBREG (QImode, target, 0);
26271 emit_insn
26272 (gen_rtx_SET (VOIDmode, gen_rtx_STRICT_LOW_PART (VOIDmode, target),
26273 gen_rtx_fmt_ee (EQ, QImode,
26274 gen_rtx_REG ((enum machine_mode) d->flag,
26275 FLAGS_REG),
26276 const0_rtx)));
26277 return SUBREG_REG (target);
26279 else
26280 return target;
26283 /* Subroutine of ix86_expand_builtin to take care of insns with
26284 variable number of operands. */
26286 static rtx
26287 ix86_expand_args_builtin (const struct builtin_description *d,
26288 tree exp, rtx target)
26290 rtx pat, real_target;
26291 unsigned int i, nargs;
26292 unsigned int nargs_constant = 0;
26293 int num_memory = 0;
26294 struct
26296 rtx op;
26297 enum machine_mode mode;
26298 } args[4];
26299 bool last_arg_count = false;
26300 enum insn_code icode = d->icode;
26301 const struct insn_data_d *insn_p = &insn_data[icode];
26302 enum machine_mode tmode = insn_p->operand[0].mode;
26303 enum machine_mode rmode = VOIDmode;
26304 bool swap = false;
26305 enum rtx_code comparison = d->comparison;
26307 switch ((enum ix86_builtin_func_type) d->flag)
26309 case INT_FTYPE_V8SF_V8SF_PTEST:
26310 case INT_FTYPE_V4DI_V4DI_PTEST:
26311 case INT_FTYPE_V4DF_V4DF_PTEST:
26312 case INT_FTYPE_V4SF_V4SF_PTEST:
26313 case INT_FTYPE_V2DI_V2DI_PTEST:
26314 case INT_FTYPE_V2DF_V2DF_PTEST:
26315 return ix86_expand_sse_ptest (d, exp, target);
26316 case FLOAT128_FTYPE_FLOAT128:
26317 case FLOAT_FTYPE_FLOAT:
26318 case INT_FTYPE_INT:
26319 case UINT64_FTYPE_INT:
26320 case UINT16_FTYPE_UINT16:
26321 case INT64_FTYPE_INT64:
26322 case INT64_FTYPE_V4SF:
26323 case INT64_FTYPE_V2DF:
26324 case INT_FTYPE_V16QI:
26325 case INT_FTYPE_V8QI:
26326 case INT_FTYPE_V8SF:
26327 case INT_FTYPE_V4DF:
26328 case INT_FTYPE_V4SF:
26329 case INT_FTYPE_V2DF:
26330 case V16QI_FTYPE_V16QI:
26331 case V8SI_FTYPE_V8SF:
26332 case V8SI_FTYPE_V4SI:
26333 case V8HI_FTYPE_V8HI:
26334 case V8HI_FTYPE_V16QI:
26335 case V8QI_FTYPE_V8QI:
26336 case V8SF_FTYPE_V8SF:
26337 case V8SF_FTYPE_V8SI:
26338 case V8SF_FTYPE_V4SF:
26339 case V8SF_FTYPE_V8HI:
26340 case V4SI_FTYPE_V4SI:
26341 case V4SI_FTYPE_V16QI:
26342 case V4SI_FTYPE_V4SF:
26343 case V4SI_FTYPE_V8SI:
26344 case V4SI_FTYPE_V8HI:
26345 case V4SI_FTYPE_V4DF:
26346 case V4SI_FTYPE_V2DF:
26347 case V4HI_FTYPE_V4HI:
26348 case V4DF_FTYPE_V4DF:
26349 case V4DF_FTYPE_V4SI:
26350 case V4DF_FTYPE_V4SF:
26351 case V4DF_FTYPE_V2DF:
26352 case V4SF_FTYPE_V4SF:
26353 case V4SF_FTYPE_V4SI:
26354 case V4SF_FTYPE_V8SF:
26355 case V4SF_FTYPE_V4DF:
26356 case V4SF_FTYPE_V8HI:
26357 case V4SF_FTYPE_V2DF:
26358 case V2DI_FTYPE_V2DI:
26359 case V2DI_FTYPE_V16QI:
26360 case V2DI_FTYPE_V8HI:
26361 case V2DI_FTYPE_V4SI:
26362 case V2DF_FTYPE_V2DF:
26363 case V2DF_FTYPE_V4SI:
26364 case V2DF_FTYPE_V4DF:
26365 case V2DF_FTYPE_V4SF:
26366 case V2DF_FTYPE_V2SI:
26367 case V2SI_FTYPE_V2SI:
26368 case V2SI_FTYPE_V4SF:
26369 case V2SI_FTYPE_V2SF:
26370 case V2SI_FTYPE_V2DF:
26371 case V2SF_FTYPE_V2SF:
26372 case V2SF_FTYPE_V2SI:
26373 nargs = 1;
26374 break;
26375 case V4SF_FTYPE_V4SF_VEC_MERGE:
26376 case V2DF_FTYPE_V2DF_VEC_MERGE:
26377 return ix86_expand_unop_vec_merge_builtin (icode, exp, target);
26378 case FLOAT128_FTYPE_FLOAT128_FLOAT128:
26379 case V16QI_FTYPE_V16QI_V16QI:
26380 case V16QI_FTYPE_V8HI_V8HI:
26381 case V8QI_FTYPE_V8QI_V8QI:
26382 case V8QI_FTYPE_V4HI_V4HI:
26383 case V8HI_FTYPE_V8HI_V8HI:
26384 case V8HI_FTYPE_V16QI_V16QI:
26385 case V8HI_FTYPE_V4SI_V4SI:
26386 case V8SF_FTYPE_V8SF_V8SF:
26387 case V8SF_FTYPE_V8SF_V8SI:
26388 case V4SI_FTYPE_V4SI_V4SI:
26389 case V4SI_FTYPE_V8HI_V8HI:
26390 case V4SI_FTYPE_V4SF_V4SF:
26391 case V4SI_FTYPE_V2DF_V2DF:
26392 case V4HI_FTYPE_V4HI_V4HI:
26393 case V4HI_FTYPE_V8QI_V8QI:
26394 case V4HI_FTYPE_V2SI_V2SI:
26395 case V4DF_FTYPE_V4DF_V4DF:
26396 case V4DF_FTYPE_V4DF_V4DI:
26397 case V4SF_FTYPE_V4SF_V4SF:
26398 case V4SF_FTYPE_V4SF_V4SI:
26399 case V4SF_FTYPE_V4SF_V2SI:
26400 case V4SF_FTYPE_V4SF_V2DF:
26401 case V4SF_FTYPE_V4SF_DI:
26402 case V4SF_FTYPE_V4SF_SI:
26403 case V2DI_FTYPE_V2DI_V2DI:
26404 case V2DI_FTYPE_V16QI_V16QI:
26405 case V2DI_FTYPE_V4SI_V4SI:
26406 case V2DI_FTYPE_V2DI_V16QI:
26407 case V2DI_FTYPE_V2DF_V2DF:
26408 case V2SI_FTYPE_V2SI_V2SI:
26409 case V2SI_FTYPE_V4HI_V4HI:
26410 case V2SI_FTYPE_V2SF_V2SF:
26411 case V2DF_FTYPE_V2DF_V2DF:
26412 case V2DF_FTYPE_V2DF_V4SF:
26413 case V2DF_FTYPE_V2DF_V2DI:
26414 case V2DF_FTYPE_V2DF_DI:
26415 case V2DF_FTYPE_V2DF_SI:
26416 case V2SF_FTYPE_V2SF_V2SF:
26417 case V1DI_FTYPE_V1DI_V1DI:
26418 case V1DI_FTYPE_V8QI_V8QI:
26419 case V1DI_FTYPE_V2SI_V2SI:
26420 if (comparison == UNKNOWN)
26421 return ix86_expand_binop_builtin (icode, exp, target);
26422 nargs = 2;
26423 break;
26424 case V4SF_FTYPE_V4SF_V4SF_SWAP:
26425 case V2DF_FTYPE_V2DF_V2DF_SWAP:
26426 gcc_assert (comparison != UNKNOWN);
26427 nargs = 2;
26428 swap = true;
26429 break;
26430 case V8HI_FTYPE_V8HI_V8HI_COUNT:
26431 case V8HI_FTYPE_V8HI_SI_COUNT:
26432 case V4SI_FTYPE_V4SI_V4SI_COUNT:
26433 case V4SI_FTYPE_V4SI_SI_COUNT:
26434 case V4HI_FTYPE_V4HI_V4HI_COUNT:
26435 case V4HI_FTYPE_V4HI_SI_COUNT:
26436 case V2DI_FTYPE_V2DI_V2DI_COUNT:
26437 case V2DI_FTYPE_V2DI_SI_COUNT:
26438 case V2SI_FTYPE_V2SI_V2SI_COUNT:
26439 case V2SI_FTYPE_V2SI_SI_COUNT:
26440 case V1DI_FTYPE_V1DI_V1DI_COUNT:
26441 case V1DI_FTYPE_V1DI_SI_COUNT:
26442 nargs = 2;
26443 last_arg_count = true;
26444 break;
26445 case UINT64_FTYPE_UINT64_UINT64:
26446 case UINT_FTYPE_UINT_UINT:
26447 case UINT_FTYPE_UINT_USHORT:
26448 case UINT_FTYPE_UINT_UCHAR:
26449 case UINT16_FTYPE_UINT16_INT:
26450 case UINT8_FTYPE_UINT8_INT:
26451 nargs = 2;
26452 break;
26453 case V2DI_FTYPE_V2DI_INT_CONVERT:
26454 nargs = 2;
26455 rmode = V1TImode;
26456 nargs_constant = 1;
26457 break;
26458 case V8HI_FTYPE_V8HI_INT:
26459 case V8HI_FTYPE_V8SF_INT:
26460 case V8HI_FTYPE_V4SF_INT:
26461 case V8SF_FTYPE_V8SF_INT:
26462 case V4SI_FTYPE_V4SI_INT:
26463 case V4SI_FTYPE_V8SI_INT:
26464 case V4HI_FTYPE_V4HI_INT:
26465 case V4DF_FTYPE_V4DF_INT:
26466 case V4SF_FTYPE_V4SF_INT:
26467 case V4SF_FTYPE_V8SF_INT:
26468 case V2DI_FTYPE_V2DI_INT:
26469 case V2DF_FTYPE_V2DF_INT:
26470 case V2DF_FTYPE_V4DF_INT:
26471 nargs = 2;
26472 nargs_constant = 1;
26473 break;
26474 case V16QI_FTYPE_V16QI_V16QI_V16QI:
26475 case V8SF_FTYPE_V8SF_V8SF_V8SF:
26476 case V4DF_FTYPE_V4DF_V4DF_V4DF:
26477 case V4SF_FTYPE_V4SF_V4SF_V4SF:
26478 case V2DF_FTYPE_V2DF_V2DF_V2DF:
26479 nargs = 3;
26480 break;
26481 case V16QI_FTYPE_V16QI_V16QI_INT:
26482 case V8HI_FTYPE_V8HI_V8HI_INT:
26483 case V8SI_FTYPE_V8SI_V8SI_INT:
26484 case V8SI_FTYPE_V8SI_V4SI_INT:
26485 case V8SF_FTYPE_V8SF_V8SF_INT:
26486 case V8SF_FTYPE_V8SF_V4SF_INT:
26487 case V4SI_FTYPE_V4SI_V4SI_INT:
26488 case V4DF_FTYPE_V4DF_V4DF_INT:
26489 case V4DF_FTYPE_V4DF_V2DF_INT:
26490 case V4SF_FTYPE_V4SF_V4SF_INT:
26491 case V2DI_FTYPE_V2DI_V2DI_INT:
26492 case V2DF_FTYPE_V2DF_V2DF_INT:
26493 nargs = 3;
26494 nargs_constant = 1;
26495 break;
26496 case V2DI_FTYPE_V2DI_V2DI_INT_CONVERT:
26497 nargs = 3;
26498 rmode = V2DImode;
26499 nargs_constant = 1;
26500 break;
26501 case V1DI_FTYPE_V1DI_V1DI_INT_CONVERT:
26502 nargs = 3;
26503 rmode = DImode;
26504 nargs_constant = 1;
26505 break;
26506 case V2DI_FTYPE_V2DI_UINT_UINT:
26507 nargs = 3;
26508 nargs_constant = 2;
26509 break;
26510 case V2DF_FTYPE_V2DF_V2DF_V2DI_INT:
26511 case V4DF_FTYPE_V4DF_V4DF_V4DI_INT:
26512 case V4SF_FTYPE_V4SF_V4SF_V4SI_INT:
26513 case V8SF_FTYPE_V8SF_V8SF_V8SI_INT:
26514 nargs = 4;
26515 nargs_constant = 1;
26516 break;
26517 case V2DI_FTYPE_V2DI_V2DI_UINT_UINT:
26518 nargs = 4;
26519 nargs_constant = 2;
26520 break;
26521 default:
26522 gcc_unreachable ();
26525 gcc_assert (nargs <= ARRAY_SIZE (args));
26527 if (comparison != UNKNOWN)
26529 gcc_assert (nargs == 2);
26530 return ix86_expand_sse_compare (d, exp, target, swap);
26533 if (rmode == VOIDmode || rmode == tmode)
26535 if (optimize
26536 || target == 0
26537 || GET_MODE (target) != tmode
26538 || !insn_p->operand[0].predicate (target, tmode))
26539 target = gen_reg_rtx (tmode);
26540 real_target = target;
26542 else
26544 target = gen_reg_rtx (rmode);
26545 real_target = simplify_gen_subreg (tmode, target, rmode, 0);
26548 for (i = 0; i < nargs; i++)
26550 tree arg = CALL_EXPR_ARG (exp, i);
26551 rtx op = expand_normal (arg);
26552 enum machine_mode mode = insn_p->operand[i + 1].mode;
26553 bool match = insn_p->operand[i + 1].predicate (op, mode);
26555 if (last_arg_count && (i + 1) == nargs)
26557 /* SIMD shift insns take either an 8-bit immediate or
26558 register as count. But builtin functions take int as
26559 count. If count doesn't match, we put it in register. */
26560 if (!match)
26562 op = simplify_gen_subreg (SImode, op, GET_MODE (op), 0);
26563 if (!insn_p->operand[i + 1].predicate (op, mode))
26564 op = copy_to_reg (op);
26567 else if ((nargs - i) <= nargs_constant)
26569 if (!match)
26570 switch (icode)
26572 case CODE_FOR_sse4_1_roundpd:
26573 case CODE_FOR_sse4_1_roundps:
26574 case CODE_FOR_sse4_1_roundsd:
26575 case CODE_FOR_sse4_1_roundss:
26576 case CODE_FOR_sse4_1_blendps:
26577 case CODE_FOR_avx_blendpd256:
26578 case CODE_FOR_avx_vpermilv4df:
26579 case CODE_FOR_avx_roundpd256:
26580 case CODE_FOR_avx_roundps256:
26581 error ("the last argument must be a 4-bit immediate");
26582 return const0_rtx;
26584 case CODE_FOR_sse4_1_blendpd:
26585 case CODE_FOR_avx_vpermilv2df:
26586 case CODE_FOR_xop_vpermil2v2df3:
26587 case CODE_FOR_xop_vpermil2v4sf3:
26588 case CODE_FOR_xop_vpermil2v4df3:
26589 case CODE_FOR_xop_vpermil2v8sf3:
26590 error ("the last argument must be a 2-bit immediate");
26591 return const0_rtx;
26593 case CODE_FOR_avx_vextractf128v4df:
26594 case CODE_FOR_avx_vextractf128v8sf:
26595 case CODE_FOR_avx_vextractf128v8si:
26596 case CODE_FOR_avx_vinsertf128v4df:
26597 case CODE_FOR_avx_vinsertf128v8sf:
26598 case CODE_FOR_avx_vinsertf128v8si:
26599 error ("the last argument must be a 1-bit immediate");
26600 return const0_rtx;
26602 case CODE_FOR_avx_cmpsdv2df3:
26603 case CODE_FOR_avx_cmpssv4sf3:
26604 case CODE_FOR_avx_cmppdv2df3:
26605 case CODE_FOR_avx_cmppsv4sf3:
26606 case CODE_FOR_avx_cmppdv4df3:
26607 case CODE_FOR_avx_cmppsv8sf3:
26608 error ("the last argument must be a 5-bit immediate");
26609 return const0_rtx;
26611 default:
26612 switch (nargs_constant)
26614 case 2:
26615 if ((nargs - i) == nargs_constant)
26617 error ("the next to last argument must be an 8-bit immediate");
26618 break;
26620 case 1:
26621 error ("the last argument must be an 8-bit immediate");
26622 break;
26623 default:
26624 gcc_unreachable ();
26626 return const0_rtx;
26629 else
26631 if (VECTOR_MODE_P (mode))
26632 op = safe_vector_operand (op, mode);
26634 /* If we aren't optimizing, only allow one memory operand to
26635 be generated. */
26636 if (memory_operand (op, mode))
26637 num_memory++;
26639 if (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode)
26641 if (optimize || !match || num_memory > 1)
26642 op = copy_to_mode_reg (mode, op);
26644 else
26646 op = copy_to_reg (op);
26647 op = simplify_gen_subreg (mode, op, GET_MODE (op), 0);
26651 args[i].op = op;
26652 args[i].mode = mode;
26655 switch (nargs)
26657 case 1:
26658 pat = GEN_FCN (icode) (real_target, args[0].op);
26659 break;
26660 case 2:
26661 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op);
26662 break;
26663 case 3:
26664 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
26665 args[2].op);
26666 break;
26667 case 4:
26668 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
26669 args[2].op, args[3].op);
26670 break;
26671 default:
26672 gcc_unreachable ();
26675 if (! pat)
26676 return 0;
26678 emit_insn (pat);
26679 return target;
26682 /* Subroutine of ix86_expand_builtin to take care of special insns
26683 with variable number of operands. */
26685 static rtx
26686 ix86_expand_special_args_builtin (const struct builtin_description *d,
26687 tree exp, rtx target)
26689 tree arg;
26690 rtx pat, op;
26691 unsigned int i, nargs, arg_adjust, memory;
26692 struct
26694 rtx op;
26695 enum machine_mode mode;
26696 } args[3];
26697 enum insn_code icode = d->icode;
26698 bool last_arg_constant = false;
26699 const struct insn_data_d *insn_p = &insn_data[icode];
26700 enum machine_mode tmode = insn_p->operand[0].mode;
26701 enum { load, store } klass;
26703 switch ((enum ix86_builtin_func_type) d->flag)
26705 case VOID_FTYPE_VOID:
26706 if (icode == CODE_FOR_avx_vzeroupper)
26707 target = GEN_INT (vzeroupper_intrinsic);
26708 emit_insn (GEN_FCN (icode) (target));
26709 return 0;
26710 case VOID_FTYPE_UINT64:
26711 case VOID_FTYPE_UNSIGNED:
26712 nargs = 0;
26713 klass = store;
26714 memory = 0;
26715 break;
26716 break;
26717 case UINT64_FTYPE_VOID:
26718 case UNSIGNED_FTYPE_VOID:
26719 nargs = 0;
26720 klass = load;
26721 memory = 0;
26722 break;
26723 case UINT64_FTYPE_PUNSIGNED:
26724 case V2DI_FTYPE_PV2DI:
26725 case V32QI_FTYPE_PCCHAR:
26726 case V16QI_FTYPE_PCCHAR:
26727 case V8SF_FTYPE_PCV4SF:
26728 case V8SF_FTYPE_PCFLOAT:
26729 case V4SF_FTYPE_PCFLOAT:
26730 case V4DF_FTYPE_PCV2DF:
26731 case V4DF_FTYPE_PCDOUBLE:
26732 case V2DF_FTYPE_PCDOUBLE:
26733 case VOID_FTYPE_PVOID:
26734 nargs = 1;
26735 klass = load;
26736 memory = 0;
26737 break;
26738 case VOID_FTYPE_PV2SF_V4SF:
26739 case VOID_FTYPE_PV4DI_V4DI:
26740 case VOID_FTYPE_PV2DI_V2DI:
26741 case VOID_FTYPE_PCHAR_V32QI:
26742 case VOID_FTYPE_PCHAR_V16QI:
26743 case VOID_FTYPE_PFLOAT_V8SF:
26744 case VOID_FTYPE_PFLOAT_V4SF:
26745 case VOID_FTYPE_PDOUBLE_V4DF:
26746 case VOID_FTYPE_PDOUBLE_V2DF:
26747 case VOID_FTYPE_PULONGLONG_ULONGLONG:
26748 case VOID_FTYPE_PINT_INT:
26749 nargs = 1;
26750 klass = store;
26751 /* Reserve memory operand for target. */
26752 memory = ARRAY_SIZE (args);
26753 break;
26754 case V4SF_FTYPE_V4SF_PCV2SF:
26755 case V2DF_FTYPE_V2DF_PCDOUBLE:
26756 nargs = 2;
26757 klass = load;
26758 memory = 1;
26759 break;
26760 case V8SF_FTYPE_PCV8SF_V8SF:
26761 case V4DF_FTYPE_PCV4DF_V4DF:
26762 case V4SF_FTYPE_PCV4SF_V4SF:
26763 case V2DF_FTYPE_PCV2DF_V2DF:
26764 nargs = 2;
26765 klass = load;
26766 memory = 0;
26767 break;
26768 case VOID_FTYPE_PV8SF_V8SF_V8SF:
26769 case VOID_FTYPE_PV4DF_V4DF_V4DF:
26770 case VOID_FTYPE_PV4SF_V4SF_V4SF:
26771 case VOID_FTYPE_PV2DF_V2DF_V2DF:
26772 nargs = 2;
26773 klass = store;
26774 /* Reserve memory operand for target. */
26775 memory = ARRAY_SIZE (args);
26776 break;
26777 case VOID_FTYPE_UINT_UINT_UINT:
26778 case VOID_FTYPE_UINT64_UINT_UINT:
26779 case UCHAR_FTYPE_UINT_UINT_UINT:
26780 case UCHAR_FTYPE_UINT64_UINT_UINT:
26781 nargs = 3;
26782 klass = load;
26783 memory = ARRAY_SIZE (args);
26784 last_arg_constant = true;
26785 break;
26786 default:
26787 gcc_unreachable ();
26790 gcc_assert (nargs <= ARRAY_SIZE (args));
26792 if (klass == store)
26794 arg = CALL_EXPR_ARG (exp, 0);
26795 op = expand_normal (arg);
26796 gcc_assert (target == 0);
26797 if (memory)
26798 target = gen_rtx_MEM (tmode, copy_to_mode_reg (Pmode, op));
26799 else
26800 target = force_reg (tmode, op);
26801 arg_adjust = 1;
26803 else
26805 arg_adjust = 0;
26806 if (optimize
26807 || target == 0
26808 || GET_MODE (target) != tmode
26809 || !insn_p->operand[0].predicate (target, tmode))
26810 target = gen_reg_rtx (tmode);
26813 for (i = 0; i < nargs; i++)
26815 enum machine_mode mode = insn_p->operand[i + 1].mode;
26816 bool match;
26818 arg = CALL_EXPR_ARG (exp, i + arg_adjust);
26819 op = expand_normal (arg);
26820 match = insn_p->operand[i + 1].predicate (op, mode);
26822 if (last_arg_constant && (i + 1) == nargs)
26824 if (!match)
26826 if (icode == CODE_FOR_lwp_lwpvalsi3
26827 || icode == CODE_FOR_lwp_lwpinssi3
26828 || icode == CODE_FOR_lwp_lwpvaldi3
26829 || icode == CODE_FOR_lwp_lwpinsdi3)
26830 error ("the last argument must be a 32-bit immediate");
26831 else
26832 error ("the last argument must be an 8-bit immediate");
26833 return const0_rtx;
26836 else
26838 if (i == memory)
26840 /* This must be the memory operand. */
26841 op = gen_rtx_MEM (mode, copy_to_mode_reg (Pmode, op));
26842 gcc_assert (GET_MODE (op) == mode
26843 || GET_MODE (op) == VOIDmode);
26845 else
26847 /* This must be register. */
26848 if (VECTOR_MODE_P (mode))
26849 op = safe_vector_operand (op, mode);
26851 gcc_assert (GET_MODE (op) == mode
26852 || GET_MODE (op) == VOIDmode);
26853 op = copy_to_mode_reg (mode, op);
26857 args[i].op = op;
26858 args[i].mode = mode;
26861 switch (nargs)
26863 case 0:
26864 pat = GEN_FCN (icode) (target);
26865 break;
26866 case 1:
26867 pat = GEN_FCN (icode) (target, args[0].op);
26868 break;
26869 case 2:
26870 pat = GEN_FCN (icode) (target, args[0].op, args[1].op);
26871 break;
26872 case 3:
26873 pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op);
26874 break;
26875 default:
26876 gcc_unreachable ();
26879 if (! pat)
26880 return 0;
26881 emit_insn (pat);
26882 return klass == store ? 0 : target;
26885 /* Return the integer constant in ARG. Constrain it to be in the range
26886 of the subparts of VEC_TYPE; issue an error if not. */
26888 static int
26889 get_element_number (tree vec_type, tree arg)
26891 unsigned HOST_WIDE_INT elt, max = TYPE_VECTOR_SUBPARTS (vec_type) - 1;
26893 if (!host_integerp (arg, 1)
26894 || (elt = tree_low_cst (arg, 1), elt > max))
26896 error ("selector must be an integer constant in the range 0..%wi", max);
26897 return 0;
26900 return elt;
26903 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
26904 ix86_expand_vector_init. We DO have language-level syntax for this, in
26905 the form of (type){ init-list }. Except that since we can't place emms
26906 instructions from inside the compiler, we can't allow the use of MMX
26907 registers unless the user explicitly asks for it. So we do *not* define
26908 vec_set/vec_extract/vec_init patterns for MMX modes in mmx.md. Instead
26909 we have builtins invoked by mmintrin.h that gives us license to emit
26910 these sorts of instructions. */
26912 static rtx
26913 ix86_expand_vec_init_builtin (tree type, tree exp, rtx target)
26915 enum machine_mode tmode = TYPE_MODE (type);
26916 enum machine_mode inner_mode = GET_MODE_INNER (tmode);
26917 int i, n_elt = GET_MODE_NUNITS (tmode);
26918 rtvec v = rtvec_alloc (n_elt);
26920 gcc_assert (VECTOR_MODE_P (tmode));
26921 gcc_assert (call_expr_nargs (exp) == n_elt);
26923 for (i = 0; i < n_elt; ++i)
26925 rtx x = expand_normal (CALL_EXPR_ARG (exp, i));
26926 RTVEC_ELT (v, i) = gen_lowpart (inner_mode, x);
26929 if (!target || !register_operand (target, tmode))
26930 target = gen_reg_rtx (tmode);
26932 ix86_expand_vector_init (true, target, gen_rtx_PARALLEL (tmode, v));
26933 return target;
26936 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
26937 ix86_expand_vector_extract. They would be redundant (for non-MMX) if we
26938 had a language-level syntax for referencing vector elements. */
26940 static rtx
26941 ix86_expand_vec_ext_builtin (tree exp, rtx target)
26943 enum machine_mode tmode, mode0;
26944 tree arg0, arg1;
26945 int elt;
26946 rtx op0;
26948 arg0 = CALL_EXPR_ARG (exp, 0);
26949 arg1 = CALL_EXPR_ARG (exp, 1);
26951 op0 = expand_normal (arg0);
26952 elt = get_element_number (TREE_TYPE (arg0), arg1);
26954 tmode = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
26955 mode0 = TYPE_MODE (TREE_TYPE (arg0));
26956 gcc_assert (VECTOR_MODE_P (mode0));
26958 op0 = force_reg (mode0, op0);
26960 if (optimize || !target || !register_operand (target, tmode))
26961 target = gen_reg_rtx (tmode);
26963 ix86_expand_vector_extract (true, target, op0, elt);
26965 return target;
26968 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
26969 ix86_expand_vector_set. They would be redundant (for non-MMX) if we had
26970 a language-level syntax for referencing vector elements. */
26972 static rtx
26973 ix86_expand_vec_set_builtin (tree exp)
26975 enum machine_mode tmode, mode1;
26976 tree arg0, arg1, arg2;
26977 int elt;
26978 rtx op0, op1, target;
26980 arg0 = CALL_EXPR_ARG (exp, 0);
26981 arg1 = CALL_EXPR_ARG (exp, 1);
26982 arg2 = CALL_EXPR_ARG (exp, 2);
26984 tmode = TYPE_MODE (TREE_TYPE (arg0));
26985 mode1 = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
26986 gcc_assert (VECTOR_MODE_P (tmode));
26988 op0 = expand_expr (arg0, NULL_RTX, tmode, EXPAND_NORMAL);
26989 op1 = expand_expr (arg1, NULL_RTX, mode1, EXPAND_NORMAL);
26990 elt = get_element_number (TREE_TYPE (arg0), arg2);
26992 if (GET_MODE (op1) != mode1 && GET_MODE (op1) != VOIDmode)
26993 op1 = convert_modes (mode1, GET_MODE (op1), op1, true);
26995 op0 = force_reg (tmode, op0);
26996 op1 = force_reg (mode1, op1);
26998 /* OP0 is the source of these builtin functions and shouldn't be
26999 modified. Create a copy, use it and return it as target. */
27000 target = gen_reg_rtx (tmode);
27001 emit_move_insn (target, op0);
27002 ix86_expand_vector_set (true, target, op1, elt);
27004 return target;
27007 /* Expand an expression EXP that calls a built-in function,
27008 with result going to TARGET if that's convenient
27009 (and in mode MODE if that's convenient).
27010 SUBTARGET may be used as the target for computing one of EXP's operands.
27011 IGNORE is nonzero if the value is to be ignored. */
27013 static rtx
27014 ix86_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
27015 enum machine_mode mode ATTRIBUTE_UNUSED,
27016 int ignore ATTRIBUTE_UNUSED)
27018 const struct builtin_description *d;
27019 size_t i;
27020 enum insn_code icode;
27021 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
27022 tree arg0, arg1, arg2;
27023 rtx op0, op1, op2, pat;
27024 enum machine_mode mode0, mode1, mode2;
27025 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
27027 /* Determine whether the builtin function is available under the current ISA.
27028 Originally the builtin was not created if it wasn't applicable to the
27029 current ISA based on the command line switches. With function specific
27030 options, we need to check in the context of the function making the call
27031 whether it is supported. */
27032 if (ix86_builtins_isa[fcode].isa
27033 && !(ix86_builtins_isa[fcode].isa & ix86_isa_flags))
27035 char *opts = ix86_target_string (ix86_builtins_isa[fcode].isa, 0, NULL,
27036 NULL, NULL, false);
27038 if (!opts)
27039 error ("%qE needs unknown isa option", fndecl);
27040 else
27042 gcc_assert (opts != NULL);
27043 error ("%qE needs isa option %s", fndecl, opts);
27044 free (opts);
27046 return const0_rtx;
27049 switch (fcode)
27051 case IX86_BUILTIN_MASKMOVQ:
27052 case IX86_BUILTIN_MASKMOVDQU:
27053 icode = (fcode == IX86_BUILTIN_MASKMOVQ
27054 ? CODE_FOR_mmx_maskmovq
27055 : CODE_FOR_sse2_maskmovdqu);
27056 /* Note the arg order is different from the operand order. */
27057 arg1 = CALL_EXPR_ARG (exp, 0);
27058 arg2 = CALL_EXPR_ARG (exp, 1);
27059 arg0 = CALL_EXPR_ARG (exp, 2);
27060 op0 = expand_normal (arg0);
27061 op1 = expand_normal (arg1);
27062 op2 = expand_normal (arg2);
27063 mode0 = insn_data[icode].operand[0].mode;
27064 mode1 = insn_data[icode].operand[1].mode;
27065 mode2 = insn_data[icode].operand[2].mode;
27067 op0 = force_reg (Pmode, op0);
27068 op0 = gen_rtx_MEM (mode1, op0);
27070 if (!insn_data[icode].operand[0].predicate (op0, mode0))
27071 op0 = copy_to_mode_reg (mode0, op0);
27072 if (!insn_data[icode].operand[1].predicate (op1, mode1))
27073 op1 = copy_to_mode_reg (mode1, op1);
27074 if (!insn_data[icode].operand[2].predicate (op2, mode2))
27075 op2 = copy_to_mode_reg (mode2, op2);
27076 pat = GEN_FCN (icode) (op0, op1, op2);
27077 if (! pat)
27078 return 0;
27079 emit_insn (pat);
27080 return 0;
27082 case IX86_BUILTIN_LDMXCSR:
27083 op0 = expand_normal (CALL_EXPR_ARG (exp, 0));
27084 target = assign_386_stack_local (SImode, SLOT_VIRTUAL);
27085 emit_move_insn (target, op0);
27086 emit_insn (gen_sse_ldmxcsr (target));
27087 return 0;
27089 case IX86_BUILTIN_STMXCSR:
27090 target = assign_386_stack_local (SImode, SLOT_VIRTUAL);
27091 emit_insn (gen_sse_stmxcsr (target));
27092 return copy_to_mode_reg (SImode, target);
27094 case IX86_BUILTIN_CLFLUSH:
27095 arg0 = CALL_EXPR_ARG (exp, 0);
27096 op0 = expand_normal (arg0);
27097 icode = CODE_FOR_sse2_clflush;
27098 if (!insn_data[icode].operand[0].predicate (op0, Pmode))
27099 op0 = copy_to_mode_reg (Pmode, op0);
27101 emit_insn (gen_sse2_clflush (op0));
27102 return 0;
27104 case IX86_BUILTIN_MONITOR:
27105 arg0 = CALL_EXPR_ARG (exp, 0);
27106 arg1 = CALL_EXPR_ARG (exp, 1);
27107 arg2 = CALL_EXPR_ARG (exp, 2);
27108 op0 = expand_normal (arg0);
27109 op1 = expand_normal (arg1);
27110 op2 = expand_normal (arg2);
27111 if (!REG_P (op0))
27112 op0 = copy_to_mode_reg (Pmode, op0);
27113 if (!REG_P (op1))
27114 op1 = copy_to_mode_reg (SImode, op1);
27115 if (!REG_P (op2))
27116 op2 = copy_to_mode_reg (SImode, op2);
27117 emit_insn (ix86_gen_monitor (op0, op1, op2));
27118 return 0;
27120 case IX86_BUILTIN_MWAIT:
27121 arg0 = CALL_EXPR_ARG (exp, 0);
27122 arg1 = CALL_EXPR_ARG (exp, 1);
27123 op0 = expand_normal (arg0);
27124 op1 = expand_normal (arg1);
27125 if (!REG_P (op0))
27126 op0 = copy_to_mode_reg (SImode, op0);
27127 if (!REG_P (op1))
27128 op1 = copy_to_mode_reg (SImode, op1);
27129 emit_insn (gen_sse3_mwait (op0, op1));
27130 return 0;
27132 case IX86_BUILTIN_VEC_INIT_V2SI:
27133 case IX86_BUILTIN_VEC_INIT_V4HI:
27134 case IX86_BUILTIN_VEC_INIT_V8QI:
27135 return ix86_expand_vec_init_builtin (TREE_TYPE (exp), exp, target);
27137 case IX86_BUILTIN_VEC_EXT_V2DF:
27138 case IX86_BUILTIN_VEC_EXT_V2DI:
27139 case IX86_BUILTIN_VEC_EXT_V4SF:
27140 case IX86_BUILTIN_VEC_EXT_V4SI:
27141 case IX86_BUILTIN_VEC_EXT_V8HI:
27142 case IX86_BUILTIN_VEC_EXT_V2SI:
27143 case IX86_BUILTIN_VEC_EXT_V4HI:
27144 case IX86_BUILTIN_VEC_EXT_V16QI:
27145 return ix86_expand_vec_ext_builtin (exp, target);
27147 case IX86_BUILTIN_VEC_SET_V2DI:
27148 case IX86_BUILTIN_VEC_SET_V4SF:
27149 case IX86_BUILTIN_VEC_SET_V4SI:
27150 case IX86_BUILTIN_VEC_SET_V8HI:
27151 case IX86_BUILTIN_VEC_SET_V4HI:
27152 case IX86_BUILTIN_VEC_SET_V16QI:
27153 return ix86_expand_vec_set_builtin (exp);
27155 case IX86_BUILTIN_VEC_PERM_V2DF:
27156 case IX86_BUILTIN_VEC_PERM_V4SF:
27157 case IX86_BUILTIN_VEC_PERM_V2DI:
27158 case IX86_BUILTIN_VEC_PERM_V4SI:
27159 case IX86_BUILTIN_VEC_PERM_V8HI:
27160 case IX86_BUILTIN_VEC_PERM_V16QI:
27161 case IX86_BUILTIN_VEC_PERM_V2DI_U:
27162 case IX86_BUILTIN_VEC_PERM_V4SI_U:
27163 case IX86_BUILTIN_VEC_PERM_V8HI_U:
27164 case IX86_BUILTIN_VEC_PERM_V16QI_U:
27165 case IX86_BUILTIN_VEC_PERM_V4DF:
27166 case IX86_BUILTIN_VEC_PERM_V8SF:
27167 return ix86_expand_vec_perm_builtin (exp);
27169 case IX86_BUILTIN_INFQ:
27170 case IX86_BUILTIN_HUGE_VALQ:
27172 REAL_VALUE_TYPE inf;
27173 rtx tmp;
27175 real_inf (&inf);
27176 tmp = CONST_DOUBLE_FROM_REAL_VALUE (inf, mode);
27178 tmp = validize_mem (force_const_mem (mode, tmp));
27180 if (target == 0)
27181 target = gen_reg_rtx (mode);
27183 emit_move_insn (target, tmp);
27184 return target;
27187 case IX86_BUILTIN_LLWPCB:
27188 arg0 = CALL_EXPR_ARG (exp, 0);
27189 op0 = expand_normal (arg0);
27190 icode = CODE_FOR_lwp_llwpcb;
27191 if (!insn_data[icode].operand[0].predicate (op0, Pmode))
27192 op0 = copy_to_mode_reg (Pmode, op0);
27193 emit_insn (gen_lwp_llwpcb (op0));
27194 return 0;
27196 case IX86_BUILTIN_SLWPCB:
27197 icode = CODE_FOR_lwp_slwpcb;
27198 if (!target
27199 || !insn_data[icode].operand[0].predicate (target, Pmode))
27200 target = gen_reg_rtx (Pmode);
27201 emit_insn (gen_lwp_slwpcb (target));
27202 return target;
27204 case IX86_BUILTIN_BEXTRI32:
27205 case IX86_BUILTIN_BEXTRI64:
27206 arg0 = CALL_EXPR_ARG (exp, 0);
27207 arg1 = CALL_EXPR_ARG (exp, 1);
27208 op0 = expand_normal (arg0);
27209 op1 = expand_normal (arg1);
27210 icode = (fcode == IX86_BUILTIN_BEXTRI32
27211 ? CODE_FOR_tbm_bextri_si
27212 : CODE_FOR_tbm_bextri_di);
27213 if (!CONST_INT_P (op1))
27215 error ("last argument must be an immediate");
27216 return const0_rtx;
27218 else
27220 unsigned char length = (INTVAL (op1) >> 8) & 0xFF;
27221 unsigned char lsb_index = INTVAL (op1) & 0xFF;
27222 op1 = GEN_INT (length);
27223 op2 = GEN_INT (lsb_index);
27224 pat = GEN_FCN (icode) (target, op0, op1, op2);
27225 if (pat)
27226 emit_insn (pat);
27227 return target;
27230 case IX86_BUILTIN_RDRAND16_STEP:
27231 icode = CODE_FOR_rdrandhi_1;
27232 mode0 = HImode;
27233 goto rdrand_step;
27235 case IX86_BUILTIN_RDRAND32_STEP:
27236 icode = CODE_FOR_rdrandsi_1;
27237 mode0 = SImode;
27238 goto rdrand_step;
27240 case IX86_BUILTIN_RDRAND64_STEP:
27241 icode = CODE_FOR_rdranddi_1;
27242 mode0 = DImode;
27244 rdrand_step:
27245 op0 = gen_reg_rtx (mode0);
27246 emit_insn (GEN_FCN (icode) (op0));
27248 op1 = gen_reg_rtx (SImode);
27249 emit_move_insn (op1, CONST1_RTX (SImode));
27251 /* Emit SImode conditional move. */
27252 if (mode0 == HImode)
27254 op2 = gen_reg_rtx (SImode);
27255 emit_insn (gen_zero_extendhisi2 (op2, op0));
27257 else if (mode0 == SImode)
27258 op2 = op0;
27259 else
27260 op2 = gen_rtx_SUBREG (SImode, op0, 0);
27262 pat = gen_rtx_GEU (VOIDmode, gen_rtx_REG (CCCmode, FLAGS_REG),
27263 const0_rtx);
27264 emit_insn (gen_rtx_SET (VOIDmode, op1,
27265 gen_rtx_IF_THEN_ELSE (SImode, pat, op2, op1)));
27266 emit_move_insn (target, op1);
27268 arg0 = CALL_EXPR_ARG (exp, 0);
27269 op1 = expand_normal (arg0);
27270 if (!address_operand (op1, VOIDmode))
27271 op1 = copy_addr_to_reg (op1);
27272 emit_move_insn (gen_rtx_MEM (mode0, op1), op0);
27273 return target;
27275 default:
27276 break;
27279 for (i = 0, d = bdesc_special_args;
27280 i < ARRAY_SIZE (bdesc_special_args);
27281 i++, d++)
27282 if (d->code == fcode)
27283 return ix86_expand_special_args_builtin (d, exp, target);
27285 for (i = 0, d = bdesc_args;
27286 i < ARRAY_SIZE (bdesc_args);
27287 i++, d++)
27288 if (d->code == fcode)
27289 switch (fcode)
27291 case IX86_BUILTIN_FABSQ:
27292 case IX86_BUILTIN_COPYSIGNQ:
27293 if (!TARGET_SSE2)
27294 /* Emit a normal call if SSE2 isn't available. */
27295 return expand_call (exp, target, ignore);
27296 default:
27297 return ix86_expand_args_builtin (d, exp, target);
27300 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
27301 if (d->code == fcode)
27302 return ix86_expand_sse_comi (d, exp, target);
27304 for (i = 0, d = bdesc_pcmpestr;
27305 i < ARRAY_SIZE (bdesc_pcmpestr);
27306 i++, d++)
27307 if (d->code == fcode)
27308 return ix86_expand_sse_pcmpestr (d, exp, target);
27310 for (i = 0, d = bdesc_pcmpistr;
27311 i < ARRAY_SIZE (bdesc_pcmpistr);
27312 i++, d++)
27313 if (d->code == fcode)
27314 return ix86_expand_sse_pcmpistr (d, exp, target);
27316 for (i = 0, d = bdesc_multi_arg; i < ARRAY_SIZE (bdesc_multi_arg); i++, d++)
27317 if (d->code == fcode)
27318 return ix86_expand_multi_arg_builtin (d->icode, exp, target,
27319 (enum ix86_builtin_func_type)
27320 d->flag, d->comparison);
27322 gcc_unreachable ();
27325 /* Returns a function decl for a vectorized version of the builtin function
27326 with builtin function code FN and the result vector type TYPE, or NULL_TREE
27327 if it is not available. */
27329 static tree
27330 ix86_builtin_vectorized_function (tree fndecl, tree type_out,
27331 tree type_in)
27333 enum machine_mode in_mode, out_mode;
27334 int in_n, out_n;
27335 enum built_in_function fn = DECL_FUNCTION_CODE (fndecl);
27337 if (TREE_CODE (type_out) != VECTOR_TYPE
27338 || TREE_CODE (type_in) != VECTOR_TYPE
27339 || DECL_BUILT_IN_CLASS (fndecl) != BUILT_IN_NORMAL)
27340 return NULL_TREE;
27342 out_mode = TYPE_MODE (TREE_TYPE (type_out));
27343 out_n = TYPE_VECTOR_SUBPARTS (type_out);
27344 in_mode = TYPE_MODE (TREE_TYPE (type_in));
27345 in_n = TYPE_VECTOR_SUBPARTS (type_in);
27347 switch (fn)
27349 case BUILT_IN_SQRT:
27350 if (out_mode == DFmode && in_mode == DFmode)
27352 if (out_n == 2 && in_n == 2)
27353 return ix86_builtins[IX86_BUILTIN_SQRTPD];
27354 else if (out_n == 4 && in_n == 4)
27355 return ix86_builtins[IX86_BUILTIN_SQRTPD256];
27357 break;
27359 case BUILT_IN_SQRTF:
27360 if (out_mode == SFmode && in_mode == SFmode)
27362 if (out_n == 4 && in_n == 4)
27363 return ix86_builtins[IX86_BUILTIN_SQRTPS_NR];
27364 else if (out_n == 8 && in_n == 8)
27365 return ix86_builtins[IX86_BUILTIN_SQRTPS_NR256];
27367 break;
27369 case BUILT_IN_LRINT:
27370 if (out_mode == SImode && out_n == 4
27371 && in_mode == DFmode && in_n == 2)
27372 return ix86_builtins[IX86_BUILTIN_VEC_PACK_SFIX];
27373 break;
27375 case BUILT_IN_LRINTF:
27376 if (out_mode == SImode && in_mode == SFmode)
27378 if (out_n == 4 && in_n == 4)
27379 return ix86_builtins[IX86_BUILTIN_CVTPS2DQ];
27380 else if (out_n == 8 && in_n == 8)
27381 return ix86_builtins[IX86_BUILTIN_CVTPS2DQ256];
27383 break;
27385 case BUILT_IN_COPYSIGN:
27386 if (out_mode == DFmode && in_mode == DFmode)
27388 if (out_n == 2 && in_n == 2)
27389 return ix86_builtins[IX86_BUILTIN_CPYSGNPD];
27390 else if (out_n == 4 && in_n == 4)
27391 return ix86_builtins[IX86_BUILTIN_CPYSGNPD256];
27393 break;
27395 case BUILT_IN_COPYSIGNF:
27396 if (out_mode == SFmode && in_mode == SFmode)
27398 if (out_n == 4 && in_n == 4)
27399 return ix86_builtins[IX86_BUILTIN_CPYSGNPS];
27400 else if (out_n == 8 && in_n == 8)
27401 return ix86_builtins[IX86_BUILTIN_CPYSGNPS256];
27403 break;
27405 case BUILT_IN_FMA:
27406 if (out_mode == DFmode && in_mode == DFmode)
27408 if (out_n == 2 && in_n == 2)
27409 return ix86_builtins[IX86_BUILTIN_VFMADDPD];
27410 if (out_n == 4 && in_n == 4)
27411 return ix86_builtins[IX86_BUILTIN_VFMADDPD256];
27413 break;
27415 case BUILT_IN_FMAF:
27416 if (out_mode == SFmode && in_mode == SFmode)
27418 if (out_n == 4 && in_n == 4)
27419 return ix86_builtins[IX86_BUILTIN_VFMADDPS];
27420 if (out_n == 8 && in_n == 8)
27421 return ix86_builtins[IX86_BUILTIN_VFMADDPS256];
27423 break;
27425 default:
27426 break;
27429 /* Dispatch to a handler for a vectorization library. */
27430 if (ix86_veclib_handler)
27431 return ix86_veclib_handler ((enum built_in_function) fn, type_out,
27432 type_in);
27434 return NULL_TREE;
27437 /* Handler for an SVML-style interface to
27438 a library with vectorized intrinsics. */
27440 static tree
27441 ix86_veclibabi_svml (enum built_in_function fn, tree type_out, tree type_in)
27443 char name[20];
27444 tree fntype, new_fndecl, args;
27445 unsigned arity;
27446 const char *bname;
27447 enum machine_mode el_mode, in_mode;
27448 int n, in_n;
27450 /* The SVML is suitable for unsafe math only. */
27451 if (!flag_unsafe_math_optimizations)
27452 return NULL_TREE;
27454 el_mode = TYPE_MODE (TREE_TYPE (type_out));
27455 n = TYPE_VECTOR_SUBPARTS (type_out);
27456 in_mode = TYPE_MODE (TREE_TYPE (type_in));
27457 in_n = TYPE_VECTOR_SUBPARTS (type_in);
27458 if (el_mode != in_mode
27459 || n != in_n)
27460 return NULL_TREE;
27462 switch (fn)
27464 case BUILT_IN_EXP:
27465 case BUILT_IN_LOG:
27466 case BUILT_IN_LOG10:
27467 case BUILT_IN_POW:
27468 case BUILT_IN_TANH:
27469 case BUILT_IN_TAN:
27470 case BUILT_IN_ATAN:
27471 case BUILT_IN_ATAN2:
27472 case BUILT_IN_ATANH:
27473 case BUILT_IN_CBRT:
27474 case BUILT_IN_SINH:
27475 case BUILT_IN_SIN:
27476 case BUILT_IN_ASINH:
27477 case BUILT_IN_ASIN:
27478 case BUILT_IN_COSH:
27479 case BUILT_IN_COS:
27480 case BUILT_IN_ACOSH:
27481 case BUILT_IN_ACOS:
27482 if (el_mode != DFmode || n != 2)
27483 return NULL_TREE;
27484 break;
27486 case BUILT_IN_EXPF:
27487 case BUILT_IN_LOGF:
27488 case BUILT_IN_LOG10F:
27489 case BUILT_IN_POWF:
27490 case BUILT_IN_TANHF:
27491 case BUILT_IN_TANF:
27492 case BUILT_IN_ATANF:
27493 case BUILT_IN_ATAN2F:
27494 case BUILT_IN_ATANHF:
27495 case BUILT_IN_CBRTF:
27496 case BUILT_IN_SINHF:
27497 case BUILT_IN_SINF:
27498 case BUILT_IN_ASINHF:
27499 case BUILT_IN_ASINF:
27500 case BUILT_IN_COSHF:
27501 case BUILT_IN_COSF:
27502 case BUILT_IN_ACOSHF:
27503 case BUILT_IN_ACOSF:
27504 if (el_mode != SFmode || n != 4)
27505 return NULL_TREE;
27506 break;
27508 default:
27509 return NULL_TREE;
27512 bname = IDENTIFIER_POINTER (DECL_NAME (implicit_built_in_decls[fn]));
27514 if (fn == BUILT_IN_LOGF)
27515 strcpy (name, "vmlsLn4");
27516 else if (fn == BUILT_IN_LOG)
27517 strcpy (name, "vmldLn2");
27518 else if (n == 4)
27520 sprintf (name, "vmls%s", bname+10);
27521 name[strlen (name)-1] = '4';
27523 else
27524 sprintf (name, "vmld%s2", bname+10);
27526 /* Convert to uppercase. */
27527 name[4] &= ~0x20;
27529 arity = 0;
27530 for (args = DECL_ARGUMENTS (implicit_built_in_decls[fn]); args;
27531 args = TREE_CHAIN (args))
27532 arity++;
27534 if (arity == 1)
27535 fntype = build_function_type_list (type_out, type_in, NULL);
27536 else
27537 fntype = build_function_type_list (type_out, type_in, type_in, NULL);
27539 /* Build a function declaration for the vectorized function. */
27540 new_fndecl = build_decl (BUILTINS_LOCATION,
27541 FUNCTION_DECL, get_identifier (name), fntype);
27542 TREE_PUBLIC (new_fndecl) = 1;
27543 DECL_EXTERNAL (new_fndecl) = 1;
27544 DECL_IS_NOVOPS (new_fndecl) = 1;
27545 TREE_READONLY (new_fndecl) = 1;
27547 return new_fndecl;
27550 /* Handler for an ACML-style interface to
27551 a library with vectorized intrinsics. */
27553 static tree
27554 ix86_veclibabi_acml (enum built_in_function fn, tree type_out, tree type_in)
27556 char name[20] = "__vr.._";
27557 tree fntype, new_fndecl, args;
27558 unsigned arity;
27559 const char *bname;
27560 enum machine_mode el_mode, in_mode;
27561 int n, in_n;
27563 /* The ACML is 64bits only and suitable for unsafe math only as
27564 it does not correctly support parts of IEEE with the required
27565 precision such as denormals. */
27566 if (!TARGET_64BIT
27567 || !flag_unsafe_math_optimizations)
27568 return NULL_TREE;
27570 el_mode = TYPE_MODE (TREE_TYPE (type_out));
27571 n = TYPE_VECTOR_SUBPARTS (type_out);
27572 in_mode = TYPE_MODE (TREE_TYPE (type_in));
27573 in_n = TYPE_VECTOR_SUBPARTS (type_in);
27574 if (el_mode != in_mode
27575 || n != in_n)
27576 return NULL_TREE;
27578 switch (fn)
27580 case BUILT_IN_SIN:
27581 case BUILT_IN_COS:
27582 case BUILT_IN_EXP:
27583 case BUILT_IN_LOG:
27584 case BUILT_IN_LOG2:
27585 case BUILT_IN_LOG10:
27586 name[4] = 'd';
27587 name[5] = '2';
27588 if (el_mode != DFmode
27589 || n != 2)
27590 return NULL_TREE;
27591 break;
27593 case BUILT_IN_SINF:
27594 case BUILT_IN_COSF:
27595 case BUILT_IN_EXPF:
27596 case BUILT_IN_POWF:
27597 case BUILT_IN_LOGF:
27598 case BUILT_IN_LOG2F:
27599 case BUILT_IN_LOG10F:
27600 name[4] = 's';
27601 name[5] = '4';
27602 if (el_mode != SFmode
27603 || n != 4)
27604 return NULL_TREE;
27605 break;
27607 default:
27608 return NULL_TREE;
27611 bname = IDENTIFIER_POINTER (DECL_NAME (implicit_built_in_decls[fn]));
27612 sprintf (name + 7, "%s", bname+10);
27614 arity = 0;
27615 for (args = DECL_ARGUMENTS (implicit_built_in_decls[fn]); args;
27616 args = TREE_CHAIN (args))
27617 arity++;
27619 if (arity == 1)
27620 fntype = build_function_type_list (type_out, type_in, NULL);
27621 else
27622 fntype = build_function_type_list (type_out, type_in, type_in, NULL);
27624 /* Build a function declaration for the vectorized function. */
27625 new_fndecl = build_decl (BUILTINS_LOCATION,
27626 FUNCTION_DECL, get_identifier (name), fntype);
27627 TREE_PUBLIC (new_fndecl) = 1;
27628 DECL_EXTERNAL (new_fndecl) = 1;
27629 DECL_IS_NOVOPS (new_fndecl) = 1;
27630 TREE_READONLY (new_fndecl) = 1;
27632 return new_fndecl;
27636 /* Returns a decl of a function that implements conversion of an integer vector
27637 into a floating-point vector, or vice-versa. DEST_TYPE and SRC_TYPE
27638 are the types involved when converting according to CODE.
27639 Return NULL_TREE if it is not available. */
27641 static tree
27642 ix86_vectorize_builtin_conversion (unsigned int code,
27643 tree dest_type, tree src_type)
27645 if (! TARGET_SSE2)
27646 return NULL_TREE;
27648 switch (code)
27650 case FLOAT_EXPR:
27651 switch (TYPE_MODE (src_type))
27653 case V4SImode:
27654 switch (TYPE_MODE (dest_type))
27656 case V4SFmode:
27657 return (TYPE_UNSIGNED (src_type)
27658 ? ix86_builtins[IX86_BUILTIN_CVTUDQ2PS]
27659 : ix86_builtins[IX86_BUILTIN_CVTDQ2PS]);
27660 case V4DFmode:
27661 return (TYPE_UNSIGNED (src_type)
27662 ? NULL_TREE
27663 : ix86_builtins[IX86_BUILTIN_CVTDQ2PD256]);
27664 default:
27665 return NULL_TREE;
27667 break;
27668 case V8SImode:
27669 switch (TYPE_MODE (dest_type))
27671 case V8SFmode:
27672 return (TYPE_UNSIGNED (src_type)
27673 ? NULL_TREE
27674 : ix86_builtins[IX86_BUILTIN_CVTDQ2PS256]);
27675 default:
27676 return NULL_TREE;
27678 break;
27679 default:
27680 return NULL_TREE;
27683 case FIX_TRUNC_EXPR:
27684 switch (TYPE_MODE (dest_type))
27686 case V4SImode:
27687 switch (TYPE_MODE (src_type))
27689 case V4SFmode:
27690 return (TYPE_UNSIGNED (dest_type)
27691 ? NULL_TREE
27692 : ix86_builtins[IX86_BUILTIN_CVTTPS2DQ]);
27693 case V4DFmode:
27694 return (TYPE_UNSIGNED (dest_type)
27695 ? NULL_TREE
27696 : ix86_builtins[IX86_BUILTIN_CVTTPD2DQ256]);
27697 default:
27698 return NULL_TREE;
27700 break;
27702 case V8SImode:
27703 switch (TYPE_MODE (src_type))
27705 case V8SFmode:
27706 return (TYPE_UNSIGNED (dest_type)
27707 ? NULL_TREE
27708 : ix86_builtins[IX86_BUILTIN_CVTTPS2DQ256]);
27709 default:
27710 return NULL_TREE;
27712 break;
27714 default:
27715 return NULL_TREE;
27718 default:
27719 return NULL_TREE;
27722 return NULL_TREE;
27725 /* Returns a code for a target-specific builtin that implements
27726 reciprocal of the function, or NULL_TREE if not available. */
27728 static tree
27729 ix86_builtin_reciprocal (unsigned int fn, bool md_fn,
27730 bool sqrt ATTRIBUTE_UNUSED)
27732 if (! (TARGET_SSE_MATH && !optimize_insn_for_size_p ()
27733 && flag_finite_math_only && !flag_trapping_math
27734 && flag_unsafe_math_optimizations))
27735 return NULL_TREE;
27737 if (md_fn)
27738 /* Machine dependent builtins. */
27739 switch (fn)
27741 /* Vectorized version of sqrt to rsqrt conversion. */
27742 case IX86_BUILTIN_SQRTPS_NR:
27743 return ix86_builtins[IX86_BUILTIN_RSQRTPS_NR];
27745 case IX86_BUILTIN_SQRTPS_NR256:
27746 return ix86_builtins[IX86_BUILTIN_RSQRTPS_NR256];
27748 default:
27749 return NULL_TREE;
27751 else
27752 /* Normal builtins. */
27753 switch (fn)
27755 /* Sqrt to rsqrt conversion. */
27756 case BUILT_IN_SQRTF:
27757 return ix86_builtins[IX86_BUILTIN_RSQRTF];
27759 default:
27760 return NULL_TREE;
27764 /* Helper for avx_vpermilps256_operand et al. This is also used by
27765 the expansion functions to turn the parallel back into a mask.
27766 The return value is 0 for no match and the imm8+1 for a match. */
27769 avx_vpermilp_parallel (rtx par, enum machine_mode mode)
27771 unsigned i, nelt = GET_MODE_NUNITS (mode);
27772 unsigned mask = 0;
27773 unsigned char ipar[8];
27775 if (XVECLEN (par, 0) != (int) nelt)
27776 return 0;
27778 /* Validate that all of the elements are constants, and not totally
27779 out of range. Copy the data into an integral array to make the
27780 subsequent checks easier. */
27781 for (i = 0; i < nelt; ++i)
27783 rtx er = XVECEXP (par, 0, i);
27784 unsigned HOST_WIDE_INT ei;
27786 if (!CONST_INT_P (er))
27787 return 0;
27788 ei = INTVAL (er);
27789 if (ei >= nelt)
27790 return 0;
27791 ipar[i] = ei;
27794 switch (mode)
27796 case V4DFmode:
27797 /* In the 256-bit DFmode case, we can only move elements within
27798 a 128-bit lane. */
27799 for (i = 0; i < 2; ++i)
27801 if (ipar[i] >= 2)
27802 return 0;
27803 mask |= ipar[i] << i;
27805 for (i = 2; i < 4; ++i)
27807 if (ipar[i] < 2)
27808 return 0;
27809 mask |= (ipar[i] - 2) << i;
27811 break;
27813 case V8SFmode:
27814 /* In the 256-bit SFmode case, we have full freedom of movement
27815 within the low 128-bit lane, but the high 128-bit lane must
27816 mirror the exact same pattern. */
27817 for (i = 0; i < 4; ++i)
27818 if (ipar[i] + 4 != ipar[i + 4])
27819 return 0;
27820 nelt = 4;
27821 /* FALLTHRU */
27823 case V2DFmode:
27824 case V4SFmode:
27825 /* In the 128-bit case, we've full freedom in the placement of
27826 the elements from the source operand. */
27827 for (i = 0; i < nelt; ++i)
27828 mask |= ipar[i] << (i * (nelt / 2));
27829 break;
27831 default:
27832 gcc_unreachable ();
27835 /* Make sure success has a non-zero value by adding one. */
27836 return mask + 1;
27839 /* Helper for avx_vperm2f128_v4df_operand et al. This is also used by
27840 the expansion functions to turn the parallel back into a mask.
27841 The return value is 0 for no match and the imm8+1 for a match. */
27844 avx_vperm2f128_parallel (rtx par, enum machine_mode mode)
27846 unsigned i, nelt = GET_MODE_NUNITS (mode), nelt2 = nelt / 2;
27847 unsigned mask = 0;
27848 unsigned char ipar[8];
27850 if (XVECLEN (par, 0) != (int) nelt)
27851 return 0;
27853 /* Validate that all of the elements are constants, and not totally
27854 out of range. Copy the data into an integral array to make the
27855 subsequent checks easier. */
27856 for (i = 0; i < nelt; ++i)
27858 rtx er = XVECEXP (par, 0, i);
27859 unsigned HOST_WIDE_INT ei;
27861 if (!CONST_INT_P (er))
27862 return 0;
27863 ei = INTVAL (er);
27864 if (ei >= 2 * nelt)
27865 return 0;
27866 ipar[i] = ei;
27869 /* Validate that the halves of the permute are halves. */
27870 for (i = 0; i < nelt2 - 1; ++i)
27871 if (ipar[i] + 1 != ipar[i + 1])
27872 return 0;
27873 for (i = nelt2; i < nelt - 1; ++i)
27874 if (ipar[i] + 1 != ipar[i + 1])
27875 return 0;
27877 /* Reconstruct the mask. */
27878 for (i = 0; i < 2; ++i)
27880 unsigned e = ipar[i * nelt2];
27881 if (e % nelt2)
27882 return 0;
27883 e /= nelt2;
27884 mask |= e << (i * 4);
27887 /* Make sure success has a non-zero value by adding one. */
27888 return mask + 1;
27892 /* Store OPERAND to the memory after reload is completed. This means
27893 that we can't easily use assign_stack_local. */
27895 ix86_force_to_memory (enum machine_mode mode, rtx operand)
27897 rtx result;
27899 gcc_assert (reload_completed);
27900 if (ix86_using_red_zone ())
27902 result = gen_rtx_MEM (mode,
27903 gen_rtx_PLUS (Pmode,
27904 stack_pointer_rtx,
27905 GEN_INT (-RED_ZONE_SIZE)));
27906 emit_move_insn (result, operand);
27908 else if (TARGET_64BIT)
27910 switch (mode)
27912 case HImode:
27913 case SImode:
27914 operand = gen_lowpart (DImode, operand);
27915 /* FALLTHRU */
27916 case DImode:
27917 emit_insn (
27918 gen_rtx_SET (VOIDmode,
27919 gen_rtx_MEM (DImode,
27920 gen_rtx_PRE_DEC (DImode,
27921 stack_pointer_rtx)),
27922 operand));
27923 break;
27924 default:
27925 gcc_unreachable ();
27927 result = gen_rtx_MEM (mode, stack_pointer_rtx);
27929 else
27931 switch (mode)
27933 case DImode:
27935 rtx operands[2];
27936 split_double_mode (mode, &operand, 1, operands, operands + 1);
27937 emit_insn (
27938 gen_rtx_SET (VOIDmode,
27939 gen_rtx_MEM (SImode,
27940 gen_rtx_PRE_DEC (Pmode,
27941 stack_pointer_rtx)),
27942 operands[1]));
27943 emit_insn (
27944 gen_rtx_SET (VOIDmode,
27945 gen_rtx_MEM (SImode,
27946 gen_rtx_PRE_DEC (Pmode,
27947 stack_pointer_rtx)),
27948 operands[0]));
27950 break;
27951 case HImode:
27952 /* Store HImodes as SImodes. */
27953 operand = gen_lowpart (SImode, operand);
27954 /* FALLTHRU */
27955 case SImode:
27956 emit_insn (
27957 gen_rtx_SET (VOIDmode,
27958 gen_rtx_MEM (GET_MODE (operand),
27959 gen_rtx_PRE_DEC (SImode,
27960 stack_pointer_rtx)),
27961 operand));
27962 break;
27963 default:
27964 gcc_unreachable ();
27966 result = gen_rtx_MEM (mode, stack_pointer_rtx);
27968 return result;
27971 /* Free operand from the memory. */
27972 void
27973 ix86_free_from_memory (enum machine_mode mode)
27975 if (!ix86_using_red_zone ())
27977 int size;
27979 if (mode == DImode || TARGET_64BIT)
27980 size = 8;
27981 else
27982 size = 4;
27983 /* Use LEA to deallocate stack space. In peephole2 it will be converted
27984 to pop or add instruction if registers are available. */
27985 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
27986 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
27987 GEN_INT (size))));
27991 /* Implement TARGET_IRA_COVER_CLASSES. If -mfpmath=sse, we prefer
27992 SSE_REGS to FLOAT_REGS if their costs for a pseudo are the
27993 same. */
27994 static const reg_class_t *
27995 i386_ira_cover_classes (void)
27997 static const reg_class_t sse_fpmath_classes[] = {
27998 GENERAL_REGS, SSE_REGS, MMX_REGS, FLOAT_REGS, LIM_REG_CLASSES
28000 static const reg_class_t no_sse_fpmath_classes[] = {
28001 GENERAL_REGS, FLOAT_REGS, MMX_REGS, SSE_REGS, LIM_REG_CLASSES
28004 return TARGET_SSE_MATH ? sse_fpmath_classes : no_sse_fpmath_classes;
28007 /* Implement TARGET_PREFERRED_RELOAD_CLASS.
28009 Put float CONST_DOUBLE in the constant pool instead of fp regs.
28010 QImode must go into class Q_REGS.
28011 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
28012 movdf to do mem-to-mem moves through integer regs. */
28014 static reg_class_t
28015 ix86_preferred_reload_class (rtx x, reg_class_t regclass)
28017 enum machine_mode mode = GET_MODE (x);
28019 /* We're only allowed to return a subclass of CLASS. Many of the
28020 following checks fail for NO_REGS, so eliminate that early. */
28021 if (regclass == NO_REGS)
28022 return NO_REGS;
28024 /* All classes can load zeros. */
28025 if (x == CONST0_RTX (mode))
28026 return regclass;
28028 /* Force constants into memory if we are loading a (nonzero) constant into
28029 an MMX or SSE register. This is because there are no MMX/SSE instructions
28030 to load from a constant. */
28031 if (CONSTANT_P (x)
28032 && (MAYBE_MMX_CLASS_P (regclass) || MAYBE_SSE_CLASS_P (regclass)))
28033 return NO_REGS;
28035 /* Prefer SSE regs only, if we can use them for math. */
28036 if (TARGET_SSE_MATH && !TARGET_MIX_SSE_I387 && SSE_FLOAT_MODE_P (mode))
28037 return SSE_CLASS_P (regclass) ? regclass : NO_REGS;
28039 /* Floating-point constants need more complex checks. */
28040 if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) != VOIDmode)
28042 /* General regs can load everything. */
28043 if (reg_class_subset_p (regclass, GENERAL_REGS))
28044 return regclass;
28046 /* Floats can load 0 and 1 plus some others. Note that we eliminated
28047 zero above. We only want to wind up preferring 80387 registers if
28048 we plan on doing computation with them. */
28049 if (TARGET_80387
28050 && standard_80387_constant_p (x))
28052 /* Limit class to non-sse. */
28053 if (regclass == FLOAT_SSE_REGS)
28054 return FLOAT_REGS;
28055 if (regclass == FP_TOP_SSE_REGS)
28056 return FP_TOP_REG;
28057 if (regclass == FP_SECOND_SSE_REGS)
28058 return FP_SECOND_REG;
28059 if (regclass == FLOAT_INT_REGS || regclass == FLOAT_REGS)
28060 return regclass;
28063 return NO_REGS;
28066 /* Generally when we see PLUS here, it's the function invariant
28067 (plus soft-fp const_int). Which can only be computed into general
28068 regs. */
28069 if (GET_CODE (x) == PLUS)
28070 return reg_class_subset_p (regclass, GENERAL_REGS) ? regclass : NO_REGS;
28072 /* QImode constants are easy to load, but non-constant QImode data
28073 must go into Q_REGS. */
28074 if (GET_MODE (x) == QImode && !CONSTANT_P (x))
28076 if (reg_class_subset_p (regclass, Q_REGS))
28077 return regclass;
28078 if (reg_class_subset_p (Q_REGS, regclass))
28079 return Q_REGS;
28080 return NO_REGS;
28083 return regclass;
28086 /* Discourage putting floating-point values in SSE registers unless
28087 SSE math is being used, and likewise for the 387 registers. */
28088 static reg_class_t
28089 ix86_preferred_output_reload_class (rtx x, reg_class_t regclass)
28091 enum machine_mode mode = GET_MODE (x);
28093 /* Restrict the output reload class to the register bank that we are doing
28094 math on. If we would like not to return a subset of CLASS, reject this
28095 alternative: if reload cannot do this, it will still use its choice. */
28096 mode = GET_MODE (x);
28097 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
28098 return MAYBE_SSE_CLASS_P (regclass) ? SSE_REGS : NO_REGS;
28100 if (X87_FLOAT_MODE_P (mode))
28102 if (regclass == FP_TOP_SSE_REGS)
28103 return FP_TOP_REG;
28104 else if (regclass == FP_SECOND_SSE_REGS)
28105 return FP_SECOND_REG;
28106 else
28107 return FLOAT_CLASS_P (regclass) ? regclass : NO_REGS;
28110 return regclass;
28113 static reg_class_t
28114 ix86_secondary_reload (bool in_p, rtx x, reg_class_t rclass,
28115 enum machine_mode mode,
28116 secondary_reload_info *sri ATTRIBUTE_UNUSED)
28118 /* QImode spills from non-QI registers require
28119 intermediate register on 32bit targets. */
28120 if (!in_p && mode == QImode && !TARGET_64BIT
28121 && (rclass == GENERAL_REGS
28122 || rclass == LEGACY_REGS
28123 || rclass == INDEX_REGS))
28125 int regno;
28127 if (REG_P (x))
28128 regno = REGNO (x);
28129 else
28130 regno = -1;
28132 if (regno >= FIRST_PSEUDO_REGISTER || GET_CODE (x) == SUBREG)
28133 regno = true_regnum (x);
28135 /* Return Q_REGS if the operand is in memory. */
28136 if (regno == -1)
28137 return Q_REGS;
28140 return NO_REGS;
28143 /* Implement TARGET_CLASS_LIKELY_SPILLED_P. */
28145 static bool
28146 ix86_class_likely_spilled_p (reg_class_t rclass)
28148 switch (rclass)
28150 case AREG:
28151 case DREG:
28152 case CREG:
28153 case BREG:
28154 case AD_REGS:
28155 case SIREG:
28156 case DIREG:
28157 case SSE_FIRST_REG:
28158 case FP_TOP_REG:
28159 case FP_SECOND_REG:
28160 return true;
28162 default:
28163 break;
28166 return false;
28169 /* If we are copying between general and FP registers, we need a memory
28170 location. The same is true for SSE and MMX registers.
28172 To optimize register_move_cost performance, allow inline variant.
28174 The macro can't work reliably when one of the CLASSES is class containing
28175 registers from multiple units (SSE, MMX, integer). We avoid this by never
28176 combining those units in single alternative in the machine description.
28177 Ensure that this constraint holds to avoid unexpected surprises.
28179 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
28180 enforce these sanity checks. */
28182 static inline bool
28183 inline_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
28184 enum machine_mode mode, int strict)
28186 if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
28187 || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)
28188 || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)
28189 || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2)
28190 || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1)
28191 || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2))
28193 gcc_assert (!strict);
28194 return true;
28197 if (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2))
28198 return true;
28200 /* ??? This is a lie. We do have moves between mmx/general, and for
28201 mmx/sse2. But by saying we need secondary memory we discourage the
28202 register allocator from using the mmx registers unless needed. */
28203 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2))
28204 return true;
28206 if (SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
28208 /* SSE1 doesn't have any direct moves from other classes. */
28209 if (!TARGET_SSE2)
28210 return true;
28212 /* If the target says that inter-unit moves are more expensive
28213 than moving through memory, then don't generate them. */
28214 if (!TARGET_INTER_UNIT_MOVES)
28215 return true;
28217 /* Between SSE and general, we have moves no larger than word size. */
28218 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
28219 return true;
28222 return false;
28225 bool
28226 ix86_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
28227 enum machine_mode mode, int strict)
28229 return inline_secondary_memory_needed (class1, class2, mode, strict);
28232 /* Return true if the registers in CLASS cannot represent the change from
28233 modes FROM to TO. */
28235 bool
28236 ix86_cannot_change_mode_class (enum machine_mode from, enum machine_mode to,
28237 enum reg_class regclass)
28239 if (from == to)
28240 return false;
28242 /* x87 registers can't do subreg at all, as all values are reformatted
28243 to extended precision. */
28244 if (MAYBE_FLOAT_CLASS_P (regclass))
28245 return true;
28247 if (MAYBE_SSE_CLASS_P (regclass) || MAYBE_MMX_CLASS_P (regclass))
28249 /* Vector registers do not support QI or HImode loads. If we don't
28250 disallow a change to these modes, reload will assume it's ok to
28251 drop the subreg from (subreg:SI (reg:HI 100) 0). This affects
28252 the vec_dupv4hi pattern. */
28253 if (GET_MODE_SIZE (from) < 4)
28254 return true;
28256 /* Vector registers do not support subreg with nonzero offsets, which
28257 are otherwise valid for integer registers. Since we can't see
28258 whether we have a nonzero offset from here, prohibit all
28259 nonparadoxical subregs changing size. */
28260 if (GET_MODE_SIZE (to) < GET_MODE_SIZE (from))
28261 return true;
28264 return false;
28267 /* Return the cost of moving data of mode M between a
28268 register and memory. A value of 2 is the default; this cost is
28269 relative to those in `REGISTER_MOVE_COST'.
28271 This function is used extensively by register_move_cost that is used to
28272 build tables at startup. Make it inline in this case.
28273 When IN is 2, return maximum of in and out move cost.
28275 If moving between registers and memory is more expensive than
28276 between two registers, you should define this macro to express the
28277 relative cost.
28279 Model also increased moving costs of QImode registers in non
28280 Q_REGS classes.
28282 static inline int
28283 inline_memory_move_cost (enum machine_mode mode, enum reg_class regclass,
28284 int in)
28286 int cost;
28287 if (FLOAT_CLASS_P (regclass))
28289 int index;
28290 switch (mode)
28292 case SFmode:
28293 index = 0;
28294 break;
28295 case DFmode:
28296 index = 1;
28297 break;
28298 case XFmode:
28299 index = 2;
28300 break;
28301 default:
28302 return 100;
28304 if (in == 2)
28305 return MAX (ix86_cost->fp_load [index], ix86_cost->fp_store [index]);
28306 return in ? ix86_cost->fp_load [index] : ix86_cost->fp_store [index];
28308 if (SSE_CLASS_P (regclass))
28310 int index;
28311 switch (GET_MODE_SIZE (mode))
28313 case 4:
28314 index = 0;
28315 break;
28316 case 8:
28317 index = 1;
28318 break;
28319 case 16:
28320 index = 2;
28321 break;
28322 default:
28323 return 100;
28325 if (in == 2)
28326 return MAX (ix86_cost->sse_load [index], ix86_cost->sse_store [index]);
28327 return in ? ix86_cost->sse_load [index] : ix86_cost->sse_store [index];
28329 if (MMX_CLASS_P (regclass))
28331 int index;
28332 switch (GET_MODE_SIZE (mode))
28334 case 4:
28335 index = 0;
28336 break;
28337 case 8:
28338 index = 1;
28339 break;
28340 default:
28341 return 100;
28343 if (in)
28344 return MAX (ix86_cost->mmx_load [index], ix86_cost->mmx_store [index]);
28345 return in ? ix86_cost->mmx_load [index] : ix86_cost->mmx_store [index];
28347 switch (GET_MODE_SIZE (mode))
28349 case 1:
28350 if (Q_CLASS_P (regclass) || TARGET_64BIT)
28352 if (!in)
28353 return ix86_cost->int_store[0];
28354 if (TARGET_PARTIAL_REG_DEPENDENCY
28355 && optimize_function_for_speed_p (cfun))
28356 cost = ix86_cost->movzbl_load;
28357 else
28358 cost = ix86_cost->int_load[0];
28359 if (in == 2)
28360 return MAX (cost, ix86_cost->int_store[0]);
28361 return cost;
28363 else
28365 if (in == 2)
28366 return MAX (ix86_cost->movzbl_load, ix86_cost->int_store[0] + 4);
28367 if (in)
28368 return ix86_cost->movzbl_load;
28369 else
28370 return ix86_cost->int_store[0] + 4;
28372 break;
28373 case 2:
28374 if (in == 2)
28375 return MAX (ix86_cost->int_load[1], ix86_cost->int_store[1]);
28376 return in ? ix86_cost->int_load[1] : ix86_cost->int_store[1];
28377 default:
28378 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
28379 if (mode == TFmode)
28380 mode = XFmode;
28381 if (in == 2)
28382 cost = MAX (ix86_cost->int_load[2] , ix86_cost->int_store[2]);
28383 else if (in)
28384 cost = ix86_cost->int_load[2];
28385 else
28386 cost = ix86_cost->int_store[2];
28387 return (cost * (((int) GET_MODE_SIZE (mode)
28388 + UNITS_PER_WORD - 1) / UNITS_PER_WORD));
28392 static int
28393 ix86_memory_move_cost (enum machine_mode mode, reg_class_t regclass,
28394 bool in)
28396 return inline_memory_move_cost (mode, (enum reg_class) regclass, in ? 1 : 0);
28400 /* Return the cost of moving data from a register in class CLASS1 to
28401 one in class CLASS2.
28403 It is not required that the cost always equal 2 when FROM is the same as TO;
28404 on some machines it is expensive to move between registers if they are not
28405 general registers. */
28407 static int
28408 ix86_register_move_cost (enum machine_mode mode, reg_class_t class1_i,
28409 reg_class_t class2_i)
28411 enum reg_class class1 = (enum reg_class) class1_i;
28412 enum reg_class class2 = (enum reg_class) class2_i;
28414 /* In case we require secondary memory, compute cost of the store followed
28415 by load. In order to avoid bad register allocation choices, we need
28416 for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */
28418 if (inline_secondary_memory_needed (class1, class2, mode, 0))
28420 int cost = 1;
28422 cost += inline_memory_move_cost (mode, class1, 2);
28423 cost += inline_memory_move_cost (mode, class2, 2);
28425 /* In case of copying from general_purpose_register we may emit multiple
28426 stores followed by single load causing memory size mismatch stall.
28427 Count this as arbitrarily high cost of 20. */
28428 if (CLASS_MAX_NREGS (class1, mode) > CLASS_MAX_NREGS (class2, mode))
28429 cost += 20;
28431 /* In the case of FP/MMX moves, the registers actually overlap, and we
28432 have to switch modes in order to treat them differently. */
28433 if ((MMX_CLASS_P (class1) && MAYBE_FLOAT_CLASS_P (class2))
28434 || (MMX_CLASS_P (class2) && MAYBE_FLOAT_CLASS_P (class1)))
28435 cost += 20;
28437 return cost;
28440 /* Moves between SSE/MMX and integer unit are expensive. */
28441 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
28442 || SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
28444 /* ??? By keeping returned value relatively high, we limit the number
28445 of moves between integer and MMX/SSE registers for all targets.
28446 Additionally, high value prevents problem with x86_modes_tieable_p(),
28447 where integer modes in MMX/SSE registers are not tieable
28448 because of missing QImode and HImode moves to, from or between
28449 MMX/SSE registers. */
28450 return MAX (8, ix86_cost->mmxsse_to_integer);
28452 if (MAYBE_FLOAT_CLASS_P (class1))
28453 return ix86_cost->fp_move;
28454 if (MAYBE_SSE_CLASS_P (class1))
28455 return ix86_cost->sse_move;
28456 if (MAYBE_MMX_CLASS_P (class1))
28457 return ix86_cost->mmx_move;
28458 return 2;
28461 /* Return 1 if hard register REGNO can hold a value of machine-mode MODE. */
28463 bool
28464 ix86_hard_regno_mode_ok (int regno, enum machine_mode mode)
28466 /* Flags and only flags can only hold CCmode values. */
28467 if (CC_REGNO_P (regno))
28468 return GET_MODE_CLASS (mode) == MODE_CC;
28469 if (GET_MODE_CLASS (mode) == MODE_CC
28470 || GET_MODE_CLASS (mode) == MODE_RANDOM
28471 || GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
28472 return 0;
28473 if (FP_REGNO_P (regno))
28474 return VALID_FP_MODE_P (mode);
28475 if (SSE_REGNO_P (regno))
28477 /* We implement the move patterns for all vector modes into and
28478 out of SSE registers, even when no operation instructions
28479 are available. OImode move is available only when AVX is
28480 enabled. */
28481 return ((TARGET_AVX && mode == OImode)
28482 || VALID_AVX256_REG_MODE (mode)
28483 || VALID_SSE_REG_MODE (mode)
28484 || VALID_SSE2_REG_MODE (mode)
28485 || VALID_MMX_REG_MODE (mode)
28486 || VALID_MMX_REG_MODE_3DNOW (mode));
28488 if (MMX_REGNO_P (regno))
28490 /* We implement the move patterns for 3DNOW modes even in MMX mode,
28491 so if the register is available at all, then we can move data of
28492 the given mode into or out of it. */
28493 return (VALID_MMX_REG_MODE (mode)
28494 || VALID_MMX_REG_MODE_3DNOW (mode));
28497 if (mode == QImode)
28499 /* Take care for QImode values - they can be in non-QI regs,
28500 but then they do cause partial register stalls. */
28501 if (regno <= BX_REG || TARGET_64BIT)
28502 return 1;
28503 if (!TARGET_PARTIAL_REG_STALL)
28504 return 1;
28505 return reload_in_progress || reload_completed;
28507 /* We handle both integer and floats in the general purpose registers. */
28508 else if (VALID_INT_MODE_P (mode))
28509 return 1;
28510 else if (VALID_FP_MODE_P (mode))
28511 return 1;
28512 else if (VALID_DFP_MODE_P (mode))
28513 return 1;
28514 /* Lots of MMX code casts 8 byte vector modes to DImode. If we then go
28515 on to use that value in smaller contexts, this can easily force a
28516 pseudo to be allocated to GENERAL_REGS. Since this is no worse than
28517 supporting DImode, allow it. */
28518 else if (VALID_MMX_REG_MODE_3DNOW (mode) || VALID_MMX_REG_MODE (mode))
28519 return 1;
28521 return 0;
28524 /* A subroutine of ix86_modes_tieable_p. Return true if MODE is a
28525 tieable integer mode. */
28527 static bool
28528 ix86_tieable_integer_mode_p (enum machine_mode mode)
28530 switch (mode)
28532 case HImode:
28533 case SImode:
28534 return true;
28536 case QImode:
28537 return TARGET_64BIT || !TARGET_PARTIAL_REG_STALL;
28539 case DImode:
28540 return TARGET_64BIT;
28542 default:
28543 return false;
28547 /* Return true if MODE1 is accessible in a register that can hold MODE2
28548 without copying. That is, all register classes that can hold MODE2
28549 can also hold MODE1. */
28551 bool
28552 ix86_modes_tieable_p (enum machine_mode mode1, enum machine_mode mode2)
28554 if (mode1 == mode2)
28555 return true;
28557 if (ix86_tieable_integer_mode_p (mode1)
28558 && ix86_tieable_integer_mode_p (mode2))
28559 return true;
28561 /* MODE2 being XFmode implies fp stack or general regs, which means we
28562 can tie any smaller floating point modes to it. Note that we do not
28563 tie this with TFmode. */
28564 if (mode2 == XFmode)
28565 return mode1 == SFmode || mode1 == DFmode;
28567 /* MODE2 being DFmode implies fp stack, general or sse regs, which means
28568 that we can tie it with SFmode. */
28569 if (mode2 == DFmode)
28570 return mode1 == SFmode;
28572 /* If MODE2 is only appropriate for an SSE register, then tie with
28573 any other mode acceptable to SSE registers. */
28574 if (GET_MODE_SIZE (mode2) == 16
28575 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode2))
28576 return (GET_MODE_SIZE (mode1) == 16
28577 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode1));
28579 /* If MODE2 is appropriate for an MMX register, then tie
28580 with any other mode acceptable to MMX registers. */
28581 if (GET_MODE_SIZE (mode2) == 8
28582 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode2))
28583 return (GET_MODE_SIZE (mode1) == 8
28584 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode1));
28586 return false;
28589 /* Compute a (partial) cost for rtx X. Return true if the complete
28590 cost has been computed, and false if subexpressions should be
28591 scanned. In either case, *TOTAL contains the cost result. */
28593 static bool
28594 ix86_rtx_costs (rtx x, int code, int outer_code_i, int *total, bool speed)
28596 enum rtx_code outer_code = (enum rtx_code) outer_code_i;
28597 enum machine_mode mode = GET_MODE (x);
28598 const struct processor_costs *cost = speed ? ix86_cost : &ix86_size_cost;
28600 switch (code)
28602 case CONST_INT:
28603 case CONST:
28604 case LABEL_REF:
28605 case SYMBOL_REF:
28606 if (TARGET_64BIT && !x86_64_immediate_operand (x, VOIDmode))
28607 *total = 3;
28608 else if (TARGET_64BIT && !x86_64_zext_immediate_operand (x, VOIDmode))
28609 *total = 2;
28610 else if (flag_pic && SYMBOLIC_CONST (x)
28611 && (!TARGET_64BIT
28612 || (!GET_CODE (x) != LABEL_REF
28613 && (GET_CODE (x) != SYMBOL_REF
28614 || !SYMBOL_REF_LOCAL_P (x)))))
28615 *total = 1;
28616 else
28617 *total = 0;
28618 return true;
28620 case CONST_DOUBLE:
28621 if (mode == VOIDmode)
28622 *total = 0;
28623 else
28624 switch (standard_80387_constant_p (x))
28626 case 1: /* 0.0 */
28627 *total = 1;
28628 break;
28629 default: /* Other constants */
28630 *total = 2;
28631 break;
28632 case 0:
28633 case -1:
28634 /* Start with (MEM (SYMBOL_REF)), since that's where
28635 it'll probably end up. Add a penalty for size. */
28636 *total = (COSTS_N_INSNS (1)
28637 + (flag_pic != 0 && !TARGET_64BIT)
28638 + (mode == SFmode ? 0 : mode == DFmode ? 1 : 2));
28639 break;
28641 return true;
28643 case ZERO_EXTEND:
28644 /* The zero extensions is often completely free on x86_64, so make
28645 it as cheap as possible. */
28646 if (TARGET_64BIT && mode == DImode
28647 && GET_MODE (XEXP (x, 0)) == SImode)
28648 *total = 1;
28649 else if (TARGET_ZERO_EXTEND_WITH_AND)
28650 *total = cost->add;
28651 else
28652 *total = cost->movzx;
28653 return false;
28655 case SIGN_EXTEND:
28656 *total = cost->movsx;
28657 return false;
28659 case ASHIFT:
28660 if (CONST_INT_P (XEXP (x, 1))
28661 && (GET_MODE (XEXP (x, 0)) != DImode || TARGET_64BIT))
28663 HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
28664 if (value == 1)
28666 *total = cost->add;
28667 return false;
28669 if ((value == 2 || value == 3)
28670 && cost->lea <= cost->shift_const)
28672 *total = cost->lea;
28673 return false;
28676 /* FALLTHRU */
28678 case ROTATE:
28679 case ASHIFTRT:
28680 case LSHIFTRT:
28681 case ROTATERT:
28682 if (!TARGET_64BIT && GET_MODE (XEXP (x, 0)) == DImode)
28684 if (CONST_INT_P (XEXP (x, 1)))
28686 if (INTVAL (XEXP (x, 1)) > 32)
28687 *total = cost->shift_const + COSTS_N_INSNS (2);
28688 else
28689 *total = cost->shift_const * 2;
28691 else
28693 if (GET_CODE (XEXP (x, 1)) == AND)
28694 *total = cost->shift_var * 2;
28695 else
28696 *total = cost->shift_var * 6 + COSTS_N_INSNS (2);
28699 else
28701 if (CONST_INT_P (XEXP (x, 1)))
28702 *total = cost->shift_const;
28703 else
28704 *total = cost->shift_var;
28706 return false;
28708 case FMA:
28710 rtx sub;
28712 gcc_assert (FLOAT_MODE_P (mode));
28713 gcc_assert (TARGET_FMA || TARGET_FMA4);
28715 /* ??? SSE scalar/vector cost should be used here. */
28716 /* ??? Bald assumption that fma has the same cost as fmul. */
28717 *total = cost->fmul;
28718 *total += rtx_cost (XEXP (x, 1), FMA, speed);
28720 /* Negate in op0 or op2 is free: FMS, FNMA, FNMS. */
28721 sub = XEXP (x, 0);
28722 if (GET_CODE (sub) == NEG)
28723 sub = XEXP (x, 0);
28724 *total += rtx_cost (sub, FMA, speed);
28726 sub = XEXP (x, 2);
28727 if (GET_CODE (sub) == NEG)
28728 sub = XEXP (x, 0);
28729 *total += rtx_cost (sub, FMA, speed);
28730 return true;
28733 case MULT:
28734 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
28736 /* ??? SSE scalar cost should be used here. */
28737 *total = cost->fmul;
28738 return false;
28740 else if (X87_FLOAT_MODE_P (mode))
28742 *total = cost->fmul;
28743 return false;
28745 else if (FLOAT_MODE_P (mode))
28747 /* ??? SSE vector cost should be used here. */
28748 *total = cost->fmul;
28749 return false;
28751 else
28753 rtx op0 = XEXP (x, 0);
28754 rtx op1 = XEXP (x, 1);
28755 int nbits;
28756 if (CONST_INT_P (XEXP (x, 1)))
28758 unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
28759 for (nbits = 0; value != 0; value &= value - 1)
28760 nbits++;
28762 else
28763 /* This is arbitrary. */
28764 nbits = 7;
28766 /* Compute costs correctly for widening multiplication. */
28767 if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op0) == ZERO_EXTEND)
28768 && GET_MODE_SIZE (GET_MODE (XEXP (op0, 0))) * 2
28769 == GET_MODE_SIZE (mode))
28771 int is_mulwiden = 0;
28772 enum machine_mode inner_mode = GET_MODE (op0);
28774 if (GET_CODE (op0) == GET_CODE (op1))
28775 is_mulwiden = 1, op1 = XEXP (op1, 0);
28776 else if (CONST_INT_P (op1))
28778 if (GET_CODE (op0) == SIGN_EXTEND)
28779 is_mulwiden = trunc_int_for_mode (INTVAL (op1), inner_mode)
28780 == INTVAL (op1);
28781 else
28782 is_mulwiden = !(INTVAL (op1) & ~GET_MODE_MASK (inner_mode));
28785 if (is_mulwiden)
28786 op0 = XEXP (op0, 0), mode = GET_MODE (op0);
28789 *total = (cost->mult_init[MODE_INDEX (mode)]
28790 + nbits * cost->mult_bit
28791 + rtx_cost (op0, outer_code, speed) + rtx_cost (op1, outer_code, speed));
28793 return true;
28796 case DIV:
28797 case UDIV:
28798 case MOD:
28799 case UMOD:
28800 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
28801 /* ??? SSE cost should be used here. */
28802 *total = cost->fdiv;
28803 else if (X87_FLOAT_MODE_P (mode))
28804 *total = cost->fdiv;
28805 else if (FLOAT_MODE_P (mode))
28806 /* ??? SSE vector cost should be used here. */
28807 *total = cost->fdiv;
28808 else
28809 *total = cost->divide[MODE_INDEX (mode)];
28810 return false;
28812 case PLUS:
28813 if (GET_MODE_CLASS (mode) == MODE_INT
28814 && GET_MODE_BITSIZE (mode) <= GET_MODE_BITSIZE (Pmode))
28816 if (GET_CODE (XEXP (x, 0)) == PLUS
28817 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
28818 && CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 0), 1))
28819 && CONSTANT_P (XEXP (x, 1)))
28821 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1));
28822 if (val == 2 || val == 4 || val == 8)
28824 *total = cost->lea;
28825 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code, speed);
28826 *total += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0),
28827 outer_code, speed);
28828 *total += rtx_cost (XEXP (x, 1), outer_code, speed);
28829 return true;
28832 else if (GET_CODE (XEXP (x, 0)) == MULT
28833 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
28835 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (x, 0), 1));
28836 if (val == 2 || val == 4 || val == 8)
28838 *total = cost->lea;
28839 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code, speed);
28840 *total += rtx_cost (XEXP (x, 1), outer_code, speed);
28841 return true;
28844 else if (GET_CODE (XEXP (x, 0)) == PLUS)
28846 *total = cost->lea;
28847 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code, speed);
28848 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code, speed);
28849 *total += rtx_cost (XEXP (x, 1), outer_code, speed);
28850 return true;
28853 /* FALLTHRU */
28855 case MINUS:
28856 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
28858 /* ??? SSE cost should be used here. */
28859 *total = cost->fadd;
28860 return false;
28862 else if (X87_FLOAT_MODE_P (mode))
28864 *total = cost->fadd;
28865 return false;
28867 else if (FLOAT_MODE_P (mode))
28869 /* ??? SSE vector cost should be used here. */
28870 *total = cost->fadd;
28871 return false;
28873 /* FALLTHRU */
28875 case AND:
28876 case IOR:
28877 case XOR:
28878 if (!TARGET_64BIT && mode == DImode)
28880 *total = (cost->add * 2
28881 + (rtx_cost (XEXP (x, 0), outer_code, speed)
28882 << (GET_MODE (XEXP (x, 0)) != DImode))
28883 + (rtx_cost (XEXP (x, 1), outer_code, speed)
28884 << (GET_MODE (XEXP (x, 1)) != DImode)));
28885 return true;
28887 /* FALLTHRU */
28889 case NEG:
28890 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
28892 /* ??? SSE cost should be used here. */
28893 *total = cost->fchs;
28894 return false;
28896 else if (X87_FLOAT_MODE_P (mode))
28898 *total = cost->fchs;
28899 return false;
28901 else if (FLOAT_MODE_P (mode))
28903 /* ??? SSE vector cost should be used here. */
28904 *total = cost->fchs;
28905 return false;
28907 /* FALLTHRU */
28909 case NOT:
28910 if (!TARGET_64BIT && mode == DImode)
28911 *total = cost->add * 2;
28912 else
28913 *total = cost->add;
28914 return false;
28916 case COMPARE:
28917 if (GET_CODE (XEXP (x, 0)) == ZERO_EXTRACT
28918 && XEXP (XEXP (x, 0), 1) == const1_rtx
28919 && CONST_INT_P (XEXP (XEXP (x, 0), 2))
28920 && XEXP (x, 1) == const0_rtx)
28922 /* This kind of construct is implemented using test[bwl].
28923 Treat it as if we had an AND. */
28924 *total = (cost->add
28925 + rtx_cost (XEXP (XEXP (x, 0), 0), outer_code, speed)
28926 + rtx_cost (const1_rtx, outer_code, speed));
28927 return true;
28929 return false;
28931 case FLOAT_EXTEND:
28932 if (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH))
28933 *total = 0;
28934 return false;
28936 case ABS:
28937 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
28938 /* ??? SSE cost should be used here. */
28939 *total = cost->fabs;
28940 else if (X87_FLOAT_MODE_P (mode))
28941 *total = cost->fabs;
28942 else if (FLOAT_MODE_P (mode))
28943 /* ??? SSE vector cost should be used here. */
28944 *total = cost->fabs;
28945 return false;
28947 case SQRT:
28948 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
28949 /* ??? SSE cost should be used here. */
28950 *total = cost->fsqrt;
28951 else if (X87_FLOAT_MODE_P (mode))
28952 *total = cost->fsqrt;
28953 else if (FLOAT_MODE_P (mode))
28954 /* ??? SSE vector cost should be used here. */
28955 *total = cost->fsqrt;
28956 return false;
28958 case UNSPEC:
28959 if (XINT (x, 1) == UNSPEC_TP)
28960 *total = 0;
28961 return false;
28963 case VEC_SELECT:
28964 case VEC_CONCAT:
28965 case VEC_MERGE:
28966 case VEC_DUPLICATE:
28967 /* ??? Assume all of these vector manipulation patterns are
28968 recognizable. In which case they all pretty much have the
28969 same cost. */
28970 *total = COSTS_N_INSNS (1);
28971 return true;
28973 default:
28974 return false;
28978 #if TARGET_MACHO
28980 static int current_machopic_label_num;
28982 /* Given a symbol name and its associated stub, write out the
28983 definition of the stub. */
28985 void
28986 machopic_output_stub (FILE *file, const char *symb, const char *stub)
28988 unsigned int length;
28989 char *binder_name, *symbol_name, lazy_ptr_name[32];
28990 int label = ++current_machopic_label_num;
28992 /* For 64-bit we shouldn't get here. */
28993 gcc_assert (!TARGET_64BIT);
28995 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
28996 symb = targetm.strip_name_encoding (symb);
28998 length = strlen (stub);
28999 binder_name = XALLOCAVEC (char, length + 32);
29000 GEN_BINDER_NAME_FOR_STUB (binder_name, stub, length);
29002 length = strlen (symb);
29003 symbol_name = XALLOCAVEC (char, length + 32);
29004 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
29006 sprintf (lazy_ptr_name, "L%d$lz", label);
29008 if (MACHOPIC_ATT_STUB)
29009 switch_to_section (darwin_sections[machopic_picsymbol_stub3_section]);
29010 else if (MACHOPIC_PURE)
29012 if (TARGET_DEEP_BRANCH_PREDICTION)
29013 switch_to_section (darwin_sections[machopic_picsymbol_stub2_section]);
29014 else
29015 switch_to_section (darwin_sections[machopic_picsymbol_stub_section]);
29017 else
29018 switch_to_section (darwin_sections[machopic_symbol_stub_section]);
29020 fprintf (file, "%s:\n", stub);
29021 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
29023 if (MACHOPIC_ATT_STUB)
29025 fprintf (file, "\thlt ; hlt ; hlt ; hlt ; hlt\n");
29027 else if (MACHOPIC_PURE)
29029 /* PIC stub. */
29030 if (TARGET_DEEP_BRANCH_PREDICTION)
29032 /* 25-byte PIC stub using "CALL get_pc_thunk". */
29033 rtx tmp = gen_rtx_REG (SImode, 2 /* ECX */);
29034 output_set_got (tmp, NULL_RTX); /* "CALL ___<cpu>.get_pc_thunk.cx". */
29035 fprintf (file, "LPC$%d:\tmovl\t%s-LPC$%d(%%ecx),%%ecx\n", label, lazy_ptr_name, label);
29037 else
29039 /* 26-byte PIC stub using inline picbase: "CALL L42 ! L42: pop %eax". */
29040 fprintf (file, "\tcall LPC$%d\nLPC$%d:\tpopl %%ecx\n", label, label);
29041 fprintf (file, "\tmovl %s-LPC$%d(%%ecx),%%ecx\n", lazy_ptr_name, label);
29043 fprintf (file, "\tjmp\t*%%ecx\n");
29045 else
29046 fprintf (file, "\tjmp\t*%s\n", lazy_ptr_name);
29048 /* The AT&T-style ("self-modifying") stub is not lazily bound, thus
29049 it needs no stub-binding-helper. */
29050 if (MACHOPIC_ATT_STUB)
29051 return;
29053 fprintf (file, "%s:\n", binder_name);
29055 if (MACHOPIC_PURE)
29057 fprintf (file, "\tlea\t%s-%s(%%ecx),%%ecx\n", lazy_ptr_name, binder_name);
29058 fprintf (file, "\tpushl\t%%ecx\n");
29060 else
29061 fprintf (file, "\tpushl\t$%s\n", lazy_ptr_name);
29063 fputs ("\tjmp\tdyld_stub_binding_helper\n", file);
29065 /* N.B. Keep the correspondence of these
29066 'symbol_ptr/symbol_ptr2/symbol_ptr3' sections consistent with the
29067 old-pic/new-pic/non-pic stubs; altering this will break
29068 compatibility with existing dylibs. */
29069 if (MACHOPIC_PURE)
29071 /* PIC stubs. */
29072 if (TARGET_DEEP_BRANCH_PREDICTION)
29073 /* 25-byte PIC stub using "CALL get_pc_thunk". */
29074 switch_to_section (darwin_sections[machopic_lazy_symbol_ptr2_section]);
29075 else
29076 /* 26-byte PIC stub using inline picbase: "CALL L42 ! L42: pop %ebx". */
29077 switch_to_section (darwin_sections[machopic_lazy_symbol_ptr_section]);
29079 else
29080 /* 16-byte -mdynamic-no-pic stub. */
29081 switch_to_section(darwin_sections[machopic_lazy_symbol_ptr3_section]);
29083 fprintf (file, "%s:\n", lazy_ptr_name);
29084 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
29085 fprintf (file, ASM_LONG "%s\n", binder_name);
29087 #endif /* TARGET_MACHO */
29089 /* Order the registers for register allocator. */
29091 void
29092 x86_order_regs_for_local_alloc (void)
29094 int pos = 0;
29095 int i;
29097 /* First allocate the local general purpose registers. */
29098 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
29099 if (GENERAL_REGNO_P (i) && call_used_regs[i])
29100 reg_alloc_order [pos++] = i;
29102 /* Global general purpose registers. */
29103 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
29104 if (GENERAL_REGNO_P (i) && !call_used_regs[i])
29105 reg_alloc_order [pos++] = i;
29107 /* x87 registers come first in case we are doing FP math
29108 using them. */
29109 if (!TARGET_SSE_MATH)
29110 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
29111 reg_alloc_order [pos++] = i;
29113 /* SSE registers. */
29114 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
29115 reg_alloc_order [pos++] = i;
29116 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
29117 reg_alloc_order [pos++] = i;
29119 /* x87 registers. */
29120 if (TARGET_SSE_MATH)
29121 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
29122 reg_alloc_order [pos++] = i;
29124 for (i = FIRST_MMX_REG; i <= LAST_MMX_REG; i++)
29125 reg_alloc_order [pos++] = i;
29127 /* Initialize the rest of array as we do not allocate some registers
29128 at all. */
29129 while (pos < FIRST_PSEUDO_REGISTER)
29130 reg_alloc_order [pos++] = 0;
29133 /* Handle a "callee_pop_aggregate_return" attribute; arguments as
29134 in struct attribute_spec handler. */
29135 static tree
29136 ix86_handle_callee_pop_aggregate_return (tree *node, tree name,
29137 tree args,
29138 int flags ATTRIBUTE_UNUSED,
29139 bool *no_add_attrs)
29141 if (TREE_CODE (*node) != FUNCTION_TYPE
29142 && TREE_CODE (*node) != METHOD_TYPE
29143 && TREE_CODE (*node) != FIELD_DECL
29144 && TREE_CODE (*node) != TYPE_DECL)
29146 warning (OPT_Wattributes, "%qE attribute only applies to functions",
29147 name);
29148 *no_add_attrs = true;
29149 return NULL_TREE;
29151 if (TARGET_64BIT)
29153 warning (OPT_Wattributes, "%qE attribute only available for 32-bit",
29154 name);
29155 *no_add_attrs = true;
29156 return NULL_TREE;
29158 if (is_attribute_p ("callee_pop_aggregate_return", name))
29160 tree cst;
29162 cst = TREE_VALUE (args);
29163 if (TREE_CODE (cst) != INTEGER_CST)
29165 warning (OPT_Wattributes,
29166 "%qE attribute requires an integer constant argument",
29167 name);
29168 *no_add_attrs = true;
29170 else if (compare_tree_int (cst, 0) != 0
29171 && compare_tree_int (cst, 1) != 0)
29173 warning (OPT_Wattributes,
29174 "argument to %qE attribute is neither zero, nor one",
29175 name);
29176 *no_add_attrs = true;
29179 return NULL_TREE;
29182 return NULL_TREE;
29185 /* Handle a "ms_abi" or "sysv" attribute; arguments as in
29186 struct attribute_spec.handler. */
29187 static tree
29188 ix86_handle_abi_attribute (tree *node, tree name,
29189 tree args ATTRIBUTE_UNUSED,
29190 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
29192 if (TREE_CODE (*node) != FUNCTION_TYPE
29193 && TREE_CODE (*node) != METHOD_TYPE
29194 && TREE_CODE (*node) != FIELD_DECL
29195 && TREE_CODE (*node) != TYPE_DECL)
29197 warning (OPT_Wattributes, "%qE attribute only applies to functions",
29198 name);
29199 *no_add_attrs = true;
29200 return NULL_TREE;
29202 if (!TARGET_64BIT)
29204 warning (OPT_Wattributes, "%qE attribute only available for 64-bit",
29205 name);
29206 *no_add_attrs = true;
29207 return NULL_TREE;
29210 /* Can combine regparm with all attributes but fastcall. */
29211 if (is_attribute_p ("ms_abi", name))
29213 if (lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (*node)))
29215 error ("ms_abi and sysv_abi attributes are not compatible");
29218 return NULL_TREE;
29220 else if (is_attribute_p ("sysv_abi", name))
29222 if (lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (*node)))
29224 error ("ms_abi and sysv_abi attributes are not compatible");
29227 return NULL_TREE;
29230 return NULL_TREE;
29233 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
29234 struct attribute_spec.handler. */
29235 static tree
29236 ix86_handle_struct_attribute (tree *node, tree name,
29237 tree args ATTRIBUTE_UNUSED,
29238 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
29240 tree *type = NULL;
29241 if (DECL_P (*node))
29243 if (TREE_CODE (*node) == TYPE_DECL)
29244 type = &TREE_TYPE (*node);
29246 else
29247 type = node;
29249 if (!(type && (TREE_CODE (*type) == RECORD_TYPE
29250 || TREE_CODE (*type) == UNION_TYPE)))
29252 warning (OPT_Wattributes, "%qE attribute ignored",
29253 name);
29254 *no_add_attrs = true;
29257 else if ((is_attribute_p ("ms_struct", name)
29258 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type)))
29259 || ((is_attribute_p ("gcc_struct", name)
29260 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type)))))
29262 warning (OPT_Wattributes, "%qE incompatible attribute ignored",
29263 name);
29264 *no_add_attrs = true;
29267 return NULL_TREE;
29270 static tree
29271 ix86_handle_fndecl_attribute (tree *node, tree name,
29272 tree args ATTRIBUTE_UNUSED,
29273 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
29275 if (TREE_CODE (*node) != FUNCTION_DECL)
29277 warning (OPT_Wattributes, "%qE attribute only applies to functions",
29278 name);
29279 *no_add_attrs = true;
29281 return NULL_TREE;
29284 static bool
29285 ix86_ms_bitfield_layout_p (const_tree record_type)
29287 return ((TARGET_MS_BITFIELD_LAYOUT
29288 && !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type)))
29289 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type)));
29292 /* Returns an expression indicating where the this parameter is
29293 located on entry to the FUNCTION. */
29295 static rtx
29296 x86_this_parameter (tree function)
29298 tree type = TREE_TYPE (function);
29299 bool aggr = aggregate_value_p (TREE_TYPE (type), type) != 0;
29300 int nregs;
29302 if (TARGET_64BIT)
29304 const int *parm_regs;
29306 if (ix86_function_type_abi (type) == MS_ABI)
29307 parm_regs = x86_64_ms_abi_int_parameter_registers;
29308 else
29309 parm_regs = x86_64_int_parameter_registers;
29310 return gen_rtx_REG (DImode, parm_regs[aggr]);
29313 nregs = ix86_function_regparm (type, function);
29315 if (nregs > 0 && !stdarg_p (type))
29317 int regno;
29319 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
29320 regno = aggr ? DX_REG : CX_REG;
29321 else if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (type)))
29323 regno = CX_REG;
29324 if (aggr)
29325 return gen_rtx_MEM (SImode,
29326 plus_constant (stack_pointer_rtx, 4));
29328 else
29330 regno = AX_REG;
29331 if (aggr)
29333 regno = DX_REG;
29334 if (nregs == 1)
29335 return gen_rtx_MEM (SImode,
29336 plus_constant (stack_pointer_rtx, 4));
29339 return gen_rtx_REG (SImode, regno);
29342 return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, aggr ? 8 : 4));
29345 /* Determine whether x86_output_mi_thunk can succeed. */
29347 static bool
29348 x86_can_output_mi_thunk (const_tree thunk ATTRIBUTE_UNUSED,
29349 HOST_WIDE_INT delta ATTRIBUTE_UNUSED,
29350 HOST_WIDE_INT vcall_offset, const_tree function)
29352 /* 64-bit can handle anything. */
29353 if (TARGET_64BIT)
29354 return true;
29356 /* For 32-bit, everything's fine if we have one free register. */
29357 if (ix86_function_regparm (TREE_TYPE (function), function) < 3)
29358 return true;
29360 /* Need a free register for vcall_offset. */
29361 if (vcall_offset)
29362 return false;
29364 /* Need a free register for GOT references. */
29365 if (flag_pic && !targetm.binds_local_p (function))
29366 return false;
29368 /* Otherwise ok. */
29369 return true;
29372 /* Output the assembler code for a thunk function. THUNK_DECL is the
29373 declaration for the thunk function itself, FUNCTION is the decl for
29374 the target function. DELTA is an immediate constant offset to be
29375 added to THIS. If VCALL_OFFSET is nonzero, the word at
29376 *(*this + vcall_offset) should be added to THIS. */
29378 static void
29379 x86_output_mi_thunk (FILE *file,
29380 tree thunk ATTRIBUTE_UNUSED, HOST_WIDE_INT delta,
29381 HOST_WIDE_INT vcall_offset, tree function)
29383 rtx xops[3];
29384 rtx this_param = x86_this_parameter (function);
29385 rtx this_reg, tmp;
29387 /* Make sure unwind info is emitted for the thunk if needed. */
29388 final_start_function (emit_barrier (), file, 1);
29390 /* If VCALL_OFFSET, we'll need THIS in a register. Might as well
29391 pull it in now and let DELTA benefit. */
29392 if (REG_P (this_param))
29393 this_reg = this_param;
29394 else if (vcall_offset)
29396 /* Put the this parameter into %eax. */
29397 xops[0] = this_param;
29398 xops[1] = this_reg = gen_rtx_REG (Pmode, AX_REG);
29399 output_asm_insn ("mov%z1\t{%0, %1|%1, %0}", xops);
29401 else
29402 this_reg = NULL_RTX;
29404 /* Adjust the this parameter by a fixed constant. */
29405 if (delta)
29407 xops[0] = GEN_INT (delta);
29408 xops[1] = this_reg ? this_reg : this_param;
29409 if (TARGET_64BIT)
29411 if (!x86_64_general_operand (xops[0], DImode))
29413 tmp = gen_rtx_REG (DImode, R10_REG);
29414 xops[1] = tmp;
29415 output_asm_insn ("mov{q}\t{%1, %0|%0, %1}", xops);
29416 xops[0] = tmp;
29417 xops[1] = this_param;
29419 if (x86_maybe_negate_const_int (&xops[0], DImode))
29420 output_asm_insn ("sub{q}\t{%0, %1|%1, %0}", xops);
29421 else
29422 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
29424 else if (x86_maybe_negate_const_int (&xops[0], SImode))
29425 output_asm_insn ("sub{l}\t{%0, %1|%1, %0}", xops);
29426 else
29427 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
29430 /* Adjust the this parameter by a value stored in the vtable. */
29431 if (vcall_offset)
29433 if (TARGET_64BIT)
29434 tmp = gen_rtx_REG (DImode, R10_REG);
29435 else
29437 int tmp_regno = CX_REG;
29438 if (lookup_attribute ("fastcall",
29439 TYPE_ATTRIBUTES (TREE_TYPE (function)))
29440 || lookup_attribute ("thiscall",
29441 TYPE_ATTRIBUTES (TREE_TYPE (function))))
29442 tmp_regno = AX_REG;
29443 tmp = gen_rtx_REG (SImode, tmp_regno);
29446 xops[0] = gen_rtx_MEM (Pmode, this_reg);
29447 xops[1] = tmp;
29448 output_asm_insn ("mov%z1\t{%0, %1|%1, %0}", xops);
29450 /* Adjust the this parameter. */
29451 xops[0] = gen_rtx_MEM (Pmode, plus_constant (tmp, vcall_offset));
29452 if (TARGET_64BIT && !memory_operand (xops[0], Pmode))
29454 rtx tmp2 = gen_rtx_REG (DImode, R11_REG);
29455 xops[0] = GEN_INT (vcall_offset);
29456 xops[1] = tmp2;
29457 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
29458 xops[0] = gen_rtx_MEM (Pmode, gen_rtx_PLUS (Pmode, tmp, tmp2));
29460 xops[1] = this_reg;
29461 output_asm_insn ("add%z1\t{%0, %1|%1, %0}", xops);
29464 /* If necessary, drop THIS back to its stack slot. */
29465 if (this_reg && this_reg != this_param)
29467 xops[0] = this_reg;
29468 xops[1] = this_param;
29469 output_asm_insn ("mov%z1\t{%0, %1|%1, %0}", xops);
29472 xops[0] = XEXP (DECL_RTL (function), 0);
29473 if (TARGET_64BIT)
29475 if (!flag_pic || targetm.binds_local_p (function)
29476 || DEFAULT_ABI == MS_ABI)
29477 output_asm_insn ("jmp\t%P0", xops);
29478 /* All thunks should be in the same object as their target,
29479 and thus binds_local_p should be true. */
29480 else if (TARGET_64BIT && cfun->machine->call_abi == MS_ABI)
29481 gcc_unreachable ();
29482 else
29484 tmp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, xops[0]), UNSPEC_GOTPCREL);
29485 tmp = gen_rtx_CONST (Pmode, tmp);
29486 tmp = gen_rtx_MEM (QImode, tmp);
29487 xops[0] = tmp;
29488 output_asm_insn ("jmp\t%A0", xops);
29491 else
29493 if (!flag_pic || targetm.binds_local_p (function))
29494 output_asm_insn ("jmp\t%P0", xops);
29495 else
29496 #if TARGET_MACHO
29497 if (TARGET_MACHO)
29499 rtx sym_ref = XEXP (DECL_RTL (function), 0);
29500 if (TARGET_MACHO_BRANCH_ISLANDS)
29501 sym_ref = (gen_rtx_SYMBOL_REF
29502 (Pmode,
29503 machopic_indirection_name (sym_ref, /*stub_p=*/true)));
29504 tmp = gen_rtx_MEM (QImode, sym_ref);
29505 xops[0] = tmp;
29506 output_asm_insn ("jmp\t%0", xops);
29508 else
29509 #endif /* TARGET_MACHO */
29511 tmp = gen_rtx_REG (SImode, CX_REG);
29512 output_set_got (tmp, NULL_RTX);
29514 xops[1] = tmp;
29515 output_asm_insn ("mov{l}\t{%0@GOT(%1), %1|%1, %0@GOT[%1]}", xops);
29516 output_asm_insn ("jmp\t{*}%1", xops);
29519 final_end_function ();
29522 static void
29523 x86_file_start (void)
29525 default_file_start ();
29526 #if TARGET_MACHO
29527 darwin_file_start ();
29528 #endif
29529 if (X86_FILE_START_VERSION_DIRECTIVE)
29530 fputs ("\t.version\t\"01.01\"\n", asm_out_file);
29531 if (X86_FILE_START_FLTUSED)
29532 fputs ("\t.global\t__fltused\n", asm_out_file);
29533 if (ix86_asm_dialect == ASM_INTEL)
29534 fputs ("\t.intel_syntax noprefix\n", asm_out_file);
29538 x86_field_alignment (tree field, int computed)
29540 enum machine_mode mode;
29541 tree type = TREE_TYPE (field);
29543 if (TARGET_64BIT || TARGET_ALIGN_DOUBLE)
29544 return computed;
29545 mode = TYPE_MODE (strip_array_types (type));
29546 if (mode == DFmode || mode == DCmode
29547 || GET_MODE_CLASS (mode) == MODE_INT
29548 || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
29549 return MIN (32, computed);
29550 return computed;
29553 /* Output assembler code to FILE to increment profiler label # LABELNO
29554 for profiling a function entry. */
29555 void
29556 x86_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED)
29558 const char *mcount_name = (flag_fentry ? MCOUNT_NAME_BEFORE_PROLOGUE
29559 : MCOUNT_NAME);
29561 if (TARGET_64BIT)
29563 #ifndef NO_PROFILE_COUNTERS
29564 fprintf (file, "\tleaq\t%sP%d(%%rip),%%r11\n", LPREFIX, labelno);
29565 #endif
29567 if (DEFAULT_ABI == SYSV_ABI && flag_pic)
29568 fprintf (file, "\tcall\t*%s@GOTPCREL(%%rip)\n", mcount_name);
29569 else
29570 fprintf (file, "\tcall\t%s\n", mcount_name);
29572 else if (flag_pic)
29574 #ifndef NO_PROFILE_COUNTERS
29575 fprintf (file, "\tleal\t%sP%d@GOTOFF(%%ebx),%%" PROFILE_COUNT_REGISTER "\n",
29576 LPREFIX, labelno);
29577 #endif
29578 fprintf (file, "\tcall\t*%s@GOT(%%ebx)\n", mcount_name);
29580 else
29582 #ifndef NO_PROFILE_COUNTERS
29583 fprintf (file, "\tmovl\t$%sP%d,%%" PROFILE_COUNT_REGISTER "\n",
29584 LPREFIX, labelno);
29585 #endif
29586 fprintf (file, "\tcall\t%s\n", mcount_name);
29590 /* We don't have exact information about the insn sizes, but we may assume
29591 quite safely that we are informed about all 1 byte insns and memory
29592 address sizes. This is enough to eliminate unnecessary padding in
29593 99% of cases. */
29595 static int
29596 min_insn_size (rtx insn)
29598 int l = 0, len;
29600 if (!INSN_P (insn) || !active_insn_p (insn))
29601 return 0;
29603 /* Discard alignments we've emit and jump instructions. */
29604 if (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
29605 && XINT (PATTERN (insn), 1) == UNSPECV_ALIGN)
29606 return 0;
29607 if (JUMP_TABLE_DATA_P (insn))
29608 return 0;
29610 /* Important case - calls are always 5 bytes.
29611 It is common to have many calls in the row. */
29612 if (CALL_P (insn)
29613 && symbolic_reference_mentioned_p (PATTERN (insn))
29614 && !SIBLING_CALL_P (insn))
29615 return 5;
29616 len = get_attr_length (insn);
29617 if (len <= 1)
29618 return 1;
29620 /* For normal instructions we rely on get_attr_length being exact,
29621 with a few exceptions. */
29622 if (!JUMP_P (insn))
29624 enum attr_type type = get_attr_type (insn);
29626 switch (type)
29628 case TYPE_MULTI:
29629 if (GET_CODE (PATTERN (insn)) == ASM_INPUT
29630 || asm_noperands (PATTERN (insn)) >= 0)
29631 return 0;
29632 break;
29633 case TYPE_OTHER:
29634 case TYPE_FCMP:
29635 break;
29636 default:
29637 /* Otherwise trust get_attr_length. */
29638 return len;
29641 l = get_attr_length_address (insn);
29642 if (l < 4 && symbolic_reference_mentioned_p (PATTERN (insn)))
29643 l = 4;
29645 if (l)
29646 return 1+l;
29647 else
29648 return 2;
29651 #ifdef ASM_OUTPUT_MAX_SKIP_PAD
29653 /* AMD K8 core mispredicts jumps when there are more than 3 jumps in 16 byte
29654 window. */
29656 static void
29657 ix86_avoid_jump_mispredicts (void)
29659 rtx insn, start = get_insns ();
29660 int nbytes = 0, njumps = 0;
29661 int isjump = 0;
29663 /* Look for all minimal intervals of instructions containing 4 jumps.
29664 The intervals are bounded by START and INSN. NBYTES is the total
29665 size of instructions in the interval including INSN and not including
29666 START. When the NBYTES is smaller than 16 bytes, it is possible
29667 that the end of START and INSN ends up in the same 16byte page.
29669 The smallest offset in the page INSN can start is the case where START
29670 ends on the offset 0. Offset of INSN is then NBYTES - sizeof (INSN).
29671 We add p2align to 16byte window with maxskip 15 - NBYTES + sizeof (INSN).
29673 for (insn = start; insn; insn = NEXT_INSN (insn))
29675 int min_size;
29677 if (LABEL_P (insn))
29679 int align = label_to_alignment (insn);
29680 int max_skip = label_to_max_skip (insn);
29682 if (max_skip > 15)
29683 max_skip = 15;
29684 /* If align > 3, only up to 16 - max_skip - 1 bytes can be
29685 already in the current 16 byte page, because otherwise
29686 ASM_OUTPUT_MAX_SKIP_ALIGN could skip max_skip or fewer
29687 bytes to reach 16 byte boundary. */
29688 if (align <= 0
29689 || (align <= 3 && max_skip != (1 << align) - 1))
29690 max_skip = 0;
29691 if (dump_file)
29692 fprintf (dump_file, "Label %i with max_skip %i\n",
29693 INSN_UID (insn), max_skip);
29694 if (max_skip)
29696 while (nbytes + max_skip >= 16)
29698 start = NEXT_INSN (start);
29699 if ((JUMP_P (start)
29700 && GET_CODE (PATTERN (start)) != ADDR_VEC
29701 && GET_CODE (PATTERN (start)) != ADDR_DIFF_VEC)
29702 || CALL_P (start))
29703 njumps--, isjump = 1;
29704 else
29705 isjump = 0;
29706 nbytes -= min_insn_size (start);
29709 continue;
29712 min_size = min_insn_size (insn);
29713 nbytes += min_size;
29714 if (dump_file)
29715 fprintf (dump_file, "Insn %i estimated to %i bytes\n",
29716 INSN_UID (insn), min_size);
29717 if ((JUMP_P (insn)
29718 && GET_CODE (PATTERN (insn)) != ADDR_VEC
29719 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC)
29720 || CALL_P (insn))
29721 njumps++;
29722 else
29723 continue;
29725 while (njumps > 3)
29727 start = NEXT_INSN (start);
29728 if ((JUMP_P (start)
29729 && GET_CODE (PATTERN (start)) != ADDR_VEC
29730 && GET_CODE (PATTERN (start)) != ADDR_DIFF_VEC)
29731 || CALL_P (start))
29732 njumps--, isjump = 1;
29733 else
29734 isjump = 0;
29735 nbytes -= min_insn_size (start);
29737 gcc_assert (njumps >= 0);
29738 if (dump_file)
29739 fprintf (dump_file, "Interval %i to %i has %i bytes\n",
29740 INSN_UID (start), INSN_UID (insn), nbytes);
29742 if (njumps == 3 && isjump && nbytes < 16)
29744 int padsize = 15 - nbytes + min_insn_size (insn);
29746 if (dump_file)
29747 fprintf (dump_file, "Padding insn %i by %i bytes!\n",
29748 INSN_UID (insn), padsize);
29749 emit_insn_before (gen_pad (GEN_INT (padsize)), insn);
29753 #endif
29755 /* AMD Athlon works faster
29756 when RET is not destination of conditional jump or directly preceded
29757 by other jump instruction. We avoid the penalty by inserting NOP just
29758 before the RET instructions in such cases. */
29759 static void
29760 ix86_pad_returns (void)
29762 edge e;
29763 edge_iterator ei;
29765 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR->preds)
29767 basic_block bb = e->src;
29768 rtx ret = BB_END (bb);
29769 rtx prev;
29770 bool replace = false;
29772 if (!JUMP_P (ret) || GET_CODE (PATTERN (ret)) != RETURN
29773 || optimize_bb_for_size_p (bb))
29774 continue;
29775 for (prev = PREV_INSN (ret); prev; prev = PREV_INSN (prev))
29776 if (active_insn_p (prev) || LABEL_P (prev))
29777 break;
29778 if (prev && LABEL_P (prev))
29780 edge e;
29781 edge_iterator ei;
29783 FOR_EACH_EDGE (e, ei, bb->preds)
29784 if (EDGE_FREQUENCY (e) && e->src->index >= 0
29785 && !(e->flags & EDGE_FALLTHRU))
29786 replace = true;
29788 if (!replace)
29790 prev = prev_active_insn (ret);
29791 if (prev
29792 && ((JUMP_P (prev) && any_condjump_p (prev))
29793 || CALL_P (prev)))
29794 replace = true;
29795 /* Empty functions get branch mispredict even when
29796 the jump destination is not visible to us. */
29797 if (!prev && !optimize_function_for_size_p (cfun))
29798 replace = true;
29800 if (replace)
29802 emit_jump_insn_before (gen_return_internal_long (), ret);
29803 delete_insn (ret);
29808 /* Count the minimum number of instructions in BB. Return 4 if the
29809 number of instructions >= 4. */
29811 static int
29812 ix86_count_insn_bb (basic_block bb)
29814 rtx insn;
29815 int insn_count = 0;
29817 /* Count number of instructions in this block. Return 4 if the number
29818 of instructions >= 4. */
29819 FOR_BB_INSNS (bb, insn)
29821 /* Only happen in exit blocks. */
29822 if (JUMP_P (insn)
29823 && GET_CODE (PATTERN (insn)) == RETURN)
29824 break;
29826 if (NONDEBUG_INSN_P (insn)
29827 && GET_CODE (PATTERN (insn)) != USE
29828 && GET_CODE (PATTERN (insn)) != CLOBBER)
29830 insn_count++;
29831 if (insn_count >= 4)
29832 return insn_count;
29836 return insn_count;
29840 /* Count the minimum number of instructions in code path in BB.
29841 Return 4 if the number of instructions >= 4. */
29843 static int
29844 ix86_count_insn (basic_block bb)
29846 edge e;
29847 edge_iterator ei;
29848 int min_prev_count;
29850 /* Only bother counting instructions along paths with no
29851 more than 2 basic blocks between entry and exit. Given
29852 that BB has an edge to exit, determine if a predecessor
29853 of BB has an edge from entry. If so, compute the number
29854 of instructions in the predecessor block. If there
29855 happen to be multiple such blocks, compute the minimum. */
29856 min_prev_count = 4;
29857 FOR_EACH_EDGE (e, ei, bb->preds)
29859 edge prev_e;
29860 edge_iterator prev_ei;
29862 if (e->src == ENTRY_BLOCK_PTR)
29864 min_prev_count = 0;
29865 break;
29867 FOR_EACH_EDGE (prev_e, prev_ei, e->src->preds)
29869 if (prev_e->src == ENTRY_BLOCK_PTR)
29871 int count = ix86_count_insn_bb (e->src);
29872 if (count < min_prev_count)
29873 min_prev_count = count;
29874 break;
29879 if (min_prev_count < 4)
29880 min_prev_count += ix86_count_insn_bb (bb);
29882 return min_prev_count;
29885 /* Pad short funtion to 4 instructions. */
29887 static void
29888 ix86_pad_short_function (void)
29890 edge e;
29891 edge_iterator ei;
29893 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR->preds)
29895 rtx ret = BB_END (e->src);
29896 if (JUMP_P (ret) && GET_CODE (PATTERN (ret)) == RETURN)
29898 int insn_count = ix86_count_insn (e->src);
29900 /* Pad short function. */
29901 if (insn_count < 4)
29903 rtx insn = ret;
29905 /* Find epilogue. */
29906 while (insn
29907 && (!NOTE_P (insn)
29908 || NOTE_KIND (insn) != NOTE_INSN_EPILOGUE_BEG))
29909 insn = PREV_INSN (insn);
29911 if (!insn)
29912 insn = ret;
29914 /* Two NOPs count as one instruction. */
29915 insn_count = 2 * (4 - insn_count);
29916 emit_insn_before (gen_nops (GEN_INT (insn_count)), insn);
29922 /* Implement machine specific optimizations. We implement padding of returns
29923 for K8 CPUs and pass to avoid 4 jumps in the single 16 byte window. */
29924 static void
29925 ix86_reorg (void)
29927 /* We are freeing block_for_insn in the toplev to keep compatibility
29928 with old MDEP_REORGS that are not CFG based. Recompute it now. */
29929 compute_bb_for_insn ();
29931 if (optimize && optimize_function_for_speed_p (cfun))
29933 if (TARGET_PAD_SHORT_FUNCTION)
29934 ix86_pad_short_function ();
29935 else if (TARGET_PAD_RETURNS)
29936 ix86_pad_returns ();
29937 #ifdef ASM_OUTPUT_MAX_SKIP_PAD
29938 if (TARGET_FOUR_JUMP_LIMIT)
29939 ix86_avoid_jump_mispredicts ();
29940 #endif
29943 /* Run the vzeroupper optimization if needed. */
29944 if (TARGET_VZEROUPPER)
29945 move_or_delete_vzeroupper ();
29948 /* Return nonzero when QImode register that must be represented via REX prefix
29949 is used. */
29950 bool
29951 x86_extended_QIreg_mentioned_p (rtx insn)
29953 int i;
29954 extract_insn_cached (insn);
29955 for (i = 0; i < recog_data.n_operands; i++)
29956 if (REG_P (recog_data.operand[i])
29957 && REGNO (recog_data.operand[i]) > BX_REG)
29958 return true;
29959 return false;
29962 /* Return nonzero when P points to register encoded via REX prefix.
29963 Called via for_each_rtx. */
29964 static int
29965 extended_reg_mentioned_1 (rtx *p, void *data ATTRIBUTE_UNUSED)
29967 unsigned int regno;
29968 if (!REG_P (*p))
29969 return 0;
29970 regno = REGNO (*p);
29971 return REX_INT_REGNO_P (regno) || REX_SSE_REGNO_P (regno);
29974 /* Return true when INSN mentions register that must be encoded using REX
29975 prefix. */
29976 bool
29977 x86_extended_reg_mentioned_p (rtx insn)
29979 return for_each_rtx (INSN_P (insn) ? &PATTERN (insn) : &insn,
29980 extended_reg_mentioned_1, NULL);
29983 /* If profitable, negate (without causing overflow) integer constant
29984 of mode MODE at location LOC. Return true in this case. */
29985 bool
29986 x86_maybe_negate_const_int (rtx *loc, enum machine_mode mode)
29988 HOST_WIDE_INT val;
29990 if (!CONST_INT_P (*loc))
29991 return false;
29993 switch (mode)
29995 case DImode:
29996 /* DImode x86_64 constants must fit in 32 bits. */
29997 gcc_assert (x86_64_immediate_operand (*loc, mode));
29999 mode = SImode;
30000 break;
30002 case SImode:
30003 case HImode:
30004 case QImode:
30005 break;
30007 default:
30008 gcc_unreachable ();
30011 /* Avoid overflows. */
30012 if (mode_signbit_p (mode, *loc))
30013 return false;
30015 val = INTVAL (*loc);
30017 /* Make things pretty and `subl $4,%eax' rather than `addl $-4,%eax'.
30018 Exceptions: -128 encodes smaller than 128, so swap sign and op. */
30019 if ((val < 0 && val != -128)
30020 || val == 128)
30022 *loc = GEN_INT (-val);
30023 return true;
30026 return false;
30029 /* Generate an unsigned DImode/SImode to FP conversion. This is the same code
30030 optabs would emit if we didn't have TFmode patterns. */
30032 void
30033 x86_emit_floatuns (rtx operands[2])
30035 rtx neglab, donelab, i0, i1, f0, in, out;
30036 enum machine_mode mode, inmode;
30038 inmode = GET_MODE (operands[1]);
30039 gcc_assert (inmode == SImode || inmode == DImode);
30041 out = operands[0];
30042 in = force_reg (inmode, operands[1]);
30043 mode = GET_MODE (out);
30044 neglab = gen_label_rtx ();
30045 donelab = gen_label_rtx ();
30046 f0 = gen_reg_rtx (mode);
30048 emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, inmode, 0, neglab);
30050 expand_float (out, in, 0);
30052 emit_jump_insn (gen_jump (donelab));
30053 emit_barrier ();
30055 emit_label (neglab);
30057 i0 = expand_simple_binop (inmode, LSHIFTRT, in, const1_rtx, NULL,
30058 1, OPTAB_DIRECT);
30059 i1 = expand_simple_binop (inmode, AND, in, const1_rtx, NULL,
30060 1, OPTAB_DIRECT);
30061 i0 = expand_simple_binop (inmode, IOR, i0, i1, i0, 1, OPTAB_DIRECT);
30063 expand_float (f0, i0, 0);
30065 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_PLUS (mode, f0, f0)));
30067 emit_label (donelab);
30070 /* AVX does not support 32-byte integer vector operations,
30071 thus the longest vector we are faced with is V16QImode. */
30072 #define MAX_VECT_LEN 16
30074 struct expand_vec_perm_d
30076 rtx target, op0, op1;
30077 unsigned char perm[MAX_VECT_LEN];
30078 enum machine_mode vmode;
30079 unsigned char nelt;
30080 bool testing_p;
30083 static bool expand_vec_perm_1 (struct expand_vec_perm_d *d);
30084 static bool expand_vec_perm_broadcast_1 (struct expand_vec_perm_d *d);
30086 /* Get a vector mode of the same size as the original but with elements
30087 twice as wide. This is only guaranteed to apply to integral vectors. */
30089 static inline enum machine_mode
30090 get_mode_wider_vector (enum machine_mode o)
30092 /* ??? Rely on the ordering that genmodes.c gives to vectors. */
30093 enum machine_mode n = GET_MODE_WIDER_MODE (o);
30094 gcc_assert (GET_MODE_NUNITS (o) == GET_MODE_NUNITS (n) * 2);
30095 gcc_assert (GET_MODE_SIZE (o) == GET_MODE_SIZE (n));
30096 return n;
30099 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
30100 with all elements equal to VAR. Return true if successful. */
30102 static bool
30103 ix86_expand_vector_init_duplicate (bool mmx_ok, enum machine_mode mode,
30104 rtx target, rtx val)
30106 bool ok;
30108 switch (mode)
30110 case V2SImode:
30111 case V2SFmode:
30112 if (!mmx_ok)
30113 return false;
30114 /* FALLTHRU */
30116 case V4DFmode:
30117 case V4DImode:
30118 case V8SFmode:
30119 case V8SImode:
30120 case V2DFmode:
30121 case V2DImode:
30122 case V4SFmode:
30123 case V4SImode:
30125 rtx insn, dup;
30127 /* First attempt to recognize VAL as-is. */
30128 dup = gen_rtx_VEC_DUPLICATE (mode, val);
30129 insn = emit_insn (gen_rtx_SET (VOIDmode, target, dup));
30130 if (recog_memoized (insn) < 0)
30132 rtx seq;
30133 /* If that fails, force VAL into a register. */
30135 start_sequence ();
30136 XEXP (dup, 0) = force_reg (GET_MODE_INNER (mode), val);
30137 seq = get_insns ();
30138 end_sequence ();
30139 if (seq)
30140 emit_insn_before (seq, insn);
30142 ok = recog_memoized (insn) >= 0;
30143 gcc_assert (ok);
30146 return true;
30148 case V4HImode:
30149 if (!mmx_ok)
30150 return false;
30151 if (TARGET_SSE || TARGET_3DNOW_A)
30153 rtx x;
30155 val = gen_lowpart (SImode, val);
30156 x = gen_rtx_TRUNCATE (HImode, val);
30157 x = gen_rtx_VEC_DUPLICATE (mode, x);
30158 emit_insn (gen_rtx_SET (VOIDmode, target, x));
30159 return true;
30161 goto widen;
30163 case V8QImode:
30164 if (!mmx_ok)
30165 return false;
30166 goto widen;
30168 case V8HImode:
30169 if (TARGET_SSE2)
30171 struct expand_vec_perm_d dperm;
30172 rtx tmp1, tmp2;
30174 permute:
30175 memset (&dperm, 0, sizeof (dperm));
30176 dperm.target = target;
30177 dperm.vmode = mode;
30178 dperm.nelt = GET_MODE_NUNITS (mode);
30179 dperm.op0 = dperm.op1 = gen_reg_rtx (mode);
30181 /* Extend to SImode using a paradoxical SUBREG. */
30182 tmp1 = gen_reg_rtx (SImode);
30183 emit_move_insn (tmp1, gen_lowpart (SImode, val));
30185 /* Insert the SImode value as low element of a V4SImode vector. */
30186 tmp2 = gen_lowpart (V4SImode, dperm.op0);
30187 emit_insn (gen_vec_setv4si_0 (tmp2, CONST0_RTX (V4SImode), tmp1));
30189 ok = (expand_vec_perm_1 (&dperm)
30190 || expand_vec_perm_broadcast_1 (&dperm));
30191 gcc_assert (ok);
30192 return ok;
30194 goto widen;
30196 case V16QImode:
30197 if (TARGET_SSE2)
30198 goto permute;
30199 goto widen;
30201 widen:
30202 /* Replicate the value once into the next wider mode and recurse. */
30204 enum machine_mode smode, wsmode, wvmode;
30205 rtx x;
30207 smode = GET_MODE_INNER (mode);
30208 wvmode = get_mode_wider_vector (mode);
30209 wsmode = GET_MODE_INNER (wvmode);
30211 val = convert_modes (wsmode, smode, val, true);
30212 x = expand_simple_binop (wsmode, ASHIFT, val,
30213 GEN_INT (GET_MODE_BITSIZE (smode)),
30214 NULL_RTX, 1, OPTAB_LIB_WIDEN);
30215 val = expand_simple_binop (wsmode, IOR, val, x, x, 1, OPTAB_LIB_WIDEN);
30217 x = gen_lowpart (wvmode, target);
30218 ok = ix86_expand_vector_init_duplicate (mmx_ok, wvmode, x, val);
30219 gcc_assert (ok);
30220 return ok;
30223 case V16HImode:
30224 case V32QImode:
30226 enum machine_mode hvmode = (mode == V16HImode ? V8HImode : V16QImode);
30227 rtx x = gen_reg_rtx (hvmode);
30229 ok = ix86_expand_vector_init_duplicate (false, hvmode, x, val);
30230 gcc_assert (ok);
30232 x = gen_rtx_VEC_CONCAT (mode, x, x);
30233 emit_insn (gen_rtx_SET (VOIDmode, target, x));
30235 return true;
30237 default:
30238 return false;
30242 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
30243 whose ONE_VAR element is VAR, and other elements are zero. Return true
30244 if successful. */
30246 static bool
30247 ix86_expand_vector_init_one_nonzero (bool mmx_ok, enum machine_mode mode,
30248 rtx target, rtx var, int one_var)
30250 enum machine_mode vsimode;
30251 rtx new_target;
30252 rtx x, tmp;
30253 bool use_vector_set = false;
30255 switch (mode)
30257 case V2DImode:
30258 /* For SSE4.1, we normally use vector set. But if the second
30259 element is zero and inter-unit moves are OK, we use movq
30260 instead. */
30261 use_vector_set = (TARGET_64BIT
30262 && TARGET_SSE4_1
30263 && !(TARGET_INTER_UNIT_MOVES
30264 && one_var == 0));
30265 break;
30266 case V16QImode:
30267 case V4SImode:
30268 case V4SFmode:
30269 use_vector_set = TARGET_SSE4_1;
30270 break;
30271 case V8HImode:
30272 use_vector_set = TARGET_SSE2;
30273 break;
30274 case V4HImode:
30275 use_vector_set = TARGET_SSE || TARGET_3DNOW_A;
30276 break;
30277 case V32QImode:
30278 case V16HImode:
30279 case V8SImode:
30280 case V8SFmode:
30281 case V4DFmode:
30282 use_vector_set = TARGET_AVX;
30283 break;
30284 case V4DImode:
30285 /* Use ix86_expand_vector_set in 64bit mode only. */
30286 use_vector_set = TARGET_AVX && TARGET_64BIT;
30287 break;
30288 default:
30289 break;
30292 if (use_vector_set)
30294 emit_insn (gen_rtx_SET (VOIDmode, target, CONST0_RTX (mode)));
30295 var = force_reg (GET_MODE_INNER (mode), var);
30296 ix86_expand_vector_set (mmx_ok, target, var, one_var);
30297 return true;
30300 switch (mode)
30302 case V2SFmode:
30303 case V2SImode:
30304 if (!mmx_ok)
30305 return false;
30306 /* FALLTHRU */
30308 case V2DFmode:
30309 case V2DImode:
30310 if (one_var != 0)
30311 return false;
30312 var = force_reg (GET_MODE_INNER (mode), var);
30313 x = gen_rtx_VEC_CONCAT (mode, var, CONST0_RTX (GET_MODE_INNER (mode)));
30314 emit_insn (gen_rtx_SET (VOIDmode, target, x));
30315 return true;
30317 case V4SFmode:
30318 case V4SImode:
30319 if (!REG_P (target) || REGNO (target) < FIRST_PSEUDO_REGISTER)
30320 new_target = gen_reg_rtx (mode);
30321 else
30322 new_target = target;
30323 var = force_reg (GET_MODE_INNER (mode), var);
30324 x = gen_rtx_VEC_DUPLICATE (mode, var);
30325 x = gen_rtx_VEC_MERGE (mode, x, CONST0_RTX (mode), const1_rtx);
30326 emit_insn (gen_rtx_SET (VOIDmode, new_target, x));
30327 if (one_var != 0)
30329 /* We need to shuffle the value to the correct position, so
30330 create a new pseudo to store the intermediate result. */
30332 /* With SSE2, we can use the integer shuffle insns. */
30333 if (mode != V4SFmode && TARGET_SSE2)
30335 emit_insn (gen_sse2_pshufd_1 (new_target, new_target,
30336 const1_rtx,
30337 GEN_INT (one_var == 1 ? 0 : 1),
30338 GEN_INT (one_var == 2 ? 0 : 1),
30339 GEN_INT (one_var == 3 ? 0 : 1)));
30340 if (target != new_target)
30341 emit_move_insn (target, new_target);
30342 return true;
30345 /* Otherwise convert the intermediate result to V4SFmode and
30346 use the SSE1 shuffle instructions. */
30347 if (mode != V4SFmode)
30349 tmp = gen_reg_rtx (V4SFmode);
30350 emit_move_insn (tmp, gen_lowpart (V4SFmode, new_target));
30352 else
30353 tmp = new_target;
30355 emit_insn (gen_sse_shufps_v4sf (tmp, tmp, tmp,
30356 const1_rtx,
30357 GEN_INT (one_var == 1 ? 0 : 1),
30358 GEN_INT (one_var == 2 ? 0+4 : 1+4),
30359 GEN_INT (one_var == 3 ? 0+4 : 1+4)));
30361 if (mode != V4SFmode)
30362 emit_move_insn (target, gen_lowpart (V4SImode, tmp));
30363 else if (tmp != target)
30364 emit_move_insn (target, tmp);
30366 else if (target != new_target)
30367 emit_move_insn (target, new_target);
30368 return true;
30370 case V8HImode:
30371 case V16QImode:
30372 vsimode = V4SImode;
30373 goto widen;
30374 case V4HImode:
30375 case V8QImode:
30376 if (!mmx_ok)
30377 return false;
30378 vsimode = V2SImode;
30379 goto widen;
30380 widen:
30381 if (one_var != 0)
30382 return false;
30384 /* Zero extend the variable element to SImode and recurse. */
30385 var = convert_modes (SImode, GET_MODE_INNER (mode), var, true);
30387 x = gen_reg_rtx (vsimode);
30388 if (!ix86_expand_vector_init_one_nonzero (mmx_ok, vsimode, x,
30389 var, one_var))
30390 gcc_unreachable ();
30392 emit_move_insn (target, gen_lowpart (mode, x));
30393 return true;
30395 default:
30396 return false;
30400 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
30401 consisting of the values in VALS. It is known that all elements
30402 except ONE_VAR are constants. Return true if successful. */
30404 static bool
30405 ix86_expand_vector_init_one_var (bool mmx_ok, enum machine_mode mode,
30406 rtx target, rtx vals, int one_var)
30408 rtx var = XVECEXP (vals, 0, one_var);
30409 enum machine_mode wmode;
30410 rtx const_vec, x;
30412 const_vec = copy_rtx (vals);
30413 XVECEXP (const_vec, 0, one_var) = CONST0_RTX (GET_MODE_INNER (mode));
30414 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (const_vec, 0));
30416 switch (mode)
30418 case V2DFmode:
30419 case V2DImode:
30420 case V2SFmode:
30421 case V2SImode:
30422 /* For the two element vectors, it's just as easy to use
30423 the general case. */
30424 return false;
30426 case V4DImode:
30427 /* Use ix86_expand_vector_set in 64bit mode only. */
30428 if (!TARGET_64BIT)
30429 return false;
30430 case V4DFmode:
30431 case V8SFmode:
30432 case V8SImode:
30433 case V16HImode:
30434 case V32QImode:
30435 case V4SFmode:
30436 case V4SImode:
30437 case V8HImode:
30438 case V4HImode:
30439 break;
30441 case V16QImode:
30442 if (TARGET_SSE4_1)
30443 break;
30444 wmode = V8HImode;
30445 goto widen;
30446 case V8QImode:
30447 wmode = V4HImode;
30448 goto widen;
30449 widen:
30450 /* There's no way to set one QImode entry easily. Combine
30451 the variable value with its adjacent constant value, and
30452 promote to an HImode set. */
30453 x = XVECEXP (vals, 0, one_var ^ 1);
30454 if (one_var & 1)
30456 var = convert_modes (HImode, QImode, var, true);
30457 var = expand_simple_binop (HImode, ASHIFT, var, GEN_INT (8),
30458 NULL_RTX, 1, OPTAB_LIB_WIDEN);
30459 x = GEN_INT (INTVAL (x) & 0xff);
30461 else
30463 var = convert_modes (HImode, QImode, var, true);
30464 x = gen_int_mode (INTVAL (x) << 8, HImode);
30466 if (x != const0_rtx)
30467 var = expand_simple_binop (HImode, IOR, var, x, var,
30468 1, OPTAB_LIB_WIDEN);
30470 x = gen_reg_rtx (wmode);
30471 emit_move_insn (x, gen_lowpart (wmode, const_vec));
30472 ix86_expand_vector_set (mmx_ok, x, var, one_var >> 1);
30474 emit_move_insn (target, gen_lowpart (mode, x));
30475 return true;
30477 default:
30478 return false;
30481 emit_move_insn (target, const_vec);
30482 ix86_expand_vector_set (mmx_ok, target, var, one_var);
30483 return true;
30486 /* A subroutine of ix86_expand_vector_init_general. Use vector
30487 concatenate to handle the most general case: all values variable,
30488 and none identical. */
30490 static void
30491 ix86_expand_vector_init_concat (enum machine_mode mode,
30492 rtx target, rtx *ops, int n)
30494 enum machine_mode cmode, hmode = VOIDmode;
30495 rtx first[8], second[4];
30496 rtvec v;
30497 int i, j;
30499 switch (n)
30501 case 2:
30502 switch (mode)
30504 case V8SImode:
30505 cmode = V4SImode;
30506 break;
30507 case V8SFmode:
30508 cmode = V4SFmode;
30509 break;
30510 case V4DImode:
30511 cmode = V2DImode;
30512 break;
30513 case V4DFmode:
30514 cmode = V2DFmode;
30515 break;
30516 case V4SImode:
30517 cmode = V2SImode;
30518 break;
30519 case V4SFmode:
30520 cmode = V2SFmode;
30521 break;
30522 case V2DImode:
30523 cmode = DImode;
30524 break;
30525 case V2SImode:
30526 cmode = SImode;
30527 break;
30528 case V2DFmode:
30529 cmode = DFmode;
30530 break;
30531 case V2SFmode:
30532 cmode = SFmode;
30533 break;
30534 default:
30535 gcc_unreachable ();
30538 if (!register_operand (ops[1], cmode))
30539 ops[1] = force_reg (cmode, ops[1]);
30540 if (!register_operand (ops[0], cmode))
30541 ops[0] = force_reg (cmode, ops[0]);
30542 emit_insn (gen_rtx_SET (VOIDmode, target,
30543 gen_rtx_VEC_CONCAT (mode, ops[0],
30544 ops[1])));
30545 break;
30547 case 4:
30548 switch (mode)
30550 case V4DImode:
30551 cmode = V2DImode;
30552 break;
30553 case V4DFmode:
30554 cmode = V2DFmode;
30555 break;
30556 case V4SImode:
30557 cmode = V2SImode;
30558 break;
30559 case V4SFmode:
30560 cmode = V2SFmode;
30561 break;
30562 default:
30563 gcc_unreachable ();
30565 goto half;
30567 case 8:
30568 switch (mode)
30570 case V8SImode:
30571 cmode = V2SImode;
30572 hmode = V4SImode;
30573 break;
30574 case V8SFmode:
30575 cmode = V2SFmode;
30576 hmode = V4SFmode;
30577 break;
30578 default:
30579 gcc_unreachable ();
30581 goto half;
30583 half:
30584 /* FIXME: We process inputs backward to help RA. PR 36222. */
30585 i = n - 1;
30586 j = (n >> 1) - 1;
30587 for (; i > 0; i -= 2, j--)
30589 first[j] = gen_reg_rtx (cmode);
30590 v = gen_rtvec (2, ops[i - 1], ops[i]);
30591 ix86_expand_vector_init (false, first[j],
30592 gen_rtx_PARALLEL (cmode, v));
30595 n >>= 1;
30596 if (n > 2)
30598 gcc_assert (hmode != VOIDmode);
30599 for (i = j = 0; i < n; i += 2, j++)
30601 second[j] = gen_reg_rtx (hmode);
30602 ix86_expand_vector_init_concat (hmode, second [j],
30603 &first [i], 2);
30605 n >>= 1;
30606 ix86_expand_vector_init_concat (mode, target, second, n);
30608 else
30609 ix86_expand_vector_init_concat (mode, target, first, n);
30610 break;
30612 default:
30613 gcc_unreachable ();
30617 /* A subroutine of ix86_expand_vector_init_general. Use vector
30618 interleave to handle the most general case: all values variable,
30619 and none identical. */
30621 static void
30622 ix86_expand_vector_init_interleave (enum machine_mode mode,
30623 rtx target, rtx *ops, int n)
30625 enum machine_mode first_imode, second_imode, third_imode, inner_mode;
30626 int i, j;
30627 rtx op0, op1;
30628 rtx (*gen_load_even) (rtx, rtx, rtx);
30629 rtx (*gen_interleave_first_low) (rtx, rtx, rtx);
30630 rtx (*gen_interleave_second_low) (rtx, rtx, rtx);
30632 switch (mode)
30634 case V8HImode:
30635 gen_load_even = gen_vec_setv8hi;
30636 gen_interleave_first_low = gen_vec_interleave_lowv4si;
30637 gen_interleave_second_low = gen_vec_interleave_lowv2di;
30638 inner_mode = HImode;
30639 first_imode = V4SImode;
30640 second_imode = V2DImode;
30641 third_imode = VOIDmode;
30642 break;
30643 case V16QImode:
30644 gen_load_even = gen_vec_setv16qi;
30645 gen_interleave_first_low = gen_vec_interleave_lowv8hi;
30646 gen_interleave_second_low = gen_vec_interleave_lowv4si;
30647 inner_mode = QImode;
30648 first_imode = V8HImode;
30649 second_imode = V4SImode;
30650 third_imode = V2DImode;
30651 break;
30652 default:
30653 gcc_unreachable ();
30656 for (i = 0; i < n; i++)
30658 /* Extend the odd elment to SImode using a paradoxical SUBREG. */
30659 op0 = gen_reg_rtx (SImode);
30660 emit_move_insn (op0, gen_lowpart (SImode, ops [i + i]));
30662 /* Insert the SImode value as low element of V4SImode vector. */
30663 op1 = gen_reg_rtx (V4SImode);
30664 op0 = gen_rtx_VEC_MERGE (V4SImode,
30665 gen_rtx_VEC_DUPLICATE (V4SImode,
30666 op0),
30667 CONST0_RTX (V4SImode),
30668 const1_rtx);
30669 emit_insn (gen_rtx_SET (VOIDmode, op1, op0));
30671 /* Cast the V4SImode vector back to a vector in orignal mode. */
30672 op0 = gen_reg_rtx (mode);
30673 emit_move_insn (op0, gen_lowpart (mode, op1));
30675 /* Load even elements into the second positon. */
30676 emit_insn (gen_load_even (op0,
30677 force_reg (inner_mode,
30678 ops [i + i + 1]),
30679 const1_rtx));
30681 /* Cast vector to FIRST_IMODE vector. */
30682 ops[i] = gen_reg_rtx (first_imode);
30683 emit_move_insn (ops[i], gen_lowpart (first_imode, op0));
30686 /* Interleave low FIRST_IMODE vectors. */
30687 for (i = j = 0; i < n; i += 2, j++)
30689 op0 = gen_reg_rtx (first_imode);
30690 emit_insn (gen_interleave_first_low (op0, ops[i], ops[i + 1]));
30692 /* Cast FIRST_IMODE vector to SECOND_IMODE vector. */
30693 ops[j] = gen_reg_rtx (second_imode);
30694 emit_move_insn (ops[j], gen_lowpart (second_imode, op0));
30697 /* Interleave low SECOND_IMODE vectors. */
30698 switch (second_imode)
30700 case V4SImode:
30701 for (i = j = 0; i < n / 2; i += 2, j++)
30703 op0 = gen_reg_rtx (second_imode);
30704 emit_insn (gen_interleave_second_low (op0, ops[i],
30705 ops[i + 1]));
30707 /* Cast the SECOND_IMODE vector to the THIRD_IMODE
30708 vector. */
30709 ops[j] = gen_reg_rtx (third_imode);
30710 emit_move_insn (ops[j], gen_lowpart (third_imode, op0));
30712 second_imode = V2DImode;
30713 gen_interleave_second_low = gen_vec_interleave_lowv2di;
30714 /* FALLTHRU */
30716 case V2DImode:
30717 op0 = gen_reg_rtx (second_imode);
30718 emit_insn (gen_interleave_second_low (op0, ops[0],
30719 ops[1]));
30721 /* Cast the SECOND_IMODE vector back to a vector on original
30722 mode. */
30723 emit_insn (gen_rtx_SET (VOIDmode, target,
30724 gen_lowpart (mode, op0)));
30725 break;
30727 default:
30728 gcc_unreachable ();
30732 /* A subroutine of ix86_expand_vector_init. Handle the most general case:
30733 all values variable, and none identical. */
30735 static void
30736 ix86_expand_vector_init_general (bool mmx_ok, enum machine_mode mode,
30737 rtx target, rtx vals)
30739 rtx ops[32], op0, op1;
30740 enum machine_mode half_mode = VOIDmode;
30741 int n, i;
30743 switch (mode)
30745 case V2SFmode:
30746 case V2SImode:
30747 if (!mmx_ok && !TARGET_SSE)
30748 break;
30749 /* FALLTHRU */
30751 case V8SFmode:
30752 case V8SImode:
30753 case V4DFmode:
30754 case V4DImode:
30755 case V4SFmode:
30756 case V4SImode:
30757 case V2DFmode:
30758 case V2DImode:
30759 n = GET_MODE_NUNITS (mode);
30760 for (i = 0; i < n; i++)
30761 ops[i] = XVECEXP (vals, 0, i);
30762 ix86_expand_vector_init_concat (mode, target, ops, n);
30763 return;
30765 case V32QImode:
30766 half_mode = V16QImode;
30767 goto half;
30769 case V16HImode:
30770 half_mode = V8HImode;
30771 goto half;
30773 half:
30774 n = GET_MODE_NUNITS (mode);
30775 for (i = 0; i < n; i++)
30776 ops[i] = XVECEXP (vals, 0, i);
30777 op0 = gen_reg_rtx (half_mode);
30778 op1 = gen_reg_rtx (half_mode);
30779 ix86_expand_vector_init_interleave (half_mode, op0, ops,
30780 n >> 2);
30781 ix86_expand_vector_init_interleave (half_mode, op1,
30782 &ops [n >> 1], n >> 2);
30783 emit_insn (gen_rtx_SET (VOIDmode, target,
30784 gen_rtx_VEC_CONCAT (mode, op0, op1)));
30785 return;
30787 case V16QImode:
30788 if (!TARGET_SSE4_1)
30789 break;
30790 /* FALLTHRU */
30792 case V8HImode:
30793 if (!TARGET_SSE2)
30794 break;
30796 /* Don't use ix86_expand_vector_init_interleave if we can't
30797 move from GPR to SSE register directly. */
30798 if (!TARGET_INTER_UNIT_MOVES)
30799 break;
30801 n = GET_MODE_NUNITS (mode);
30802 for (i = 0; i < n; i++)
30803 ops[i] = XVECEXP (vals, 0, i);
30804 ix86_expand_vector_init_interleave (mode, target, ops, n >> 1);
30805 return;
30807 case V4HImode:
30808 case V8QImode:
30809 break;
30811 default:
30812 gcc_unreachable ();
30816 int i, j, n_elts, n_words, n_elt_per_word;
30817 enum machine_mode inner_mode;
30818 rtx words[4], shift;
30820 inner_mode = GET_MODE_INNER (mode);
30821 n_elts = GET_MODE_NUNITS (mode);
30822 n_words = GET_MODE_SIZE (mode) / UNITS_PER_WORD;
30823 n_elt_per_word = n_elts / n_words;
30824 shift = GEN_INT (GET_MODE_BITSIZE (inner_mode));
30826 for (i = 0; i < n_words; ++i)
30828 rtx word = NULL_RTX;
30830 for (j = 0; j < n_elt_per_word; ++j)
30832 rtx elt = XVECEXP (vals, 0, (i+1)*n_elt_per_word - j - 1);
30833 elt = convert_modes (word_mode, inner_mode, elt, true);
30835 if (j == 0)
30836 word = elt;
30837 else
30839 word = expand_simple_binop (word_mode, ASHIFT, word, shift,
30840 word, 1, OPTAB_LIB_WIDEN);
30841 word = expand_simple_binop (word_mode, IOR, word, elt,
30842 word, 1, OPTAB_LIB_WIDEN);
30846 words[i] = word;
30849 if (n_words == 1)
30850 emit_move_insn (target, gen_lowpart (mode, words[0]));
30851 else if (n_words == 2)
30853 rtx tmp = gen_reg_rtx (mode);
30854 emit_clobber (tmp);
30855 emit_move_insn (gen_lowpart (word_mode, tmp), words[0]);
30856 emit_move_insn (gen_highpart (word_mode, tmp), words[1]);
30857 emit_move_insn (target, tmp);
30859 else if (n_words == 4)
30861 rtx tmp = gen_reg_rtx (V4SImode);
30862 gcc_assert (word_mode == SImode);
30863 vals = gen_rtx_PARALLEL (V4SImode, gen_rtvec_v (4, words));
30864 ix86_expand_vector_init_general (false, V4SImode, tmp, vals);
30865 emit_move_insn (target, gen_lowpart (mode, tmp));
30867 else
30868 gcc_unreachable ();
30872 /* Initialize vector TARGET via VALS. Suppress the use of MMX
30873 instructions unless MMX_OK is true. */
30875 void
30876 ix86_expand_vector_init (bool mmx_ok, rtx target, rtx vals)
30878 enum machine_mode mode = GET_MODE (target);
30879 enum machine_mode inner_mode = GET_MODE_INNER (mode);
30880 int n_elts = GET_MODE_NUNITS (mode);
30881 int n_var = 0, one_var = -1;
30882 bool all_same = true, all_const_zero = true;
30883 int i;
30884 rtx x;
30886 for (i = 0; i < n_elts; ++i)
30888 x = XVECEXP (vals, 0, i);
30889 if (!(CONST_INT_P (x)
30890 || GET_CODE (x) == CONST_DOUBLE
30891 || GET_CODE (x) == CONST_FIXED))
30892 n_var++, one_var = i;
30893 else if (x != CONST0_RTX (inner_mode))
30894 all_const_zero = false;
30895 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
30896 all_same = false;
30899 /* Constants are best loaded from the constant pool. */
30900 if (n_var == 0)
30902 emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
30903 return;
30906 /* If all values are identical, broadcast the value. */
30907 if (all_same
30908 && ix86_expand_vector_init_duplicate (mmx_ok, mode, target,
30909 XVECEXP (vals, 0, 0)))
30910 return;
30912 /* Values where only one field is non-constant are best loaded from
30913 the pool and overwritten via move later. */
30914 if (n_var == 1)
30916 if (all_const_zero
30917 && ix86_expand_vector_init_one_nonzero (mmx_ok, mode, target,
30918 XVECEXP (vals, 0, one_var),
30919 one_var))
30920 return;
30922 if (ix86_expand_vector_init_one_var (mmx_ok, mode, target, vals, one_var))
30923 return;
30926 ix86_expand_vector_init_general (mmx_ok, mode, target, vals);
30929 void
30930 ix86_expand_vector_set (bool mmx_ok, rtx target, rtx val, int elt)
30932 enum machine_mode mode = GET_MODE (target);
30933 enum machine_mode inner_mode = GET_MODE_INNER (mode);
30934 enum machine_mode half_mode;
30935 bool use_vec_merge = false;
30936 rtx tmp;
30937 static rtx (*gen_extract[6][2]) (rtx, rtx)
30939 { gen_vec_extract_lo_v32qi, gen_vec_extract_hi_v32qi },
30940 { gen_vec_extract_lo_v16hi, gen_vec_extract_hi_v16hi },
30941 { gen_vec_extract_lo_v8si, gen_vec_extract_hi_v8si },
30942 { gen_vec_extract_lo_v4di, gen_vec_extract_hi_v4di },
30943 { gen_vec_extract_lo_v8sf, gen_vec_extract_hi_v8sf },
30944 { gen_vec_extract_lo_v4df, gen_vec_extract_hi_v4df }
30946 static rtx (*gen_insert[6][2]) (rtx, rtx, rtx)
30948 { gen_vec_set_lo_v32qi, gen_vec_set_hi_v32qi },
30949 { gen_vec_set_lo_v16hi, gen_vec_set_hi_v16hi },
30950 { gen_vec_set_lo_v8si, gen_vec_set_hi_v8si },
30951 { gen_vec_set_lo_v4di, gen_vec_set_hi_v4di },
30952 { gen_vec_set_lo_v8sf, gen_vec_set_hi_v8sf },
30953 { gen_vec_set_lo_v4df, gen_vec_set_hi_v4df }
30955 int i, j, n;
30957 switch (mode)
30959 case V2SFmode:
30960 case V2SImode:
30961 if (mmx_ok)
30963 tmp = gen_reg_rtx (GET_MODE_INNER (mode));
30964 ix86_expand_vector_extract (true, tmp, target, 1 - elt);
30965 if (elt == 0)
30966 tmp = gen_rtx_VEC_CONCAT (mode, tmp, val);
30967 else
30968 tmp = gen_rtx_VEC_CONCAT (mode, val, tmp);
30969 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
30970 return;
30972 break;
30974 case V2DImode:
30975 use_vec_merge = TARGET_SSE4_1;
30976 if (use_vec_merge)
30977 break;
30979 case V2DFmode:
30981 rtx op0, op1;
30983 /* For the two element vectors, we implement a VEC_CONCAT with
30984 the extraction of the other element. */
30986 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (1 - elt)));
30987 tmp = gen_rtx_VEC_SELECT (inner_mode, target, tmp);
30989 if (elt == 0)
30990 op0 = val, op1 = tmp;
30991 else
30992 op0 = tmp, op1 = val;
30994 tmp = gen_rtx_VEC_CONCAT (mode, op0, op1);
30995 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
30997 return;
30999 case V4SFmode:
31000 use_vec_merge = TARGET_SSE4_1;
31001 if (use_vec_merge)
31002 break;
31004 switch (elt)
31006 case 0:
31007 use_vec_merge = true;
31008 break;
31010 case 1:
31011 /* tmp = target = A B C D */
31012 tmp = copy_to_reg (target);
31013 /* target = A A B B */
31014 emit_insn (gen_vec_interleave_lowv4sf (target, target, target));
31015 /* target = X A B B */
31016 ix86_expand_vector_set (false, target, val, 0);
31017 /* target = A X C D */
31018 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
31019 const1_rtx, const0_rtx,
31020 GEN_INT (2+4), GEN_INT (3+4)));
31021 return;
31023 case 2:
31024 /* tmp = target = A B C D */
31025 tmp = copy_to_reg (target);
31026 /* tmp = X B C D */
31027 ix86_expand_vector_set (false, tmp, val, 0);
31028 /* target = A B X D */
31029 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
31030 const0_rtx, const1_rtx,
31031 GEN_INT (0+4), GEN_INT (3+4)));
31032 return;
31034 case 3:
31035 /* tmp = target = A B C D */
31036 tmp = copy_to_reg (target);
31037 /* tmp = X B C D */
31038 ix86_expand_vector_set (false, tmp, val, 0);
31039 /* target = A B X D */
31040 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
31041 const0_rtx, const1_rtx,
31042 GEN_INT (2+4), GEN_INT (0+4)));
31043 return;
31045 default:
31046 gcc_unreachable ();
31048 break;
31050 case V4SImode:
31051 use_vec_merge = TARGET_SSE4_1;
31052 if (use_vec_merge)
31053 break;
31055 /* Element 0 handled by vec_merge below. */
31056 if (elt == 0)
31058 use_vec_merge = true;
31059 break;
31062 if (TARGET_SSE2)
31064 /* With SSE2, use integer shuffles to swap element 0 and ELT,
31065 store into element 0, then shuffle them back. */
31067 rtx order[4];
31069 order[0] = GEN_INT (elt);
31070 order[1] = const1_rtx;
31071 order[2] = const2_rtx;
31072 order[3] = GEN_INT (3);
31073 order[elt] = const0_rtx;
31075 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
31076 order[1], order[2], order[3]));
31078 ix86_expand_vector_set (false, target, val, 0);
31080 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
31081 order[1], order[2], order[3]));
31083 else
31085 /* For SSE1, we have to reuse the V4SF code. */
31086 ix86_expand_vector_set (false, gen_lowpart (V4SFmode, target),
31087 gen_lowpart (SFmode, val), elt);
31089 return;
31091 case V8HImode:
31092 use_vec_merge = TARGET_SSE2;
31093 break;
31094 case V4HImode:
31095 use_vec_merge = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
31096 break;
31098 case V16QImode:
31099 use_vec_merge = TARGET_SSE4_1;
31100 break;
31102 case V8QImode:
31103 break;
31105 case V32QImode:
31106 half_mode = V16QImode;
31107 j = 0;
31108 n = 16;
31109 goto half;
31111 case V16HImode:
31112 half_mode = V8HImode;
31113 j = 1;
31114 n = 8;
31115 goto half;
31117 case V8SImode:
31118 half_mode = V4SImode;
31119 j = 2;
31120 n = 4;
31121 goto half;
31123 case V4DImode:
31124 half_mode = V2DImode;
31125 j = 3;
31126 n = 2;
31127 goto half;
31129 case V8SFmode:
31130 half_mode = V4SFmode;
31131 j = 4;
31132 n = 4;
31133 goto half;
31135 case V4DFmode:
31136 half_mode = V2DFmode;
31137 j = 5;
31138 n = 2;
31139 goto half;
31141 half:
31142 /* Compute offset. */
31143 i = elt / n;
31144 elt %= n;
31146 gcc_assert (i <= 1);
31148 /* Extract the half. */
31149 tmp = gen_reg_rtx (half_mode);
31150 emit_insn (gen_extract[j][i] (tmp, target));
31152 /* Put val in tmp at elt. */
31153 ix86_expand_vector_set (false, tmp, val, elt);
31155 /* Put it back. */
31156 emit_insn (gen_insert[j][i] (target, target, tmp));
31157 return;
31159 default:
31160 break;
31163 if (use_vec_merge)
31165 tmp = gen_rtx_VEC_DUPLICATE (mode, val);
31166 tmp = gen_rtx_VEC_MERGE (mode, tmp, target, GEN_INT (1 << elt));
31167 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
31169 else
31171 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), false);
31173 emit_move_insn (mem, target);
31175 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
31176 emit_move_insn (tmp, val);
31178 emit_move_insn (target, mem);
31182 void
31183 ix86_expand_vector_extract (bool mmx_ok, rtx target, rtx vec, int elt)
31185 enum machine_mode mode = GET_MODE (vec);
31186 enum machine_mode inner_mode = GET_MODE_INNER (mode);
31187 bool use_vec_extr = false;
31188 rtx tmp;
31190 switch (mode)
31192 case V2SImode:
31193 case V2SFmode:
31194 if (!mmx_ok)
31195 break;
31196 /* FALLTHRU */
31198 case V2DFmode:
31199 case V2DImode:
31200 use_vec_extr = true;
31201 break;
31203 case V4SFmode:
31204 use_vec_extr = TARGET_SSE4_1;
31205 if (use_vec_extr)
31206 break;
31208 switch (elt)
31210 case 0:
31211 tmp = vec;
31212 break;
31214 case 1:
31215 case 3:
31216 tmp = gen_reg_rtx (mode);
31217 emit_insn (gen_sse_shufps_v4sf (tmp, vec, vec,
31218 GEN_INT (elt), GEN_INT (elt),
31219 GEN_INT (elt+4), GEN_INT (elt+4)));
31220 break;
31222 case 2:
31223 tmp = gen_reg_rtx (mode);
31224 emit_insn (gen_vec_interleave_highv4sf (tmp, vec, vec));
31225 break;
31227 default:
31228 gcc_unreachable ();
31230 vec = tmp;
31231 use_vec_extr = true;
31232 elt = 0;
31233 break;
31235 case V4SImode:
31236 use_vec_extr = TARGET_SSE4_1;
31237 if (use_vec_extr)
31238 break;
31240 if (TARGET_SSE2)
31242 switch (elt)
31244 case 0:
31245 tmp = vec;
31246 break;
31248 case 1:
31249 case 3:
31250 tmp = gen_reg_rtx (mode);
31251 emit_insn (gen_sse2_pshufd_1 (tmp, vec,
31252 GEN_INT (elt), GEN_INT (elt),
31253 GEN_INT (elt), GEN_INT (elt)));
31254 break;
31256 case 2:
31257 tmp = gen_reg_rtx (mode);
31258 emit_insn (gen_vec_interleave_highv4si (tmp, vec, vec));
31259 break;
31261 default:
31262 gcc_unreachable ();
31264 vec = tmp;
31265 use_vec_extr = true;
31266 elt = 0;
31268 else
31270 /* For SSE1, we have to reuse the V4SF code. */
31271 ix86_expand_vector_extract (false, gen_lowpart (SFmode, target),
31272 gen_lowpart (V4SFmode, vec), elt);
31273 return;
31275 break;
31277 case V8HImode:
31278 use_vec_extr = TARGET_SSE2;
31279 break;
31280 case V4HImode:
31281 use_vec_extr = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
31282 break;
31284 case V16QImode:
31285 use_vec_extr = TARGET_SSE4_1;
31286 break;
31288 case V8QImode:
31289 /* ??? Could extract the appropriate HImode element and shift. */
31290 default:
31291 break;
31294 if (use_vec_extr)
31296 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (elt)));
31297 tmp = gen_rtx_VEC_SELECT (inner_mode, vec, tmp);
31299 /* Let the rtl optimizers know about the zero extension performed. */
31300 if (inner_mode == QImode || inner_mode == HImode)
31302 tmp = gen_rtx_ZERO_EXTEND (SImode, tmp);
31303 target = gen_lowpart (SImode, target);
31306 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
31308 else
31310 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), false);
31312 emit_move_insn (mem, vec);
31314 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
31315 emit_move_insn (target, tmp);
31319 /* Expand a vector reduction on V4SFmode for SSE1. FN is the binary
31320 pattern to reduce; DEST is the destination; IN is the input vector. */
31322 void
31323 ix86_expand_reduc_v4sf (rtx (*fn) (rtx, rtx, rtx), rtx dest, rtx in)
31325 rtx tmp1, tmp2, tmp3;
31327 tmp1 = gen_reg_rtx (V4SFmode);
31328 tmp2 = gen_reg_rtx (V4SFmode);
31329 tmp3 = gen_reg_rtx (V4SFmode);
31331 emit_insn (gen_sse_movhlps (tmp1, in, in));
31332 emit_insn (fn (tmp2, tmp1, in));
31334 emit_insn (gen_sse_shufps_v4sf (tmp3, tmp2, tmp2,
31335 const1_rtx, const1_rtx,
31336 GEN_INT (1+4), GEN_INT (1+4)));
31337 emit_insn (fn (dest, tmp2, tmp3));
31340 /* Target hook for scalar_mode_supported_p. */
31341 static bool
31342 ix86_scalar_mode_supported_p (enum machine_mode mode)
31344 if (DECIMAL_FLOAT_MODE_P (mode))
31345 return default_decimal_float_supported_p ();
31346 else if (mode == TFmode)
31347 return true;
31348 else
31349 return default_scalar_mode_supported_p (mode);
31352 /* Implements target hook vector_mode_supported_p. */
31353 static bool
31354 ix86_vector_mode_supported_p (enum machine_mode mode)
31356 if (TARGET_SSE && VALID_SSE_REG_MODE (mode))
31357 return true;
31358 if (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode))
31359 return true;
31360 if (TARGET_AVX && VALID_AVX256_REG_MODE (mode))
31361 return true;
31362 if (TARGET_MMX && VALID_MMX_REG_MODE (mode))
31363 return true;
31364 if (TARGET_3DNOW && VALID_MMX_REG_MODE_3DNOW (mode))
31365 return true;
31366 return false;
31369 /* Target hook for c_mode_for_suffix. */
31370 static enum machine_mode
31371 ix86_c_mode_for_suffix (char suffix)
31373 if (suffix == 'q')
31374 return TFmode;
31375 if (suffix == 'w')
31376 return XFmode;
31378 return VOIDmode;
31381 /* Worker function for TARGET_MD_ASM_CLOBBERS.
31383 We do this in the new i386 backend to maintain source compatibility
31384 with the old cc0-based compiler. */
31386 static tree
31387 ix86_md_asm_clobbers (tree outputs ATTRIBUTE_UNUSED,
31388 tree inputs ATTRIBUTE_UNUSED,
31389 tree clobbers)
31391 clobbers = tree_cons (NULL_TREE, build_string (5, "flags"),
31392 clobbers);
31393 clobbers = tree_cons (NULL_TREE, build_string (4, "fpsr"),
31394 clobbers);
31395 return clobbers;
31398 /* Implements target vector targetm.asm.encode_section_info. This
31399 is not used by netware. */
31401 static void ATTRIBUTE_UNUSED
31402 ix86_encode_section_info (tree decl, rtx rtl, int first)
31404 default_encode_section_info (decl, rtl, first);
31406 if (TREE_CODE (decl) == VAR_DECL
31407 && (TREE_STATIC (decl) || DECL_EXTERNAL (decl))
31408 && ix86_in_large_data_p (decl))
31409 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_FAR_ADDR;
31412 /* Worker function for REVERSE_CONDITION. */
31414 enum rtx_code
31415 ix86_reverse_condition (enum rtx_code code, enum machine_mode mode)
31417 return (mode != CCFPmode && mode != CCFPUmode
31418 ? reverse_condition (code)
31419 : reverse_condition_maybe_unordered (code));
31422 /* Output code to perform an x87 FP register move, from OPERANDS[1]
31423 to OPERANDS[0]. */
31425 const char *
31426 output_387_reg_move (rtx insn, rtx *operands)
31428 if (REG_P (operands[0]))
31430 if (REG_P (operands[1])
31431 && find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
31433 if (REGNO (operands[0]) == FIRST_STACK_REG)
31434 return output_387_ffreep (operands, 0);
31435 return "fstp\t%y0";
31437 if (STACK_TOP_P (operands[0]))
31438 return "fld%Z1\t%y1";
31439 return "fst\t%y0";
31441 else if (MEM_P (operands[0]))
31443 gcc_assert (REG_P (operands[1]));
31444 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
31445 return "fstp%Z0\t%y0";
31446 else
31448 /* There is no non-popping store to memory for XFmode.
31449 So if we need one, follow the store with a load. */
31450 if (GET_MODE (operands[0]) == XFmode)
31451 return "fstp%Z0\t%y0\n\tfld%Z0\t%y0";
31452 else
31453 return "fst%Z0\t%y0";
31456 else
31457 gcc_unreachable();
31460 /* Output code to perform a conditional jump to LABEL, if C2 flag in
31461 FP status register is set. */
31463 void
31464 ix86_emit_fp_unordered_jump (rtx label)
31466 rtx reg = gen_reg_rtx (HImode);
31467 rtx temp;
31469 emit_insn (gen_x86_fnstsw_1 (reg));
31471 if (TARGET_SAHF && (TARGET_USE_SAHF || optimize_insn_for_size_p ()))
31473 emit_insn (gen_x86_sahf_1 (reg));
31475 temp = gen_rtx_REG (CCmode, FLAGS_REG);
31476 temp = gen_rtx_UNORDERED (VOIDmode, temp, const0_rtx);
31478 else
31480 emit_insn (gen_testqi_ext_ccno_0 (reg, GEN_INT (0x04)));
31482 temp = gen_rtx_REG (CCNOmode, FLAGS_REG);
31483 temp = gen_rtx_NE (VOIDmode, temp, const0_rtx);
31486 temp = gen_rtx_IF_THEN_ELSE (VOIDmode, temp,
31487 gen_rtx_LABEL_REF (VOIDmode, label),
31488 pc_rtx);
31489 temp = gen_rtx_SET (VOIDmode, pc_rtx, temp);
31491 emit_jump_insn (temp);
31492 predict_jump (REG_BR_PROB_BASE * 10 / 100);
31495 /* Output code to perform a log1p XFmode calculation. */
31497 void ix86_emit_i387_log1p (rtx op0, rtx op1)
31499 rtx label1 = gen_label_rtx ();
31500 rtx label2 = gen_label_rtx ();
31502 rtx tmp = gen_reg_rtx (XFmode);
31503 rtx tmp2 = gen_reg_rtx (XFmode);
31504 rtx test;
31506 emit_insn (gen_absxf2 (tmp, op1));
31507 test = gen_rtx_GE (VOIDmode, tmp,
31508 CONST_DOUBLE_FROM_REAL_VALUE (
31509 REAL_VALUE_ATOF ("0.29289321881345247561810596348408353", XFmode),
31510 XFmode));
31511 emit_jump_insn (gen_cbranchxf4 (test, XEXP (test, 0), XEXP (test, 1), label1));
31513 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
31514 emit_insn (gen_fyl2xp1xf3_i387 (op0, op1, tmp2));
31515 emit_jump (label2);
31517 emit_label (label1);
31518 emit_move_insn (tmp, CONST1_RTX (XFmode));
31519 emit_insn (gen_addxf3 (tmp, op1, tmp));
31520 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
31521 emit_insn (gen_fyl2xxf3_i387 (op0, tmp, tmp2));
31523 emit_label (label2);
31526 /* Output code to perform a Newton-Rhapson approximation of a single precision
31527 floating point divide [http://en.wikipedia.org/wiki/N-th_root_algorithm]. */
31529 void ix86_emit_swdivsf (rtx res, rtx a, rtx b, enum machine_mode mode)
31531 rtx x0, x1, e0, e1, two;
31533 x0 = gen_reg_rtx (mode);
31534 e0 = gen_reg_rtx (mode);
31535 e1 = gen_reg_rtx (mode);
31536 x1 = gen_reg_rtx (mode);
31538 two = CONST_DOUBLE_FROM_REAL_VALUE (dconst2, SFmode);
31540 if (VECTOR_MODE_P (mode))
31541 two = ix86_build_const_vector (mode, true, two);
31543 two = force_reg (mode, two);
31545 /* a / b = a * rcp(b) * (2.0 - b * rcp(b)) */
31547 /* x0 = rcp(b) estimate */
31548 emit_insn (gen_rtx_SET (VOIDmode, x0,
31549 gen_rtx_UNSPEC (mode, gen_rtvec (1, b),
31550 UNSPEC_RCP)));
31551 /* e0 = x0 * a */
31552 emit_insn (gen_rtx_SET (VOIDmode, e0,
31553 gen_rtx_MULT (mode, x0, a)));
31554 /* e1 = x0 * b */
31555 emit_insn (gen_rtx_SET (VOIDmode, e1,
31556 gen_rtx_MULT (mode, x0, b)));
31557 /* x1 = 2. - e1 */
31558 emit_insn (gen_rtx_SET (VOIDmode, x1,
31559 gen_rtx_MINUS (mode, two, e1)));
31560 /* res = e0 * x1 */
31561 emit_insn (gen_rtx_SET (VOIDmode, res,
31562 gen_rtx_MULT (mode, e0, x1)));
31565 /* Output code to perform a Newton-Rhapson approximation of a
31566 single precision floating point [reciprocal] square root. */
31568 void ix86_emit_swsqrtsf (rtx res, rtx a, enum machine_mode mode,
31569 bool recip)
31571 rtx x0, e0, e1, e2, e3, mthree, mhalf;
31572 REAL_VALUE_TYPE r;
31574 x0 = gen_reg_rtx (mode);
31575 e0 = gen_reg_rtx (mode);
31576 e1 = gen_reg_rtx (mode);
31577 e2 = gen_reg_rtx (mode);
31578 e3 = gen_reg_rtx (mode);
31580 real_from_integer (&r, VOIDmode, -3, -1, 0);
31581 mthree = CONST_DOUBLE_FROM_REAL_VALUE (r, SFmode);
31583 real_arithmetic (&r, NEGATE_EXPR, &dconsthalf, NULL);
31584 mhalf = CONST_DOUBLE_FROM_REAL_VALUE (r, SFmode);
31586 if (VECTOR_MODE_P (mode))
31588 mthree = ix86_build_const_vector (mode, true, mthree);
31589 mhalf = ix86_build_const_vector (mode, true, mhalf);
31592 /* sqrt(a) = -0.5 * a * rsqrtss(a) * (a * rsqrtss(a) * rsqrtss(a) - 3.0)
31593 rsqrt(a) = -0.5 * rsqrtss(a) * (a * rsqrtss(a) * rsqrtss(a) - 3.0) */
31595 /* x0 = rsqrt(a) estimate */
31596 emit_insn (gen_rtx_SET (VOIDmode, x0,
31597 gen_rtx_UNSPEC (mode, gen_rtvec (1, a),
31598 UNSPEC_RSQRT)));
31600 /* If (a == 0.0) Filter out infinity to prevent NaN for sqrt(0.0). */
31601 if (!recip)
31603 rtx zero, mask;
31605 zero = gen_reg_rtx (mode);
31606 mask = gen_reg_rtx (mode);
31608 zero = force_reg (mode, CONST0_RTX(mode));
31609 emit_insn (gen_rtx_SET (VOIDmode, mask,
31610 gen_rtx_NE (mode, zero, a)));
31612 emit_insn (gen_rtx_SET (VOIDmode, x0,
31613 gen_rtx_AND (mode, x0, mask)));
31616 /* e0 = x0 * a */
31617 emit_insn (gen_rtx_SET (VOIDmode, e0,
31618 gen_rtx_MULT (mode, x0, a)));
31619 /* e1 = e0 * x0 */
31620 emit_insn (gen_rtx_SET (VOIDmode, e1,
31621 gen_rtx_MULT (mode, e0, x0)));
31623 /* e2 = e1 - 3. */
31624 mthree = force_reg (mode, mthree);
31625 emit_insn (gen_rtx_SET (VOIDmode, e2,
31626 gen_rtx_PLUS (mode, e1, mthree)));
31628 mhalf = force_reg (mode, mhalf);
31629 if (recip)
31630 /* e3 = -.5 * x0 */
31631 emit_insn (gen_rtx_SET (VOIDmode, e3,
31632 gen_rtx_MULT (mode, x0, mhalf)));
31633 else
31634 /* e3 = -.5 * e0 */
31635 emit_insn (gen_rtx_SET (VOIDmode, e3,
31636 gen_rtx_MULT (mode, e0, mhalf)));
31637 /* ret = e2 * e3 */
31638 emit_insn (gen_rtx_SET (VOIDmode, res,
31639 gen_rtx_MULT (mode, e2, e3)));
31642 /* Solaris implementation of TARGET_ASM_NAMED_SECTION. */
31644 static void ATTRIBUTE_UNUSED
31645 i386_solaris_elf_named_section (const char *name, unsigned int flags,
31646 tree decl)
31648 /* With Binutils 2.15, the "@unwind" marker must be specified on
31649 every occurrence of the ".eh_frame" section, not just the first
31650 one. */
31651 if (TARGET_64BIT
31652 && strcmp (name, ".eh_frame") == 0)
31654 fprintf (asm_out_file, "\t.section\t%s,\"%s\",@unwind\n", name,
31655 flags & SECTION_WRITE ? "aw" : "a");
31656 return;
31658 default_elf_asm_named_section (name, flags, decl);
31661 /* Return the mangling of TYPE if it is an extended fundamental type. */
31663 static const char *
31664 ix86_mangle_type (const_tree type)
31666 type = TYPE_MAIN_VARIANT (type);
31668 if (TREE_CODE (type) != VOID_TYPE && TREE_CODE (type) != BOOLEAN_TYPE
31669 && TREE_CODE (type) != INTEGER_TYPE && TREE_CODE (type) != REAL_TYPE)
31670 return NULL;
31672 switch (TYPE_MODE (type))
31674 case TFmode:
31675 /* __float128 is "g". */
31676 return "g";
31677 case XFmode:
31678 /* "long double" or __float80 is "e". */
31679 return "e";
31680 default:
31681 return NULL;
31685 /* For 32-bit code we can save PIC register setup by using
31686 __stack_chk_fail_local hidden function instead of calling
31687 __stack_chk_fail directly. 64-bit code doesn't need to setup any PIC
31688 register, so it is better to call __stack_chk_fail directly. */
31690 static tree
31691 ix86_stack_protect_fail (void)
31693 return TARGET_64BIT
31694 ? default_external_stack_protect_fail ()
31695 : default_hidden_stack_protect_fail ();
31698 /* Select a format to encode pointers in exception handling data. CODE
31699 is 0 for data, 1 for code labels, 2 for function pointers. GLOBAL is
31700 true if the symbol may be affected by dynamic relocations.
31702 ??? All x86 object file formats are capable of representing this.
31703 After all, the relocation needed is the same as for the call insn.
31704 Whether or not a particular assembler allows us to enter such, I
31705 guess we'll have to see. */
31707 asm_preferred_eh_data_format (int code, int global)
31709 if (flag_pic)
31711 int type = DW_EH_PE_sdata8;
31712 if (!TARGET_64BIT
31713 || ix86_cmodel == CM_SMALL_PIC
31714 || (ix86_cmodel == CM_MEDIUM_PIC && (global || code)))
31715 type = DW_EH_PE_sdata4;
31716 return (global ? DW_EH_PE_indirect : 0) | DW_EH_PE_pcrel | type;
31718 if (ix86_cmodel == CM_SMALL
31719 || (ix86_cmodel == CM_MEDIUM && code))
31720 return DW_EH_PE_udata4;
31721 return DW_EH_PE_absptr;
31724 /* Expand copysign from SIGN to the positive value ABS_VALUE
31725 storing in RESULT. If MASK is non-null, it shall be a mask to mask out
31726 the sign-bit. */
31727 static void
31728 ix86_sse_copysign_to_positive (rtx result, rtx abs_value, rtx sign, rtx mask)
31730 enum machine_mode mode = GET_MODE (sign);
31731 rtx sgn = gen_reg_rtx (mode);
31732 if (mask == NULL_RTX)
31734 enum machine_mode vmode;
31736 if (mode == SFmode)
31737 vmode = V4SFmode;
31738 else if (mode == DFmode)
31739 vmode = V2DFmode;
31740 else
31741 vmode = mode;
31743 mask = ix86_build_signbit_mask (vmode, VECTOR_MODE_P (mode), false);
31744 if (!VECTOR_MODE_P (mode))
31746 /* We need to generate a scalar mode mask in this case. */
31747 rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
31748 tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
31749 mask = gen_reg_rtx (mode);
31750 emit_insn (gen_rtx_SET (VOIDmode, mask, tmp));
31753 else
31754 mask = gen_rtx_NOT (mode, mask);
31755 emit_insn (gen_rtx_SET (VOIDmode, sgn,
31756 gen_rtx_AND (mode, mask, sign)));
31757 emit_insn (gen_rtx_SET (VOIDmode, result,
31758 gen_rtx_IOR (mode, abs_value, sgn)));
31761 /* Expand fabs (OP0) and return a new rtx that holds the result. The
31762 mask for masking out the sign-bit is stored in *SMASK, if that is
31763 non-null. */
31764 static rtx
31765 ix86_expand_sse_fabs (rtx op0, rtx *smask)
31767 enum machine_mode vmode, mode = GET_MODE (op0);
31768 rtx xa, mask;
31770 xa = gen_reg_rtx (mode);
31771 if (mode == SFmode)
31772 vmode = V4SFmode;
31773 else if (mode == DFmode)
31774 vmode = V2DFmode;
31775 else
31776 vmode = mode;
31777 mask = ix86_build_signbit_mask (vmode, VECTOR_MODE_P (mode), true);
31778 if (!VECTOR_MODE_P (mode))
31780 /* We need to generate a scalar mode mask in this case. */
31781 rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
31782 tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
31783 mask = gen_reg_rtx (mode);
31784 emit_insn (gen_rtx_SET (VOIDmode, mask, tmp));
31786 emit_insn (gen_rtx_SET (VOIDmode, xa,
31787 gen_rtx_AND (mode, op0, mask)));
31789 if (smask)
31790 *smask = mask;
31792 return xa;
31795 /* Expands a comparison of OP0 with OP1 using comparison code CODE,
31796 swapping the operands if SWAP_OPERANDS is true. The expanded
31797 code is a forward jump to a newly created label in case the
31798 comparison is true. The generated label rtx is returned. */
31799 static rtx
31800 ix86_expand_sse_compare_and_jump (enum rtx_code code, rtx op0, rtx op1,
31801 bool swap_operands)
31803 rtx label, tmp;
31805 if (swap_operands)
31807 tmp = op0;
31808 op0 = op1;
31809 op1 = tmp;
31812 label = gen_label_rtx ();
31813 tmp = gen_rtx_REG (CCFPUmode, FLAGS_REG);
31814 emit_insn (gen_rtx_SET (VOIDmode, tmp,
31815 gen_rtx_COMPARE (CCFPUmode, op0, op1)));
31816 tmp = gen_rtx_fmt_ee (code, VOIDmode, tmp, const0_rtx);
31817 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
31818 gen_rtx_LABEL_REF (VOIDmode, label), pc_rtx);
31819 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
31820 JUMP_LABEL (tmp) = label;
31822 return label;
31825 /* Expand a mask generating SSE comparison instruction comparing OP0 with OP1
31826 using comparison code CODE. Operands are swapped for the comparison if
31827 SWAP_OPERANDS is true. Returns a rtx for the generated mask. */
31828 static rtx
31829 ix86_expand_sse_compare_mask (enum rtx_code code, rtx op0, rtx op1,
31830 bool swap_operands)
31832 enum machine_mode mode = GET_MODE (op0);
31833 rtx mask = gen_reg_rtx (mode);
31835 if (swap_operands)
31837 rtx tmp = op0;
31838 op0 = op1;
31839 op1 = tmp;
31842 if (mode == DFmode)
31843 emit_insn (gen_sse2_maskcmpdf3 (mask, op0, op1,
31844 gen_rtx_fmt_ee (code, mode, op0, op1)));
31845 else
31846 emit_insn (gen_sse_maskcmpsf3 (mask, op0, op1,
31847 gen_rtx_fmt_ee (code, mode, op0, op1)));
31849 return mask;
31852 /* Generate and return a rtx of mode MODE for 2**n where n is the number
31853 of bits of the mantissa of MODE, which must be one of DFmode or SFmode. */
31854 static rtx
31855 ix86_gen_TWO52 (enum machine_mode mode)
31857 REAL_VALUE_TYPE TWO52r;
31858 rtx TWO52;
31860 real_ldexp (&TWO52r, &dconst1, mode == DFmode ? 52 : 23);
31861 TWO52 = const_double_from_real_value (TWO52r, mode);
31862 TWO52 = force_reg (mode, TWO52);
31864 return TWO52;
31867 /* Expand SSE sequence for computing lround from OP1 storing
31868 into OP0. */
31869 void
31870 ix86_expand_lround (rtx op0, rtx op1)
31872 /* C code for the stuff we're doing below:
31873 tmp = op1 + copysign (nextafter (0.5, 0.0), op1)
31874 return (long)tmp;
31876 enum machine_mode mode = GET_MODE (op1);
31877 const struct real_format *fmt;
31878 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
31879 rtx adj;
31881 /* load nextafter (0.5, 0.0) */
31882 fmt = REAL_MODE_FORMAT (mode);
31883 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
31884 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
31886 /* adj = copysign (0.5, op1) */
31887 adj = force_reg (mode, const_double_from_real_value (pred_half, mode));
31888 ix86_sse_copysign_to_positive (adj, adj, force_reg (mode, op1), NULL_RTX);
31890 /* adj = op1 + adj */
31891 adj = expand_simple_binop (mode, PLUS, adj, op1, NULL_RTX, 0, OPTAB_DIRECT);
31893 /* op0 = (imode)adj */
31894 expand_fix (op0, adj, 0);
31897 /* Expand SSE2 sequence for computing lround from OPERAND1 storing
31898 into OPERAND0. */
31899 void
31900 ix86_expand_lfloorceil (rtx op0, rtx op1, bool do_floor)
31902 /* C code for the stuff we're doing below (for do_floor):
31903 xi = (long)op1;
31904 xi -= (double)xi > op1 ? 1 : 0;
31905 return xi;
31907 enum machine_mode fmode = GET_MODE (op1);
31908 enum machine_mode imode = GET_MODE (op0);
31909 rtx ireg, freg, label, tmp;
31911 /* reg = (long)op1 */
31912 ireg = gen_reg_rtx (imode);
31913 expand_fix (ireg, op1, 0);
31915 /* freg = (double)reg */
31916 freg = gen_reg_rtx (fmode);
31917 expand_float (freg, ireg, 0);
31919 /* ireg = (freg > op1) ? ireg - 1 : ireg */
31920 label = ix86_expand_sse_compare_and_jump (UNLE,
31921 freg, op1, !do_floor);
31922 tmp = expand_simple_binop (imode, do_floor ? MINUS : PLUS,
31923 ireg, const1_rtx, NULL_RTX, 0, OPTAB_DIRECT);
31924 emit_move_insn (ireg, tmp);
31926 emit_label (label);
31927 LABEL_NUSES (label) = 1;
31929 emit_move_insn (op0, ireg);
31932 /* Expand rint (IEEE round to nearest) rounding OPERAND1 and storing the
31933 result in OPERAND0. */
31934 void
31935 ix86_expand_rint (rtx operand0, rtx operand1)
31937 /* C code for the stuff we're doing below:
31938 xa = fabs (operand1);
31939 if (!isless (xa, 2**52))
31940 return operand1;
31941 xa = xa + 2**52 - 2**52;
31942 return copysign (xa, operand1);
31944 enum machine_mode mode = GET_MODE (operand0);
31945 rtx res, xa, label, TWO52, mask;
31947 res = gen_reg_rtx (mode);
31948 emit_move_insn (res, operand1);
31950 /* xa = abs (operand1) */
31951 xa = ix86_expand_sse_fabs (res, &mask);
31953 /* if (!isless (xa, TWO52)) goto label; */
31954 TWO52 = ix86_gen_TWO52 (mode);
31955 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
31957 xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
31958 xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
31960 ix86_sse_copysign_to_positive (res, xa, res, mask);
31962 emit_label (label);
31963 LABEL_NUSES (label) = 1;
31965 emit_move_insn (operand0, res);
31968 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
31969 into OPERAND0. */
31970 void
31971 ix86_expand_floorceildf_32 (rtx operand0, rtx operand1, bool do_floor)
31973 /* C code for the stuff we expand below.
31974 double xa = fabs (x), x2;
31975 if (!isless (xa, TWO52))
31976 return x;
31977 xa = xa + TWO52 - TWO52;
31978 x2 = copysign (xa, x);
31979 Compensate. Floor:
31980 if (x2 > x)
31981 x2 -= 1;
31982 Compensate. Ceil:
31983 if (x2 < x)
31984 x2 -= -1;
31985 return x2;
31987 enum machine_mode mode = GET_MODE (operand0);
31988 rtx xa, TWO52, tmp, label, one, res, mask;
31990 TWO52 = ix86_gen_TWO52 (mode);
31992 /* Temporary for holding the result, initialized to the input
31993 operand to ease control flow. */
31994 res = gen_reg_rtx (mode);
31995 emit_move_insn (res, operand1);
31997 /* xa = abs (operand1) */
31998 xa = ix86_expand_sse_fabs (res, &mask);
32000 /* if (!isless (xa, TWO52)) goto label; */
32001 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
32003 /* xa = xa + TWO52 - TWO52; */
32004 xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
32005 xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
32007 /* xa = copysign (xa, operand1) */
32008 ix86_sse_copysign_to_positive (xa, xa, res, mask);
32010 /* generate 1.0 or -1.0 */
32011 one = force_reg (mode,
32012 const_double_from_real_value (do_floor
32013 ? dconst1 : dconstm1, mode));
32015 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
32016 tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
32017 emit_insn (gen_rtx_SET (VOIDmode, tmp,
32018 gen_rtx_AND (mode, one, tmp)));
32019 /* We always need to subtract here to preserve signed zero. */
32020 tmp = expand_simple_binop (mode, MINUS,
32021 xa, tmp, NULL_RTX, 0, OPTAB_DIRECT);
32022 emit_move_insn (res, tmp);
32024 emit_label (label);
32025 LABEL_NUSES (label) = 1;
32027 emit_move_insn (operand0, res);
32030 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
32031 into OPERAND0. */
32032 void
32033 ix86_expand_floorceil (rtx operand0, rtx operand1, bool do_floor)
32035 /* C code for the stuff we expand below.
32036 double xa = fabs (x), x2;
32037 if (!isless (xa, TWO52))
32038 return x;
32039 x2 = (double)(long)x;
32040 Compensate. Floor:
32041 if (x2 > x)
32042 x2 -= 1;
32043 Compensate. Ceil:
32044 if (x2 < x)
32045 x2 += 1;
32046 if (HONOR_SIGNED_ZEROS (mode))
32047 return copysign (x2, x);
32048 return x2;
32050 enum machine_mode mode = GET_MODE (operand0);
32051 rtx xa, xi, TWO52, tmp, label, one, res, mask;
32053 TWO52 = ix86_gen_TWO52 (mode);
32055 /* Temporary for holding the result, initialized to the input
32056 operand to ease control flow. */
32057 res = gen_reg_rtx (mode);
32058 emit_move_insn (res, operand1);
32060 /* xa = abs (operand1) */
32061 xa = ix86_expand_sse_fabs (res, &mask);
32063 /* if (!isless (xa, TWO52)) goto label; */
32064 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
32066 /* xa = (double)(long)x */
32067 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
32068 expand_fix (xi, res, 0);
32069 expand_float (xa, xi, 0);
32071 /* generate 1.0 */
32072 one = force_reg (mode, const_double_from_real_value (dconst1, mode));
32074 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
32075 tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
32076 emit_insn (gen_rtx_SET (VOIDmode, tmp,
32077 gen_rtx_AND (mode, one, tmp)));
32078 tmp = expand_simple_binop (mode, do_floor ? MINUS : PLUS,
32079 xa, tmp, NULL_RTX, 0, OPTAB_DIRECT);
32080 emit_move_insn (res, tmp);
32082 if (HONOR_SIGNED_ZEROS (mode))
32083 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
32085 emit_label (label);
32086 LABEL_NUSES (label) = 1;
32088 emit_move_insn (operand0, res);
32091 /* Expand SSE sequence for computing round from OPERAND1 storing
32092 into OPERAND0. Sequence that works without relying on DImode truncation
32093 via cvttsd2siq that is only available on 64bit targets. */
32094 void
32095 ix86_expand_rounddf_32 (rtx operand0, rtx operand1)
32097 /* C code for the stuff we expand below.
32098 double xa = fabs (x), xa2, x2;
32099 if (!isless (xa, TWO52))
32100 return x;
32101 Using the absolute value and copying back sign makes
32102 -0.0 -> -0.0 correct.
32103 xa2 = xa + TWO52 - TWO52;
32104 Compensate.
32105 dxa = xa2 - xa;
32106 if (dxa <= -0.5)
32107 xa2 += 1;
32108 else if (dxa > 0.5)
32109 xa2 -= 1;
32110 x2 = copysign (xa2, x);
32111 return x2;
32113 enum machine_mode mode = GET_MODE (operand0);
32114 rtx xa, xa2, dxa, TWO52, tmp, label, half, mhalf, one, res, mask;
32116 TWO52 = ix86_gen_TWO52 (mode);
32118 /* Temporary for holding the result, initialized to the input
32119 operand to ease control flow. */
32120 res = gen_reg_rtx (mode);
32121 emit_move_insn (res, operand1);
32123 /* xa = abs (operand1) */
32124 xa = ix86_expand_sse_fabs (res, &mask);
32126 /* if (!isless (xa, TWO52)) goto label; */
32127 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
32129 /* xa2 = xa + TWO52 - TWO52; */
32130 xa2 = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
32131 xa2 = expand_simple_binop (mode, MINUS, xa2, TWO52, xa2, 0, OPTAB_DIRECT);
32133 /* dxa = xa2 - xa; */
32134 dxa = expand_simple_binop (mode, MINUS, xa2, xa, NULL_RTX, 0, OPTAB_DIRECT);
32136 /* generate 0.5, 1.0 and -0.5 */
32137 half = force_reg (mode, const_double_from_real_value (dconsthalf, mode));
32138 one = expand_simple_binop (mode, PLUS, half, half, NULL_RTX, 0, OPTAB_DIRECT);
32139 mhalf = expand_simple_binop (mode, MINUS, half, one, NULL_RTX,
32140 0, OPTAB_DIRECT);
32142 /* Compensate. */
32143 tmp = gen_reg_rtx (mode);
32144 /* xa2 = xa2 - (dxa > 0.5 ? 1 : 0) */
32145 tmp = ix86_expand_sse_compare_mask (UNGT, dxa, half, false);
32146 emit_insn (gen_rtx_SET (VOIDmode, tmp,
32147 gen_rtx_AND (mode, one, tmp)));
32148 xa2 = expand_simple_binop (mode, MINUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
32149 /* xa2 = xa2 + (dxa <= -0.5 ? 1 : 0) */
32150 tmp = ix86_expand_sse_compare_mask (UNGE, mhalf, dxa, false);
32151 emit_insn (gen_rtx_SET (VOIDmode, tmp,
32152 gen_rtx_AND (mode, one, tmp)));
32153 xa2 = expand_simple_binop (mode, PLUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
32155 /* res = copysign (xa2, operand1) */
32156 ix86_sse_copysign_to_positive (res, xa2, force_reg (mode, operand1), mask);
32158 emit_label (label);
32159 LABEL_NUSES (label) = 1;
32161 emit_move_insn (operand0, res);
32164 /* Expand SSE sequence for computing trunc from OPERAND1 storing
32165 into OPERAND0. */
32166 void
32167 ix86_expand_trunc (rtx operand0, rtx operand1)
32169 /* C code for SSE variant we expand below.
32170 double xa = fabs (x), x2;
32171 if (!isless (xa, TWO52))
32172 return x;
32173 x2 = (double)(long)x;
32174 if (HONOR_SIGNED_ZEROS (mode))
32175 return copysign (x2, x);
32176 return x2;
32178 enum machine_mode mode = GET_MODE (operand0);
32179 rtx xa, xi, TWO52, label, res, mask;
32181 TWO52 = ix86_gen_TWO52 (mode);
32183 /* Temporary for holding the result, initialized to the input
32184 operand to ease control flow. */
32185 res = gen_reg_rtx (mode);
32186 emit_move_insn (res, operand1);
32188 /* xa = abs (operand1) */
32189 xa = ix86_expand_sse_fabs (res, &mask);
32191 /* if (!isless (xa, TWO52)) goto label; */
32192 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
32194 /* x = (double)(long)x */
32195 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
32196 expand_fix (xi, res, 0);
32197 expand_float (res, xi, 0);
32199 if (HONOR_SIGNED_ZEROS (mode))
32200 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
32202 emit_label (label);
32203 LABEL_NUSES (label) = 1;
32205 emit_move_insn (operand0, res);
32208 /* Expand SSE sequence for computing trunc from OPERAND1 storing
32209 into OPERAND0. */
32210 void
32211 ix86_expand_truncdf_32 (rtx operand0, rtx operand1)
32213 enum machine_mode mode = GET_MODE (operand0);
32214 rtx xa, mask, TWO52, label, one, res, smask, tmp;
32216 /* C code for SSE variant we expand below.
32217 double xa = fabs (x), x2;
32218 if (!isless (xa, TWO52))
32219 return x;
32220 xa2 = xa + TWO52 - TWO52;
32221 Compensate:
32222 if (xa2 > xa)
32223 xa2 -= 1.0;
32224 x2 = copysign (xa2, x);
32225 return x2;
32228 TWO52 = ix86_gen_TWO52 (mode);
32230 /* Temporary for holding the result, initialized to the input
32231 operand to ease control flow. */
32232 res = gen_reg_rtx (mode);
32233 emit_move_insn (res, operand1);
32235 /* xa = abs (operand1) */
32236 xa = ix86_expand_sse_fabs (res, &smask);
32238 /* if (!isless (xa, TWO52)) goto label; */
32239 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
32241 /* res = xa + TWO52 - TWO52; */
32242 tmp = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
32243 tmp = expand_simple_binop (mode, MINUS, tmp, TWO52, tmp, 0, OPTAB_DIRECT);
32244 emit_move_insn (res, tmp);
32246 /* generate 1.0 */
32247 one = force_reg (mode, const_double_from_real_value (dconst1, mode));
32249 /* Compensate: res = xa2 - (res > xa ? 1 : 0) */
32250 mask = ix86_expand_sse_compare_mask (UNGT, res, xa, false);
32251 emit_insn (gen_rtx_SET (VOIDmode, mask,
32252 gen_rtx_AND (mode, mask, one)));
32253 tmp = expand_simple_binop (mode, MINUS,
32254 res, mask, NULL_RTX, 0, OPTAB_DIRECT);
32255 emit_move_insn (res, tmp);
32257 /* res = copysign (res, operand1) */
32258 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), smask);
32260 emit_label (label);
32261 LABEL_NUSES (label) = 1;
32263 emit_move_insn (operand0, res);
32266 /* Expand SSE sequence for computing round from OPERAND1 storing
32267 into OPERAND0. */
32268 void
32269 ix86_expand_round (rtx operand0, rtx operand1)
32271 /* C code for the stuff we're doing below:
32272 double xa = fabs (x);
32273 if (!isless (xa, TWO52))
32274 return x;
32275 xa = (double)(long)(xa + nextafter (0.5, 0.0));
32276 return copysign (xa, x);
32278 enum machine_mode mode = GET_MODE (operand0);
32279 rtx res, TWO52, xa, label, xi, half, mask;
32280 const struct real_format *fmt;
32281 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
32283 /* Temporary for holding the result, initialized to the input
32284 operand to ease control flow. */
32285 res = gen_reg_rtx (mode);
32286 emit_move_insn (res, operand1);
32288 TWO52 = ix86_gen_TWO52 (mode);
32289 xa = ix86_expand_sse_fabs (res, &mask);
32290 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
32292 /* load nextafter (0.5, 0.0) */
32293 fmt = REAL_MODE_FORMAT (mode);
32294 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
32295 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
32297 /* xa = xa + 0.5 */
32298 half = force_reg (mode, const_double_from_real_value (pred_half, mode));
32299 xa = expand_simple_binop (mode, PLUS, xa, half, NULL_RTX, 0, OPTAB_DIRECT);
32301 /* xa = (double)(int64_t)xa */
32302 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
32303 expand_fix (xi, xa, 0);
32304 expand_float (xa, xi, 0);
32306 /* res = copysign (xa, operand1) */
32307 ix86_sse_copysign_to_positive (res, xa, force_reg (mode, operand1), mask);
32309 emit_label (label);
32310 LABEL_NUSES (label) = 1;
32312 emit_move_insn (operand0, res);
32316 /* Table of valid machine attributes. */
32317 static const struct attribute_spec ix86_attribute_table[] =
32319 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
32320 /* Stdcall attribute says callee is responsible for popping arguments
32321 if they are not variable. */
32322 { "stdcall", 0, 0, false, true, true, ix86_handle_cconv_attribute },
32323 /* Fastcall attribute says callee is responsible for popping arguments
32324 if they are not variable. */
32325 { "fastcall", 0, 0, false, true, true, ix86_handle_cconv_attribute },
32326 /* Thiscall attribute says callee is responsible for popping arguments
32327 if they are not variable. */
32328 { "thiscall", 0, 0, false, true, true, ix86_handle_cconv_attribute },
32329 /* Cdecl attribute says the callee is a normal C declaration */
32330 { "cdecl", 0, 0, false, true, true, ix86_handle_cconv_attribute },
32331 /* Regparm attribute specifies how many integer arguments are to be
32332 passed in registers. */
32333 { "regparm", 1, 1, false, true, true, ix86_handle_cconv_attribute },
32334 /* Sseregparm attribute says we are using x86_64 calling conventions
32335 for FP arguments. */
32336 { "sseregparm", 0, 0, false, true, true, ix86_handle_cconv_attribute },
32337 /* force_align_arg_pointer says this function realigns the stack at entry. */
32338 { (const char *)&ix86_force_align_arg_pointer_string, 0, 0,
32339 false, true, true, ix86_handle_cconv_attribute },
32340 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
32341 { "dllimport", 0, 0, false, false, false, handle_dll_attribute },
32342 { "dllexport", 0, 0, false, false, false, handle_dll_attribute },
32343 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute },
32344 #endif
32345 { "ms_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
32346 { "gcc_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
32347 #ifdef SUBTARGET_ATTRIBUTE_TABLE
32348 SUBTARGET_ATTRIBUTE_TABLE,
32349 #endif
32350 /* ms_abi and sysv_abi calling convention function attributes. */
32351 { "ms_abi", 0, 0, false, true, true, ix86_handle_abi_attribute },
32352 { "sysv_abi", 0, 0, false, true, true, ix86_handle_abi_attribute },
32353 { "ms_hook_prologue", 0, 0, true, false, false, ix86_handle_fndecl_attribute },
32354 { "callee_pop_aggregate_return", 1, 1, false, true, true,
32355 ix86_handle_callee_pop_aggregate_return },
32356 /* End element. */
32357 { NULL, 0, 0, false, false, false, NULL }
32360 /* Implement targetm.vectorize.builtin_vectorization_cost. */
32361 static int
32362 ix86_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
32363 tree vectype ATTRIBUTE_UNUSED,
32364 int misalign ATTRIBUTE_UNUSED)
32366 switch (type_of_cost)
32368 case scalar_stmt:
32369 return ix86_cost->scalar_stmt_cost;
32371 case scalar_load:
32372 return ix86_cost->scalar_load_cost;
32374 case scalar_store:
32375 return ix86_cost->scalar_store_cost;
32377 case vector_stmt:
32378 return ix86_cost->vec_stmt_cost;
32380 case vector_load:
32381 return ix86_cost->vec_align_load_cost;
32383 case vector_store:
32384 return ix86_cost->vec_store_cost;
32386 case vec_to_scalar:
32387 return ix86_cost->vec_to_scalar_cost;
32389 case scalar_to_vec:
32390 return ix86_cost->scalar_to_vec_cost;
32392 case unaligned_load:
32393 case unaligned_store:
32394 return ix86_cost->vec_unalign_load_cost;
32396 case cond_branch_taken:
32397 return ix86_cost->cond_taken_branch_cost;
32399 case cond_branch_not_taken:
32400 return ix86_cost->cond_not_taken_branch_cost;
32402 case vec_perm:
32403 return 1;
32405 default:
32406 gcc_unreachable ();
32411 /* Implement targetm.vectorize.builtin_vec_perm. */
32413 static tree
32414 ix86_vectorize_builtin_vec_perm (tree vec_type, tree *mask_type)
32416 tree itype = TREE_TYPE (vec_type);
32417 bool u = TYPE_UNSIGNED (itype);
32418 enum machine_mode vmode = TYPE_MODE (vec_type);
32419 enum ix86_builtins fcode;
32420 bool ok = TARGET_SSE2;
32422 switch (vmode)
32424 case V4DFmode:
32425 ok = TARGET_AVX;
32426 fcode = IX86_BUILTIN_VEC_PERM_V4DF;
32427 goto get_di;
32428 case V2DFmode:
32429 fcode = IX86_BUILTIN_VEC_PERM_V2DF;
32430 get_di:
32431 itype = ix86_get_builtin_type (IX86_BT_DI);
32432 break;
32434 case V8SFmode:
32435 ok = TARGET_AVX;
32436 fcode = IX86_BUILTIN_VEC_PERM_V8SF;
32437 goto get_si;
32438 case V4SFmode:
32439 ok = TARGET_SSE;
32440 fcode = IX86_BUILTIN_VEC_PERM_V4SF;
32441 get_si:
32442 itype = ix86_get_builtin_type (IX86_BT_SI);
32443 break;
32445 case V2DImode:
32446 fcode = u ? IX86_BUILTIN_VEC_PERM_V2DI_U : IX86_BUILTIN_VEC_PERM_V2DI;
32447 break;
32448 case V4SImode:
32449 fcode = u ? IX86_BUILTIN_VEC_PERM_V4SI_U : IX86_BUILTIN_VEC_PERM_V4SI;
32450 break;
32451 case V8HImode:
32452 fcode = u ? IX86_BUILTIN_VEC_PERM_V8HI_U : IX86_BUILTIN_VEC_PERM_V8HI;
32453 break;
32454 case V16QImode:
32455 fcode = u ? IX86_BUILTIN_VEC_PERM_V16QI_U : IX86_BUILTIN_VEC_PERM_V16QI;
32456 break;
32457 default:
32458 ok = false;
32459 break;
32462 if (!ok)
32463 return NULL_TREE;
32465 *mask_type = itype;
32466 return ix86_builtins[(int) fcode];
32469 /* Return a vector mode with twice as many elements as VMODE. */
32470 /* ??? Consider moving this to a table generated by genmodes.c. */
32472 static enum machine_mode
32473 doublesize_vector_mode (enum machine_mode vmode)
32475 switch (vmode)
32477 case V2SFmode: return V4SFmode;
32478 case V1DImode: return V2DImode;
32479 case V2SImode: return V4SImode;
32480 case V4HImode: return V8HImode;
32481 case V8QImode: return V16QImode;
32483 case V2DFmode: return V4DFmode;
32484 case V4SFmode: return V8SFmode;
32485 case V2DImode: return V4DImode;
32486 case V4SImode: return V8SImode;
32487 case V8HImode: return V16HImode;
32488 case V16QImode: return V32QImode;
32490 case V4DFmode: return V8DFmode;
32491 case V8SFmode: return V16SFmode;
32492 case V4DImode: return V8DImode;
32493 case V8SImode: return V16SImode;
32494 case V16HImode: return V32HImode;
32495 case V32QImode: return V64QImode;
32497 default:
32498 gcc_unreachable ();
32502 /* Construct (set target (vec_select op0 (parallel perm))) and
32503 return true if that's a valid instruction in the active ISA. */
32505 static bool
32506 expand_vselect (rtx target, rtx op0, const unsigned char *perm, unsigned nelt)
32508 rtx rperm[MAX_VECT_LEN], x;
32509 unsigned i;
32511 for (i = 0; i < nelt; ++i)
32512 rperm[i] = GEN_INT (perm[i]);
32514 x = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (nelt, rperm));
32515 x = gen_rtx_VEC_SELECT (GET_MODE (target), op0, x);
32516 x = gen_rtx_SET (VOIDmode, target, x);
32518 x = emit_insn (x);
32519 if (recog_memoized (x) < 0)
32521 remove_insn (x);
32522 return false;
32524 return true;
32527 /* Similar, but generate a vec_concat from op0 and op1 as well. */
32529 static bool
32530 expand_vselect_vconcat (rtx target, rtx op0, rtx op1,
32531 const unsigned char *perm, unsigned nelt)
32533 enum machine_mode v2mode;
32534 rtx x;
32536 v2mode = doublesize_vector_mode (GET_MODE (op0));
32537 x = gen_rtx_VEC_CONCAT (v2mode, op0, op1);
32538 return expand_vselect (target, x, perm, nelt);
32541 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
32542 in terms of blendp[sd] / pblendw / pblendvb. */
32544 static bool
32545 expand_vec_perm_blend (struct expand_vec_perm_d *d)
32547 enum machine_mode vmode = d->vmode;
32548 unsigned i, mask, nelt = d->nelt;
32549 rtx target, op0, op1, x;
32551 if (!TARGET_SSE4_1 || d->op0 == d->op1)
32552 return false;
32553 if (!(GET_MODE_SIZE (vmode) == 16 || vmode == V4DFmode || vmode == V8SFmode))
32554 return false;
32556 /* This is a blend, not a permute. Elements must stay in their
32557 respective lanes. */
32558 for (i = 0; i < nelt; ++i)
32560 unsigned e = d->perm[i];
32561 if (!(e == i || e == i + nelt))
32562 return false;
32565 if (d->testing_p)
32566 return true;
32568 /* ??? Without SSE4.1, we could implement this with and/andn/or. This
32569 decision should be extracted elsewhere, so that we only try that
32570 sequence once all budget==3 options have been tried. */
32572 /* For bytes, see if bytes move in pairs so we can use pblendw with
32573 an immediate argument, rather than pblendvb with a vector argument. */
32574 if (vmode == V16QImode)
32576 bool pblendw_ok = true;
32577 for (i = 0; i < 16 && pblendw_ok; i += 2)
32578 pblendw_ok = (d->perm[i] + 1 == d->perm[i + 1]);
32580 if (!pblendw_ok)
32582 rtx rperm[16], vperm;
32584 for (i = 0; i < nelt; ++i)
32585 rperm[i] = (d->perm[i] < nelt ? const0_rtx : constm1_rtx);
32587 vperm = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, rperm));
32588 vperm = force_reg (V16QImode, vperm);
32590 emit_insn (gen_sse4_1_pblendvb (d->target, d->op0, d->op1, vperm));
32591 return true;
32595 target = d->target;
32596 op0 = d->op0;
32597 op1 = d->op1;
32598 mask = 0;
32600 switch (vmode)
32602 case V4DFmode:
32603 case V8SFmode:
32604 case V2DFmode:
32605 case V4SFmode:
32606 case V8HImode:
32607 for (i = 0; i < nelt; ++i)
32608 mask |= (d->perm[i] >= nelt) << i;
32609 break;
32611 case V2DImode:
32612 for (i = 0; i < 2; ++i)
32613 mask |= (d->perm[i] >= 2 ? 15 : 0) << (i * 4);
32614 goto do_subreg;
32616 case V4SImode:
32617 for (i = 0; i < 4; ++i)
32618 mask |= (d->perm[i] >= 4 ? 3 : 0) << (i * 2);
32619 goto do_subreg;
32621 case V16QImode:
32622 for (i = 0; i < 8; ++i)
32623 mask |= (d->perm[i * 2] >= 16) << i;
32625 do_subreg:
32626 vmode = V8HImode;
32627 target = gen_lowpart (vmode, target);
32628 op0 = gen_lowpart (vmode, op0);
32629 op1 = gen_lowpart (vmode, op1);
32630 break;
32632 default:
32633 gcc_unreachable ();
32636 /* This matches five different patterns with the different modes. */
32637 x = gen_rtx_VEC_MERGE (vmode, op1, op0, GEN_INT (mask));
32638 x = gen_rtx_SET (VOIDmode, target, x);
32639 emit_insn (x);
32641 return true;
32644 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
32645 in terms of the variable form of vpermilps.
32647 Note that we will have already failed the immediate input vpermilps,
32648 which requires that the high and low part shuffle be identical; the
32649 variable form doesn't require that. */
32651 static bool
32652 expand_vec_perm_vpermil (struct expand_vec_perm_d *d)
32654 rtx rperm[8], vperm;
32655 unsigned i;
32657 if (!TARGET_AVX || d->vmode != V8SFmode || d->op0 != d->op1)
32658 return false;
32660 /* We can only permute within the 128-bit lane. */
32661 for (i = 0; i < 8; ++i)
32663 unsigned e = d->perm[i];
32664 if (i < 4 ? e >= 4 : e < 4)
32665 return false;
32668 if (d->testing_p)
32669 return true;
32671 for (i = 0; i < 8; ++i)
32673 unsigned e = d->perm[i];
32675 /* Within each 128-bit lane, the elements of op0 are numbered
32676 from 0 and the elements of op1 are numbered from 4. */
32677 if (e >= 8 + 4)
32678 e -= 8;
32679 else if (e >= 4)
32680 e -= 4;
32682 rperm[i] = GEN_INT (e);
32685 vperm = gen_rtx_CONST_VECTOR (V8SImode, gen_rtvec_v (8, rperm));
32686 vperm = force_reg (V8SImode, vperm);
32687 emit_insn (gen_avx_vpermilvarv8sf3 (d->target, d->op0, vperm));
32689 return true;
32692 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
32693 in terms of pshufb or vpperm. */
32695 static bool
32696 expand_vec_perm_pshufb (struct expand_vec_perm_d *d)
32698 unsigned i, nelt, eltsz;
32699 rtx rperm[16], vperm, target, op0, op1;
32701 if (!(d->op0 == d->op1 ? TARGET_SSSE3 : TARGET_XOP))
32702 return false;
32703 if (GET_MODE_SIZE (d->vmode) != 16)
32704 return false;
32706 if (d->testing_p)
32707 return true;
32709 nelt = d->nelt;
32710 eltsz = GET_MODE_SIZE (GET_MODE_INNER (d->vmode));
32712 for (i = 0; i < nelt; ++i)
32714 unsigned j, e = d->perm[i];
32715 for (j = 0; j < eltsz; ++j)
32716 rperm[i * eltsz + j] = GEN_INT (e * eltsz + j);
32719 vperm = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, rperm));
32720 vperm = force_reg (V16QImode, vperm);
32722 target = gen_lowpart (V16QImode, d->target);
32723 op0 = gen_lowpart (V16QImode, d->op0);
32724 if (d->op0 == d->op1)
32725 emit_insn (gen_ssse3_pshufbv16qi3 (target, op0, vperm));
32726 else
32728 op1 = gen_lowpart (V16QImode, d->op1);
32729 emit_insn (gen_xop_pperm (target, op0, op1, vperm));
32732 return true;
32735 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to instantiate D
32736 in a single instruction. */
32738 static bool
32739 expand_vec_perm_1 (struct expand_vec_perm_d *d)
32741 unsigned i, nelt = d->nelt;
32742 unsigned char perm2[MAX_VECT_LEN];
32744 /* Check plain VEC_SELECT first, because AVX has instructions that could
32745 match both SEL and SEL+CONCAT, but the plain SEL will allow a memory
32746 input where SEL+CONCAT may not. */
32747 if (d->op0 == d->op1)
32749 int mask = nelt - 1;
32751 for (i = 0; i < nelt; i++)
32752 perm2[i] = d->perm[i] & mask;
32754 if (expand_vselect (d->target, d->op0, perm2, nelt))
32755 return true;
32757 /* There are plenty of patterns in sse.md that are written for
32758 SEL+CONCAT and are not replicated for a single op. Perhaps
32759 that should be changed, to avoid the nastiness here. */
32761 /* Recognize interleave style patterns, which means incrementing
32762 every other permutation operand. */
32763 for (i = 0; i < nelt; i += 2)
32765 perm2[i] = d->perm[i] & mask;
32766 perm2[i + 1] = (d->perm[i + 1] & mask) + nelt;
32768 if (expand_vselect_vconcat (d->target, d->op0, d->op0, perm2, nelt))
32769 return true;
32771 /* Recognize shufps, which means adding {0, 0, nelt, nelt}. */
32772 if (nelt >= 4)
32774 for (i = 0; i < nelt; i += 4)
32776 perm2[i + 0] = d->perm[i + 0] & mask;
32777 perm2[i + 1] = d->perm[i + 1] & mask;
32778 perm2[i + 2] = (d->perm[i + 2] & mask) + nelt;
32779 perm2[i + 3] = (d->perm[i + 3] & mask) + nelt;
32782 if (expand_vselect_vconcat (d->target, d->op0, d->op0, perm2, nelt))
32783 return true;
32787 /* Finally, try the fully general two operand permute. */
32788 if (expand_vselect_vconcat (d->target, d->op0, d->op1, d->perm, nelt))
32789 return true;
32791 /* Recognize interleave style patterns with reversed operands. */
32792 if (d->op0 != d->op1)
32794 for (i = 0; i < nelt; ++i)
32796 unsigned e = d->perm[i];
32797 if (e >= nelt)
32798 e -= nelt;
32799 else
32800 e += nelt;
32801 perm2[i] = e;
32804 if (expand_vselect_vconcat (d->target, d->op1, d->op0, perm2, nelt))
32805 return true;
32808 /* Try the SSE4.1 blend variable merge instructions. */
32809 if (expand_vec_perm_blend (d))
32810 return true;
32812 /* Try one of the AVX vpermil variable permutations. */
32813 if (expand_vec_perm_vpermil (d))
32814 return true;
32816 /* Try the SSSE3 pshufb or XOP vpperm variable permutation. */
32817 if (expand_vec_perm_pshufb (d))
32818 return true;
32820 return false;
32823 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
32824 in terms of a pair of pshuflw + pshufhw instructions. */
32826 static bool
32827 expand_vec_perm_pshuflw_pshufhw (struct expand_vec_perm_d *d)
32829 unsigned char perm2[MAX_VECT_LEN];
32830 unsigned i;
32831 bool ok;
32833 if (d->vmode != V8HImode || d->op0 != d->op1)
32834 return false;
32836 /* The two permutations only operate in 64-bit lanes. */
32837 for (i = 0; i < 4; ++i)
32838 if (d->perm[i] >= 4)
32839 return false;
32840 for (i = 4; i < 8; ++i)
32841 if (d->perm[i] < 4)
32842 return false;
32844 if (d->testing_p)
32845 return true;
32847 /* Emit the pshuflw. */
32848 memcpy (perm2, d->perm, 4);
32849 for (i = 4; i < 8; ++i)
32850 perm2[i] = i;
32851 ok = expand_vselect (d->target, d->op0, perm2, 8);
32852 gcc_assert (ok);
32854 /* Emit the pshufhw. */
32855 memcpy (perm2 + 4, d->perm + 4, 4);
32856 for (i = 0; i < 4; ++i)
32857 perm2[i] = i;
32858 ok = expand_vselect (d->target, d->target, perm2, 8);
32859 gcc_assert (ok);
32861 return true;
32864 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
32865 the permutation using the SSSE3 palignr instruction. This succeeds
32866 when all of the elements in PERM fit within one vector and we merely
32867 need to shift them down so that a single vector permutation has a
32868 chance to succeed. */
32870 static bool
32871 expand_vec_perm_palignr (struct expand_vec_perm_d *d)
32873 unsigned i, nelt = d->nelt;
32874 unsigned min, max;
32875 bool in_order, ok;
32876 rtx shift;
32878 /* Even with AVX, palignr only operates on 128-bit vectors. */
32879 if (!TARGET_SSSE3 || GET_MODE_SIZE (d->vmode) != 16)
32880 return false;
32882 min = nelt, max = 0;
32883 for (i = 0; i < nelt; ++i)
32885 unsigned e = d->perm[i];
32886 if (e < min)
32887 min = e;
32888 if (e > max)
32889 max = e;
32891 if (min == 0 || max - min >= nelt)
32892 return false;
32894 /* Given that we have SSSE3, we know we'll be able to implement the
32895 single operand permutation after the palignr with pshufb. */
32896 if (d->testing_p)
32897 return true;
32899 shift = GEN_INT (min * GET_MODE_BITSIZE (GET_MODE_INNER (d->vmode)));
32900 emit_insn (gen_ssse3_palignrti (gen_lowpart (TImode, d->target),
32901 gen_lowpart (TImode, d->op1),
32902 gen_lowpart (TImode, d->op0), shift));
32904 d->op0 = d->op1 = d->target;
32906 in_order = true;
32907 for (i = 0; i < nelt; ++i)
32909 unsigned e = d->perm[i] - min;
32910 if (e != i)
32911 in_order = false;
32912 d->perm[i] = e;
32915 /* Test for the degenerate case where the alignment by itself
32916 produces the desired permutation. */
32917 if (in_order)
32918 return true;
32920 ok = expand_vec_perm_1 (d);
32921 gcc_assert (ok);
32923 return ok;
32926 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
32927 a two vector permutation into a single vector permutation by using
32928 an interleave operation to merge the vectors. */
32930 static bool
32931 expand_vec_perm_interleave2 (struct expand_vec_perm_d *d)
32933 struct expand_vec_perm_d dremap, dfinal;
32934 unsigned i, nelt = d->nelt, nelt2 = nelt / 2;
32935 unsigned contents, h1, h2, h3, h4;
32936 unsigned char remap[2 * MAX_VECT_LEN];
32937 rtx seq;
32938 bool ok;
32940 if (d->op0 == d->op1)
32941 return false;
32943 /* The 256-bit unpck[lh]p[sd] instructions only operate within the 128-bit
32944 lanes. We can use similar techniques with the vperm2f128 instruction,
32945 but it requires slightly different logic. */
32946 if (GET_MODE_SIZE (d->vmode) != 16)
32947 return false;
32949 /* Examine from whence the elements come. */
32950 contents = 0;
32951 for (i = 0; i < nelt; ++i)
32952 contents |= 1u << d->perm[i];
32954 /* Split the two input vectors into 4 halves. */
32955 h1 = (1u << nelt2) - 1;
32956 h2 = h1 << nelt2;
32957 h3 = h2 << nelt2;
32958 h4 = h3 << nelt2;
32960 memset (remap, 0xff, sizeof (remap));
32961 dremap = *d;
32963 /* If the elements from the low halves use interleave low, and similarly
32964 for interleave high. If the elements are from mis-matched halves, we
32965 can use shufps for V4SF/V4SI or do a DImode shuffle. */
32966 if ((contents & (h1 | h3)) == contents)
32968 for (i = 0; i < nelt2; ++i)
32970 remap[i] = i * 2;
32971 remap[i + nelt] = i * 2 + 1;
32972 dremap.perm[i * 2] = i;
32973 dremap.perm[i * 2 + 1] = i + nelt;
32976 else if ((contents & (h2 | h4)) == contents)
32978 for (i = 0; i < nelt2; ++i)
32980 remap[i + nelt2] = i * 2;
32981 remap[i + nelt + nelt2] = i * 2 + 1;
32982 dremap.perm[i * 2] = i + nelt2;
32983 dremap.perm[i * 2 + 1] = i + nelt + nelt2;
32986 else if ((contents & (h1 | h4)) == contents)
32988 for (i = 0; i < nelt2; ++i)
32990 remap[i] = i;
32991 remap[i + nelt + nelt2] = i + nelt2;
32992 dremap.perm[i] = i;
32993 dremap.perm[i + nelt2] = i + nelt + nelt2;
32995 if (nelt != 4)
32997 dremap.vmode = V2DImode;
32998 dremap.nelt = 2;
32999 dremap.perm[0] = 0;
33000 dremap.perm[1] = 3;
33003 else if ((contents & (h2 | h3)) == contents)
33005 for (i = 0; i < nelt2; ++i)
33007 remap[i + nelt2] = i;
33008 remap[i + nelt] = i + nelt2;
33009 dremap.perm[i] = i + nelt2;
33010 dremap.perm[i + nelt2] = i + nelt;
33012 if (nelt != 4)
33014 dremap.vmode = V2DImode;
33015 dremap.nelt = 2;
33016 dremap.perm[0] = 1;
33017 dremap.perm[1] = 2;
33020 else
33021 return false;
33023 /* Use the remapping array set up above to move the elements from their
33024 swizzled locations into their final destinations. */
33025 dfinal = *d;
33026 for (i = 0; i < nelt; ++i)
33028 unsigned e = remap[d->perm[i]];
33029 gcc_assert (e < nelt);
33030 dfinal.perm[i] = e;
33032 dfinal.op0 = gen_reg_rtx (dfinal.vmode);
33033 dfinal.op1 = dfinal.op0;
33034 dremap.target = dfinal.op0;
33036 /* Test if the final remap can be done with a single insn. For V4SFmode or
33037 V4SImode this *will* succeed. For V8HImode or V16QImode it may not. */
33038 start_sequence ();
33039 ok = expand_vec_perm_1 (&dfinal);
33040 seq = get_insns ();
33041 end_sequence ();
33043 if (!ok)
33044 return false;
33046 if (dremap.vmode != dfinal.vmode)
33048 dremap.target = gen_lowpart (dremap.vmode, dremap.target);
33049 dremap.op0 = gen_lowpart (dremap.vmode, dremap.op0);
33050 dremap.op1 = gen_lowpart (dremap.vmode, dremap.op1);
33053 ok = expand_vec_perm_1 (&dremap);
33054 gcc_assert (ok);
33056 emit_insn (seq);
33057 return true;
33060 /* A subroutine of expand_vec_perm_even_odd_1. Implement the double-word
33061 permutation with two pshufb insns and an ior. We should have already
33062 failed all two instruction sequences. */
33064 static bool
33065 expand_vec_perm_pshufb2 (struct expand_vec_perm_d *d)
33067 rtx rperm[2][16], vperm, l, h, op, m128;
33068 unsigned int i, nelt, eltsz;
33070 if (!TARGET_SSSE3 || GET_MODE_SIZE (d->vmode) != 16)
33071 return false;
33072 gcc_assert (d->op0 != d->op1);
33074 nelt = d->nelt;
33075 eltsz = GET_MODE_SIZE (GET_MODE_INNER (d->vmode));
33077 /* Generate two permutation masks. If the required element is within
33078 the given vector it is shuffled into the proper lane. If the required
33079 element is in the other vector, force a zero into the lane by setting
33080 bit 7 in the permutation mask. */
33081 m128 = GEN_INT (-128);
33082 for (i = 0; i < nelt; ++i)
33084 unsigned j, e = d->perm[i];
33085 unsigned which = (e >= nelt);
33086 if (e >= nelt)
33087 e -= nelt;
33089 for (j = 0; j < eltsz; ++j)
33091 rperm[which][i*eltsz + j] = GEN_INT (e*eltsz + j);
33092 rperm[1-which][i*eltsz + j] = m128;
33096 vperm = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, rperm[0]));
33097 vperm = force_reg (V16QImode, vperm);
33099 l = gen_reg_rtx (V16QImode);
33100 op = gen_lowpart (V16QImode, d->op0);
33101 emit_insn (gen_ssse3_pshufbv16qi3 (l, op, vperm));
33103 vperm = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, rperm[1]));
33104 vperm = force_reg (V16QImode, vperm);
33106 h = gen_reg_rtx (V16QImode);
33107 op = gen_lowpart (V16QImode, d->op1);
33108 emit_insn (gen_ssse3_pshufbv16qi3 (h, op, vperm));
33110 op = gen_lowpart (V16QImode, d->target);
33111 emit_insn (gen_iorv16qi3 (op, l, h));
33113 return true;
33116 /* A subroutine of ix86_expand_vec_perm_builtin_1. Implement extract-even
33117 and extract-odd permutations. */
33119 static bool
33120 expand_vec_perm_even_odd_1 (struct expand_vec_perm_d *d, unsigned odd)
33122 rtx t1, t2, t3;
33124 switch (d->vmode)
33126 case V4DFmode:
33127 t1 = gen_reg_rtx (V4DFmode);
33128 t2 = gen_reg_rtx (V4DFmode);
33130 /* Shuffle the lanes around into { 0 1 4 5 } and { 2 3 6 7 }. */
33131 emit_insn (gen_avx_vperm2f128v4df3 (t1, d->op0, d->op1, GEN_INT (0x20)));
33132 emit_insn (gen_avx_vperm2f128v4df3 (t2, d->op0, d->op1, GEN_INT (0x31)));
33134 /* Now an unpck[lh]pd will produce the result required. */
33135 if (odd)
33136 t3 = gen_avx_unpckhpd256 (d->target, t1, t2);
33137 else
33138 t3 = gen_avx_unpcklpd256 (d->target, t1, t2);
33139 emit_insn (t3);
33140 break;
33142 case V8SFmode:
33144 int mask = odd ? 0xdd : 0x88;
33146 t1 = gen_reg_rtx (V8SFmode);
33147 t2 = gen_reg_rtx (V8SFmode);
33148 t3 = gen_reg_rtx (V8SFmode);
33150 /* Shuffle within the 128-bit lanes to produce:
33151 { 0 2 8 a 4 6 c e } | { 1 3 9 b 5 7 d f }. */
33152 emit_insn (gen_avx_shufps256 (t1, d->op0, d->op1,
33153 GEN_INT (mask)));
33155 /* Shuffle the lanes around to produce:
33156 { 4 6 c e 0 2 8 a } and { 5 7 d f 1 3 9 b }. */
33157 emit_insn (gen_avx_vperm2f128v8sf3 (t2, t1, t1,
33158 GEN_INT (0x3)));
33160 /* Shuffle within the 128-bit lanes to produce:
33161 { 0 2 4 6 4 6 0 2 } | { 1 3 5 7 5 7 1 3 }. */
33162 emit_insn (gen_avx_shufps256 (t3, t1, t2, GEN_INT (0x44)));
33164 /* Shuffle within the 128-bit lanes to produce:
33165 { 8 a c e c e 8 a } | { 9 b d f d f 9 b }. */
33166 emit_insn (gen_avx_shufps256 (t2, t1, t2, GEN_INT (0xee)));
33168 /* Shuffle the lanes around to produce:
33169 { 0 2 4 6 8 a c e } | { 1 3 5 7 9 b d f }. */
33170 emit_insn (gen_avx_vperm2f128v8sf3 (d->target, t3, t2,
33171 GEN_INT (0x20)));
33173 break;
33175 case V2DFmode:
33176 case V4SFmode:
33177 case V2DImode:
33178 case V4SImode:
33179 /* These are always directly implementable by expand_vec_perm_1. */
33180 gcc_unreachable ();
33182 case V8HImode:
33183 if (TARGET_SSSE3)
33184 return expand_vec_perm_pshufb2 (d);
33185 else
33187 /* We need 2*log2(N)-1 operations to achieve odd/even
33188 with interleave. */
33189 t1 = gen_reg_rtx (V8HImode);
33190 t2 = gen_reg_rtx (V8HImode);
33191 emit_insn (gen_vec_interleave_highv8hi (t1, d->op0, d->op1));
33192 emit_insn (gen_vec_interleave_lowv8hi (d->target, d->op0, d->op1));
33193 emit_insn (gen_vec_interleave_highv8hi (t2, d->target, t1));
33194 emit_insn (gen_vec_interleave_lowv8hi (d->target, d->target, t1));
33195 if (odd)
33196 t3 = gen_vec_interleave_highv8hi (d->target, d->target, t2);
33197 else
33198 t3 = gen_vec_interleave_lowv8hi (d->target, d->target, t2);
33199 emit_insn (t3);
33201 break;
33203 case V16QImode:
33204 if (TARGET_SSSE3)
33205 return expand_vec_perm_pshufb2 (d);
33206 else
33208 t1 = gen_reg_rtx (V16QImode);
33209 t2 = gen_reg_rtx (V16QImode);
33210 t3 = gen_reg_rtx (V16QImode);
33211 emit_insn (gen_vec_interleave_highv16qi (t1, d->op0, d->op1));
33212 emit_insn (gen_vec_interleave_lowv16qi (d->target, d->op0, d->op1));
33213 emit_insn (gen_vec_interleave_highv16qi (t2, d->target, t1));
33214 emit_insn (gen_vec_interleave_lowv16qi (d->target, d->target, t1));
33215 emit_insn (gen_vec_interleave_highv16qi (t3, d->target, t2));
33216 emit_insn (gen_vec_interleave_lowv16qi (d->target, d->target, t2));
33217 if (odd)
33218 t3 = gen_vec_interleave_highv16qi (d->target, d->target, t3);
33219 else
33220 t3 = gen_vec_interleave_lowv16qi (d->target, d->target, t3);
33221 emit_insn (t3);
33223 break;
33225 default:
33226 gcc_unreachable ();
33229 return true;
33232 /* A subroutine of ix86_expand_vec_perm_builtin_1. Pattern match
33233 extract-even and extract-odd permutations. */
33235 static bool
33236 expand_vec_perm_even_odd (struct expand_vec_perm_d *d)
33238 unsigned i, odd, nelt = d->nelt;
33240 odd = d->perm[0];
33241 if (odd != 0 && odd != 1)
33242 return false;
33244 for (i = 1; i < nelt; ++i)
33245 if (d->perm[i] != 2 * i + odd)
33246 return false;
33248 return expand_vec_perm_even_odd_1 (d, odd);
33251 /* A subroutine of ix86_expand_vec_perm_builtin_1. Implement broadcast
33252 permutations. We assume that expand_vec_perm_1 has already failed. */
33254 static bool
33255 expand_vec_perm_broadcast_1 (struct expand_vec_perm_d *d)
33257 unsigned elt = d->perm[0], nelt2 = d->nelt / 2;
33258 enum machine_mode vmode = d->vmode;
33259 unsigned char perm2[4];
33260 rtx op0 = d->op0;
33261 bool ok;
33263 switch (vmode)
33265 case V4DFmode:
33266 case V8SFmode:
33267 /* These are special-cased in sse.md so that we can optionally
33268 use the vbroadcast instruction. They expand to two insns
33269 if the input happens to be in a register. */
33270 gcc_unreachable ();
33272 case V2DFmode:
33273 case V2DImode:
33274 case V4SFmode:
33275 case V4SImode:
33276 /* These are always implementable using standard shuffle patterns. */
33277 gcc_unreachable ();
33279 case V8HImode:
33280 case V16QImode:
33281 /* These can be implemented via interleave. We save one insn by
33282 stopping once we have promoted to V4SImode and then use pshufd. */
33285 optab otab = vec_interleave_low_optab;
33287 if (elt >= nelt2)
33289 otab = vec_interleave_high_optab;
33290 elt -= nelt2;
33292 nelt2 /= 2;
33294 op0 = expand_binop (vmode, otab, op0, op0, NULL, 0, OPTAB_DIRECT);
33295 vmode = get_mode_wider_vector (vmode);
33296 op0 = gen_lowpart (vmode, op0);
33298 while (vmode != V4SImode);
33300 memset (perm2, elt, 4);
33301 ok = expand_vselect (gen_lowpart (V4SImode, d->target), op0, perm2, 4);
33302 gcc_assert (ok);
33303 return true;
33305 default:
33306 gcc_unreachable ();
33310 /* A subroutine of ix86_expand_vec_perm_builtin_1. Pattern match
33311 broadcast permutations. */
33313 static bool
33314 expand_vec_perm_broadcast (struct expand_vec_perm_d *d)
33316 unsigned i, elt, nelt = d->nelt;
33318 if (d->op0 != d->op1)
33319 return false;
33321 elt = d->perm[0];
33322 for (i = 1; i < nelt; ++i)
33323 if (d->perm[i] != elt)
33324 return false;
33326 return expand_vec_perm_broadcast_1 (d);
33329 /* The guts of ix86_expand_vec_perm_builtin, also used by the ok hook.
33330 With all of the interface bits taken care of, perform the expansion
33331 in D and return true on success. */
33333 static bool
33334 ix86_expand_vec_perm_builtin_1 (struct expand_vec_perm_d *d)
33336 /* Try a single instruction expansion. */
33337 if (expand_vec_perm_1 (d))
33338 return true;
33340 /* Try sequences of two instructions. */
33342 if (expand_vec_perm_pshuflw_pshufhw (d))
33343 return true;
33345 if (expand_vec_perm_palignr (d))
33346 return true;
33348 if (expand_vec_perm_interleave2 (d))
33349 return true;
33351 if (expand_vec_perm_broadcast (d))
33352 return true;
33354 /* Try sequences of three instructions. */
33356 if (expand_vec_perm_pshufb2 (d))
33357 return true;
33359 /* ??? Look for narrow permutations whose element orderings would
33360 allow the promotion to a wider mode. */
33362 /* ??? Look for sequences of interleave or a wider permute that place
33363 the data into the correct lanes for a half-vector shuffle like
33364 pshuf[lh]w or vpermilps. */
33366 /* ??? Look for sequences of interleave that produce the desired results.
33367 The combinatorics of punpck[lh] get pretty ugly... */
33369 if (expand_vec_perm_even_odd (d))
33370 return true;
33372 return false;
33375 /* Extract the values from the vector CST into the permutation array in D.
33376 Return 0 on error, 1 if all values from the permutation come from the
33377 first vector, 2 if all values from the second vector, and 3 otherwise. */
33379 static int
33380 extract_vec_perm_cst (struct expand_vec_perm_d *d, tree cst)
33382 tree list = TREE_VECTOR_CST_ELTS (cst);
33383 unsigned i, nelt = d->nelt;
33384 int ret = 0;
33386 for (i = 0; i < nelt; ++i, list = TREE_CHAIN (list))
33388 unsigned HOST_WIDE_INT e;
33390 if (!host_integerp (TREE_VALUE (list), 1))
33391 return 0;
33392 e = tree_low_cst (TREE_VALUE (list), 1);
33393 if (e >= 2 * nelt)
33394 return 0;
33396 ret |= (e < nelt ? 1 : 2);
33397 d->perm[i] = e;
33399 gcc_assert (list == NULL);
33401 /* For all elements from second vector, fold the elements to first. */
33402 if (ret == 2)
33403 for (i = 0; i < nelt; ++i)
33404 d->perm[i] -= nelt;
33406 return ret;
33409 static rtx
33410 ix86_expand_vec_perm_builtin (tree exp)
33412 struct expand_vec_perm_d d;
33413 tree arg0, arg1, arg2;
33415 arg0 = CALL_EXPR_ARG (exp, 0);
33416 arg1 = CALL_EXPR_ARG (exp, 1);
33417 arg2 = CALL_EXPR_ARG (exp, 2);
33419 d.vmode = TYPE_MODE (TREE_TYPE (arg0));
33420 d.nelt = GET_MODE_NUNITS (d.vmode);
33421 d.testing_p = false;
33422 gcc_assert (VECTOR_MODE_P (d.vmode));
33424 if (TREE_CODE (arg2) != VECTOR_CST)
33426 error_at (EXPR_LOCATION (exp),
33427 "vector permutation requires vector constant");
33428 goto exit_error;
33431 switch (extract_vec_perm_cst (&d, arg2))
33433 default:
33434 gcc_unreachable();
33436 case 0:
33437 error_at (EXPR_LOCATION (exp), "invalid vector permutation constant");
33438 goto exit_error;
33440 case 3:
33441 if (!operand_equal_p (arg0, arg1, 0))
33443 d.op0 = expand_expr (arg0, NULL_RTX, d.vmode, EXPAND_NORMAL);
33444 d.op0 = force_reg (d.vmode, d.op0);
33445 d.op1 = expand_expr (arg1, NULL_RTX, d.vmode, EXPAND_NORMAL);
33446 d.op1 = force_reg (d.vmode, d.op1);
33447 break;
33450 /* The elements of PERM do not suggest that only the first operand
33451 is used, but both operands are identical. Allow easier matching
33452 of the permutation by folding the permutation into the single
33453 input vector. */
33455 unsigned i, nelt = d.nelt;
33456 for (i = 0; i < nelt; ++i)
33457 if (d.perm[i] >= nelt)
33458 d.perm[i] -= nelt;
33460 /* FALLTHRU */
33462 case 1:
33463 d.op0 = expand_expr (arg0, NULL_RTX, d.vmode, EXPAND_NORMAL);
33464 d.op0 = force_reg (d.vmode, d.op0);
33465 d.op1 = d.op0;
33466 break;
33468 case 2:
33469 d.op0 = expand_expr (arg1, NULL_RTX, d.vmode, EXPAND_NORMAL);
33470 d.op0 = force_reg (d.vmode, d.op0);
33471 d.op1 = d.op0;
33472 break;
33475 d.target = gen_reg_rtx (d.vmode);
33476 if (ix86_expand_vec_perm_builtin_1 (&d))
33477 return d.target;
33479 /* For compiler generated permutations, we should never got here, because
33480 the compiler should also be checking the ok hook. But since this is a
33481 builtin the user has access too, so don't abort. */
33482 switch (d.nelt)
33484 case 2:
33485 sorry ("vector permutation (%d %d)", d.perm[0], d.perm[1]);
33486 break;
33487 case 4:
33488 sorry ("vector permutation (%d %d %d %d)",
33489 d.perm[0], d.perm[1], d.perm[2], d.perm[3]);
33490 break;
33491 case 8:
33492 sorry ("vector permutation (%d %d %d %d %d %d %d %d)",
33493 d.perm[0], d.perm[1], d.perm[2], d.perm[3],
33494 d.perm[4], d.perm[5], d.perm[6], d.perm[7]);
33495 break;
33496 case 16:
33497 sorry ("vector permutation "
33498 "(%d %d %d %d %d %d %d %d %d %d %d %d %d %d %d %d)",
33499 d.perm[0], d.perm[1], d.perm[2], d.perm[3],
33500 d.perm[4], d.perm[5], d.perm[6], d.perm[7],
33501 d.perm[8], d.perm[9], d.perm[10], d.perm[11],
33502 d.perm[12], d.perm[13], d.perm[14], d.perm[15]);
33503 break;
33504 default:
33505 gcc_unreachable ();
33507 exit_error:
33508 return CONST0_RTX (d.vmode);
33511 /* Implement targetm.vectorize.builtin_vec_perm_ok. */
33513 static bool
33514 ix86_vectorize_builtin_vec_perm_ok (tree vec_type, tree mask)
33516 struct expand_vec_perm_d d;
33517 int vec_mask;
33518 bool ret, one_vec;
33520 d.vmode = TYPE_MODE (vec_type);
33521 d.nelt = GET_MODE_NUNITS (d.vmode);
33522 d.testing_p = true;
33524 /* Given sufficient ISA support we can just return true here
33525 for selected vector modes. */
33526 if (GET_MODE_SIZE (d.vmode) == 16)
33528 /* All implementable with a single vpperm insn. */
33529 if (TARGET_XOP)
33530 return true;
33531 /* All implementable with 2 pshufb + 1 ior. */
33532 if (TARGET_SSSE3)
33533 return true;
33534 /* All implementable with shufpd or unpck[lh]pd. */
33535 if (d.nelt == 2)
33536 return true;
33539 vec_mask = extract_vec_perm_cst (&d, mask);
33541 /* This hook is cannot be called in response to something that the
33542 user does (unlike the builtin expander) so we shouldn't ever see
33543 an error generated from the extract. */
33544 gcc_assert (vec_mask > 0 && vec_mask <= 3);
33545 one_vec = (vec_mask != 3);
33547 /* Implementable with shufps or pshufd. */
33548 if (one_vec && (d.vmode == V4SFmode || d.vmode == V4SImode))
33549 return true;
33551 /* Otherwise we have to go through the motions and see if we can
33552 figure out how to generate the requested permutation. */
33553 d.target = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 1);
33554 d.op1 = d.op0 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 2);
33555 if (!one_vec)
33556 d.op1 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 3);
33558 start_sequence ();
33559 ret = ix86_expand_vec_perm_builtin_1 (&d);
33560 end_sequence ();
33562 return ret;
33565 void
33566 ix86_expand_vec_extract_even_odd (rtx targ, rtx op0, rtx op1, unsigned odd)
33568 struct expand_vec_perm_d d;
33569 unsigned i, nelt;
33571 d.target = targ;
33572 d.op0 = op0;
33573 d.op1 = op1;
33574 d.vmode = GET_MODE (targ);
33575 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
33576 d.testing_p = false;
33578 for (i = 0; i < nelt; ++i)
33579 d.perm[i] = i * 2 + odd;
33581 /* We'll either be able to implement the permutation directly... */
33582 if (expand_vec_perm_1 (&d))
33583 return;
33585 /* ... or we use the special-case patterns. */
33586 expand_vec_perm_even_odd_1 (&d, odd);
33589 /* This function returns the calling abi specific va_list type node.
33590 It returns the FNDECL specific va_list type. */
33592 static tree
33593 ix86_fn_abi_va_list (tree fndecl)
33595 if (!TARGET_64BIT)
33596 return va_list_type_node;
33597 gcc_assert (fndecl != NULL_TREE);
33599 if (ix86_function_abi ((const_tree) fndecl) == MS_ABI)
33600 return ms_va_list_type_node;
33601 else
33602 return sysv_va_list_type_node;
33605 /* Returns the canonical va_list type specified by TYPE. If there
33606 is no valid TYPE provided, it return NULL_TREE. */
33608 static tree
33609 ix86_canonical_va_list_type (tree type)
33611 tree wtype, htype;
33613 /* Resolve references and pointers to va_list type. */
33614 if (TREE_CODE (type) == MEM_REF)
33615 type = TREE_TYPE (type);
33616 else if (POINTER_TYPE_P (type) && POINTER_TYPE_P (TREE_TYPE(type)))
33617 type = TREE_TYPE (type);
33618 else if (POINTER_TYPE_P (type) && TREE_CODE (TREE_TYPE (type)) == ARRAY_TYPE)
33619 type = TREE_TYPE (type);
33621 if (TARGET_64BIT)
33623 wtype = va_list_type_node;
33624 gcc_assert (wtype != NULL_TREE);
33625 htype = type;
33626 if (TREE_CODE (wtype) == ARRAY_TYPE)
33628 /* If va_list is an array type, the argument may have decayed
33629 to a pointer type, e.g. by being passed to another function.
33630 In that case, unwrap both types so that we can compare the
33631 underlying records. */
33632 if (TREE_CODE (htype) == ARRAY_TYPE
33633 || POINTER_TYPE_P (htype))
33635 wtype = TREE_TYPE (wtype);
33636 htype = TREE_TYPE (htype);
33639 if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
33640 return va_list_type_node;
33641 wtype = sysv_va_list_type_node;
33642 gcc_assert (wtype != NULL_TREE);
33643 htype = type;
33644 if (TREE_CODE (wtype) == ARRAY_TYPE)
33646 /* If va_list is an array type, the argument may have decayed
33647 to a pointer type, e.g. by being passed to another function.
33648 In that case, unwrap both types so that we can compare the
33649 underlying records. */
33650 if (TREE_CODE (htype) == ARRAY_TYPE
33651 || POINTER_TYPE_P (htype))
33653 wtype = TREE_TYPE (wtype);
33654 htype = TREE_TYPE (htype);
33657 if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
33658 return sysv_va_list_type_node;
33659 wtype = ms_va_list_type_node;
33660 gcc_assert (wtype != NULL_TREE);
33661 htype = type;
33662 if (TREE_CODE (wtype) == ARRAY_TYPE)
33664 /* If va_list is an array type, the argument may have decayed
33665 to a pointer type, e.g. by being passed to another function.
33666 In that case, unwrap both types so that we can compare the
33667 underlying records. */
33668 if (TREE_CODE (htype) == ARRAY_TYPE
33669 || POINTER_TYPE_P (htype))
33671 wtype = TREE_TYPE (wtype);
33672 htype = TREE_TYPE (htype);
33675 if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
33676 return ms_va_list_type_node;
33677 return NULL_TREE;
33679 return std_canonical_va_list_type (type);
33682 /* Iterate through the target-specific builtin types for va_list.
33683 IDX denotes the iterator, *PTREE is set to the result type of
33684 the va_list builtin, and *PNAME to its internal type.
33685 Returns zero if there is no element for this index, otherwise
33686 IDX should be increased upon the next call.
33687 Note, do not iterate a base builtin's name like __builtin_va_list.
33688 Used from c_common_nodes_and_builtins. */
33690 static int
33691 ix86_enum_va_list (int idx, const char **pname, tree *ptree)
33693 if (TARGET_64BIT)
33695 switch (idx)
33697 default:
33698 break;
33700 case 0:
33701 *ptree = ms_va_list_type_node;
33702 *pname = "__builtin_ms_va_list";
33703 return 1;
33705 case 1:
33706 *ptree = sysv_va_list_type_node;
33707 *pname = "__builtin_sysv_va_list";
33708 return 1;
33712 return 0;
33715 #undef TARGET_SCHED_DISPATCH
33716 #define TARGET_SCHED_DISPATCH has_dispatch
33717 #undef TARGET_SCHED_DISPATCH_DO
33718 #define TARGET_SCHED_DISPATCH_DO do_dispatch
33720 /* The size of the dispatch window is the total number of bytes of
33721 object code allowed in a window. */
33722 #define DISPATCH_WINDOW_SIZE 16
33724 /* Number of dispatch windows considered for scheduling. */
33725 #define MAX_DISPATCH_WINDOWS 3
33727 /* Maximum number of instructions in a window. */
33728 #define MAX_INSN 4
33730 /* Maximum number of immediate operands in a window. */
33731 #define MAX_IMM 4
33733 /* Maximum number of immediate bits allowed in a window. */
33734 #define MAX_IMM_SIZE 128
33736 /* Maximum number of 32 bit immediates allowed in a window. */
33737 #define MAX_IMM_32 4
33739 /* Maximum number of 64 bit immediates allowed in a window. */
33740 #define MAX_IMM_64 2
33742 /* Maximum total of loads or prefetches allowed in a window. */
33743 #define MAX_LOAD 2
33745 /* Maximum total of stores allowed in a window. */
33746 #define MAX_STORE 1
33748 #undef BIG
33749 #define BIG 100
33752 /* Dispatch groups. Istructions that affect the mix in a dispatch window. */
33753 enum dispatch_group {
33754 disp_no_group = 0,
33755 disp_load,
33756 disp_store,
33757 disp_load_store,
33758 disp_prefetch,
33759 disp_imm,
33760 disp_imm_32,
33761 disp_imm_64,
33762 disp_branch,
33763 disp_cmp,
33764 disp_jcc,
33765 disp_last
33768 /* Number of allowable groups in a dispatch window. It is an array
33769 indexed by dispatch_group enum. 100 is used as a big number,
33770 because the number of these kind of operations does not have any
33771 effect in dispatch window, but we need them for other reasons in
33772 the table. */
33773 static unsigned int num_allowable_groups[disp_last] = {
33774 0, 2, 1, 1, 2, 4, 4, 2, 1, BIG, BIG
33777 char group_name[disp_last + 1][16] = {
33778 "disp_no_group", "disp_load", "disp_store", "disp_load_store",
33779 "disp_prefetch", "disp_imm", "disp_imm_32", "disp_imm_64",
33780 "disp_branch", "disp_cmp", "disp_jcc", "disp_last"
33783 /* Instruction path. */
33784 enum insn_path {
33785 no_path = 0,
33786 path_single, /* Single micro op. */
33787 path_double, /* Double micro op. */
33788 path_multi, /* Instructions with more than 2 micro op.. */
33789 last_path
33792 /* sched_insn_info defines a window to the instructions scheduled in
33793 the basic block. It contains a pointer to the insn_info table and
33794 the instruction scheduled.
33796 Windows are allocated for each basic block and are linked
33797 together. */
33798 typedef struct sched_insn_info_s {
33799 rtx insn;
33800 enum dispatch_group group;
33801 enum insn_path path;
33802 int byte_len;
33803 int imm_bytes;
33804 } sched_insn_info;
33806 /* Linked list of dispatch windows. This is a two way list of
33807 dispatch windows of a basic block. It contains information about
33808 the number of uops in the window and the total number of
33809 instructions and of bytes in the object code for this dispatch
33810 window. */
33811 typedef struct dispatch_windows_s {
33812 int num_insn; /* Number of insn in the window. */
33813 int num_uops; /* Number of uops in the window. */
33814 int window_size; /* Number of bytes in the window. */
33815 int window_num; /* Window number between 0 or 1. */
33816 int num_imm; /* Number of immediates in an insn. */
33817 int num_imm_32; /* Number of 32 bit immediates in an insn. */
33818 int num_imm_64; /* Number of 64 bit immediates in an insn. */
33819 int imm_size; /* Total immediates in the window. */
33820 int num_loads; /* Total memory loads in the window. */
33821 int num_stores; /* Total memory stores in the window. */
33822 int violation; /* Violation exists in window. */
33823 sched_insn_info *window; /* Pointer to the window. */
33824 struct dispatch_windows_s *next;
33825 struct dispatch_windows_s *prev;
33826 } dispatch_windows;
33828 /* Immediate valuse used in an insn. */
33829 typedef struct imm_info_s
33831 int imm;
33832 int imm32;
33833 int imm64;
33834 } imm_info;
33836 static dispatch_windows *dispatch_window_list;
33837 static dispatch_windows *dispatch_window_list1;
33839 /* Get dispatch group of insn. */
33841 static enum dispatch_group
33842 get_mem_group (rtx insn)
33844 enum attr_memory memory;
33846 if (INSN_CODE (insn) < 0)
33847 return disp_no_group;
33848 memory = get_attr_memory (insn);
33849 if (memory == MEMORY_STORE)
33850 return disp_store;
33852 if (memory == MEMORY_LOAD)
33853 return disp_load;
33855 if (memory == MEMORY_BOTH)
33856 return disp_load_store;
33858 return disp_no_group;
33861 /* Return true if insn is a compare instruction. */
33863 static bool
33864 is_cmp (rtx insn)
33866 enum attr_type type;
33868 type = get_attr_type (insn);
33869 return (type == TYPE_TEST
33870 || type == TYPE_ICMP
33871 || type == TYPE_FCMP
33872 || GET_CODE (PATTERN (insn)) == COMPARE);
33875 /* Return true if a dispatch violation encountered. */
33877 static bool
33878 dispatch_violation (void)
33880 if (dispatch_window_list->next)
33881 return dispatch_window_list->next->violation;
33882 return dispatch_window_list->violation;
33885 /* Return true if insn is a branch instruction. */
33887 static bool
33888 is_branch (rtx insn)
33890 return (CALL_P (insn) || JUMP_P (insn));
33893 /* Return true if insn is a prefetch instruction. */
33895 static bool
33896 is_prefetch (rtx insn)
33898 return NONJUMP_INSN_P (insn) && GET_CODE (PATTERN (insn)) == PREFETCH;
33901 /* This function initializes a dispatch window and the list container holding a
33902 pointer to the window. */
33904 static void
33905 init_window (int window_num)
33907 int i;
33908 dispatch_windows *new_list;
33910 if (window_num == 0)
33911 new_list = dispatch_window_list;
33912 else
33913 new_list = dispatch_window_list1;
33915 new_list->num_insn = 0;
33916 new_list->num_uops = 0;
33917 new_list->window_size = 0;
33918 new_list->next = NULL;
33919 new_list->prev = NULL;
33920 new_list->window_num = window_num;
33921 new_list->num_imm = 0;
33922 new_list->num_imm_32 = 0;
33923 new_list->num_imm_64 = 0;
33924 new_list->imm_size = 0;
33925 new_list->num_loads = 0;
33926 new_list->num_stores = 0;
33927 new_list->violation = false;
33929 for (i = 0; i < MAX_INSN; i++)
33931 new_list->window[i].insn = NULL;
33932 new_list->window[i].group = disp_no_group;
33933 new_list->window[i].path = no_path;
33934 new_list->window[i].byte_len = 0;
33935 new_list->window[i].imm_bytes = 0;
33937 return;
33940 /* This function allocates and initializes a dispatch window and the
33941 list container holding a pointer to the window. */
33943 static dispatch_windows *
33944 allocate_window (void)
33946 dispatch_windows *new_list = XNEW (struct dispatch_windows_s);
33947 new_list->window = XNEWVEC (struct sched_insn_info_s, MAX_INSN + 1);
33949 return new_list;
33952 /* This routine initializes the dispatch scheduling information. It
33953 initiates building dispatch scheduler tables and constructs the
33954 first dispatch window. */
33956 static void
33957 init_dispatch_sched (void)
33959 /* Allocate a dispatch list and a window. */
33960 dispatch_window_list = allocate_window ();
33961 dispatch_window_list1 = allocate_window ();
33962 init_window (0);
33963 init_window (1);
33966 /* This function returns true if a branch is detected. End of a basic block
33967 does not have to be a branch, but here we assume only branches end a
33968 window. */
33970 static bool
33971 is_end_basic_block (enum dispatch_group group)
33973 return group == disp_branch;
33976 /* This function is called when the end of a window processing is reached. */
33978 static void
33979 process_end_window (void)
33981 gcc_assert (dispatch_window_list->num_insn <= MAX_INSN);
33982 if (dispatch_window_list->next)
33984 gcc_assert (dispatch_window_list1->num_insn <= MAX_INSN);
33985 gcc_assert (dispatch_window_list->window_size
33986 + dispatch_window_list1->window_size <= 48);
33987 init_window (1);
33989 init_window (0);
33992 /* Allocates a new dispatch window and adds it to WINDOW_LIST.
33993 WINDOW_NUM is either 0 or 1. A maximum of two windows are generated
33994 for 48 bytes of instructions. Note that these windows are not dispatch
33995 windows that their sizes are DISPATCH_WINDOW_SIZE. */
33997 static dispatch_windows *
33998 allocate_next_window (int window_num)
34000 if (window_num == 0)
34002 if (dispatch_window_list->next)
34003 init_window (1);
34004 init_window (0);
34005 return dispatch_window_list;
34008 dispatch_window_list->next = dispatch_window_list1;
34009 dispatch_window_list1->prev = dispatch_window_list;
34011 return dispatch_window_list1;
34014 /* Increment the number of immediate operands of an instruction. */
34016 static int
34017 find_constant_1 (rtx *in_rtx, imm_info *imm_values)
34019 if (*in_rtx == 0)
34020 return 0;
34022 switch ( GET_CODE (*in_rtx))
34024 case CONST:
34025 case SYMBOL_REF:
34026 case CONST_INT:
34027 (imm_values->imm)++;
34028 if (x86_64_immediate_operand (*in_rtx, SImode))
34029 (imm_values->imm32)++;
34030 else
34031 (imm_values->imm64)++;
34032 break;
34034 case CONST_DOUBLE:
34035 (imm_values->imm)++;
34036 (imm_values->imm64)++;
34037 break;
34039 case CODE_LABEL:
34040 if (LABEL_KIND (*in_rtx) == LABEL_NORMAL)
34042 (imm_values->imm)++;
34043 (imm_values->imm32)++;
34045 break;
34047 default:
34048 break;
34051 return 0;
34054 /* Compute number of immediate operands of an instruction. */
34056 static void
34057 find_constant (rtx in_rtx, imm_info *imm_values)
34059 for_each_rtx (INSN_P (in_rtx) ? &PATTERN (in_rtx) : &in_rtx,
34060 (rtx_function) find_constant_1, (void *) imm_values);
34063 /* Return total size of immediate operands of an instruction along with number
34064 of corresponding immediate-operands. It initializes its parameters to zero
34065 befor calling FIND_CONSTANT.
34066 INSN is the input instruction. IMM is the total of immediates.
34067 IMM32 is the number of 32 bit immediates. IMM64 is the number of 64
34068 bit immediates. */
34070 static int
34071 get_num_immediates (rtx insn, int *imm, int *imm32, int *imm64)
34073 imm_info imm_values = {0, 0, 0};
34075 find_constant (insn, &imm_values);
34076 *imm = imm_values.imm;
34077 *imm32 = imm_values.imm32;
34078 *imm64 = imm_values.imm64;
34079 return imm_values.imm32 * 4 + imm_values.imm64 * 8;
34082 /* This function indicates if an operand of an instruction is an
34083 immediate. */
34085 static bool
34086 has_immediate (rtx insn)
34088 int num_imm_operand;
34089 int num_imm32_operand;
34090 int num_imm64_operand;
34092 if (insn)
34093 return get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
34094 &num_imm64_operand);
34095 return false;
34098 /* Return single or double path for instructions. */
34100 static enum insn_path
34101 get_insn_path (rtx insn)
34103 enum attr_amdfam10_decode path = get_attr_amdfam10_decode (insn);
34105 if ((int)path == 0)
34106 return path_single;
34108 if ((int)path == 1)
34109 return path_double;
34111 return path_multi;
34114 /* Return insn dispatch group. */
34116 static enum dispatch_group
34117 get_insn_group (rtx insn)
34119 enum dispatch_group group = get_mem_group (insn);
34120 if (group)
34121 return group;
34123 if (is_branch (insn))
34124 return disp_branch;
34126 if (is_cmp (insn))
34127 return disp_cmp;
34129 if (has_immediate (insn))
34130 return disp_imm;
34132 if (is_prefetch (insn))
34133 return disp_prefetch;
34135 return disp_no_group;
34138 /* Count number of GROUP restricted instructions in a dispatch
34139 window WINDOW_LIST. */
34141 static int
34142 count_num_restricted (rtx insn, dispatch_windows *window_list)
34144 enum dispatch_group group = get_insn_group (insn);
34145 int imm_size;
34146 int num_imm_operand;
34147 int num_imm32_operand;
34148 int num_imm64_operand;
34150 if (group == disp_no_group)
34151 return 0;
34153 if (group == disp_imm)
34155 imm_size = get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
34156 &num_imm64_operand);
34157 if (window_list->imm_size + imm_size > MAX_IMM_SIZE
34158 || num_imm_operand + window_list->num_imm > MAX_IMM
34159 || (num_imm32_operand > 0
34160 && (window_list->num_imm_32 + num_imm32_operand > MAX_IMM_32
34161 || window_list->num_imm_64 * 2 + num_imm32_operand > MAX_IMM_32))
34162 || (num_imm64_operand > 0
34163 && (window_list->num_imm_64 + num_imm64_operand > MAX_IMM_64
34164 || window_list->num_imm_32 + num_imm64_operand * 2 > MAX_IMM_32))
34165 || (window_list->imm_size + imm_size == MAX_IMM_SIZE
34166 && num_imm64_operand > 0
34167 && ((window_list->num_imm_64 > 0
34168 && window_list->num_insn >= 2)
34169 || window_list->num_insn >= 3)))
34170 return BIG;
34172 return 1;
34175 if ((group == disp_load_store
34176 && (window_list->num_loads >= MAX_LOAD
34177 || window_list->num_stores >= MAX_STORE))
34178 || ((group == disp_load
34179 || group == disp_prefetch)
34180 && window_list->num_loads >= MAX_LOAD)
34181 || (group == disp_store
34182 && window_list->num_stores >= MAX_STORE))
34183 return BIG;
34185 return 1;
34188 /* This function returns true if insn satisfies dispatch rules on the
34189 last window scheduled. */
34191 static bool
34192 fits_dispatch_window (rtx insn)
34194 dispatch_windows *window_list = dispatch_window_list;
34195 dispatch_windows *window_list_next = dispatch_window_list->next;
34196 unsigned int num_restrict;
34197 enum dispatch_group group = get_insn_group (insn);
34198 enum insn_path path = get_insn_path (insn);
34199 int sum;
34201 /* Make disp_cmp and disp_jcc get scheduled at the latest. These
34202 instructions should be given the lowest priority in the
34203 scheduling process in Haifa scheduler to make sure they will be
34204 scheduled in the same dispatch window as the refrence to them. */
34205 if (group == disp_jcc || group == disp_cmp)
34206 return false;
34208 /* Check nonrestricted. */
34209 if (group == disp_no_group || group == disp_branch)
34210 return true;
34212 /* Get last dispatch window. */
34213 if (window_list_next)
34214 window_list = window_list_next;
34216 if (window_list->window_num == 1)
34218 sum = window_list->prev->window_size + window_list->window_size;
34220 if (sum == 32
34221 || (min_insn_size (insn) + sum) >= 48)
34222 /* Window 1 is full. Go for next window. */
34223 return true;
34226 num_restrict = count_num_restricted (insn, window_list);
34228 if (num_restrict > num_allowable_groups[group])
34229 return false;
34231 /* See if it fits in the first window. */
34232 if (window_list->window_num == 0)
34234 /* The first widow should have only single and double path
34235 uops. */
34236 if (path == path_double
34237 && (window_list->num_uops + 2) > MAX_INSN)
34238 return false;
34239 else if (path != path_single)
34240 return false;
34242 return true;
34245 /* Add an instruction INSN with NUM_UOPS micro-operations to the
34246 dispatch window WINDOW_LIST. */
34248 static void
34249 add_insn_window (rtx insn, dispatch_windows *window_list, int num_uops)
34251 int byte_len = min_insn_size (insn);
34252 int num_insn = window_list->num_insn;
34253 int imm_size;
34254 sched_insn_info *window = window_list->window;
34255 enum dispatch_group group = get_insn_group (insn);
34256 enum insn_path path = get_insn_path (insn);
34257 int num_imm_operand;
34258 int num_imm32_operand;
34259 int num_imm64_operand;
34261 if (!window_list->violation && group != disp_cmp
34262 && !fits_dispatch_window (insn))
34263 window_list->violation = true;
34265 imm_size = get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
34266 &num_imm64_operand);
34268 /* Initialize window with new instruction. */
34269 window[num_insn].insn = insn;
34270 window[num_insn].byte_len = byte_len;
34271 window[num_insn].group = group;
34272 window[num_insn].path = path;
34273 window[num_insn].imm_bytes = imm_size;
34275 window_list->window_size += byte_len;
34276 window_list->num_insn = num_insn + 1;
34277 window_list->num_uops = window_list->num_uops + num_uops;
34278 window_list->imm_size += imm_size;
34279 window_list->num_imm += num_imm_operand;
34280 window_list->num_imm_32 += num_imm32_operand;
34281 window_list->num_imm_64 += num_imm64_operand;
34283 if (group == disp_store)
34284 window_list->num_stores += 1;
34285 else if (group == disp_load
34286 || group == disp_prefetch)
34287 window_list->num_loads += 1;
34288 else if (group == disp_load_store)
34290 window_list->num_stores += 1;
34291 window_list->num_loads += 1;
34295 /* Adds a scheduled instruction, INSN, to the current dispatch window.
34296 If the total bytes of instructions or the number of instructions in
34297 the window exceed allowable, it allocates a new window. */
34299 static void
34300 add_to_dispatch_window (rtx insn)
34302 int byte_len;
34303 dispatch_windows *window_list;
34304 dispatch_windows *next_list;
34305 dispatch_windows *window0_list;
34306 enum insn_path path;
34307 enum dispatch_group insn_group;
34308 bool insn_fits;
34309 int num_insn;
34310 int num_uops;
34311 int window_num;
34312 int insn_num_uops;
34313 int sum;
34315 if (INSN_CODE (insn) < 0)
34316 return;
34318 byte_len = min_insn_size (insn);
34319 window_list = dispatch_window_list;
34320 next_list = window_list->next;
34321 path = get_insn_path (insn);
34322 insn_group = get_insn_group (insn);
34324 /* Get the last dispatch window. */
34325 if (next_list)
34326 window_list = dispatch_window_list->next;
34328 if (path == path_single)
34329 insn_num_uops = 1;
34330 else if (path == path_double)
34331 insn_num_uops = 2;
34332 else
34333 insn_num_uops = (int) path;
34335 /* If current window is full, get a new window.
34336 Window number zero is full, if MAX_INSN uops are scheduled in it.
34337 Window number one is full, if window zero's bytes plus window
34338 one's bytes is 32, or if the bytes of the new instruction added
34339 to the total makes it greater than 48, or it has already MAX_INSN
34340 instructions in it. */
34341 num_insn = window_list->num_insn;
34342 num_uops = window_list->num_uops;
34343 window_num = window_list->window_num;
34344 insn_fits = fits_dispatch_window (insn);
34346 if (num_insn >= MAX_INSN
34347 || num_uops + insn_num_uops > MAX_INSN
34348 || !(insn_fits))
34350 window_num = ~window_num & 1;
34351 window_list = allocate_next_window (window_num);
34354 if (window_num == 0)
34356 add_insn_window (insn, window_list, insn_num_uops);
34357 if (window_list->num_insn >= MAX_INSN
34358 && insn_group == disp_branch)
34360 process_end_window ();
34361 return;
34364 else if (window_num == 1)
34366 window0_list = window_list->prev;
34367 sum = window0_list->window_size + window_list->window_size;
34368 if (sum == 32
34369 || (byte_len + sum) >= 48)
34371 process_end_window ();
34372 window_list = dispatch_window_list;
34375 add_insn_window (insn, window_list, insn_num_uops);
34377 else
34378 gcc_unreachable ();
34380 if (is_end_basic_block (insn_group))
34382 /* End of basic block is reached do end-basic-block process. */
34383 process_end_window ();
34384 return;
34388 /* Print the dispatch window, WINDOW_NUM, to FILE. */
34390 DEBUG_FUNCTION static void
34391 debug_dispatch_window_file (FILE *file, int window_num)
34393 dispatch_windows *list;
34394 int i;
34396 if (window_num == 0)
34397 list = dispatch_window_list;
34398 else
34399 list = dispatch_window_list1;
34401 fprintf (file, "Window #%d:\n", list->window_num);
34402 fprintf (file, " num_insn = %d, num_uops = %d, window_size = %d\n",
34403 list->num_insn, list->num_uops, list->window_size);
34404 fprintf (file, " num_imm = %d, num_imm_32 = %d, num_imm_64 = %d, imm_size = %d\n",
34405 list->num_imm, list->num_imm_32, list->num_imm_64, list->imm_size);
34407 fprintf (file, " num_loads = %d, num_stores = %d\n", list->num_loads,
34408 list->num_stores);
34409 fprintf (file, " insn info:\n");
34411 for (i = 0; i < MAX_INSN; i++)
34413 if (!list->window[i].insn)
34414 break;
34415 fprintf (file, " group[%d] = %s, insn[%d] = %p, path[%d] = %d byte_len[%d] = %d, imm_bytes[%d] = %d\n",
34416 i, group_name[list->window[i].group],
34417 i, (void *)list->window[i].insn,
34418 i, list->window[i].path,
34419 i, list->window[i].byte_len,
34420 i, list->window[i].imm_bytes);
34424 /* Print to stdout a dispatch window. */
34426 DEBUG_FUNCTION void
34427 debug_dispatch_window (int window_num)
34429 debug_dispatch_window_file (stdout, window_num);
34432 /* Print INSN dispatch information to FILE. */
34434 DEBUG_FUNCTION static void
34435 debug_insn_dispatch_info_file (FILE *file, rtx insn)
34437 int byte_len;
34438 enum insn_path path;
34439 enum dispatch_group group;
34440 int imm_size;
34441 int num_imm_operand;
34442 int num_imm32_operand;
34443 int num_imm64_operand;
34445 if (INSN_CODE (insn) < 0)
34446 return;
34448 byte_len = min_insn_size (insn);
34449 path = get_insn_path (insn);
34450 group = get_insn_group (insn);
34451 imm_size = get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
34452 &num_imm64_operand);
34454 fprintf (file, " insn info:\n");
34455 fprintf (file, " group = %s, path = %d, byte_len = %d\n",
34456 group_name[group], path, byte_len);
34457 fprintf (file, " num_imm = %d, num_imm_32 = %d, num_imm_64 = %d, imm_size = %d\n",
34458 num_imm_operand, num_imm32_operand, num_imm64_operand, imm_size);
34461 /* Print to STDERR the status of the ready list with respect to
34462 dispatch windows. */
34464 DEBUG_FUNCTION void
34465 debug_ready_dispatch (void)
34467 int i;
34468 int no_ready = number_in_ready ();
34470 fprintf (stdout, "Number of ready: %d\n", no_ready);
34472 for (i = 0; i < no_ready; i++)
34473 debug_insn_dispatch_info_file (stdout, get_ready_element (i));
34476 /* This routine is the driver of the dispatch scheduler. */
34478 static void
34479 do_dispatch (rtx insn, int mode)
34481 if (mode == DISPATCH_INIT)
34482 init_dispatch_sched ();
34483 else if (mode == ADD_TO_DISPATCH_WINDOW)
34484 add_to_dispatch_window (insn);
34487 /* Return TRUE if Dispatch Scheduling is supported. */
34489 static bool
34490 has_dispatch (rtx insn, int action)
34492 if (ix86_tune == PROCESSOR_BDVER1 && flag_dispatch_scheduler)
34493 switch (action)
34495 default:
34496 return false;
34498 case IS_DISPATCH_ON:
34499 return true;
34500 break;
34502 case IS_CMP:
34503 return is_cmp (insn);
34505 case DISPATCH_VIOLATION:
34506 return dispatch_violation ();
34508 case FITS_DISPATCH_WINDOW:
34509 return fits_dispatch_window (insn);
34512 return false;
34515 /* ??? No autovectorization into MMX or 3DNOW until we can reliably
34516 place emms and femms instructions. */
34518 static enum machine_mode
34519 ix86_preferred_simd_mode (enum machine_mode mode)
34521 /* Disable double precision vectorizer if needed. */
34522 if (mode == DFmode && !TARGET_VECTORIZE_DOUBLE)
34523 return word_mode;
34525 if (!TARGET_AVX && !TARGET_SSE)
34526 return word_mode;
34528 switch (mode)
34530 case SFmode:
34531 return TARGET_AVX ? V8SFmode : V4SFmode;
34532 case DFmode:
34533 return TARGET_AVX ? V4DFmode : V2DFmode;
34534 case DImode:
34535 return V2DImode;
34536 case SImode:
34537 return V4SImode;
34538 case HImode:
34539 return V8HImode;
34540 case QImode:
34541 return V16QImode;
34543 default:;
34546 return word_mode;
34549 /* If AVX is enabled then try vectorizing with both 256bit and 128bit
34550 vectors. */
34552 static unsigned int
34553 ix86_autovectorize_vector_sizes (void)
34555 return TARGET_AVX ? 32 | 16 : 0;
34558 /* Initialize the GCC target structure. */
34559 #undef TARGET_RETURN_IN_MEMORY
34560 #define TARGET_RETURN_IN_MEMORY ix86_return_in_memory
34562 #undef TARGET_LEGITIMIZE_ADDRESS
34563 #define TARGET_LEGITIMIZE_ADDRESS ix86_legitimize_address
34565 #undef TARGET_ATTRIBUTE_TABLE
34566 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
34567 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
34568 # undef TARGET_MERGE_DECL_ATTRIBUTES
34569 # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
34570 #endif
34572 #undef TARGET_COMP_TYPE_ATTRIBUTES
34573 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
34575 #undef TARGET_INIT_BUILTINS
34576 #define TARGET_INIT_BUILTINS ix86_init_builtins
34577 #undef TARGET_BUILTIN_DECL
34578 #define TARGET_BUILTIN_DECL ix86_builtin_decl
34579 #undef TARGET_EXPAND_BUILTIN
34580 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
34582 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
34583 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
34584 ix86_builtin_vectorized_function
34586 #undef TARGET_VECTORIZE_BUILTIN_CONVERSION
34587 #define TARGET_VECTORIZE_BUILTIN_CONVERSION ix86_vectorize_builtin_conversion
34589 #undef TARGET_BUILTIN_RECIPROCAL
34590 #define TARGET_BUILTIN_RECIPROCAL ix86_builtin_reciprocal
34592 #undef TARGET_ASM_FUNCTION_EPILOGUE
34593 #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
34595 #undef TARGET_ENCODE_SECTION_INFO
34596 #ifndef SUBTARGET_ENCODE_SECTION_INFO
34597 #define TARGET_ENCODE_SECTION_INFO ix86_encode_section_info
34598 #else
34599 #define TARGET_ENCODE_SECTION_INFO SUBTARGET_ENCODE_SECTION_INFO
34600 #endif
34602 #undef TARGET_ASM_OPEN_PAREN
34603 #define TARGET_ASM_OPEN_PAREN ""
34604 #undef TARGET_ASM_CLOSE_PAREN
34605 #define TARGET_ASM_CLOSE_PAREN ""
34607 #undef TARGET_ASM_BYTE_OP
34608 #define TARGET_ASM_BYTE_OP ASM_BYTE
34610 #undef TARGET_ASM_ALIGNED_HI_OP
34611 #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
34612 #undef TARGET_ASM_ALIGNED_SI_OP
34613 #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
34614 #ifdef ASM_QUAD
34615 #undef TARGET_ASM_ALIGNED_DI_OP
34616 #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
34617 #endif
34619 #undef TARGET_PROFILE_BEFORE_PROLOGUE
34620 #define TARGET_PROFILE_BEFORE_PROLOGUE ix86_profile_before_prologue
34622 #undef TARGET_ASM_UNALIGNED_HI_OP
34623 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
34624 #undef TARGET_ASM_UNALIGNED_SI_OP
34625 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
34626 #undef TARGET_ASM_UNALIGNED_DI_OP
34627 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
34629 #undef TARGET_PRINT_OPERAND
34630 #define TARGET_PRINT_OPERAND ix86_print_operand
34631 #undef TARGET_PRINT_OPERAND_ADDRESS
34632 #define TARGET_PRINT_OPERAND_ADDRESS ix86_print_operand_address
34633 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
34634 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P ix86_print_operand_punct_valid_p
34635 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
34636 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA i386_asm_output_addr_const_extra
34638 #undef TARGET_SCHED_INIT_GLOBAL
34639 #define TARGET_SCHED_INIT_GLOBAL ix86_sched_init_global
34640 #undef TARGET_SCHED_ADJUST_COST
34641 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
34642 #undef TARGET_SCHED_ISSUE_RATE
34643 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
34644 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
34645 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
34646 ia32_multipass_dfa_lookahead
34648 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
34649 #define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
34651 #ifdef HAVE_AS_TLS
34652 #undef TARGET_HAVE_TLS
34653 #define TARGET_HAVE_TLS true
34654 #endif
34655 #undef TARGET_CANNOT_FORCE_CONST_MEM
34656 #define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
34657 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
34658 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P hook_bool_mode_const_rtx_true
34660 #undef TARGET_DELEGITIMIZE_ADDRESS
34661 #define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address
34663 #undef TARGET_MS_BITFIELD_LAYOUT_P
34664 #define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
34666 #if TARGET_MACHO
34667 #undef TARGET_BINDS_LOCAL_P
34668 #define TARGET_BINDS_LOCAL_P darwin_binds_local_p
34669 #endif
34670 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
34671 #undef TARGET_BINDS_LOCAL_P
34672 #define TARGET_BINDS_LOCAL_P i386_pe_binds_local_p
34673 #endif
34675 #undef TARGET_ASM_OUTPUT_MI_THUNK
34676 #define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
34677 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
34678 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
34680 #undef TARGET_ASM_FILE_START
34681 #define TARGET_ASM_FILE_START x86_file_start
34683 #undef TARGET_DEFAULT_TARGET_FLAGS
34684 #define TARGET_DEFAULT_TARGET_FLAGS \
34685 (TARGET_DEFAULT \
34686 | TARGET_SUBTARGET_DEFAULT \
34687 | TARGET_TLS_DIRECT_SEG_REFS_DEFAULT)
34689 #undef TARGET_HANDLE_OPTION
34690 #define TARGET_HANDLE_OPTION ix86_handle_option
34692 #undef TARGET_OPTION_OVERRIDE
34693 #define TARGET_OPTION_OVERRIDE ix86_option_override
34694 #undef TARGET_OPTION_OPTIMIZATION_TABLE
34695 #define TARGET_OPTION_OPTIMIZATION_TABLE ix86_option_optimization_table
34696 #undef TARGET_OPTION_INIT_STRUCT
34697 #define TARGET_OPTION_INIT_STRUCT ix86_option_init_struct
34699 #undef TARGET_REGISTER_MOVE_COST
34700 #define TARGET_REGISTER_MOVE_COST ix86_register_move_cost
34701 #undef TARGET_MEMORY_MOVE_COST
34702 #define TARGET_MEMORY_MOVE_COST ix86_memory_move_cost
34703 #undef TARGET_RTX_COSTS
34704 #define TARGET_RTX_COSTS ix86_rtx_costs
34705 #undef TARGET_ADDRESS_COST
34706 #define TARGET_ADDRESS_COST ix86_address_cost
34708 #undef TARGET_FIXED_CONDITION_CODE_REGS
34709 #define TARGET_FIXED_CONDITION_CODE_REGS ix86_fixed_condition_code_regs
34710 #undef TARGET_CC_MODES_COMPATIBLE
34711 #define TARGET_CC_MODES_COMPATIBLE ix86_cc_modes_compatible
34713 #undef TARGET_MACHINE_DEPENDENT_REORG
34714 #define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg
34716 #undef TARGET_BUILTIN_SETJMP_FRAME_VALUE
34717 #define TARGET_BUILTIN_SETJMP_FRAME_VALUE ix86_builtin_setjmp_frame_value
34719 #undef TARGET_BUILD_BUILTIN_VA_LIST
34720 #define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list
34722 #undef TARGET_ENUM_VA_LIST_P
34723 #define TARGET_ENUM_VA_LIST_P ix86_enum_va_list
34725 #undef TARGET_FN_ABI_VA_LIST
34726 #define TARGET_FN_ABI_VA_LIST ix86_fn_abi_va_list
34728 #undef TARGET_CANONICAL_VA_LIST_TYPE
34729 #define TARGET_CANONICAL_VA_LIST_TYPE ix86_canonical_va_list_type
34731 #undef TARGET_EXPAND_BUILTIN_VA_START
34732 #define TARGET_EXPAND_BUILTIN_VA_START ix86_va_start
34734 #undef TARGET_MD_ASM_CLOBBERS
34735 #define TARGET_MD_ASM_CLOBBERS ix86_md_asm_clobbers
34737 #undef TARGET_PROMOTE_PROTOTYPES
34738 #define TARGET_PROMOTE_PROTOTYPES hook_bool_const_tree_true
34739 #undef TARGET_STRUCT_VALUE_RTX
34740 #define TARGET_STRUCT_VALUE_RTX ix86_struct_value_rtx
34741 #undef TARGET_SETUP_INCOMING_VARARGS
34742 #define TARGET_SETUP_INCOMING_VARARGS ix86_setup_incoming_varargs
34743 #undef TARGET_MUST_PASS_IN_STACK
34744 #define TARGET_MUST_PASS_IN_STACK ix86_must_pass_in_stack
34745 #undef TARGET_FUNCTION_ARG_ADVANCE
34746 #define TARGET_FUNCTION_ARG_ADVANCE ix86_function_arg_advance
34747 #undef TARGET_FUNCTION_ARG
34748 #define TARGET_FUNCTION_ARG ix86_function_arg
34749 #undef TARGET_FUNCTION_ARG_BOUNDARY
34750 #define TARGET_FUNCTION_ARG_BOUNDARY ix86_function_arg_boundary
34751 #undef TARGET_PASS_BY_REFERENCE
34752 #define TARGET_PASS_BY_REFERENCE ix86_pass_by_reference
34753 #undef TARGET_INTERNAL_ARG_POINTER
34754 #define TARGET_INTERNAL_ARG_POINTER ix86_internal_arg_pointer
34755 #undef TARGET_UPDATE_STACK_BOUNDARY
34756 #define TARGET_UPDATE_STACK_BOUNDARY ix86_update_stack_boundary
34757 #undef TARGET_GET_DRAP_RTX
34758 #define TARGET_GET_DRAP_RTX ix86_get_drap_rtx
34759 #undef TARGET_STRICT_ARGUMENT_NAMING
34760 #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
34761 #undef TARGET_STATIC_CHAIN
34762 #define TARGET_STATIC_CHAIN ix86_static_chain
34763 #undef TARGET_TRAMPOLINE_INIT
34764 #define TARGET_TRAMPOLINE_INIT ix86_trampoline_init
34765 #undef TARGET_RETURN_POPS_ARGS
34766 #define TARGET_RETURN_POPS_ARGS ix86_return_pops_args
34768 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
34769 #define TARGET_GIMPLIFY_VA_ARG_EXPR ix86_gimplify_va_arg
34771 #undef TARGET_SCALAR_MODE_SUPPORTED_P
34772 #define TARGET_SCALAR_MODE_SUPPORTED_P ix86_scalar_mode_supported_p
34774 #undef TARGET_VECTOR_MODE_SUPPORTED_P
34775 #define TARGET_VECTOR_MODE_SUPPORTED_P ix86_vector_mode_supported_p
34777 #undef TARGET_C_MODE_FOR_SUFFIX
34778 #define TARGET_C_MODE_FOR_SUFFIX ix86_c_mode_for_suffix
34780 #ifdef HAVE_AS_TLS
34781 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
34782 #define TARGET_ASM_OUTPUT_DWARF_DTPREL i386_output_dwarf_dtprel
34783 #endif
34785 #ifdef SUBTARGET_INSERT_ATTRIBUTES
34786 #undef TARGET_INSERT_ATTRIBUTES
34787 #define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
34788 #endif
34790 #undef TARGET_MANGLE_TYPE
34791 #define TARGET_MANGLE_TYPE ix86_mangle_type
34793 #undef TARGET_STACK_PROTECT_FAIL
34794 #define TARGET_STACK_PROTECT_FAIL ix86_stack_protect_fail
34796 #undef TARGET_SUPPORTS_SPLIT_STACK
34797 #define TARGET_SUPPORTS_SPLIT_STACK ix86_supports_split_stack
34799 #undef TARGET_FUNCTION_VALUE
34800 #define TARGET_FUNCTION_VALUE ix86_function_value
34802 #undef TARGET_FUNCTION_VALUE_REGNO_P
34803 #define TARGET_FUNCTION_VALUE_REGNO_P ix86_function_value_regno_p
34805 #undef TARGET_SECONDARY_RELOAD
34806 #define TARGET_SECONDARY_RELOAD ix86_secondary_reload
34808 #undef TARGET_PREFERRED_RELOAD_CLASS
34809 #define TARGET_PREFERRED_RELOAD_CLASS ix86_preferred_reload_class
34810 #undef TARGET_PREFERRED_OUTPUT_RELOAD_CLASS
34811 #define TARGET_PREFERRED_OUTPUT_RELOAD_CLASS ix86_preferred_output_reload_class
34812 #undef TARGET_CLASS_LIKELY_SPILLED_P
34813 #define TARGET_CLASS_LIKELY_SPILLED_P ix86_class_likely_spilled_p
34815 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
34816 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
34817 ix86_builtin_vectorization_cost
34818 #undef TARGET_VECTORIZE_BUILTIN_VEC_PERM
34819 #define TARGET_VECTORIZE_BUILTIN_VEC_PERM \
34820 ix86_vectorize_builtin_vec_perm
34821 #undef TARGET_VECTORIZE_BUILTIN_VEC_PERM_OK
34822 #define TARGET_VECTORIZE_BUILTIN_VEC_PERM_OK \
34823 ix86_vectorize_builtin_vec_perm_ok
34824 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
34825 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE \
34826 ix86_preferred_simd_mode
34827 #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
34828 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
34829 ix86_autovectorize_vector_sizes
34831 #undef TARGET_SET_CURRENT_FUNCTION
34832 #define TARGET_SET_CURRENT_FUNCTION ix86_set_current_function
34834 #undef TARGET_OPTION_VALID_ATTRIBUTE_P
34835 #define TARGET_OPTION_VALID_ATTRIBUTE_P ix86_valid_target_attribute_p
34837 #undef TARGET_OPTION_SAVE
34838 #define TARGET_OPTION_SAVE ix86_function_specific_save
34840 #undef TARGET_OPTION_RESTORE
34841 #define TARGET_OPTION_RESTORE ix86_function_specific_restore
34843 #undef TARGET_OPTION_PRINT
34844 #define TARGET_OPTION_PRINT ix86_function_specific_print
34846 #undef TARGET_CAN_INLINE_P
34847 #define TARGET_CAN_INLINE_P ix86_can_inline_p
34849 #undef TARGET_EXPAND_TO_RTL_HOOK
34850 #define TARGET_EXPAND_TO_RTL_HOOK ix86_maybe_switch_abi
34852 #undef TARGET_LEGITIMATE_ADDRESS_P
34853 #define TARGET_LEGITIMATE_ADDRESS_P ix86_legitimate_address_p
34855 #undef TARGET_IRA_COVER_CLASSES
34856 #define TARGET_IRA_COVER_CLASSES i386_ira_cover_classes
34858 #undef TARGET_FRAME_POINTER_REQUIRED
34859 #define TARGET_FRAME_POINTER_REQUIRED ix86_frame_pointer_required
34861 #undef TARGET_CAN_ELIMINATE
34862 #define TARGET_CAN_ELIMINATE ix86_can_eliminate
34864 #undef TARGET_EXTRA_LIVE_ON_ENTRY
34865 #define TARGET_EXTRA_LIVE_ON_ENTRY ix86_live_on_entry
34867 #undef TARGET_ASM_CODE_END
34868 #define TARGET_ASM_CODE_END ix86_code_end
34870 #undef TARGET_CONDITIONAL_REGISTER_USAGE
34871 #define TARGET_CONDITIONAL_REGISTER_USAGE ix86_conditional_register_usage
34873 struct gcc_target targetm = TARGET_INITIALIZER;
34875 #include "gt-i386.h"