1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
3 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012
4 Free Software Foundation, Inc.
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 3, or (at your option)
13 GCC is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
24 #include "coretypes.h"
30 #include "hard-reg-set.h"
31 #include "insn-config.h"
32 #include "conditions.h"
34 #include "insn-codes.h"
35 #include "insn-attr.h"
42 #include "diagnostic-core.h"
44 #include "basic-block.h"
47 #include "target-def.h"
48 #include "common/common-target.h"
49 #include "langhooks.h"
55 #include "tm-constrs.h"
59 #include "sched-int.h"
63 #include "diagnostic.h"
66 enum upper_128bits_state
73 typedef struct block_info_def
75 /* State of the upper 128bits of AVX registers at exit. */
76 enum upper_128bits_state state
;
77 /* TRUE if state of the upper 128bits of AVX registers is unchanged
80 /* TRUE if block has been processed. */
82 /* TRUE if block has been scanned. */
84 /* Previous state of the upper 128bits of AVX registers at entry. */
85 enum upper_128bits_state prev
;
88 #define BLOCK_INFO(B) ((block_info) (B)->aux)
90 enum call_avx256_state
92 /* Callee returns 256bit AVX register. */
93 callee_return_avx256
= -1,
94 /* Callee returns and passes 256bit AVX register. */
95 callee_return_pass_avx256
,
96 /* Callee passes 256bit AVX register. */
98 /* Callee doesn't return nor passe 256bit AVX register, or no
99 256bit AVX register in function return. */
101 /* vzeroupper intrinsic. */
105 /* Check if a 256bit AVX register is referenced in stores. */
108 check_avx256_stores (rtx dest
, const_rtx set
, void *data
)
111 && VALID_AVX256_REG_MODE (GET_MODE (dest
)))
112 || (GET_CODE (set
) == SET
113 && REG_P (SET_SRC (set
))
114 && VALID_AVX256_REG_MODE (GET_MODE (SET_SRC (set
)))))
116 enum upper_128bits_state
*state
117 = (enum upper_128bits_state
*) data
;
122 /* Helper function for move_or_delete_vzeroupper_1. Look for vzeroupper
123 in basic block BB. Delete it if upper 128bit AVX registers are
124 unused. If it isn't deleted, move it to just before a jump insn.
126 STATE is state of the upper 128bits of AVX registers at entry. */
129 move_or_delete_vzeroupper_2 (basic_block bb
,
130 enum upper_128bits_state state
)
133 rtx vzeroupper_insn
= NULL_RTX
;
138 if (BLOCK_INFO (bb
)->unchanged
)
141 fprintf (dump_file
, " [bb %i] unchanged: upper 128bits: %d\n",
144 BLOCK_INFO (bb
)->state
= state
;
148 if (BLOCK_INFO (bb
)->scanned
&& BLOCK_INFO (bb
)->prev
== state
)
151 fprintf (dump_file
, " [bb %i] scanned: upper 128bits: %d\n",
152 bb
->index
, BLOCK_INFO (bb
)->state
);
156 BLOCK_INFO (bb
)->prev
= state
;
159 fprintf (dump_file
, " [bb %i] entry: upper 128bits: %d\n",
164 /* BB_END changes when it is deleted. */
165 bb_end
= BB_END (bb
);
167 while (insn
!= bb_end
)
169 insn
= NEXT_INSN (insn
);
171 if (!NONDEBUG_INSN_P (insn
))
174 /* Move vzeroupper before jump/call. */
175 if (JUMP_P (insn
) || CALL_P (insn
))
177 if (!vzeroupper_insn
)
180 if (PREV_INSN (insn
) != vzeroupper_insn
)
184 fprintf (dump_file
, "Move vzeroupper after:\n");
185 print_rtl_single (dump_file
, PREV_INSN (insn
));
186 fprintf (dump_file
, "before:\n");
187 print_rtl_single (dump_file
, insn
);
189 reorder_insns_nobb (vzeroupper_insn
, vzeroupper_insn
,
192 vzeroupper_insn
= NULL_RTX
;
196 pat
= PATTERN (insn
);
198 /* Check insn for vzeroupper intrinsic. */
199 if (GET_CODE (pat
) == UNSPEC_VOLATILE
200 && XINT (pat
, 1) == UNSPECV_VZEROUPPER
)
204 /* Found vzeroupper intrinsic. */
205 fprintf (dump_file
, "Found vzeroupper:\n");
206 print_rtl_single (dump_file
, insn
);
211 /* Check insn for vzeroall intrinsic. */
212 if (GET_CODE (pat
) == PARALLEL
213 && GET_CODE (XVECEXP (pat
, 0, 0)) == UNSPEC_VOLATILE
214 && XINT (XVECEXP (pat
, 0, 0), 1) == UNSPECV_VZEROALL
)
219 /* Delete pending vzeroupper insertion. */
222 delete_insn (vzeroupper_insn
);
223 vzeroupper_insn
= NULL_RTX
;
226 else if (state
!= used
)
228 note_stores (pat
, check_avx256_stores
, &state
);
235 /* Process vzeroupper intrinsic. */
236 avx256
= INTVAL (XVECEXP (pat
, 0, 0));
240 /* Since the upper 128bits are cleared, callee must not pass
241 256bit AVX register. We only need to check if callee
242 returns 256bit AVX register. */
243 if (avx256
== callee_return_avx256
)
249 /* Remove unnecessary vzeroupper since upper 128bits are
253 fprintf (dump_file
, "Delete redundant vzeroupper:\n");
254 print_rtl_single (dump_file
, insn
);
260 /* Set state to UNUSED if callee doesn't return 256bit AVX
262 if (avx256
!= callee_return_pass_avx256
)
265 if (avx256
== callee_return_pass_avx256
266 || avx256
== callee_pass_avx256
)
268 /* Must remove vzeroupper since callee passes in 256bit
272 fprintf (dump_file
, "Delete callee pass vzeroupper:\n");
273 print_rtl_single (dump_file
, insn
);
279 vzeroupper_insn
= insn
;
285 BLOCK_INFO (bb
)->state
= state
;
286 BLOCK_INFO (bb
)->unchanged
= unchanged
;
287 BLOCK_INFO (bb
)->scanned
= true;
290 fprintf (dump_file
, " [bb %i] exit: %s: upper 128bits: %d\n",
291 bb
->index
, unchanged
? "unchanged" : "changed",
295 /* Helper function for move_or_delete_vzeroupper. Process vzeroupper
296 in BLOCK and check its predecessor blocks. Treat UNKNOWN state
297 as USED if UNKNOWN_IS_UNUSED is true. Return TRUE if the exit
301 move_or_delete_vzeroupper_1 (basic_block block
, bool unknown_is_unused
)
305 enum upper_128bits_state state
, old_state
, new_state
;
309 fprintf (dump_file
, " Process [bb %i]: status: %d\n",
310 block
->index
, BLOCK_INFO (block
)->processed
);
312 if (BLOCK_INFO (block
)->processed
)
317 /* Check all predecessor edges of this block. */
318 seen_unknown
= false;
319 FOR_EACH_EDGE (e
, ei
, block
->preds
)
323 switch (BLOCK_INFO (e
->src
)->state
)
326 if (!unknown_is_unused
)
340 old_state
= BLOCK_INFO (block
)->state
;
341 move_or_delete_vzeroupper_2 (block
, state
);
342 new_state
= BLOCK_INFO (block
)->state
;
344 if (state
!= unknown
|| new_state
== used
)
345 BLOCK_INFO (block
)->processed
= true;
347 /* Need to rescan if the upper 128bits of AVX registers are changed
349 if (new_state
!= old_state
)
351 if (new_state
== used
)
352 cfun
->machine
->rescan_vzeroupper_p
= 1;
359 /* Go through the instruction stream looking for vzeroupper. Delete
360 it if upper 128bit AVX registers are unused. If it isn't deleted,
361 move it to just before a jump insn. */
364 move_or_delete_vzeroupper (void)
369 fibheap_t worklist
, pending
, fibheap_swap
;
370 sbitmap visited
, in_worklist
, in_pending
, sbitmap_swap
;
375 /* Set up block info for each basic block. */
376 alloc_aux_for_blocks (sizeof (struct block_info_def
));
378 /* Process outgoing edges of entry point. */
380 fprintf (dump_file
, "Process outgoing edges of entry point\n");
382 FOR_EACH_EDGE (e
, ei
, ENTRY_BLOCK_PTR
->succs
)
384 move_or_delete_vzeroupper_2 (e
->dest
,
385 cfun
->machine
->caller_pass_avx256_p
387 BLOCK_INFO (e
->dest
)->processed
= true;
390 /* Compute reverse completion order of depth first search of the CFG
391 so that the data-flow runs faster. */
392 rc_order
= XNEWVEC (int, n_basic_blocks
- NUM_FIXED_BLOCKS
);
393 bb_order
= XNEWVEC (int, last_basic_block
);
394 pre_and_rev_post_order_compute (NULL
, rc_order
, false);
395 for (i
= 0; i
< n_basic_blocks
- NUM_FIXED_BLOCKS
; i
++)
396 bb_order
[rc_order
[i
]] = i
;
399 worklist
= fibheap_new ();
400 pending
= fibheap_new ();
401 visited
= sbitmap_alloc (last_basic_block
);
402 in_worklist
= sbitmap_alloc (last_basic_block
);
403 in_pending
= sbitmap_alloc (last_basic_block
);
404 sbitmap_zero (in_worklist
);
406 /* Don't check outgoing edges of entry point. */
407 sbitmap_ones (in_pending
);
409 if (BLOCK_INFO (bb
)->processed
)
410 RESET_BIT (in_pending
, bb
->index
);
413 move_or_delete_vzeroupper_1 (bb
, false);
414 fibheap_insert (pending
, bb_order
[bb
->index
], bb
);
418 fprintf (dump_file
, "Check remaining basic blocks\n");
420 while (!fibheap_empty (pending
))
422 fibheap_swap
= pending
;
424 worklist
= fibheap_swap
;
425 sbitmap_swap
= in_pending
;
426 in_pending
= in_worklist
;
427 in_worklist
= sbitmap_swap
;
429 sbitmap_zero (visited
);
431 cfun
->machine
->rescan_vzeroupper_p
= 0;
433 while (!fibheap_empty (worklist
))
435 bb
= (basic_block
) fibheap_extract_min (worklist
);
436 RESET_BIT (in_worklist
, bb
->index
);
437 gcc_assert (!TEST_BIT (visited
, bb
->index
));
438 if (!TEST_BIT (visited
, bb
->index
))
442 SET_BIT (visited
, bb
->index
);
444 if (move_or_delete_vzeroupper_1 (bb
, false))
445 FOR_EACH_EDGE (e
, ei
, bb
->succs
)
447 if (e
->dest
== EXIT_BLOCK_PTR
448 || BLOCK_INFO (e
->dest
)->processed
)
451 if (TEST_BIT (visited
, e
->dest
->index
))
453 if (!TEST_BIT (in_pending
, e
->dest
->index
))
455 /* Send E->DEST to next round. */
456 SET_BIT (in_pending
, e
->dest
->index
);
457 fibheap_insert (pending
,
458 bb_order
[e
->dest
->index
],
462 else if (!TEST_BIT (in_worklist
, e
->dest
->index
))
464 /* Add E->DEST to current round. */
465 SET_BIT (in_worklist
, e
->dest
->index
);
466 fibheap_insert (worklist
, bb_order
[e
->dest
->index
],
473 if (!cfun
->machine
->rescan_vzeroupper_p
)
478 fibheap_delete (worklist
);
479 fibheap_delete (pending
);
480 sbitmap_free (visited
);
481 sbitmap_free (in_worklist
);
482 sbitmap_free (in_pending
);
485 fprintf (dump_file
, "Process remaining basic blocks\n");
488 move_or_delete_vzeroupper_1 (bb
, true);
490 free_aux_for_blocks ();
493 static rtx
legitimize_dllimport_symbol (rtx
, bool);
495 #ifndef CHECK_STACK_LIMIT
496 #define CHECK_STACK_LIMIT (-1)
499 /* Return index of given mode in mult and division cost tables. */
500 #define MODE_INDEX(mode) \
501 ((mode) == QImode ? 0 \
502 : (mode) == HImode ? 1 \
503 : (mode) == SImode ? 2 \
504 : (mode) == DImode ? 3 \
507 /* Processor costs (relative to an add) */
508 /* We assume COSTS_N_INSNS is defined as (N)*4 and an addition is 2 bytes. */
509 #define COSTS_N_BYTES(N) ((N) * 2)
511 #define DUMMY_STRINGOP_ALGS {libcall, {{-1, libcall}}}
514 struct processor_costs ix86_size_cost
= {/* costs for tuning for size */
515 COSTS_N_BYTES (2), /* cost of an add instruction */
516 COSTS_N_BYTES (3), /* cost of a lea instruction */
517 COSTS_N_BYTES (2), /* variable shift costs */
518 COSTS_N_BYTES (3), /* constant shift costs */
519 {COSTS_N_BYTES (3), /* cost of starting multiply for QI */
520 COSTS_N_BYTES (3), /* HI */
521 COSTS_N_BYTES (3), /* SI */
522 COSTS_N_BYTES (3), /* DI */
523 COSTS_N_BYTES (5)}, /* other */
524 0, /* cost of multiply per each bit set */
525 {COSTS_N_BYTES (3), /* cost of a divide/mod for QI */
526 COSTS_N_BYTES (3), /* HI */
527 COSTS_N_BYTES (3), /* SI */
528 COSTS_N_BYTES (3), /* DI */
529 COSTS_N_BYTES (5)}, /* other */
530 COSTS_N_BYTES (3), /* cost of movsx */
531 COSTS_N_BYTES (3), /* cost of movzx */
532 0, /* "large" insn */
534 2, /* cost for loading QImode using movzbl */
535 {2, 2, 2}, /* cost of loading integer registers
536 in QImode, HImode and SImode.
537 Relative to reg-reg move (2). */
538 {2, 2, 2}, /* cost of storing integer registers */
539 2, /* cost of reg,reg fld/fst */
540 {2, 2, 2}, /* cost of loading fp registers
541 in SFmode, DFmode and XFmode */
542 {2, 2, 2}, /* cost of storing fp registers
543 in SFmode, DFmode and XFmode */
544 3, /* cost of moving MMX register */
545 {3, 3}, /* cost of loading MMX registers
546 in SImode and DImode */
547 {3, 3}, /* cost of storing MMX registers
548 in SImode and DImode */
549 3, /* cost of moving SSE register */
550 {3, 3, 3}, /* cost of loading SSE registers
551 in SImode, DImode and TImode */
552 {3, 3, 3}, /* cost of storing SSE registers
553 in SImode, DImode and TImode */
554 3, /* MMX or SSE register to integer */
555 0, /* size of l1 cache */
556 0, /* size of l2 cache */
557 0, /* size of prefetch block */
558 0, /* number of parallel prefetches */
560 COSTS_N_BYTES (2), /* cost of FADD and FSUB insns. */
561 COSTS_N_BYTES (2), /* cost of FMUL instruction. */
562 COSTS_N_BYTES (2), /* cost of FDIV instruction. */
563 COSTS_N_BYTES (2), /* cost of FABS instruction. */
564 COSTS_N_BYTES (2), /* cost of FCHS instruction. */
565 COSTS_N_BYTES (2), /* cost of FSQRT instruction. */
566 {{rep_prefix_1_byte
, {{-1, rep_prefix_1_byte
}}},
567 {rep_prefix_1_byte
, {{-1, rep_prefix_1_byte
}}}},
568 {{rep_prefix_1_byte
, {{-1, rep_prefix_1_byte
}}},
569 {rep_prefix_1_byte
, {{-1, rep_prefix_1_byte
}}}},
570 1, /* scalar_stmt_cost. */
571 1, /* scalar load_cost. */
572 1, /* scalar_store_cost. */
573 1, /* vec_stmt_cost. */
574 1, /* vec_to_scalar_cost. */
575 1, /* scalar_to_vec_cost. */
576 1, /* vec_align_load_cost. */
577 1, /* vec_unalign_load_cost. */
578 1, /* vec_store_cost. */
579 1, /* cond_taken_branch_cost. */
580 1, /* cond_not_taken_branch_cost. */
583 /* Processor costs (relative to an add) */
585 struct processor_costs i386_cost
= { /* 386 specific costs */
586 COSTS_N_INSNS (1), /* cost of an add instruction */
587 COSTS_N_INSNS (1), /* cost of a lea instruction */
588 COSTS_N_INSNS (3), /* variable shift costs */
589 COSTS_N_INSNS (2), /* constant shift costs */
590 {COSTS_N_INSNS (6), /* cost of starting multiply for QI */
591 COSTS_N_INSNS (6), /* HI */
592 COSTS_N_INSNS (6), /* SI */
593 COSTS_N_INSNS (6), /* DI */
594 COSTS_N_INSNS (6)}, /* other */
595 COSTS_N_INSNS (1), /* cost of multiply per each bit set */
596 {COSTS_N_INSNS (23), /* cost of a divide/mod for QI */
597 COSTS_N_INSNS (23), /* HI */
598 COSTS_N_INSNS (23), /* SI */
599 COSTS_N_INSNS (23), /* DI */
600 COSTS_N_INSNS (23)}, /* other */
601 COSTS_N_INSNS (3), /* cost of movsx */
602 COSTS_N_INSNS (2), /* cost of movzx */
603 15, /* "large" insn */
605 4, /* cost for loading QImode using movzbl */
606 {2, 4, 2}, /* cost of loading integer registers
607 in QImode, HImode and SImode.
608 Relative to reg-reg move (2). */
609 {2, 4, 2}, /* cost of storing integer registers */
610 2, /* cost of reg,reg fld/fst */
611 {8, 8, 8}, /* cost of loading fp registers
612 in SFmode, DFmode and XFmode */
613 {8, 8, 8}, /* cost of storing fp registers
614 in SFmode, DFmode and XFmode */
615 2, /* cost of moving MMX register */
616 {4, 8}, /* cost of loading MMX registers
617 in SImode and DImode */
618 {4, 8}, /* cost of storing MMX registers
619 in SImode and DImode */
620 2, /* cost of moving SSE register */
621 {4, 8, 16}, /* cost of loading SSE registers
622 in SImode, DImode and TImode */
623 {4, 8, 16}, /* cost of storing SSE registers
624 in SImode, DImode and TImode */
625 3, /* MMX or SSE register to integer */
626 0, /* size of l1 cache */
627 0, /* size of l2 cache */
628 0, /* size of prefetch block */
629 0, /* number of parallel prefetches */
631 COSTS_N_INSNS (23), /* cost of FADD and FSUB insns. */
632 COSTS_N_INSNS (27), /* cost of FMUL instruction. */
633 COSTS_N_INSNS (88), /* cost of FDIV instruction. */
634 COSTS_N_INSNS (22), /* cost of FABS instruction. */
635 COSTS_N_INSNS (24), /* cost of FCHS instruction. */
636 COSTS_N_INSNS (122), /* cost of FSQRT instruction. */
637 {{rep_prefix_1_byte
, {{-1, rep_prefix_1_byte
}}},
638 DUMMY_STRINGOP_ALGS
},
639 {{rep_prefix_1_byte
, {{-1, rep_prefix_1_byte
}}},
640 DUMMY_STRINGOP_ALGS
},
641 1, /* scalar_stmt_cost. */
642 1, /* scalar load_cost. */
643 1, /* scalar_store_cost. */
644 1, /* vec_stmt_cost. */
645 1, /* vec_to_scalar_cost. */
646 1, /* scalar_to_vec_cost. */
647 1, /* vec_align_load_cost. */
648 2, /* vec_unalign_load_cost. */
649 1, /* vec_store_cost. */
650 3, /* cond_taken_branch_cost. */
651 1, /* cond_not_taken_branch_cost. */
655 struct processor_costs i486_cost
= { /* 486 specific costs */
656 COSTS_N_INSNS (1), /* cost of an add instruction */
657 COSTS_N_INSNS (1), /* cost of a lea instruction */
658 COSTS_N_INSNS (3), /* variable shift costs */
659 COSTS_N_INSNS (2), /* constant shift costs */
660 {COSTS_N_INSNS (12), /* cost of starting multiply for QI */
661 COSTS_N_INSNS (12), /* HI */
662 COSTS_N_INSNS (12), /* SI */
663 COSTS_N_INSNS (12), /* DI */
664 COSTS_N_INSNS (12)}, /* other */
665 1, /* cost of multiply per each bit set */
666 {COSTS_N_INSNS (40), /* cost of a divide/mod for QI */
667 COSTS_N_INSNS (40), /* HI */
668 COSTS_N_INSNS (40), /* SI */
669 COSTS_N_INSNS (40), /* DI */
670 COSTS_N_INSNS (40)}, /* other */
671 COSTS_N_INSNS (3), /* cost of movsx */
672 COSTS_N_INSNS (2), /* cost of movzx */
673 15, /* "large" insn */
675 4, /* cost for loading QImode using movzbl */
676 {2, 4, 2}, /* cost of loading integer registers
677 in QImode, HImode and SImode.
678 Relative to reg-reg move (2). */
679 {2, 4, 2}, /* cost of storing integer registers */
680 2, /* cost of reg,reg fld/fst */
681 {8, 8, 8}, /* cost of loading fp registers
682 in SFmode, DFmode and XFmode */
683 {8, 8, 8}, /* cost of storing fp registers
684 in SFmode, DFmode and XFmode */
685 2, /* cost of moving MMX register */
686 {4, 8}, /* cost of loading MMX registers
687 in SImode and DImode */
688 {4, 8}, /* cost of storing MMX registers
689 in SImode and DImode */
690 2, /* cost of moving SSE register */
691 {4, 8, 16}, /* cost of loading SSE registers
692 in SImode, DImode and TImode */
693 {4, 8, 16}, /* cost of storing SSE registers
694 in SImode, DImode and TImode */
695 3, /* MMX or SSE register to integer */
696 4, /* size of l1 cache. 486 has 8kB cache
697 shared for code and data, so 4kB is
698 not really precise. */
699 4, /* size of l2 cache */
700 0, /* size of prefetch block */
701 0, /* number of parallel prefetches */
703 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
704 COSTS_N_INSNS (16), /* cost of FMUL instruction. */
705 COSTS_N_INSNS (73), /* cost of FDIV instruction. */
706 COSTS_N_INSNS (3), /* cost of FABS instruction. */
707 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
708 COSTS_N_INSNS (83), /* cost of FSQRT instruction. */
709 {{rep_prefix_4_byte
, {{-1, rep_prefix_4_byte
}}},
710 DUMMY_STRINGOP_ALGS
},
711 {{rep_prefix_4_byte
, {{-1, rep_prefix_4_byte
}}},
712 DUMMY_STRINGOP_ALGS
},
713 1, /* scalar_stmt_cost. */
714 1, /* scalar load_cost. */
715 1, /* scalar_store_cost. */
716 1, /* vec_stmt_cost. */
717 1, /* vec_to_scalar_cost. */
718 1, /* scalar_to_vec_cost. */
719 1, /* vec_align_load_cost. */
720 2, /* vec_unalign_load_cost. */
721 1, /* vec_store_cost. */
722 3, /* cond_taken_branch_cost. */
723 1, /* cond_not_taken_branch_cost. */
727 struct processor_costs pentium_cost
= {
728 COSTS_N_INSNS (1), /* cost of an add instruction */
729 COSTS_N_INSNS (1), /* cost of a lea instruction */
730 COSTS_N_INSNS (4), /* variable shift costs */
731 COSTS_N_INSNS (1), /* constant shift costs */
732 {COSTS_N_INSNS (11), /* cost of starting multiply for QI */
733 COSTS_N_INSNS (11), /* HI */
734 COSTS_N_INSNS (11), /* SI */
735 COSTS_N_INSNS (11), /* DI */
736 COSTS_N_INSNS (11)}, /* other */
737 0, /* cost of multiply per each bit set */
738 {COSTS_N_INSNS (25), /* cost of a divide/mod for QI */
739 COSTS_N_INSNS (25), /* HI */
740 COSTS_N_INSNS (25), /* SI */
741 COSTS_N_INSNS (25), /* DI */
742 COSTS_N_INSNS (25)}, /* other */
743 COSTS_N_INSNS (3), /* cost of movsx */
744 COSTS_N_INSNS (2), /* cost of movzx */
745 8, /* "large" insn */
747 6, /* cost for loading QImode using movzbl */
748 {2, 4, 2}, /* cost of loading integer registers
749 in QImode, HImode and SImode.
750 Relative to reg-reg move (2). */
751 {2, 4, 2}, /* cost of storing integer registers */
752 2, /* cost of reg,reg fld/fst */
753 {2, 2, 6}, /* cost of loading fp registers
754 in SFmode, DFmode and XFmode */
755 {4, 4, 6}, /* cost of storing fp registers
756 in SFmode, DFmode and XFmode */
757 8, /* cost of moving MMX register */
758 {8, 8}, /* cost of loading MMX registers
759 in SImode and DImode */
760 {8, 8}, /* cost of storing MMX registers
761 in SImode and DImode */
762 2, /* cost of moving SSE register */
763 {4, 8, 16}, /* cost of loading SSE registers
764 in SImode, DImode and TImode */
765 {4, 8, 16}, /* cost of storing SSE registers
766 in SImode, DImode and TImode */
767 3, /* MMX or SSE register to integer */
768 8, /* size of l1 cache. */
769 8, /* size of l2 cache */
770 0, /* size of prefetch block */
771 0, /* number of parallel prefetches */
773 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
774 COSTS_N_INSNS (3), /* cost of FMUL instruction. */
775 COSTS_N_INSNS (39), /* cost of FDIV instruction. */
776 COSTS_N_INSNS (1), /* cost of FABS instruction. */
777 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
778 COSTS_N_INSNS (70), /* cost of FSQRT instruction. */
779 {{libcall
, {{256, rep_prefix_4_byte
}, {-1, libcall
}}},
780 DUMMY_STRINGOP_ALGS
},
781 {{libcall
, {{-1, rep_prefix_4_byte
}}},
782 DUMMY_STRINGOP_ALGS
},
783 1, /* scalar_stmt_cost. */
784 1, /* scalar load_cost. */
785 1, /* scalar_store_cost. */
786 1, /* vec_stmt_cost. */
787 1, /* vec_to_scalar_cost. */
788 1, /* scalar_to_vec_cost. */
789 1, /* vec_align_load_cost. */
790 2, /* vec_unalign_load_cost. */
791 1, /* vec_store_cost. */
792 3, /* cond_taken_branch_cost. */
793 1, /* cond_not_taken_branch_cost. */
797 struct processor_costs pentiumpro_cost
= {
798 COSTS_N_INSNS (1), /* cost of an add instruction */
799 COSTS_N_INSNS (1), /* cost of a lea instruction */
800 COSTS_N_INSNS (1), /* variable shift costs */
801 COSTS_N_INSNS (1), /* constant shift costs */
802 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
803 COSTS_N_INSNS (4), /* HI */
804 COSTS_N_INSNS (4), /* SI */
805 COSTS_N_INSNS (4), /* DI */
806 COSTS_N_INSNS (4)}, /* other */
807 0, /* cost of multiply per each bit set */
808 {COSTS_N_INSNS (17), /* cost of a divide/mod for QI */
809 COSTS_N_INSNS (17), /* HI */
810 COSTS_N_INSNS (17), /* SI */
811 COSTS_N_INSNS (17), /* DI */
812 COSTS_N_INSNS (17)}, /* other */
813 COSTS_N_INSNS (1), /* cost of movsx */
814 COSTS_N_INSNS (1), /* cost of movzx */
815 8, /* "large" insn */
817 2, /* cost for loading QImode using movzbl */
818 {4, 4, 4}, /* cost of loading integer registers
819 in QImode, HImode and SImode.
820 Relative to reg-reg move (2). */
821 {2, 2, 2}, /* cost of storing integer registers */
822 2, /* cost of reg,reg fld/fst */
823 {2, 2, 6}, /* cost of loading fp registers
824 in SFmode, DFmode and XFmode */
825 {4, 4, 6}, /* cost of storing fp registers
826 in SFmode, DFmode and XFmode */
827 2, /* cost of moving MMX register */
828 {2, 2}, /* cost of loading MMX registers
829 in SImode and DImode */
830 {2, 2}, /* cost of storing MMX registers
831 in SImode and DImode */
832 2, /* cost of moving SSE register */
833 {2, 2, 8}, /* cost of loading SSE registers
834 in SImode, DImode and TImode */
835 {2, 2, 8}, /* cost of storing SSE registers
836 in SImode, DImode and TImode */
837 3, /* MMX or SSE register to integer */
838 8, /* size of l1 cache. */
839 256, /* size of l2 cache */
840 32, /* size of prefetch block */
841 6, /* number of parallel prefetches */
843 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
844 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
845 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
846 COSTS_N_INSNS (2), /* cost of FABS instruction. */
847 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
848 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
849 /* PentiumPro has optimized rep instructions for blocks aligned by 8 bytes
850 (we ensure the alignment). For small blocks inline loop is still a
851 noticeable win, for bigger blocks either rep movsl or rep movsb is
852 way to go. Rep movsb has apparently more expensive startup time in CPU,
853 but after 4K the difference is down in the noise. */
854 {{rep_prefix_4_byte
, {{128, loop
}, {1024, unrolled_loop
},
855 {8192, rep_prefix_4_byte
}, {-1, rep_prefix_1_byte
}}},
856 DUMMY_STRINGOP_ALGS
},
857 {{rep_prefix_4_byte
, {{1024, unrolled_loop
},
858 {8192, rep_prefix_4_byte
}, {-1, libcall
}}},
859 DUMMY_STRINGOP_ALGS
},
860 1, /* scalar_stmt_cost. */
861 1, /* scalar load_cost. */
862 1, /* scalar_store_cost. */
863 1, /* vec_stmt_cost. */
864 1, /* vec_to_scalar_cost. */
865 1, /* scalar_to_vec_cost. */
866 1, /* vec_align_load_cost. */
867 2, /* vec_unalign_load_cost. */
868 1, /* vec_store_cost. */
869 3, /* cond_taken_branch_cost. */
870 1, /* cond_not_taken_branch_cost. */
874 struct processor_costs geode_cost
= {
875 COSTS_N_INSNS (1), /* cost of an add instruction */
876 COSTS_N_INSNS (1), /* cost of a lea instruction */
877 COSTS_N_INSNS (2), /* variable shift costs */
878 COSTS_N_INSNS (1), /* constant shift costs */
879 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
880 COSTS_N_INSNS (4), /* HI */
881 COSTS_N_INSNS (7), /* SI */
882 COSTS_N_INSNS (7), /* DI */
883 COSTS_N_INSNS (7)}, /* other */
884 0, /* cost of multiply per each bit set */
885 {COSTS_N_INSNS (15), /* cost of a divide/mod for QI */
886 COSTS_N_INSNS (23), /* HI */
887 COSTS_N_INSNS (39), /* SI */
888 COSTS_N_INSNS (39), /* DI */
889 COSTS_N_INSNS (39)}, /* other */
890 COSTS_N_INSNS (1), /* cost of movsx */
891 COSTS_N_INSNS (1), /* cost of movzx */
892 8, /* "large" insn */
894 1, /* cost for loading QImode using movzbl */
895 {1, 1, 1}, /* cost of loading integer registers
896 in QImode, HImode and SImode.
897 Relative to reg-reg move (2). */
898 {1, 1, 1}, /* cost of storing integer registers */
899 1, /* cost of reg,reg fld/fst */
900 {1, 1, 1}, /* cost of loading fp registers
901 in SFmode, DFmode and XFmode */
902 {4, 6, 6}, /* cost of storing fp registers
903 in SFmode, DFmode and XFmode */
905 1, /* cost of moving MMX register */
906 {1, 1}, /* cost of loading MMX registers
907 in SImode and DImode */
908 {1, 1}, /* cost of storing MMX registers
909 in SImode and DImode */
910 1, /* cost of moving SSE register */
911 {1, 1, 1}, /* cost of loading SSE registers
912 in SImode, DImode and TImode */
913 {1, 1, 1}, /* cost of storing SSE registers
914 in SImode, DImode and TImode */
915 1, /* MMX or SSE register to integer */
916 64, /* size of l1 cache. */
917 128, /* size of l2 cache. */
918 32, /* size of prefetch block */
919 1, /* number of parallel prefetches */
921 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
922 COSTS_N_INSNS (11), /* cost of FMUL instruction. */
923 COSTS_N_INSNS (47), /* cost of FDIV instruction. */
924 COSTS_N_INSNS (1), /* cost of FABS instruction. */
925 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
926 COSTS_N_INSNS (54), /* cost of FSQRT instruction. */
927 {{libcall
, {{256, rep_prefix_4_byte
}, {-1, libcall
}}},
928 DUMMY_STRINGOP_ALGS
},
929 {{libcall
, {{256, rep_prefix_4_byte
}, {-1, libcall
}}},
930 DUMMY_STRINGOP_ALGS
},
931 1, /* scalar_stmt_cost. */
932 1, /* scalar load_cost. */
933 1, /* scalar_store_cost. */
934 1, /* vec_stmt_cost. */
935 1, /* vec_to_scalar_cost. */
936 1, /* scalar_to_vec_cost. */
937 1, /* vec_align_load_cost. */
938 2, /* vec_unalign_load_cost. */
939 1, /* vec_store_cost. */
940 3, /* cond_taken_branch_cost. */
941 1, /* cond_not_taken_branch_cost. */
945 struct processor_costs k6_cost
= {
946 COSTS_N_INSNS (1), /* cost of an add instruction */
947 COSTS_N_INSNS (2), /* cost of a lea instruction */
948 COSTS_N_INSNS (1), /* variable shift costs */
949 COSTS_N_INSNS (1), /* constant shift costs */
950 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
951 COSTS_N_INSNS (3), /* HI */
952 COSTS_N_INSNS (3), /* SI */
953 COSTS_N_INSNS (3), /* DI */
954 COSTS_N_INSNS (3)}, /* other */
955 0, /* cost of multiply per each bit set */
956 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
957 COSTS_N_INSNS (18), /* HI */
958 COSTS_N_INSNS (18), /* SI */
959 COSTS_N_INSNS (18), /* DI */
960 COSTS_N_INSNS (18)}, /* other */
961 COSTS_N_INSNS (2), /* cost of movsx */
962 COSTS_N_INSNS (2), /* cost of movzx */
963 8, /* "large" insn */
965 3, /* cost for loading QImode using movzbl */
966 {4, 5, 4}, /* cost of loading integer registers
967 in QImode, HImode and SImode.
968 Relative to reg-reg move (2). */
969 {2, 3, 2}, /* cost of storing integer registers */
970 4, /* cost of reg,reg fld/fst */
971 {6, 6, 6}, /* cost of loading fp registers
972 in SFmode, DFmode and XFmode */
973 {4, 4, 4}, /* cost of storing fp registers
974 in SFmode, DFmode and XFmode */
975 2, /* cost of moving MMX register */
976 {2, 2}, /* cost of loading MMX registers
977 in SImode and DImode */
978 {2, 2}, /* cost of storing MMX registers
979 in SImode and DImode */
980 2, /* cost of moving SSE register */
981 {2, 2, 8}, /* cost of loading SSE registers
982 in SImode, DImode and TImode */
983 {2, 2, 8}, /* cost of storing SSE registers
984 in SImode, DImode and TImode */
985 6, /* MMX or SSE register to integer */
986 32, /* size of l1 cache. */
987 32, /* size of l2 cache. Some models
988 have integrated l2 cache, but
989 optimizing for k6 is not important
990 enough to worry about that. */
991 32, /* size of prefetch block */
992 1, /* number of parallel prefetches */
994 COSTS_N_INSNS (2), /* cost of FADD and FSUB insns. */
995 COSTS_N_INSNS (2), /* cost of FMUL instruction. */
996 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
997 COSTS_N_INSNS (2), /* cost of FABS instruction. */
998 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
999 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
1000 {{libcall
, {{256, rep_prefix_4_byte
}, {-1, libcall
}}},
1001 DUMMY_STRINGOP_ALGS
},
1002 {{libcall
, {{256, rep_prefix_4_byte
}, {-1, libcall
}}},
1003 DUMMY_STRINGOP_ALGS
},
1004 1, /* scalar_stmt_cost. */
1005 1, /* scalar load_cost. */
1006 1, /* scalar_store_cost. */
1007 1, /* vec_stmt_cost. */
1008 1, /* vec_to_scalar_cost. */
1009 1, /* scalar_to_vec_cost. */
1010 1, /* vec_align_load_cost. */
1011 2, /* vec_unalign_load_cost. */
1012 1, /* vec_store_cost. */
1013 3, /* cond_taken_branch_cost. */
1014 1, /* cond_not_taken_branch_cost. */
1018 struct processor_costs athlon_cost
= {
1019 COSTS_N_INSNS (1), /* cost of an add instruction */
1020 COSTS_N_INSNS (2), /* cost of a lea instruction */
1021 COSTS_N_INSNS (1), /* variable shift costs */
1022 COSTS_N_INSNS (1), /* constant shift costs */
1023 {COSTS_N_INSNS (5), /* cost of starting multiply for QI */
1024 COSTS_N_INSNS (5), /* HI */
1025 COSTS_N_INSNS (5), /* SI */
1026 COSTS_N_INSNS (5), /* DI */
1027 COSTS_N_INSNS (5)}, /* other */
1028 0, /* cost of multiply per each bit set */
1029 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1030 COSTS_N_INSNS (26), /* HI */
1031 COSTS_N_INSNS (42), /* SI */
1032 COSTS_N_INSNS (74), /* DI */
1033 COSTS_N_INSNS (74)}, /* other */
1034 COSTS_N_INSNS (1), /* cost of movsx */
1035 COSTS_N_INSNS (1), /* cost of movzx */
1036 8, /* "large" insn */
1038 4, /* cost for loading QImode using movzbl */
1039 {3, 4, 3}, /* cost of loading integer registers
1040 in QImode, HImode and SImode.
1041 Relative to reg-reg move (2). */
1042 {3, 4, 3}, /* cost of storing integer registers */
1043 4, /* cost of reg,reg fld/fst */
1044 {4, 4, 12}, /* cost of loading fp registers
1045 in SFmode, DFmode and XFmode */
1046 {6, 6, 8}, /* cost of storing fp registers
1047 in SFmode, DFmode and XFmode */
1048 2, /* cost of moving MMX register */
1049 {4, 4}, /* cost of loading MMX registers
1050 in SImode and DImode */
1051 {4, 4}, /* cost of storing MMX registers
1052 in SImode and DImode */
1053 2, /* cost of moving SSE register */
1054 {4, 4, 6}, /* cost of loading SSE registers
1055 in SImode, DImode and TImode */
1056 {4, 4, 5}, /* cost of storing SSE registers
1057 in SImode, DImode and TImode */
1058 5, /* MMX or SSE register to integer */
1059 64, /* size of l1 cache. */
1060 256, /* size of l2 cache. */
1061 64, /* size of prefetch block */
1062 6, /* number of parallel prefetches */
1063 5, /* Branch cost */
1064 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
1065 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
1066 COSTS_N_INSNS (24), /* cost of FDIV instruction. */
1067 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1068 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1069 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
1070 /* For some reason, Athlon deals better with REP prefix (relative to loops)
1071 compared to K8. Alignment becomes important after 8 bytes for memcpy and
1072 128 bytes for memset. */
1073 {{libcall
, {{2048, rep_prefix_4_byte
}, {-1, libcall
}}},
1074 DUMMY_STRINGOP_ALGS
},
1075 {{libcall
, {{2048, rep_prefix_4_byte
}, {-1, libcall
}}},
1076 DUMMY_STRINGOP_ALGS
},
1077 1, /* scalar_stmt_cost. */
1078 1, /* scalar load_cost. */
1079 1, /* scalar_store_cost. */
1080 1, /* vec_stmt_cost. */
1081 1, /* vec_to_scalar_cost. */
1082 1, /* scalar_to_vec_cost. */
1083 1, /* vec_align_load_cost. */
1084 2, /* vec_unalign_load_cost. */
1085 1, /* vec_store_cost. */
1086 3, /* cond_taken_branch_cost. */
1087 1, /* cond_not_taken_branch_cost. */
1091 struct processor_costs k8_cost
= {
1092 COSTS_N_INSNS (1), /* cost of an add instruction */
1093 COSTS_N_INSNS (2), /* cost of a lea instruction */
1094 COSTS_N_INSNS (1), /* variable shift costs */
1095 COSTS_N_INSNS (1), /* constant shift costs */
1096 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1097 COSTS_N_INSNS (4), /* HI */
1098 COSTS_N_INSNS (3), /* SI */
1099 COSTS_N_INSNS (4), /* DI */
1100 COSTS_N_INSNS (5)}, /* other */
1101 0, /* cost of multiply per each bit set */
1102 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1103 COSTS_N_INSNS (26), /* HI */
1104 COSTS_N_INSNS (42), /* SI */
1105 COSTS_N_INSNS (74), /* DI */
1106 COSTS_N_INSNS (74)}, /* other */
1107 COSTS_N_INSNS (1), /* cost of movsx */
1108 COSTS_N_INSNS (1), /* cost of movzx */
1109 8, /* "large" insn */
1111 4, /* cost for loading QImode using movzbl */
1112 {3, 4, 3}, /* cost of loading integer registers
1113 in QImode, HImode and SImode.
1114 Relative to reg-reg move (2). */
1115 {3, 4, 3}, /* cost of storing integer registers */
1116 4, /* cost of reg,reg fld/fst */
1117 {4, 4, 12}, /* cost of loading fp registers
1118 in SFmode, DFmode and XFmode */
1119 {6, 6, 8}, /* cost of storing fp registers
1120 in SFmode, DFmode and XFmode */
1121 2, /* cost of moving MMX register */
1122 {3, 3}, /* cost of loading MMX registers
1123 in SImode and DImode */
1124 {4, 4}, /* cost of storing MMX registers
1125 in SImode and DImode */
1126 2, /* cost of moving SSE register */
1127 {4, 3, 6}, /* cost of loading SSE registers
1128 in SImode, DImode and TImode */
1129 {4, 4, 5}, /* cost of storing SSE registers
1130 in SImode, DImode and TImode */
1131 5, /* MMX or SSE register to integer */
1132 64, /* size of l1 cache. */
1133 512, /* size of l2 cache. */
1134 64, /* size of prefetch block */
1135 /* New AMD processors never drop prefetches; if they cannot be performed
1136 immediately, they are queued. We set number of simultaneous prefetches
1137 to a large constant to reflect this (it probably is not a good idea not
1138 to limit number of prefetches at all, as their execution also takes some
1140 100, /* number of parallel prefetches */
1141 3, /* Branch cost */
1142 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
1143 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
1144 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
1145 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1146 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1147 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
1148 /* K8 has optimized REP instruction for medium sized blocks, but for very
1149 small blocks it is better to use loop. For large blocks, libcall can
1150 do nontemporary accesses and beat inline considerably. */
1151 {{libcall
, {{6, loop
}, {14, unrolled_loop
}, {-1, rep_prefix_4_byte
}}},
1152 {libcall
, {{16, loop
}, {8192, rep_prefix_8_byte
}, {-1, libcall
}}}},
1153 {{libcall
, {{8, loop
}, {24, unrolled_loop
},
1154 {2048, rep_prefix_4_byte
}, {-1, libcall
}}},
1155 {libcall
, {{48, unrolled_loop
}, {8192, rep_prefix_8_byte
}, {-1, libcall
}}}},
1156 4, /* scalar_stmt_cost. */
1157 2, /* scalar load_cost. */
1158 2, /* scalar_store_cost. */
1159 5, /* vec_stmt_cost. */
1160 0, /* vec_to_scalar_cost. */
1161 2, /* scalar_to_vec_cost. */
1162 2, /* vec_align_load_cost. */
1163 3, /* vec_unalign_load_cost. */
1164 3, /* vec_store_cost. */
1165 3, /* cond_taken_branch_cost. */
1166 2, /* cond_not_taken_branch_cost. */
1169 struct processor_costs amdfam10_cost
= {
1170 COSTS_N_INSNS (1), /* cost of an add instruction */
1171 COSTS_N_INSNS (2), /* cost of a lea instruction */
1172 COSTS_N_INSNS (1), /* variable shift costs */
1173 COSTS_N_INSNS (1), /* constant shift costs */
1174 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1175 COSTS_N_INSNS (4), /* HI */
1176 COSTS_N_INSNS (3), /* SI */
1177 COSTS_N_INSNS (4), /* DI */
1178 COSTS_N_INSNS (5)}, /* other */
1179 0, /* cost of multiply per each bit set */
1180 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1181 COSTS_N_INSNS (35), /* HI */
1182 COSTS_N_INSNS (51), /* SI */
1183 COSTS_N_INSNS (83), /* DI */
1184 COSTS_N_INSNS (83)}, /* other */
1185 COSTS_N_INSNS (1), /* cost of movsx */
1186 COSTS_N_INSNS (1), /* cost of movzx */
1187 8, /* "large" insn */
1189 4, /* cost for loading QImode using movzbl */
1190 {3, 4, 3}, /* cost of loading integer registers
1191 in QImode, HImode and SImode.
1192 Relative to reg-reg move (2). */
1193 {3, 4, 3}, /* cost of storing integer registers */
1194 4, /* cost of reg,reg fld/fst */
1195 {4, 4, 12}, /* cost of loading fp registers
1196 in SFmode, DFmode and XFmode */
1197 {6, 6, 8}, /* cost of storing fp registers
1198 in SFmode, DFmode and XFmode */
1199 2, /* cost of moving MMX register */
1200 {3, 3}, /* cost of loading MMX registers
1201 in SImode and DImode */
1202 {4, 4}, /* cost of storing MMX registers
1203 in SImode and DImode */
1204 2, /* cost of moving SSE register */
1205 {4, 4, 3}, /* cost of loading SSE registers
1206 in SImode, DImode and TImode */
1207 {4, 4, 5}, /* cost of storing SSE registers
1208 in SImode, DImode and TImode */
1209 3, /* MMX or SSE register to integer */
1211 MOVD reg64, xmmreg Double FSTORE 4
1212 MOVD reg32, xmmreg Double FSTORE 4
1214 MOVD reg64, xmmreg Double FADD 3
1216 MOVD reg32, xmmreg Double FADD 3
1218 64, /* size of l1 cache. */
1219 512, /* size of l2 cache. */
1220 64, /* size of prefetch block */
1221 /* New AMD processors never drop prefetches; if they cannot be performed
1222 immediately, they are queued. We set number of simultaneous prefetches
1223 to a large constant to reflect this (it probably is not a good idea not
1224 to limit number of prefetches at all, as their execution also takes some
1226 100, /* number of parallel prefetches */
1227 2, /* Branch cost */
1228 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
1229 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
1230 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
1231 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1232 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1233 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
1235 /* AMDFAM10 has optimized REP instruction for medium sized blocks, but for
1236 very small blocks it is better to use loop. For large blocks, libcall can
1237 do nontemporary accesses and beat inline considerably. */
1238 {{libcall
, {{6, loop
}, {14, unrolled_loop
}, {-1, rep_prefix_4_byte
}}},
1239 {libcall
, {{16, loop
}, {8192, rep_prefix_8_byte
}, {-1, libcall
}}}},
1240 {{libcall
, {{8, loop
}, {24, unrolled_loop
},
1241 {2048, rep_prefix_4_byte
}, {-1, libcall
}}},
1242 {libcall
, {{48, unrolled_loop
}, {8192, rep_prefix_8_byte
}, {-1, libcall
}}}},
1243 4, /* scalar_stmt_cost. */
1244 2, /* scalar load_cost. */
1245 2, /* scalar_store_cost. */
1246 6, /* vec_stmt_cost. */
1247 0, /* vec_to_scalar_cost. */
1248 2, /* scalar_to_vec_cost. */
1249 2, /* vec_align_load_cost. */
1250 2, /* vec_unalign_load_cost. */
1251 2, /* vec_store_cost. */
1252 2, /* cond_taken_branch_cost. */
1253 1, /* cond_not_taken_branch_cost. */
1256 struct processor_costs bdver1_cost
= {
1257 COSTS_N_INSNS (1), /* cost of an add instruction */
1258 COSTS_N_INSNS (1), /* cost of a lea instruction */
1259 COSTS_N_INSNS (1), /* variable shift costs */
1260 COSTS_N_INSNS (1), /* constant shift costs */
1261 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
1262 COSTS_N_INSNS (4), /* HI */
1263 COSTS_N_INSNS (4), /* SI */
1264 COSTS_N_INSNS (6), /* DI */
1265 COSTS_N_INSNS (6)}, /* other */
1266 0, /* cost of multiply per each bit set */
1267 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1268 COSTS_N_INSNS (35), /* HI */
1269 COSTS_N_INSNS (51), /* SI */
1270 COSTS_N_INSNS (83), /* DI */
1271 COSTS_N_INSNS (83)}, /* other */
1272 COSTS_N_INSNS (1), /* cost of movsx */
1273 COSTS_N_INSNS (1), /* cost of movzx */
1274 8, /* "large" insn */
1276 4, /* cost for loading QImode using movzbl */
1277 {5, 5, 4}, /* cost of loading integer registers
1278 in QImode, HImode and SImode.
1279 Relative to reg-reg move (2). */
1280 {4, 4, 4}, /* cost of storing integer registers */
1281 2, /* cost of reg,reg fld/fst */
1282 {5, 5, 12}, /* cost of loading fp registers
1283 in SFmode, DFmode and XFmode */
1284 {4, 4, 8}, /* cost of storing fp registers
1285 in SFmode, DFmode and XFmode */
1286 2, /* cost of moving MMX register */
1287 {4, 4}, /* cost of loading MMX registers
1288 in SImode and DImode */
1289 {4, 4}, /* cost of storing MMX registers
1290 in SImode and DImode */
1291 2, /* cost of moving SSE register */
1292 {4, 4, 4}, /* cost of loading SSE registers
1293 in SImode, DImode and TImode */
1294 {4, 4, 4}, /* cost of storing SSE registers
1295 in SImode, DImode and TImode */
1296 2, /* MMX or SSE register to integer */
1298 MOVD reg64, xmmreg Double FSTORE 4
1299 MOVD reg32, xmmreg Double FSTORE 4
1301 MOVD reg64, xmmreg Double FADD 3
1303 MOVD reg32, xmmreg Double FADD 3
1305 16, /* size of l1 cache. */
1306 2048, /* size of l2 cache. */
1307 64, /* size of prefetch block */
1308 /* New AMD processors never drop prefetches; if they cannot be performed
1309 immediately, they are queued. We set number of simultaneous prefetches
1310 to a large constant to reflect this (it probably is not a good idea not
1311 to limit number of prefetches at all, as their execution also takes some
1313 100, /* number of parallel prefetches */
1314 2, /* Branch cost */
1315 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1316 COSTS_N_INSNS (6), /* cost of FMUL instruction. */
1317 COSTS_N_INSNS (42), /* cost of FDIV instruction. */
1318 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1319 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1320 COSTS_N_INSNS (52), /* cost of FSQRT instruction. */
1322 /* BDVER1 has optimized REP instruction for medium sized blocks, but for
1323 very small blocks it is better to use loop. For large blocks, libcall
1324 can do nontemporary accesses and beat inline considerably. */
1325 {{libcall
, {{6, loop
}, {14, unrolled_loop
}, {-1, rep_prefix_4_byte
}}},
1326 {libcall
, {{16, loop
}, {8192, rep_prefix_8_byte
}, {-1, libcall
}}}},
1327 {{libcall
, {{8, loop
}, {24, unrolled_loop
},
1328 {2048, rep_prefix_4_byte
}, {-1, libcall
}}},
1329 {libcall
, {{48, unrolled_loop
}, {8192, rep_prefix_8_byte
}, {-1, libcall
}}}},
1330 6, /* scalar_stmt_cost. */
1331 4, /* scalar load_cost. */
1332 4, /* scalar_store_cost. */
1333 6, /* vec_stmt_cost. */
1334 0, /* vec_to_scalar_cost. */
1335 2, /* scalar_to_vec_cost. */
1336 4, /* vec_align_load_cost. */
1337 4, /* vec_unalign_load_cost. */
1338 4, /* vec_store_cost. */
1339 2, /* cond_taken_branch_cost. */
1340 1, /* cond_not_taken_branch_cost. */
1343 struct processor_costs bdver2_cost
= {
1344 COSTS_N_INSNS (1), /* cost of an add instruction */
1345 COSTS_N_INSNS (1), /* cost of a lea instruction */
1346 COSTS_N_INSNS (1), /* variable shift costs */
1347 COSTS_N_INSNS (1), /* constant shift costs */
1348 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
1349 COSTS_N_INSNS (4), /* HI */
1350 COSTS_N_INSNS (4), /* SI */
1351 COSTS_N_INSNS (6), /* DI */
1352 COSTS_N_INSNS (6)}, /* other */
1353 0, /* cost of multiply per each bit set */
1354 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1355 COSTS_N_INSNS (35), /* HI */
1356 COSTS_N_INSNS (51), /* SI */
1357 COSTS_N_INSNS (83), /* DI */
1358 COSTS_N_INSNS (83)}, /* other */
1359 COSTS_N_INSNS (1), /* cost of movsx */
1360 COSTS_N_INSNS (1), /* cost of movzx */
1361 8, /* "large" insn */
1363 4, /* cost for loading QImode using movzbl */
1364 {5, 5, 4}, /* cost of loading integer registers
1365 in QImode, HImode and SImode.
1366 Relative to reg-reg move (2). */
1367 {4, 4, 4}, /* cost of storing integer registers */
1368 2, /* cost of reg,reg fld/fst */
1369 {5, 5, 12}, /* cost of loading fp registers
1370 in SFmode, DFmode and XFmode */
1371 {4, 4, 8}, /* cost of storing fp registers
1372 in SFmode, DFmode and XFmode */
1373 2, /* cost of moving MMX register */
1374 {4, 4}, /* cost of loading MMX registers
1375 in SImode and DImode */
1376 {4, 4}, /* cost of storing MMX registers
1377 in SImode and DImode */
1378 2, /* cost of moving SSE register */
1379 {4, 4, 4}, /* cost of loading SSE registers
1380 in SImode, DImode and TImode */
1381 {4, 4, 4}, /* cost of storing SSE registers
1382 in SImode, DImode and TImode */
1383 2, /* MMX or SSE register to integer */
1385 MOVD reg64, xmmreg Double FSTORE 4
1386 MOVD reg32, xmmreg Double FSTORE 4
1388 MOVD reg64, xmmreg Double FADD 3
1390 MOVD reg32, xmmreg Double FADD 3
1392 16, /* size of l1 cache. */
1393 2048, /* size of l2 cache. */
1394 64, /* size of prefetch block */
1395 /* New AMD processors never drop prefetches; if they cannot be performed
1396 immediately, they are queued. We set number of simultaneous prefetches
1397 to a large constant to reflect this (it probably is not a good idea not
1398 to limit number of prefetches at all, as their execution also takes some
1400 100, /* number of parallel prefetches */
1401 2, /* Branch cost */
1402 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1403 COSTS_N_INSNS (6), /* cost of FMUL instruction. */
1404 COSTS_N_INSNS (42), /* cost of FDIV instruction. */
1405 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1406 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1407 COSTS_N_INSNS (52), /* cost of FSQRT instruction. */
1409 /* BDVER2 has optimized REP instruction for medium sized blocks, but for
1410 very small blocks it is better to use loop. For large blocks, libcall
1411 can do nontemporary accesses and beat inline considerably. */
1412 {{libcall
, {{6, loop
}, {14, unrolled_loop
}, {-1, rep_prefix_4_byte
}}},
1413 {libcall
, {{16, loop
}, {8192, rep_prefix_8_byte
}, {-1, libcall
}}}},
1414 {{libcall
, {{8, loop
}, {24, unrolled_loop
},
1415 {2048, rep_prefix_4_byte
}, {-1, libcall
}}},
1416 {libcall
, {{48, unrolled_loop
}, {8192, rep_prefix_8_byte
}, {-1, libcall
}}}},
1417 6, /* scalar_stmt_cost. */
1418 4, /* scalar load_cost. */
1419 4, /* scalar_store_cost. */
1420 6, /* vec_stmt_cost. */
1421 0, /* vec_to_scalar_cost. */
1422 2, /* scalar_to_vec_cost. */
1423 4, /* vec_align_load_cost. */
1424 4, /* vec_unalign_load_cost. */
1425 4, /* vec_store_cost. */
1426 2, /* cond_taken_branch_cost. */
1427 1, /* cond_not_taken_branch_cost. */
1430 struct processor_costs btver1_cost
= {
1431 COSTS_N_INSNS (1), /* cost of an add instruction */
1432 COSTS_N_INSNS (2), /* cost of a lea instruction */
1433 COSTS_N_INSNS (1), /* variable shift costs */
1434 COSTS_N_INSNS (1), /* constant shift costs */
1435 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1436 COSTS_N_INSNS (4), /* HI */
1437 COSTS_N_INSNS (3), /* SI */
1438 COSTS_N_INSNS (4), /* DI */
1439 COSTS_N_INSNS (5)}, /* other */
1440 0, /* cost of multiply per each bit set */
1441 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1442 COSTS_N_INSNS (35), /* HI */
1443 COSTS_N_INSNS (51), /* SI */
1444 COSTS_N_INSNS (83), /* DI */
1445 COSTS_N_INSNS (83)}, /* other */
1446 COSTS_N_INSNS (1), /* cost of movsx */
1447 COSTS_N_INSNS (1), /* cost of movzx */
1448 8, /* "large" insn */
1450 4, /* cost for loading QImode using movzbl */
1451 {3, 4, 3}, /* cost of loading integer registers
1452 in QImode, HImode and SImode.
1453 Relative to reg-reg move (2). */
1454 {3, 4, 3}, /* cost of storing integer registers */
1455 4, /* cost of reg,reg fld/fst */
1456 {4, 4, 12}, /* cost of loading fp registers
1457 in SFmode, DFmode and XFmode */
1458 {6, 6, 8}, /* cost of storing fp registers
1459 in SFmode, DFmode and XFmode */
1460 2, /* cost of moving MMX register */
1461 {3, 3}, /* cost of loading MMX registers
1462 in SImode and DImode */
1463 {4, 4}, /* cost of storing MMX registers
1464 in SImode and DImode */
1465 2, /* cost of moving SSE register */
1466 {4, 4, 3}, /* cost of loading SSE registers
1467 in SImode, DImode and TImode */
1468 {4, 4, 5}, /* cost of storing SSE registers
1469 in SImode, DImode and TImode */
1470 3, /* MMX or SSE register to integer */
1472 MOVD reg64, xmmreg Double FSTORE 4
1473 MOVD reg32, xmmreg Double FSTORE 4
1475 MOVD reg64, xmmreg Double FADD 3
1477 MOVD reg32, xmmreg Double FADD 3
1479 32, /* size of l1 cache. */
1480 512, /* size of l2 cache. */
1481 64, /* size of prefetch block */
1482 100, /* number of parallel prefetches */
1483 2, /* Branch cost */
1484 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
1485 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
1486 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
1487 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1488 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1489 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
1491 /* BTVER1 has optimized REP instruction for medium sized blocks, but for
1492 very small blocks it is better to use loop. For large blocks, libcall can
1493 do nontemporary accesses and beat inline considerably. */
1494 {{libcall
, {{6, loop
}, {14, unrolled_loop
}, {-1, rep_prefix_4_byte
}}},
1495 {libcall
, {{16, loop
}, {8192, rep_prefix_8_byte
}, {-1, libcall
}}}},
1496 {{libcall
, {{8, loop
}, {24, unrolled_loop
},
1497 {2048, rep_prefix_4_byte
}, {-1, libcall
}}},
1498 {libcall
, {{48, unrolled_loop
}, {8192, rep_prefix_8_byte
}, {-1, libcall
}}}},
1499 4, /* scalar_stmt_cost. */
1500 2, /* scalar load_cost. */
1501 2, /* scalar_store_cost. */
1502 6, /* vec_stmt_cost. */
1503 0, /* vec_to_scalar_cost. */
1504 2, /* scalar_to_vec_cost. */
1505 2, /* vec_align_load_cost. */
1506 2, /* vec_unalign_load_cost. */
1507 2, /* vec_store_cost. */
1508 2, /* cond_taken_branch_cost. */
1509 1, /* cond_not_taken_branch_cost. */
1512 struct processor_costs btver2_cost
= {
1513 COSTS_N_INSNS (1), /* cost of an add instruction */
1514 COSTS_N_INSNS (2), /* cost of a lea instruction */
1515 COSTS_N_INSNS (1), /* variable shift costs */
1516 COSTS_N_INSNS (1), /* constant shift costs */
1517 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1518 COSTS_N_INSNS (4), /* HI */
1519 COSTS_N_INSNS (3), /* SI */
1520 COSTS_N_INSNS (4), /* DI */
1521 COSTS_N_INSNS (5)}, /* other */
1522 0, /* cost of multiply per each bit set */
1523 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1524 COSTS_N_INSNS (35), /* HI */
1525 COSTS_N_INSNS (51), /* SI */
1526 COSTS_N_INSNS (83), /* DI */
1527 COSTS_N_INSNS (83)}, /* other */
1528 COSTS_N_INSNS (1), /* cost of movsx */
1529 COSTS_N_INSNS (1), /* cost of movzx */
1530 8, /* "large" insn */
1532 4, /* cost for loading QImode using movzbl */
1533 {3, 4, 3}, /* cost of loading integer registers
1534 in QImode, HImode and SImode.
1535 Relative to reg-reg move (2). */
1536 {3, 4, 3}, /* cost of storing integer registers */
1537 4, /* cost of reg,reg fld/fst */
1538 {4, 4, 12}, /* cost of loading fp registers
1539 in SFmode, DFmode and XFmode */
1540 {6, 6, 8}, /* cost of storing fp registers
1541 in SFmode, DFmode and XFmode */
1542 2, /* cost of moving MMX register */
1543 {3, 3}, /* cost of loading MMX registers
1544 in SImode and DImode */
1545 {4, 4}, /* cost of storing MMX registers
1546 in SImode and DImode */
1547 2, /* cost of moving SSE register */
1548 {4, 4, 3}, /* cost of loading SSE registers
1549 in SImode, DImode and TImode */
1550 {4, 4, 5}, /* cost of storing SSE registers
1551 in SImode, DImode and TImode */
1552 3, /* MMX or SSE register to integer */
1554 MOVD reg64, xmmreg Double FSTORE 4
1555 MOVD reg32, xmmreg Double FSTORE 4
1557 MOVD reg64, xmmreg Double FADD 3
1559 MOVD reg32, xmmreg Double FADD 3
1561 32, /* size of l1 cache. */
1562 2048, /* size of l2 cache. */
1563 64, /* size of prefetch block */
1564 100, /* number of parallel prefetches */
1565 2, /* Branch cost */
1566 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
1567 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
1568 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
1569 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1570 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1571 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
1573 {{libcall
, {{6, loop
}, {14, unrolled_loop
}, {-1, rep_prefix_4_byte
}}},
1574 {libcall
, {{16, loop
}, {8192, rep_prefix_8_byte
}, {-1, libcall
}}}},
1575 {{libcall
, {{8, loop
}, {24, unrolled_loop
},
1576 {2048, rep_prefix_4_byte
}, {-1, libcall
}}},
1577 {libcall
, {{48, unrolled_loop
}, {8192, rep_prefix_8_byte
}, {-1, libcall
}}}},
1578 4, /* scalar_stmt_cost. */
1579 2, /* scalar load_cost. */
1580 2, /* scalar_store_cost. */
1581 6, /* vec_stmt_cost. */
1582 0, /* vec_to_scalar_cost. */
1583 2, /* scalar_to_vec_cost. */
1584 2, /* vec_align_load_cost. */
1585 2, /* vec_unalign_load_cost. */
1586 2, /* vec_store_cost. */
1587 2, /* cond_taken_branch_cost. */
1588 1, /* cond_not_taken_branch_cost. */
1592 struct processor_costs pentium4_cost
= {
1593 COSTS_N_INSNS (1), /* cost of an add instruction */
1594 COSTS_N_INSNS (3), /* cost of a lea instruction */
1595 COSTS_N_INSNS (4), /* variable shift costs */
1596 COSTS_N_INSNS (4), /* constant shift costs */
1597 {COSTS_N_INSNS (15), /* cost of starting multiply for QI */
1598 COSTS_N_INSNS (15), /* HI */
1599 COSTS_N_INSNS (15), /* SI */
1600 COSTS_N_INSNS (15), /* DI */
1601 COSTS_N_INSNS (15)}, /* other */
1602 0, /* cost of multiply per each bit set */
1603 {COSTS_N_INSNS (56), /* cost of a divide/mod for QI */
1604 COSTS_N_INSNS (56), /* HI */
1605 COSTS_N_INSNS (56), /* SI */
1606 COSTS_N_INSNS (56), /* DI */
1607 COSTS_N_INSNS (56)}, /* other */
1608 COSTS_N_INSNS (1), /* cost of movsx */
1609 COSTS_N_INSNS (1), /* cost of movzx */
1610 16, /* "large" insn */
1612 2, /* cost for loading QImode using movzbl */
1613 {4, 5, 4}, /* cost of loading integer registers
1614 in QImode, HImode and SImode.
1615 Relative to reg-reg move (2). */
1616 {2, 3, 2}, /* cost of storing integer registers */
1617 2, /* cost of reg,reg fld/fst */
1618 {2, 2, 6}, /* cost of loading fp registers
1619 in SFmode, DFmode and XFmode */
1620 {4, 4, 6}, /* cost of storing fp registers
1621 in SFmode, DFmode and XFmode */
1622 2, /* cost of moving MMX register */
1623 {2, 2}, /* cost of loading MMX registers
1624 in SImode and DImode */
1625 {2, 2}, /* cost of storing MMX registers
1626 in SImode and DImode */
1627 12, /* cost of moving SSE register */
1628 {12, 12, 12}, /* cost of loading SSE registers
1629 in SImode, DImode and TImode */
1630 {2, 2, 8}, /* cost of storing SSE registers
1631 in SImode, DImode and TImode */
1632 10, /* MMX or SSE register to integer */
1633 8, /* size of l1 cache. */
1634 256, /* size of l2 cache. */
1635 64, /* size of prefetch block */
1636 6, /* number of parallel prefetches */
1637 2, /* Branch cost */
1638 COSTS_N_INSNS (5), /* cost of FADD and FSUB insns. */
1639 COSTS_N_INSNS (7), /* cost of FMUL instruction. */
1640 COSTS_N_INSNS (43), /* cost of FDIV instruction. */
1641 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1642 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1643 COSTS_N_INSNS (43), /* cost of FSQRT instruction. */
1644 {{libcall
, {{12, loop_1_byte
}, {-1, rep_prefix_4_byte
}}},
1645 DUMMY_STRINGOP_ALGS
},
1646 {{libcall
, {{6, loop_1_byte
}, {48, loop
}, {20480, rep_prefix_4_byte
},
1648 DUMMY_STRINGOP_ALGS
},
1649 1, /* scalar_stmt_cost. */
1650 1, /* scalar load_cost. */
1651 1, /* scalar_store_cost. */
1652 1, /* vec_stmt_cost. */
1653 1, /* vec_to_scalar_cost. */
1654 1, /* scalar_to_vec_cost. */
1655 1, /* vec_align_load_cost. */
1656 2, /* vec_unalign_load_cost. */
1657 1, /* vec_store_cost. */
1658 3, /* cond_taken_branch_cost. */
1659 1, /* cond_not_taken_branch_cost. */
1663 struct processor_costs nocona_cost
= {
1664 COSTS_N_INSNS (1), /* cost of an add instruction */
1665 COSTS_N_INSNS (1), /* cost of a lea instruction */
1666 COSTS_N_INSNS (1), /* variable shift costs */
1667 COSTS_N_INSNS (1), /* constant shift costs */
1668 {COSTS_N_INSNS (10), /* cost of starting multiply for QI */
1669 COSTS_N_INSNS (10), /* HI */
1670 COSTS_N_INSNS (10), /* SI */
1671 COSTS_N_INSNS (10), /* DI */
1672 COSTS_N_INSNS (10)}, /* other */
1673 0, /* cost of multiply per each bit set */
1674 {COSTS_N_INSNS (66), /* cost of a divide/mod for QI */
1675 COSTS_N_INSNS (66), /* HI */
1676 COSTS_N_INSNS (66), /* SI */
1677 COSTS_N_INSNS (66), /* DI */
1678 COSTS_N_INSNS (66)}, /* other */
1679 COSTS_N_INSNS (1), /* cost of movsx */
1680 COSTS_N_INSNS (1), /* cost of movzx */
1681 16, /* "large" insn */
1682 17, /* MOVE_RATIO */
1683 4, /* cost for loading QImode using movzbl */
1684 {4, 4, 4}, /* cost of loading integer registers
1685 in QImode, HImode and SImode.
1686 Relative to reg-reg move (2). */
1687 {4, 4, 4}, /* cost of storing integer registers */
1688 3, /* cost of reg,reg fld/fst */
1689 {12, 12, 12}, /* cost of loading fp registers
1690 in SFmode, DFmode and XFmode */
1691 {4, 4, 4}, /* cost of storing fp registers
1692 in SFmode, DFmode and XFmode */
1693 6, /* cost of moving MMX register */
1694 {12, 12}, /* cost of loading MMX registers
1695 in SImode and DImode */
1696 {12, 12}, /* cost of storing MMX registers
1697 in SImode and DImode */
1698 6, /* cost of moving SSE register */
1699 {12, 12, 12}, /* cost of loading SSE registers
1700 in SImode, DImode and TImode */
1701 {12, 12, 12}, /* cost of storing SSE registers
1702 in SImode, DImode and TImode */
1703 8, /* MMX or SSE register to integer */
1704 8, /* size of l1 cache. */
1705 1024, /* size of l2 cache. */
1706 128, /* size of prefetch block */
1707 8, /* number of parallel prefetches */
1708 1, /* Branch cost */
1709 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1710 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1711 COSTS_N_INSNS (40), /* cost of FDIV instruction. */
1712 COSTS_N_INSNS (3), /* cost of FABS instruction. */
1713 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
1714 COSTS_N_INSNS (44), /* cost of FSQRT instruction. */
1715 {{libcall
, {{12, loop_1_byte
}, {-1, rep_prefix_4_byte
}}},
1716 {libcall
, {{32, loop
}, {20000, rep_prefix_8_byte
},
1717 {100000, unrolled_loop
}, {-1, libcall
}}}},
1718 {{libcall
, {{6, loop_1_byte
}, {48, loop
}, {20480, rep_prefix_4_byte
},
1720 {libcall
, {{24, loop
}, {64, unrolled_loop
},
1721 {8192, rep_prefix_8_byte
}, {-1, libcall
}}}},
1722 1, /* scalar_stmt_cost. */
1723 1, /* scalar load_cost. */
1724 1, /* scalar_store_cost. */
1725 1, /* vec_stmt_cost. */
1726 1, /* vec_to_scalar_cost. */
1727 1, /* scalar_to_vec_cost. */
1728 1, /* vec_align_load_cost. */
1729 2, /* vec_unalign_load_cost. */
1730 1, /* vec_store_cost. */
1731 3, /* cond_taken_branch_cost. */
1732 1, /* cond_not_taken_branch_cost. */
1736 struct processor_costs atom_cost
= {
1737 COSTS_N_INSNS (1), /* cost of an add instruction */
1738 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1739 COSTS_N_INSNS (1), /* variable shift costs */
1740 COSTS_N_INSNS (1), /* constant shift costs */
1741 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1742 COSTS_N_INSNS (4), /* HI */
1743 COSTS_N_INSNS (3), /* SI */
1744 COSTS_N_INSNS (4), /* DI */
1745 COSTS_N_INSNS (2)}, /* other */
1746 0, /* cost of multiply per each bit set */
1747 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1748 COSTS_N_INSNS (26), /* HI */
1749 COSTS_N_INSNS (42), /* SI */
1750 COSTS_N_INSNS (74), /* DI */
1751 COSTS_N_INSNS (74)}, /* other */
1752 COSTS_N_INSNS (1), /* cost of movsx */
1753 COSTS_N_INSNS (1), /* cost of movzx */
1754 8, /* "large" insn */
1755 17, /* MOVE_RATIO */
1756 4, /* cost for loading QImode using movzbl */
1757 {4, 4, 4}, /* cost of loading integer registers
1758 in QImode, HImode and SImode.
1759 Relative to reg-reg move (2). */
1760 {4, 4, 4}, /* cost of storing integer registers */
1761 4, /* cost of reg,reg fld/fst */
1762 {12, 12, 12}, /* cost of loading fp registers
1763 in SFmode, DFmode and XFmode */
1764 {6, 6, 8}, /* cost of storing fp registers
1765 in SFmode, DFmode and XFmode */
1766 2, /* cost of moving MMX register */
1767 {8, 8}, /* cost of loading MMX registers
1768 in SImode and DImode */
1769 {8, 8}, /* cost of storing MMX registers
1770 in SImode and DImode */
1771 2, /* cost of moving SSE register */
1772 {8, 8, 8}, /* cost of loading SSE registers
1773 in SImode, DImode and TImode */
1774 {8, 8, 8}, /* cost of storing SSE registers
1775 in SImode, DImode and TImode */
1776 5, /* MMX or SSE register to integer */
1777 32, /* size of l1 cache. */
1778 256, /* size of l2 cache. */
1779 64, /* size of prefetch block */
1780 6, /* number of parallel prefetches */
1781 3, /* Branch cost */
1782 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1783 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1784 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1785 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1786 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1787 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1788 {{libcall
, {{11, loop
}, {-1, rep_prefix_4_byte
}}},
1789 {libcall
, {{32, loop
}, {64, rep_prefix_4_byte
},
1790 {8192, rep_prefix_8_byte
}, {-1, libcall
}}}},
1791 {{libcall
, {{8, loop
}, {15, unrolled_loop
},
1792 {2048, rep_prefix_4_byte
}, {-1, libcall
}}},
1793 {libcall
, {{24, loop
}, {32, unrolled_loop
},
1794 {8192, rep_prefix_8_byte
}, {-1, libcall
}}}},
1795 1, /* scalar_stmt_cost. */
1796 1, /* scalar load_cost. */
1797 1, /* scalar_store_cost. */
1798 1, /* vec_stmt_cost. */
1799 1, /* vec_to_scalar_cost. */
1800 1, /* scalar_to_vec_cost. */
1801 1, /* vec_align_load_cost. */
1802 2, /* vec_unalign_load_cost. */
1803 1, /* vec_store_cost. */
1804 3, /* cond_taken_branch_cost. */
1805 1, /* cond_not_taken_branch_cost. */
1808 /* Generic64 should produce code tuned for Nocona and K8. */
1810 struct processor_costs generic64_cost
= {
1811 COSTS_N_INSNS (1), /* cost of an add instruction */
1812 /* On all chips taken into consideration lea is 2 cycles and more. With
1813 this cost however our current implementation of synth_mult results in
1814 use of unnecessary temporary registers causing regression on several
1815 SPECfp benchmarks. */
1816 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1817 COSTS_N_INSNS (1), /* variable shift costs */
1818 COSTS_N_INSNS (1), /* constant shift costs */
1819 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1820 COSTS_N_INSNS (4), /* HI */
1821 COSTS_N_INSNS (3), /* SI */
1822 COSTS_N_INSNS (4), /* DI */
1823 COSTS_N_INSNS (2)}, /* other */
1824 0, /* cost of multiply per each bit set */
1825 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1826 COSTS_N_INSNS (26), /* HI */
1827 COSTS_N_INSNS (42), /* SI */
1828 COSTS_N_INSNS (74), /* DI */
1829 COSTS_N_INSNS (74)}, /* other */
1830 COSTS_N_INSNS (1), /* cost of movsx */
1831 COSTS_N_INSNS (1), /* cost of movzx */
1832 8, /* "large" insn */
1833 17, /* MOVE_RATIO */
1834 4, /* cost for loading QImode using movzbl */
1835 {4, 4, 4}, /* cost of loading integer registers
1836 in QImode, HImode and SImode.
1837 Relative to reg-reg move (2). */
1838 {4, 4, 4}, /* cost of storing integer registers */
1839 4, /* cost of reg,reg fld/fst */
1840 {12, 12, 12}, /* cost of loading fp registers
1841 in SFmode, DFmode and XFmode */
1842 {6, 6, 8}, /* cost of storing fp registers
1843 in SFmode, DFmode and XFmode */
1844 2, /* cost of moving MMX register */
1845 {8, 8}, /* cost of loading MMX registers
1846 in SImode and DImode */
1847 {8, 8}, /* cost of storing MMX registers
1848 in SImode and DImode */
1849 2, /* cost of moving SSE register */
1850 {8, 8, 8}, /* cost of loading SSE registers
1851 in SImode, DImode and TImode */
1852 {8, 8, 8}, /* cost of storing SSE registers
1853 in SImode, DImode and TImode */
1854 5, /* MMX or SSE register to integer */
1855 32, /* size of l1 cache. */
1856 512, /* size of l2 cache. */
1857 64, /* size of prefetch block */
1858 6, /* number of parallel prefetches */
1859 /* Benchmarks shows large regressions on K8 sixtrack benchmark when this
1860 value is increased to perhaps more appropriate value of 5. */
1861 3, /* Branch cost */
1862 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1863 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1864 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1865 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1866 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1867 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1868 {DUMMY_STRINGOP_ALGS
,
1869 {libcall
, {{32, loop
}, {8192, rep_prefix_8_byte
}, {-1, libcall
}}}},
1870 {DUMMY_STRINGOP_ALGS
,
1871 {libcall
, {{32, loop
}, {8192, rep_prefix_8_byte
}, {-1, libcall
}}}},
1872 1, /* scalar_stmt_cost. */
1873 1, /* scalar load_cost. */
1874 1, /* scalar_store_cost. */
1875 1, /* vec_stmt_cost. */
1876 1, /* vec_to_scalar_cost. */
1877 1, /* scalar_to_vec_cost. */
1878 1, /* vec_align_load_cost. */
1879 2, /* vec_unalign_load_cost. */
1880 1, /* vec_store_cost. */
1881 3, /* cond_taken_branch_cost. */
1882 1, /* cond_not_taken_branch_cost. */
1885 /* Generic32 should produce code tuned for PPro, Pentium4, Nocona,
1888 struct processor_costs generic32_cost
= {
1889 COSTS_N_INSNS (1), /* cost of an add instruction */
1890 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1891 COSTS_N_INSNS (1), /* variable shift costs */
1892 COSTS_N_INSNS (1), /* constant shift costs */
1893 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1894 COSTS_N_INSNS (4), /* HI */
1895 COSTS_N_INSNS (3), /* SI */
1896 COSTS_N_INSNS (4), /* DI */
1897 COSTS_N_INSNS (2)}, /* other */
1898 0, /* cost of multiply per each bit set */
1899 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1900 COSTS_N_INSNS (26), /* HI */
1901 COSTS_N_INSNS (42), /* SI */
1902 COSTS_N_INSNS (74), /* DI */
1903 COSTS_N_INSNS (74)}, /* other */
1904 COSTS_N_INSNS (1), /* cost of movsx */
1905 COSTS_N_INSNS (1), /* cost of movzx */
1906 8, /* "large" insn */
1907 17, /* MOVE_RATIO */
1908 4, /* cost for loading QImode using movzbl */
1909 {4, 4, 4}, /* cost of loading integer registers
1910 in QImode, HImode and SImode.
1911 Relative to reg-reg move (2). */
1912 {4, 4, 4}, /* cost of storing integer registers */
1913 4, /* cost of reg,reg fld/fst */
1914 {12, 12, 12}, /* cost of loading fp registers
1915 in SFmode, DFmode and XFmode */
1916 {6, 6, 8}, /* cost of storing fp registers
1917 in SFmode, DFmode and XFmode */
1918 2, /* cost of moving MMX register */
1919 {8, 8}, /* cost of loading MMX registers
1920 in SImode and DImode */
1921 {8, 8}, /* cost of storing MMX registers
1922 in SImode and DImode */
1923 2, /* cost of moving SSE register */
1924 {8, 8, 8}, /* cost of loading SSE registers
1925 in SImode, DImode and TImode */
1926 {8, 8, 8}, /* cost of storing SSE registers
1927 in SImode, DImode and TImode */
1928 5, /* MMX or SSE register to integer */
1929 32, /* size of l1 cache. */
1930 256, /* size of l2 cache. */
1931 64, /* size of prefetch block */
1932 6, /* number of parallel prefetches */
1933 3, /* Branch cost */
1934 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1935 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1936 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1937 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1938 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1939 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1940 {{libcall
, {{32, loop
}, {8192, rep_prefix_4_byte
}, {-1, libcall
}}},
1941 DUMMY_STRINGOP_ALGS
},
1942 {{libcall
, {{32, loop
}, {8192, rep_prefix_4_byte
}, {-1, libcall
}}},
1943 DUMMY_STRINGOP_ALGS
},
1944 1, /* scalar_stmt_cost. */
1945 1, /* scalar load_cost. */
1946 1, /* scalar_store_cost. */
1947 1, /* vec_stmt_cost. */
1948 1, /* vec_to_scalar_cost. */
1949 1, /* scalar_to_vec_cost. */
1950 1, /* vec_align_load_cost. */
1951 2, /* vec_unalign_load_cost. */
1952 1, /* vec_store_cost. */
1953 3, /* cond_taken_branch_cost. */
1954 1, /* cond_not_taken_branch_cost. */
1957 /* Set by -mtune. */
1958 const struct processor_costs
*ix86_tune_cost
= &pentium_cost
;
1960 /* Set by -mtune or -Os. */
1961 const struct processor_costs
*ix86_cost
= &pentium_cost
;
1963 /* Processor feature/optimization bitmasks. */
1964 #define m_386 (1<<PROCESSOR_I386)
1965 #define m_486 (1<<PROCESSOR_I486)
1966 #define m_PENT (1<<PROCESSOR_PENTIUM)
1967 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
1968 #define m_PENT4 (1<<PROCESSOR_PENTIUM4)
1969 #define m_NOCONA (1<<PROCESSOR_NOCONA)
1970 #define m_P4_NOCONA (m_PENT4 | m_NOCONA)
1971 #define m_CORE2_32 (1<<PROCESSOR_CORE2_32)
1972 #define m_CORE2_64 (1<<PROCESSOR_CORE2_64)
1973 #define m_COREI7_32 (1<<PROCESSOR_COREI7_32)
1974 #define m_COREI7_64 (1<<PROCESSOR_COREI7_64)
1975 #define m_COREI7 (m_COREI7_32 | m_COREI7_64)
1976 #define m_CORE2I7_32 (m_CORE2_32 | m_COREI7_32)
1977 #define m_CORE2I7_64 (m_CORE2_64 | m_COREI7_64)
1978 #define m_CORE2I7 (m_CORE2I7_32 | m_CORE2I7_64)
1979 #define m_ATOM (1<<PROCESSOR_ATOM)
1981 #define m_GEODE (1<<PROCESSOR_GEODE)
1982 #define m_K6 (1<<PROCESSOR_K6)
1983 #define m_K6_GEODE (m_K6 | m_GEODE)
1984 #define m_K8 (1<<PROCESSOR_K8)
1985 #define m_ATHLON (1<<PROCESSOR_ATHLON)
1986 #define m_ATHLON_K8 (m_K8 | m_ATHLON)
1987 #define m_AMDFAM10 (1<<PROCESSOR_AMDFAM10)
1988 #define m_BDVER1 (1<<PROCESSOR_BDVER1)
1989 #define m_BDVER2 (1<<PROCESSOR_BDVER2)
1990 #define m_BDVER (m_BDVER1 | m_BDVER2)
1991 #define m_BTVER (m_BTVER1 | m_BTVER2)
1992 #define m_BTVER1 (1<<PROCESSOR_BTVER1)
1993 #define m_BTVER2 (1<<PROCESSOR_BTVER2)
1994 #define m_AMD_MULTIPLE (m_ATHLON_K8 | m_AMDFAM10 | m_BDVER | m_BTVER)
1996 #define m_GENERIC32 (1<<PROCESSOR_GENERIC32)
1997 #define m_GENERIC64 (1<<PROCESSOR_GENERIC64)
1999 /* Generic instruction choice should be common subset of supported CPUs
2000 (PPro/PENT4/NOCONA/CORE2/Athlon/K8). */
2001 #define m_GENERIC (m_GENERIC32 | m_GENERIC64)
2003 /* Feature tests against the various tunings. */
2004 unsigned char ix86_tune_features
[X86_TUNE_LAST
];
2006 /* Feature tests against the various tunings used to create ix86_tune_features
2007 based on the processor mask. */
2008 static unsigned int initial_ix86_tune_features
[X86_TUNE_LAST
] = {
2009 /* X86_TUNE_USE_LEAVE: Leave does not affect Nocona SPEC2000 results
2010 negatively, so enabling for Generic64 seems like good code size
2011 tradeoff. We can't enable it for 32bit generic because it does not
2012 work well with PPro base chips. */
2013 m_386
| m_CORE2I7_64
| m_K6_GEODE
| m_AMD_MULTIPLE
| m_GENERIC64
,
2015 /* X86_TUNE_PUSH_MEMORY */
2016 m_386
| m_P4_NOCONA
| m_CORE2I7
| m_K6_GEODE
| m_AMD_MULTIPLE
| m_GENERIC
,
2018 /* X86_TUNE_ZERO_EXTEND_WITH_AND */
2021 /* X86_TUNE_UNROLL_STRLEN */
2022 m_486
| m_PENT
| m_PPRO
| m_ATOM
| m_CORE2I7
| m_K6
| m_AMD_MULTIPLE
| m_GENERIC
,
2024 /* X86_TUNE_BRANCH_PREDICTION_HINTS: Branch hints were put in P4 based
2025 on simulation result. But after P4 was made, no performance benefit
2026 was observed with branch hints. It also increases the code size.
2027 As a result, icc never generates branch hints. */
2030 /* X86_TUNE_DOUBLE_WITH_ADD */
2033 /* X86_TUNE_USE_SAHF */
2034 m_PPRO
| m_P4_NOCONA
| m_CORE2I7
| m_ATOM
| m_K6_GEODE
| m_K8
| m_AMDFAM10
| m_BDVER
| m_BTVER
| m_GENERIC
,
2036 /* X86_TUNE_MOVX: Enable to zero extend integer registers to avoid
2037 partial dependencies. */
2038 m_PPRO
| m_P4_NOCONA
| m_CORE2I7
| m_ATOM
| m_GEODE
| m_AMD_MULTIPLE
| m_GENERIC
,
2040 /* X86_TUNE_PARTIAL_REG_STALL: We probably ought to watch for partial
2041 register stalls on Generic32 compilation setting as well. However
2042 in current implementation the partial register stalls are not eliminated
2043 very well - they can be introduced via subregs synthesized by combine
2044 and can happen in caller/callee saving sequences. Because this option
2045 pays back little on PPro based chips and is in conflict with partial reg
2046 dependencies used by Athlon/P4 based chips, it is better to leave it off
2047 for generic32 for now. */
2050 /* X86_TUNE_PARTIAL_FLAG_REG_STALL */
2051 m_CORE2I7
| m_GENERIC
,
2053 /* X86_TUNE_LCP_STALL: Avoid an expensive length-changing prefix stall
2054 * on 16-bit immediate moves into memory on Core2 and Corei7. */
2055 m_CORE2I7
| m_GENERIC
,
2057 /* X86_TUNE_USE_HIMODE_FIOP */
2058 m_386
| m_486
| m_K6_GEODE
,
2060 /* X86_TUNE_USE_SIMODE_FIOP */
2061 ~(m_PENT
| m_PPRO
| m_CORE2I7
| m_ATOM
| m_AMD_MULTIPLE
| m_GENERIC
),
2063 /* X86_TUNE_USE_MOV0 */
2066 /* X86_TUNE_USE_CLTD */
2067 ~(m_PENT
| m_CORE2I7
| m_ATOM
| m_K6
| m_GENERIC
),
2069 /* X86_TUNE_USE_XCHGB: Use xchgb %rh,%rl instead of rolw/rorw $8,rx. */
2072 /* X86_TUNE_SPLIT_LONG_MOVES */
2075 /* X86_TUNE_READ_MODIFY_WRITE */
2078 /* X86_TUNE_READ_MODIFY */
2081 /* X86_TUNE_PROMOTE_QIMODE */
2082 m_386
| m_486
| m_PENT
| m_CORE2I7
| m_ATOM
| m_K6_GEODE
| m_AMD_MULTIPLE
| m_GENERIC
,
2084 /* X86_TUNE_FAST_PREFIX */
2085 ~(m_386
| m_486
| m_PENT
),
2087 /* X86_TUNE_SINGLE_STRINGOP */
2088 m_386
| m_P4_NOCONA
,
2090 /* X86_TUNE_QIMODE_MATH */
2093 /* X86_TUNE_HIMODE_MATH: On PPro this flag is meant to avoid partial
2094 register stalls. Just like X86_TUNE_PARTIAL_REG_STALL this option
2095 might be considered for Generic32 if our scheme for avoiding partial
2096 stalls was more effective. */
2099 /* X86_TUNE_PROMOTE_QI_REGS */
2102 /* X86_TUNE_PROMOTE_HI_REGS */
2105 /* X86_TUNE_SINGLE_POP: Enable if single pop insn is preferred
2106 over esp addition. */
2107 m_386
| m_486
| m_PENT
| m_PPRO
,
2109 /* X86_TUNE_DOUBLE_POP: Enable if double pop insn is preferred
2110 over esp addition. */
2113 /* X86_TUNE_SINGLE_PUSH: Enable if single push insn is preferred
2114 over esp subtraction. */
2115 m_386
| m_486
| m_PENT
| m_K6_GEODE
,
2117 /* X86_TUNE_DOUBLE_PUSH. Enable if double push insn is preferred
2118 over esp subtraction. */
2119 m_PENT
| m_K6_GEODE
,
2121 /* X86_TUNE_INTEGER_DFMODE_MOVES: Enable if integer moves are preferred
2122 for DFmode copies */
2123 ~(m_PPRO
| m_P4_NOCONA
| m_CORE2I7
| m_ATOM
| m_GEODE
| m_AMD_MULTIPLE
| m_ATOM
| m_GENERIC
),
2125 /* X86_TUNE_PARTIAL_REG_DEPENDENCY */
2126 m_P4_NOCONA
| m_CORE2I7
| m_ATOM
| m_AMD_MULTIPLE
| m_GENERIC
,
2128 /* X86_TUNE_SSE_PARTIAL_REG_DEPENDENCY: In the Generic model we have a
2129 conflict here in between PPro/Pentium4 based chips that thread 128bit
2130 SSE registers as single units versus K8 based chips that divide SSE
2131 registers to two 64bit halves. This knob promotes all store destinations
2132 to be 128bit to allow register renaming on 128bit SSE units, but usually
2133 results in one extra microop on 64bit SSE units. Experimental results
2134 shows that disabling this option on P4 brings over 20% SPECfp regression,
2135 while enabling it on K8 brings roughly 2.4% regression that can be partly
2136 masked by careful scheduling of moves. */
2137 m_PPRO
| m_P4_NOCONA
| m_CORE2I7
| m_ATOM
| m_AMDFAM10
| m_BDVER
| m_GENERIC
,
2139 /* X86_TUNE_SSE_UNALIGNED_LOAD_OPTIMAL */
2140 m_COREI7
| m_AMDFAM10
| m_BDVER
| m_BTVER
,
2142 /* X86_TUNE_SSE_UNALIGNED_STORE_OPTIMAL */
2145 /* X86_TUNE_SSE_PACKED_SINGLE_INSN_OPTIMAL */
2148 /* X86_TUNE_SSE_SPLIT_REGS: Set for machines where the type and dependencies
2149 are resolved on SSE register parts instead of whole registers, so we may
2150 maintain just lower part of scalar values in proper format leaving the
2151 upper part undefined. */
2154 /* X86_TUNE_SSE_TYPELESS_STORES */
2157 /* X86_TUNE_SSE_LOAD0_BY_PXOR */
2158 m_PPRO
| m_P4_NOCONA
,
2160 /* X86_TUNE_MEMORY_MISMATCH_STALL */
2161 m_P4_NOCONA
| m_CORE2I7
| m_ATOM
| m_AMD_MULTIPLE
| m_GENERIC
,
2163 /* X86_TUNE_PROLOGUE_USING_MOVE */
2164 m_PPRO
| m_CORE2I7
| m_ATOM
| m_ATHLON_K8
| m_GENERIC
,
2166 /* X86_TUNE_EPILOGUE_USING_MOVE */
2167 m_PPRO
| m_CORE2I7
| m_ATOM
| m_ATHLON_K8
| m_GENERIC
,
2169 /* X86_TUNE_SHIFT1 */
2172 /* X86_TUNE_USE_FFREEP */
2175 /* X86_TUNE_INTER_UNIT_MOVES */
2176 ~(m_AMD_MULTIPLE
| m_GENERIC
),
2178 /* X86_TUNE_INTER_UNIT_CONVERSIONS */
2179 ~(m_AMDFAM10
| m_BDVER
),
2181 /* X86_TUNE_FOUR_JUMP_LIMIT: Some CPU cores are not able to predict more
2182 than 4 branch instructions in the 16 byte window. */
2183 m_PPRO
| m_P4_NOCONA
| m_CORE2I7
| m_ATOM
| m_AMD_MULTIPLE
| m_GENERIC
,
2185 /* X86_TUNE_SCHEDULE */
2186 m_PENT
| m_PPRO
| m_CORE2I7
| m_ATOM
| m_K6_GEODE
| m_AMD_MULTIPLE
| m_GENERIC
,
2188 /* X86_TUNE_USE_BT */
2189 m_CORE2I7
| m_ATOM
| m_AMD_MULTIPLE
| m_GENERIC
,
2191 /* X86_TUNE_USE_INCDEC */
2192 ~(m_P4_NOCONA
| m_CORE2I7
| m_ATOM
| m_GENERIC
),
2194 /* X86_TUNE_PAD_RETURNS */
2195 m_CORE2I7
| m_AMD_MULTIPLE
| m_GENERIC
,
2197 /* X86_TUNE_PAD_SHORT_FUNCTION: Pad short funtion. */
2200 /* X86_TUNE_EXT_80387_CONSTANTS */
2201 m_PPRO
| m_P4_NOCONA
| m_CORE2I7
| m_ATOM
| m_K6_GEODE
| m_ATHLON_K8
| m_GENERIC
,
2203 /* X86_TUNE_SHORTEN_X87_SSE */
2206 /* X86_TUNE_AVOID_VECTOR_DECODE */
2207 m_CORE2I7_64
| m_K8
| m_GENERIC64
,
2209 /* X86_TUNE_PROMOTE_HIMODE_IMUL: Modern CPUs have same latency for HImode
2210 and SImode multiply, but 386 and 486 do HImode multiply faster. */
2213 /* X86_TUNE_SLOW_IMUL_IMM32_MEM: Imul of 32-bit constant and memory is
2214 vector path on AMD machines. */
2215 m_CORE2I7_64
| m_K8
| m_AMDFAM10
| m_BDVER
| m_BTVER
| m_GENERIC64
,
2217 /* X86_TUNE_SLOW_IMUL_IMM8: Imul of 8-bit constant is vector path on AMD
2219 m_CORE2I7_64
| m_K8
| m_AMDFAM10
| m_BDVER
| m_BTVER
| m_GENERIC64
,
2221 /* X86_TUNE_MOVE_M1_VIA_OR: On pentiums, it is faster to load -1 via OR
2225 /* X86_TUNE_NOT_UNPAIRABLE: NOT is not pairable on Pentium, while XOR is,
2226 but one byte longer. */
2229 /* X86_TUNE_NOT_VECTORMODE: On AMD K6, NOT is vector decoded with memory
2230 operand that cannot be represented using a modRM byte. The XOR
2231 replacement is long decoded, so this split helps here as well. */
2234 /* X86_TUNE_USE_VECTOR_FP_CONVERTS: Prefer vector packed SSE conversion
2236 m_CORE2I7
| m_AMDFAM10
| m_GENERIC
,
2238 /* X86_TUNE_USE_VECTOR_CONVERTS: Prefer vector packed SSE conversion
2239 from integer to FP. */
2242 /* X86_TUNE_FUSE_CMP_AND_BRANCH: Fuse a compare or test instruction
2243 with a subsequent conditional jump instruction into a single
2244 compare-and-branch uop. */
2247 /* X86_TUNE_OPT_AGU: Optimize for Address Generation Unit. This flag
2248 will impact LEA instruction selection. */
2251 /* X86_TUNE_VECTORIZE_DOUBLE: Enable double precision vector
2255 /* X86_SOFTARE_PREFETCHING_BENEFICIAL: Enable software prefetching
2256 at -O3. For the moment, the prefetching seems badly tuned for Intel
2258 m_K6_GEODE
| m_AMD_MULTIPLE
,
2260 /* X86_TUNE_AVX128_OPTIMAL: Enable 128-bit AVX instruction generation for
2261 the auto-vectorizer. */
2264 /* X86_TUNE_REASSOC_INT_TO_PARALLEL: Try to produce parallel computations
2265 during reassociation of integer computation. */
2268 /* X86_TUNE_REASSOC_FP_TO_PARALLEL: Try to produce parallel computations
2269 during reassociation of fp computation. */
2273 /* Feature tests against the various architecture variations. */
2274 unsigned char ix86_arch_features
[X86_ARCH_LAST
];
2276 /* Feature tests against the various architecture variations, used to create
2277 ix86_arch_features based on the processor mask. */
2278 static unsigned int initial_ix86_arch_features
[X86_ARCH_LAST
] = {
2279 /* X86_ARCH_CMOV: Conditional move was added for pentiumpro. */
2280 ~(m_386
| m_486
| m_PENT
| m_K6
),
2282 /* X86_ARCH_CMPXCHG: Compare and exchange was added for 80486. */
2285 /* X86_ARCH_CMPXCHG8B: Compare and exchange 8 bytes was added for pentium. */
2288 /* X86_ARCH_XADD: Exchange and add was added for 80486. */
2291 /* X86_ARCH_BSWAP: Byteswap was added for 80486. */
2295 static const unsigned int x86_accumulate_outgoing_args
2296 = m_PPRO
| m_P4_NOCONA
| m_ATOM
| m_CORE2I7
| m_AMD_MULTIPLE
| m_GENERIC
;
2298 static const unsigned int x86_arch_always_fancy_math_387
2299 = m_PENT
| m_PPRO
| m_P4_NOCONA
| m_CORE2I7
| m_ATOM
| m_AMD_MULTIPLE
| m_GENERIC
;
2301 static const unsigned int x86_avx256_split_unaligned_load
2302 = m_COREI7
| m_GENERIC
;
2304 static const unsigned int x86_avx256_split_unaligned_store
2305 = m_COREI7
| m_BDVER
| m_GENERIC
;
2307 /* In case the average insn count for single function invocation is
2308 lower than this constant, emit fast (but longer) prologue and
2310 #define FAST_PROLOGUE_INSN_COUNT 20
2312 /* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
2313 static const char *const qi_reg_name
[] = QI_REGISTER_NAMES
;
2314 static const char *const qi_high_reg_name
[] = QI_HIGH_REGISTER_NAMES
;
2315 static const char *const hi_reg_name
[] = HI_REGISTER_NAMES
;
2317 /* Array of the smallest class containing reg number REGNO, indexed by
2318 REGNO. Used by REGNO_REG_CLASS in i386.h. */
2320 enum reg_class
const regclass_map
[FIRST_PSEUDO_REGISTER
] =
2322 /* ax, dx, cx, bx */
2323 AREG
, DREG
, CREG
, BREG
,
2324 /* si, di, bp, sp */
2325 SIREG
, DIREG
, NON_Q_REGS
, NON_Q_REGS
,
2327 FP_TOP_REG
, FP_SECOND_REG
, FLOAT_REGS
, FLOAT_REGS
,
2328 FLOAT_REGS
, FLOAT_REGS
, FLOAT_REGS
, FLOAT_REGS
,
2331 /* flags, fpsr, fpcr, frame */
2332 NO_REGS
, NO_REGS
, NO_REGS
, NON_Q_REGS
,
2334 SSE_FIRST_REG
, SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
,
2337 MMX_REGS
, MMX_REGS
, MMX_REGS
, MMX_REGS
, MMX_REGS
, MMX_REGS
,
2340 NON_Q_REGS
, NON_Q_REGS
, NON_Q_REGS
, NON_Q_REGS
,
2341 NON_Q_REGS
, NON_Q_REGS
, NON_Q_REGS
, NON_Q_REGS
,
2342 /* SSE REX registers */
2343 SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
,
2347 /* The "default" register map used in 32bit mode. */
2349 int const dbx_register_map
[FIRST_PSEUDO_REGISTER
] =
2351 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
2352 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
2353 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
2354 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
2355 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
2356 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
2357 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
2360 /* The "default" register map used in 64bit mode. */
2362 int const dbx64_register_map
[FIRST_PSEUDO_REGISTER
] =
2364 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
2365 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
2366 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
2367 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
2368 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
2369 8,9,10,11,12,13,14,15, /* extended integer registers */
2370 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
2373 /* Define the register numbers to be used in Dwarf debugging information.
2374 The SVR4 reference port C compiler uses the following register numbers
2375 in its Dwarf output code:
2376 0 for %eax (gcc regno = 0)
2377 1 for %ecx (gcc regno = 2)
2378 2 for %edx (gcc regno = 1)
2379 3 for %ebx (gcc regno = 3)
2380 4 for %esp (gcc regno = 7)
2381 5 for %ebp (gcc regno = 6)
2382 6 for %esi (gcc regno = 4)
2383 7 for %edi (gcc regno = 5)
2384 The following three DWARF register numbers are never generated by
2385 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
2386 believes these numbers have these meanings.
2387 8 for %eip (no gcc equivalent)
2388 9 for %eflags (gcc regno = 17)
2389 10 for %trapno (no gcc equivalent)
2390 It is not at all clear how we should number the FP stack registers
2391 for the x86 architecture. If the version of SDB on x86/svr4 were
2392 a bit less brain dead with respect to floating-point then we would
2393 have a precedent to follow with respect to DWARF register numbers
2394 for x86 FP registers, but the SDB on x86/svr4 is so completely
2395 broken with respect to FP registers that it is hardly worth thinking
2396 of it as something to strive for compatibility with.
2397 The version of x86/svr4 SDB I have at the moment does (partially)
2398 seem to believe that DWARF register number 11 is associated with
2399 the x86 register %st(0), but that's about all. Higher DWARF
2400 register numbers don't seem to be associated with anything in
2401 particular, and even for DWARF regno 11, SDB only seems to under-
2402 stand that it should say that a variable lives in %st(0) (when
2403 asked via an `=' command) if we said it was in DWARF regno 11,
2404 but SDB still prints garbage when asked for the value of the
2405 variable in question (via a `/' command).
2406 (Also note that the labels SDB prints for various FP stack regs
2407 when doing an `x' command are all wrong.)
2408 Note that these problems generally don't affect the native SVR4
2409 C compiler because it doesn't allow the use of -O with -g and
2410 because when it is *not* optimizing, it allocates a memory
2411 location for each floating-point variable, and the memory
2412 location is what gets described in the DWARF AT_location
2413 attribute for the variable in question.
2414 Regardless of the severe mental illness of the x86/svr4 SDB, we
2415 do something sensible here and we use the following DWARF
2416 register numbers. Note that these are all stack-top-relative
2418 11 for %st(0) (gcc regno = 8)
2419 12 for %st(1) (gcc regno = 9)
2420 13 for %st(2) (gcc regno = 10)
2421 14 for %st(3) (gcc regno = 11)
2422 15 for %st(4) (gcc regno = 12)
2423 16 for %st(5) (gcc regno = 13)
2424 17 for %st(6) (gcc regno = 14)
2425 18 for %st(7) (gcc regno = 15)
2427 int const svr4_dbx_register_map
[FIRST_PSEUDO_REGISTER
] =
2429 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
2430 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
2431 -1, 9, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
2432 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
2433 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
2434 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
2435 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
2438 /* Define parameter passing and return registers. */
2440 static int const x86_64_int_parameter_registers
[6] =
2442 DI_REG
, SI_REG
, DX_REG
, CX_REG
, R8_REG
, R9_REG
2445 static int const x86_64_ms_abi_int_parameter_registers
[4] =
2447 CX_REG
, DX_REG
, R8_REG
, R9_REG
2450 static int const x86_64_int_return_registers
[4] =
2452 AX_REG
, DX_REG
, DI_REG
, SI_REG
2455 /* Define the structure for the machine field in struct function. */
2457 struct GTY(()) stack_local_entry
{
2458 unsigned short mode
;
2461 struct stack_local_entry
*next
;
2464 /* Structure describing stack frame layout.
2465 Stack grows downward:
2471 saved static chain if ix86_static_chain_on_stack
2473 saved frame pointer if frame_pointer_needed
2474 <- HARD_FRAME_POINTER
2480 <- sse_regs_save_offset
2483 [va_arg registers] |
2487 [padding2] | = to_allocate
2496 int outgoing_arguments_size
;
2498 /* The offsets relative to ARG_POINTER. */
2499 HOST_WIDE_INT frame_pointer_offset
;
2500 HOST_WIDE_INT hard_frame_pointer_offset
;
2501 HOST_WIDE_INT stack_pointer_offset
;
2502 HOST_WIDE_INT hfp_save_offset
;
2503 HOST_WIDE_INT reg_save_offset
;
2504 HOST_WIDE_INT sse_reg_save_offset
;
2506 /* When save_regs_using_mov is set, emit prologue using
2507 move instead of push instructions. */
2508 bool save_regs_using_mov
;
2511 /* Which cpu are we scheduling for. */
2512 enum attr_cpu ix86_schedule
;
2514 /* Which cpu are we optimizing for. */
2515 enum processor_type ix86_tune
;
2517 /* Which instruction set architecture to use. */
2518 enum processor_type ix86_arch
;
2520 /* true if sse prefetch instruction is not NOOP. */
2521 int x86_prefetch_sse
;
2523 /* -mstackrealign option */
2524 static const char ix86_force_align_arg_pointer_string
[]
2525 = "force_align_arg_pointer";
2527 static rtx (*ix86_gen_leave
) (void);
2528 static rtx (*ix86_gen_add3
) (rtx
, rtx
, rtx
);
2529 static rtx (*ix86_gen_sub3
) (rtx
, rtx
, rtx
);
2530 static rtx (*ix86_gen_sub3_carry
) (rtx
, rtx
, rtx
, rtx
, rtx
);
2531 static rtx (*ix86_gen_one_cmpl2
) (rtx
, rtx
);
2532 static rtx (*ix86_gen_monitor
) (rtx
, rtx
, rtx
);
2533 static rtx (*ix86_gen_andsp
) (rtx
, rtx
, rtx
);
2534 static rtx (*ix86_gen_allocate_stack_worker
) (rtx
, rtx
);
2535 static rtx (*ix86_gen_adjust_stack_and_probe
) (rtx
, rtx
, rtx
);
2536 static rtx (*ix86_gen_probe_stack_range
) (rtx
, rtx
, rtx
);
2537 static rtx (*ix86_gen_tls_global_dynamic_64
) (rtx
, rtx
, rtx
);
2538 static rtx (*ix86_gen_tls_local_dynamic_base_64
) (rtx
, rtx
);
2540 /* Preferred alignment for stack boundary in bits. */
2541 unsigned int ix86_preferred_stack_boundary
;
2543 /* Alignment for incoming stack boundary in bits specified at
2545 static unsigned int ix86_user_incoming_stack_boundary
;
2547 /* Default alignment for incoming stack boundary in bits. */
2548 static unsigned int ix86_default_incoming_stack_boundary
;
2550 /* Alignment for incoming stack boundary in bits. */
2551 unsigned int ix86_incoming_stack_boundary
;
2553 /* Calling abi specific va_list type nodes. */
2554 static GTY(()) tree sysv_va_list_type_node
;
2555 static GTY(()) tree ms_va_list_type_node
;
2557 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
2558 char internal_label_prefix
[16];
2559 int internal_label_prefix_len
;
2561 /* Fence to use after loop using movnt. */
2564 /* Register class used for passing given 64bit part of the argument.
2565 These represent classes as documented by the PS ABI, with the exception
2566 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
2567 use SF or DFmode move instead of DImode to avoid reformatting penalties.
2569 Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
2570 whenever possible (upper half does contain padding). */
2571 enum x86_64_reg_class
2574 X86_64_INTEGER_CLASS
,
2575 X86_64_INTEGERSI_CLASS
,
2582 X86_64_COMPLEX_X87_CLASS
,
2586 #define MAX_CLASSES 4
2588 /* Table of constants used by fldpi, fldln2, etc.... */
2589 static REAL_VALUE_TYPE ext_80387_constants_table
[5];
2590 static bool ext_80387_constants_init
= 0;
2593 static struct machine_function
* ix86_init_machine_status (void);
2594 static rtx
ix86_function_value (const_tree
, const_tree
, bool);
2595 static bool ix86_function_value_regno_p (const unsigned int);
2596 static unsigned int ix86_function_arg_boundary (enum machine_mode
,
2598 static rtx
ix86_static_chain (const_tree
, bool);
2599 static int ix86_function_regparm (const_tree
, const_tree
);
2600 static void ix86_compute_frame_layout (struct ix86_frame
*);
2601 static bool ix86_expand_vector_init_one_nonzero (bool, enum machine_mode
,
2603 static void ix86_add_new_builtins (HOST_WIDE_INT
);
2604 static tree
ix86_canonical_va_list_type (tree
);
2605 static void predict_jump (int);
2606 static unsigned int split_stack_prologue_scratch_regno (void);
2607 static bool i386_asm_output_addr_const_extra (FILE *, rtx
);
2609 enum ix86_function_specific_strings
2611 IX86_FUNCTION_SPECIFIC_ARCH
,
2612 IX86_FUNCTION_SPECIFIC_TUNE
,
2613 IX86_FUNCTION_SPECIFIC_MAX
2616 static char *ix86_target_string (HOST_WIDE_INT
, int, const char *,
2617 const char *, enum fpmath_unit
, bool);
2618 static void ix86_debug_options (void) ATTRIBUTE_UNUSED
;
2619 static void ix86_function_specific_save (struct cl_target_option
*);
2620 static void ix86_function_specific_restore (struct cl_target_option
*);
2621 static void ix86_function_specific_print (FILE *, int,
2622 struct cl_target_option
*);
2623 static bool ix86_valid_target_attribute_p (tree
, tree
, tree
, int);
2624 static bool ix86_valid_target_attribute_inner_p (tree
, char *[],
2625 struct gcc_options
*);
2626 static bool ix86_can_inline_p (tree
, tree
);
2627 static void ix86_set_current_function (tree
);
2628 static unsigned int ix86_minimum_incoming_stack_boundary (bool);
2630 static enum calling_abi
ix86_function_abi (const_tree
);
2633 #ifndef SUBTARGET32_DEFAULT_CPU
2634 #define SUBTARGET32_DEFAULT_CPU "i386"
2637 /* The svr4 ABI for the i386 says that records and unions are returned
2639 #ifndef DEFAULT_PCC_STRUCT_RETURN
2640 #define DEFAULT_PCC_STRUCT_RETURN 1
2643 /* Whether -mtune= or -march= were specified */
2644 static int ix86_tune_defaulted
;
2645 static int ix86_arch_specified
;
2647 /* Vectorization library interface and handlers. */
2648 static tree (*ix86_veclib_handler
) (enum built_in_function
, tree
, tree
);
2650 static tree
ix86_veclibabi_svml (enum built_in_function
, tree
, tree
);
2651 static tree
ix86_veclibabi_acml (enum built_in_function
, tree
, tree
);
2653 /* Processor target table, indexed by processor number */
2656 const struct processor_costs
*cost
; /* Processor costs */
2657 const int align_loop
; /* Default alignments. */
2658 const int align_loop_max_skip
;
2659 const int align_jump
;
2660 const int align_jump_max_skip
;
2661 const int align_func
;
2664 static const struct ptt processor_target_table
[PROCESSOR_max
] =
2666 {&i386_cost
, 4, 3, 4, 3, 4},
2667 {&i486_cost
, 16, 15, 16, 15, 16},
2668 {&pentium_cost
, 16, 7, 16, 7, 16},
2669 {&pentiumpro_cost
, 16, 15, 16, 10, 16},
2670 {&geode_cost
, 0, 0, 0, 0, 0},
2671 {&k6_cost
, 32, 7, 32, 7, 32},
2672 {&athlon_cost
, 16, 7, 16, 7, 16},
2673 {&pentium4_cost
, 0, 0, 0, 0, 0},
2674 {&k8_cost
, 16, 7, 16, 7, 16},
2675 {&nocona_cost
, 0, 0, 0, 0, 0},
2676 /* Core 2 32-bit. */
2677 {&generic32_cost
, 16, 10, 16, 10, 16},
2678 /* Core 2 64-bit. */
2679 {&generic64_cost
, 16, 10, 16, 10, 16},
2680 /* Core i7 32-bit. */
2681 {&generic32_cost
, 16, 10, 16, 10, 16},
2682 /* Core i7 64-bit. */
2683 {&generic64_cost
, 16, 10, 16, 10, 16},
2684 {&generic32_cost
, 16, 7, 16, 7, 16},
2685 {&generic64_cost
, 16, 10, 16, 10, 16},
2686 {&amdfam10_cost
, 32, 24, 32, 7, 32},
2687 {&bdver1_cost
, 32, 24, 32, 7, 32},
2688 {&bdver2_cost
, 32, 24, 32, 7, 32},
2689 {&btver1_cost
, 32, 24, 32, 7, 32},
2690 {&btver2_cost
, 32, 24, 32, 7, 32},
2691 {&atom_cost
, 16, 15, 16, 7, 16}
2694 static const char *const cpu_names
[TARGET_CPU_DEFAULT_max
] =
2725 /* Return true if a red-zone is in use. */
2728 ix86_using_red_zone (void)
2730 return TARGET_RED_ZONE
&& !TARGET_64BIT_MS_ABI
;
2733 /* Return a string that documents the current -m options. The caller is
2734 responsible for freeing the string. */
2737 ix86_target_string (HOST_WIDE_INT isa
, int flags
, const char *arch
,
2738 const char *tune
, enum fpmath_unit fpmath
,
2741 struct ix86_target_opts
2743 const char *option
; /* option string */
2744 HOST_WIDE_INT mask
; /* isa mask options */
2747 /* This table is ordered so that options like -msse4.2 that imply
2748 preceding options while match those first. */
2749 static struct ix86_target_opts isa_opts
[] =
2751 { "-mfma4", OPTION_MASK_ISA_FMA4
},
2752 { "-mfma", OPTION_MASK_ISA_FMA
},
2753 { "-mxop", OPTION_MASK_ISA_XOP
},
2754 { "-mlwp", OPTION_MASK_ISA_LWP
},
2755 { "-msse4a", OPTION_MASK_ISA_SSE4A
},
2756 { "-msse4.2", OPTION_MASK_ISA_SSE4_2
},
2757 { "-msse4.1", OPTION_MASK_ISA_SSE4_1
},
2758 { "-mssse3", OPTION_MASK_ISA_SSSE3
},
2759 { "-msse3", OPTION_MASK_ISA_SSE3
},
2760 { "-msse2", OPTION_MASK_ISA_SSE2
},
2761 { "-msse", OPTION_MASK_ISA_SSE
},
2762 { "-m3dnow", OPTION_MASK_ISA_3DNOW
},
2763 { "-m3dnowa", OPTION_MASK_ISA_3DNOW_A
},
2764 { "-mmmx", OPTION_MASK_ISA_MMX
},
2765 { "-mabm", OPTION_MASK_ISA_ABM
},
2766 { "-mbmi", OPTION_MASK_ISA_BMI
},
2767 { "-mbmi2", OPTION_MASK_ISA_BMI2
},
2768 { "-mlzcnt", OPTION_MASK_ISA_LZCNT
},
2769 { "-mhle", OPTION_MASK_ISA_HLE
},
2770 { "-mprfchw", OPTION_MASK_ISA_PRFCHW
},
2771 { "-mtbm", OPTION_MASK_ISA_TBM
},
2772 { "-mpopcnt", OPTION_MASK_ISA_POPCNT
},
2773 { "-mmovbe", OPTION_MASK_ISA_MOVBE
},
2774 { "-mcrc32", OPTION_MASK_ISA_CRC32
},
2775 { "-maes", OPTION_MASK_ISA_AES
},
2776 { "-mpclmul", OPTION_MASK_ISA_PCLMUL
},
2777 { "-mfsgsbase", OPTION_MASK_ISA_FSGSBASE
},
2778 { "-mrdrnd", OPTION_MASK_ISA_RDRND
},
2779 { "-mf16c", OPTION_MASK_ISA_F16C
},
2780 { "-mrtm", OPTION_MASK_ISA_RTM
},
2784 static struct ix86_target_opts flag_opts
[] =
2786 { "-m128bit-long-double", MASK_128BIT_LONG_DOUBLE
},
2787 { "-m80387", MASK_80387
},
2788 { "-maccumulate-outgoing-args", MASK_ACCUMULATE_OUTGOING_ARGS
},
2789 { "-malign-double", MASK_ALIGN_DOUBLE
},
2790 { "-mcld", MASK_CLD
},
2791 { "-mfp-ret-in-387", MASK_FLOAT_RETURNS
},
2792 { "-mieee-fp", MASK_IEEE_FP
},
2793 { "-minline-all-stringops", MASK_INLINE_ALL_STRINGOPS
},
2794 { "-minline-stringops-dynamically", MASK_INLINE_STRINGOPS_DYNAMICALLY
},
2795 { "-mms-bitfields", MASK_MS_BITFIELD_LAYOUT
},
2796 { "-mno-align-stringops", MASK_NO_ALIGN_STRINGOPS
},
2797 { "-mno-fancy-math-387", MASK_NO_FANCY_MATH_387
},
2798 { "-mno-push-args", MASK_NO_PUSH_ARGS
},
2799 { "-mno-red-zone", MASK_NO_RED_ZONE
},
2800 { "-momit-leaf-frame-pointer", MASK_OMIT_LEAF_FRAME_POINTER
},
2801 { "-mrecip", MASK_RECIP
},
2802 { "-mrtd", MASK_RTD
},
2803 { "-msseregparm", MASK_SSEREGPARM
},
2804 { "-mstack-arg-probe", MASK_STACK_PROBE
},
2805 { "-mtls-direct-seg-refs", MASK_TLS_DIRECT_SEG_REFS
},
2806 { "-mvect8-ret-in-mem", MASK_VECT8_RETURNS
},
2807 { "-m8bit-idiv", MASK_USE_8BIT_IDIV
},
2808 { "-mvzeroupper", MASK_VZEROUPPER
},
2809 { "-mavx256-split-unaligned-load", MASK_AVX256_SPLIT_UNALIGNED_LOAD
},
2810 { "-mavx256-split-unaligned-store", MASK_AVX256_SPLIT_UNALIGNED_STORE
},
2811 { "-mprefer-avx128", MASK_PREFER_AVX128
},
2814 const char *opts
[ARRAY_SIZE (isa_opts
) + ARRAY_SIZE (flag_opts
) + 6][2];
2817 char target_other
[40];
2827 memset (opts
, '\0', sizeof (opts
));
2829 /* Add -march= option. */
2832 opts
[num
][0] = "-march=";
2833 opts
[num
++][1] = arch
;
2836 /* Add -mtune= option. */
2839 opts
[num
][0] = "-mtune=";
2840 opts
[num
++][1] = tune
;
2843 /* Add -m32/-m64/-mx32. */
2844 if ((isa
& OPTION_MASK_ISA_64BIT
) != 0)
2846 if ((isa
& OPTION_MASK_ABI_64
) != 0)
2850 isa
&= ~ (OPTION_MASK_ISA_64BIT
2851 | OPTION_MASK_ABI_64
2852 | OPTION_MASK_ABI_X32
);
2856 opts
[num
++][0] = abi
;
2858 /* Pick out the options in isa options. */
2859 for (i
= 0; i
< ARRAY_SIZE (isa_opts
); i
++)
2861 if ((isa
& isa_opts
[i
].mask
) != 0)
2863 opts
[num
++][0] = isa_opts
[i
].option
;
2864 isa
&= ~ isa_opts
[i
].mask
;
2868 if (isa
&& add_nl_p
)
2870 opts
[num
++][0] = isa_other
;
2871 sprintf (isa_other
, "(other isa: %#" HOST_WIDE_INT_PRINT
"x)",
2875 /* Add flag options. */
2876 for (i
= 0; i
< ARRAY_SIZE (flag_opts
); i
++)
2878 if ((flags
& flag_opts
[i
].mask
) != 0)
2880 opts
[num
++][0] = flag_opts
[i
].option
;
2881 flags
&= ~ flag_opts
[i
].mask
;
2885 if (flags
&& add_nl_p
)
2887 opts
[num
++][0] = target_other
;
2888 sprintf (target_other
, "(other flags: %#x)", flags
);
2891 /* Add -fpmath= option. */
2894 opts
[num
][0] = "-mfpmath=";
2895 switch ((int) fpmath
)
2898 opts
[num
++][1] = "387";
2902 opts
[num
++][1] = "sse";
2905 case FPMATH_387
| FPMATH_SSE
:
2906 opts
[num
++][1] = "sse+387";
2918 gcc_assert (num
< ARRAY_SIZE (opts
));
2920 /* Size the string. */
2922 sep_len
= (add_nl_p
) ? 3 : 1;
2923 for (i
= 0; i
< num
; i
++)
2926 for (j
= 0; j
< 2; j
++)
2928 len
+= strlen (opts
[i
][j
]);
2931 /* Build the string. */
2932 ret
= ptr
= (char *) xmalloc (len
);
2935 for (i
= 0; i
< num
; i
++)
2939 for (j
= 0; j
< 2; j
++)
2940 len2
[j
] = (opts
[i
][j
]) ? strlen (opts
[i
][j
]) : 0;
2947 if (add_nl_p
&& line_len
+ len2
[0] + len2
[1] > 70)
2955 for (j
= 0; j
< 2; j
++)
2958 memcpy (ptr
, opts
[i
][j
], len2
[j
]);
2960 line_len
+= len2
[j
];
2965 gcc_assert (ret
+ len
>= ptr
);
2970 /* Return true, if profiling code should be emitted before
2971 prologue. Otherwise it returns false.
2972 Note: For x86 with "hotfix" it is sorried. */
2974 ix86_profile_before_prologue (void)
2976 return flag_fentry
!= 0;
2979 /* Function that is callable from the debugger to print the current
2982 ix86_debug_options (void)
2984 char *opts
= ix86_target_string (ix86_isa_flags
, target_flags
,
2985 ix86_arch_string
, ix86_tune_string
,
2990 fprintf (stderr
, "%s\n\n", opts
);
2994 fputs ("<no options>\n\n", stderr
);
2999 /* Override various settings based on options. If MAIN_ARGS_P, the
3000 options are from the command line, otherwise they are from
3004 ix86_option_override_internal (bool main_args_p
)
3007 unsigned int ix86_arch_mask
, ix86_tune_mask
;
3008 const bool ix86_tune_specified
= (ix86_tune_string
!= NULL
);
3013 #define PTA_3DNOW (HOST_WIDE_INT_1 << 0)
3014 #define PTA_3DNOW_A (HOST_WIDE_INT_1 << 1)
3015 #define PTA_64BIT (HOST_WIDE_INT_1 << 2)
3016 #define PTA_ABM (HOST_WIDE_INT_1 << 3)
3017 #define PTA_AES (HOST_WIDE_INT_1 << 4)
3018 #define PTA_AVX (HOST_WIDE_INT_1 << 5)
3019 #define PTA_BMI (HOST_WIDE_INT_1 << 6)
3020 #define PTA_CX16 (HOST_WIDE_INT_1 << 7)
3021 #define PTA_F16C (HOST_WIDE_INT_1 << 8)
3022 #define PTA_FMA (HOST_WIDE_INT_1 << 9)
3023 #define PTA_FMA4 (HOST_WIDE_INT_1 << 10)
3024 #define PTA_FSGSBASE (HOST_WIDE_INT_1 << 11)
3025 #define PTA_LWP (HOST_WIDE_INT_1 << 12)
3026 #define PTA_LZCNT (HOST_WIDE_INT_1 << 13)
3027 #define PTA_MMX (HOST_WIDE_INT_1 << 14)
3028 #define PTA_MOVBE (HOST_WIDE_INT_1 << 15)
3029 #define PTA_NO_SAHF (HOST_WIDE_INT_1 << 16)
3030 #define PTA_PCLMUL (HOST_WIDE_INT_1 << 17)
3031 #define PTA_POPCNT (HOST_WIDE_INT_1 << 18)
3032 #define PTA_PREFETCH_SSE (HOST_WIDE_INT_1 << 19)
3033 #define PTA_RDRND (HOST_WIDE_INT_1 << 20)
3034 #define PTA_SSE (HOST_WIDE_INT_1 << 21)
3035 #define PTA_SSE2 (HOST_WIDE_INT_1 << 22)
3036 #define PTA_SSE3 (HOST_WIDE_INT_1 << 23)
3037 #define PTA_SSE4_1 (HOST_WIDE_INT_1 << 24)
3038 #define PTA_SSE4_2 (HOST_WIDE_INT_1 << 25)
3039 #define PTA_SSE4A (HOST_WIDE_INT_1 << 26)
3040 #define PTA_SSSE3 (HOST_WIDE_INT_1 << 27)
3041 #define PTA_TBM (HOST_WIDE_INT_1 << 28)
3042 #define PTA_XOP (HOST_WIDE_INT_1 << 29)
3043 #define PTA_AVX2 (HOST_WIDE_INT_1 << 30)
3044 #define PTA_BMI2 (HOST_WIDE_INT_1 << 31)
3045 #define PTA_RTM (HOST_WIDE_INT_1 << 32)
3046 #define PTA_HLE (HOST_WIDE_INT_1 << 33)
3047 #define PTA_PRFCHW (HOST_WIDE_INT_1 << 34)
3048 /* if this reaches 64, need to widen struct pta flags below */
3052 const char *const name
; /* processor name or nickname. */
3053 const enum processor_type processor
;
3054 const enum attr_cpu schedule
;
3055 const unsigned HOST_WIDE_INT flags
;
3057 const processor_alias_table
[] =
3059 {"i386", PROCESSOR_I386
, CPU_NONE
, 0},
3060 {"i486", PROCESSOR_I486
, CPU_NONE
, 0},
3061 {"i586", PROCESSOR_PENTIUM
, CPU_PENTIUM
, 0},
3062 {"pentium", PROCESSOR_PENTIUM
, CPU_PENTIUM
, 0},
3063 {"pentium-mmx", PROCESSOR_PENTIUM
, CPU_PENTIUM
, PTA_MMX
},
3064 {"winchip-c6", PROCESSOR_I486
, CPU_NONE
, PTA_MMX
},
3065 {"winchip2", PROCESSOR_I486
, CPU_NONE
, PTA_MMX
| PTA_3DNOW
},
3066 {"c3", PROCESSOR_I486
, CPU_NONE
, PTA_MMX
| PTA_3DNOW
},
3067 {"c3-2", PROCESSOR_PENTIUMPRO
, CPU_PENTIUMPRO
, PTA_MMX
| PTA_SSE
},
3068 {"i686", PROCESSOR_PENTIUMPRO
, CPU_PENTIUMPRO
, 0},
3069 {"pentiumpro", PROCESSOR_PENTIUMPRO
, CPU_PENTIUMPRO
, 0},
3070 {"pentium2", PROCESSOR_PENTIUMPRO
, CPU_PENTIUMPRO
, PTA_MMX
},
3071 {"pentium3", PROCESSOR_PENTIUMPRO
, CPU_PENTIUMPRO
,
3073 {"pentium3m", PROCESSOR_PENTIUMPRO
, CPU_PENTIUMPRO
,
3075 {"pentium-m", PROCESSOR_PENTIUMPRO
, CPU_PENTIUMPRO
,
3076 PTA_MMX
| PTA_SSE
| PTA_SSE2
},
3077 {"pentium4", PROCESSOR_PENTIUM4
, CPU_NONE
,
3078 PTA_MMX
|PTA_SSE
| PTA_SSE2
},
3079 {"pentium4m", PROCESSOR_PENTIUM4
, CPU_NONE
,
3080 PTA_MMX
| PTA_SSE
| PTA_SSE2
},
3081 {"prescott", PROCESSOR_NOCONA
, CPU_NONE
,
3082 PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_SSE3
},
3083 {"nocona", PROCESSOR_NOCONA
, CPU_NONE
,
3084 PTA_64BIT
| PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_SSE3
3085 | PTA_CX16
| PTA_NO_SAHF
},
3086 {"core2", PROCESSOR_CORE2_64
, CPU_CORE2
,
3087 PTA_64BIT
| PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_SSE3
3088 | PTA_SSSE3
| PTA_CX16
},
3089 {"corei7", PROCESSOR_COREI7_64
, CPU_COREI7
,
3090 PTA_64BIT
| PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_SSE3
3091 | PTA_SSSE3
| PTA_SSE4_1
| PTA_SSE4_2
| PTA_CX16
},
3092 {"corei7-avx", PROCESSOR_COREI7_64
, CPU_COREI7
,
3093 PTA_64BIT
| PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_SSE3
3094 | PTA_SSSE3
| PTA_SSE4_1
| PTA_SSE4_2
| PTA_AVX
3095 | PTA_CX16
| PTA_POPCNT
| PTA_AES
| PTA_PCLMUL
},
3096 {"core-avx-i", PROCESSOR_COREI7_64
, CPU_COREI7
,
3097 PTA_64BIT
| PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_SSE3
3098 | PTA_SSSE3
| PTA_SSE4_1
| PTA_SSE4_2
| PTA_AVX
3099 | PTA_CX16
| PTA_POPCNT
| PTA_AES
| PTA_PCLMUL
| PTA_FSGSBASE
3100 | PTA_RDRND
| PTA_F16C
},
3101 {"core-avx2", PROCESSOR_COREI7_64
, CPU_COREI7
,
3102 PTA_64BIT
| PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_SSE3
3103 | PTA_SSSE3
| PTA_SSE4_1
| PTA_SSE4_2
| PTA_AVX
| PTA_AVX2
3104 | PTA_CX16
| PTA_POPCNT
| PTA_AES
| PTA_PCLMUL
| PTA_FSGSBASE
3105 | PTA_RDRND
| PTA_F16C
| PTA_BMI
| PTA_BMI2
| PTA_LZCNT
3106 | PTA_FMA
| PTA_MOVBE
| PTA_RTM
| PTA_HLE
},
3107 {"atom", PROCESSOR_ATOM
, CPU_ATOM
,
3108 PTA_64BIT
| PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_SSE3
3109 | PTA_SSSE3
| PTA_CX16
| PTA_MOVBE
},
3110 {"geode", PROCESSOR_GEODE
, CPU_GEODE
,
3111 PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
|PTA_PREFETCH_SSE
},
3112 {"k6", PROCESSOR_K6
, CPU_K6
, PTA_MMX
},
3113 {"k6-2", PROCESSOR_K6
, CPU_K6
, PTA_MMX
| PTA_3DNOW
},
3114 {"k6-3", PROCESSOR_K6
, CPU_K6
, PTA_MMX
| PTA_3DNOW
},
3115 {"athlon", PROCESSOR_ATHLON
, CPU_ATHLON
,
3116 PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_PREFETCH_SSE
},
3117 {"athlon-tbird", PROCESSOR_ATHLON
, CPU_ATHLON
,
3118 PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_PREFETCH_SSE
},
3119 {"athlon-4", PROCESSOR_ATHLON
, CPU_ATHLON
,
3120 PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_SSE
},
3121 {"athlon-xp", PROCESSOR_ATHLON
, CPU_ATHLON
,
3122 PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_SSE
},
3123 {"athlon-mp", PROCESSOR_ATHLON
, CPU_ATHLON
,
3124 PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_SSE
},
3125 {"x86-64", PROCESSOR_K8
, CPU_K8
,
3126 PTA_64BIT
| PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_NO_SAHF
},
3127 {"k8", PROCESSOR_K8
, CPU_K8
,
3128 PTA_64BIT
| PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_SSE
3129 | PTA_SSE2
| PTA_NO_SAHF
},
3130 {"k8-sse3", PROCESSOR_K8
, CPU_K8
,
3131 PTA_64BIT
| PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_SSE
3132 | PTA_SSE2
| PTA_SSE3
| PTA_NO_SAHF
},
3133 {"opteron", PROCESSOR_K8
, CPU_K8
,
3134 PTA_64BIT
| PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_SSE
3135 | PTA_SSE2
| PTA_NO_SAHF
},
3136 {"opteron-sse3", PROCESSOR_K8
, CPU_K8
,
3137 PTA_64BIT
| PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_SSE
3138 | PTA_SSE2
| PTA_SSE3
| PTA_NO_SAHF
},
3139 {"athlon64", PROCESSOR_K8
, CPU_K8
,
3140 PTA_64BIT
| PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_SSE
3141 | PTA_SSE2
| PTA_NO_SAHF
},
3142 {"athlon64-sse3", PROCESSOR_K8
, CPU_K8
,
3143 PTA_64BIT
| PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_SSE
3144 | PTA_SSE2
| PTA_SSE3
| PTA_NO_SAHF
},
3145 {"athlon-fx", PROCESSOR_K8
, CPU_K8
,
3146 PTA_64BIT
| PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_SSE
3147 | PTA_SSE2
| PTA_NO_SAHF
},
3148 {"amdfam10", PROCESSOR_AMDFAM10
, CPU_AMDFAM10
,
3149 PTA_64BIT
| PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_SSE
3150 | PTA_SSE2
| PTA_SSE3
| PTA_SSE4A
| PTA_CX16
| PTA_ABM
},
3151 {"barcelona", PROCESSOR_AMDFAM10
, CPU_AMDFAM10
,
3152 PTA_64BIT
| PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_SSE
3153 | PTA_SSE2
| PTA_SSE3
| PTA_SSE4A
| PTA_CX16
| PTA_ABM
},
3154 {"bdver1", PROCESSOR_BDVER1
, CPU_BDVER1
,
3155 PTA_64BIT
| PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_SSE3
3156 | PTA_SSE4A
| PTA_CX16
| PTA_ABM
| PTA_SSSE3
| PTA_SSE4_1
3157 | PTA_SSE4_2
| PTA_AES
| PTA_PCLMUL
| PTA_AVX
| PTA_FMA4
3158 | PTA_XOP
| PTA_LWP
},
3159 {"bdver2", PROCESSOR_BDVER2
, CPU_BDVER2
,
3160 PTA_64BIT
| PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_SSE3
3161 | PTA_SSE4A
| PTA_CX16
| PTA_ABM
| PTA_SSSE3
| PTA_SSE4_1
3162 | PTA_SSE4_2
| PTA_AES
| PTA_PCLMUL
| PTA_AVX
3163 | PTA_XOP
| PTA_LWP
| PTA_BMI
| PTA_TBM
| PTA_F16C
3165 {"btver1", PROCESSOR_BTVER1
, CPU_GENERIC64
,
3166 PTA_64BIT
| PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_SSE3
3167 | PTA_SSSE3
| PTA_SSE4A
|PTA_ABM
| PTA_CX16
},
3168 {"generic32", PROCESSOR_GENERIC32
, CPU_PENTIUMPRO
,
3169 PTA_HLE
/* flags are only used for -march switch. */ },
3170 {"btver2", PROCESSOR_BTVER2
, CPU_GENERIC64
,
3171 PTA_64BIT
| PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_SSE3
3172 | PTA_SSSE3
| PTA_SSE4A
|PTA_ABM
| PTA_CX16
| PTA_SSE4_1
3173 | PTA_SSE4_2
| PTA_AES
| PTA_PCLMUL
| PTA_AVX
3174 | PTA_BMI
| PTA_F16C
| PTA_MOVBE
},
3175 {"generic64", PROCESSOR_GENERIC64
, CPU_GENERIC64
,
3177 | PTA_HLE
/* flags are only used for -march switch. */ },
3180 /* -mrecip options. */
3183 const char *string
; /* option name */
3184 unsigned int mask
; /* mask bits to set */
3186 const recip_options
[] =
3188 { "all", RECIP_MASK_ALL
},
3189 { "none", RECIP_MASK_NONE
},
3190 { "div", RECIP_MASK_DIV
},
3191 { "sqrt", RECIP_MASK_SQRT
},
3192 { "vec-div", RECIP_MASK_VEC_DIV
},
3193 { "vec-sqrt", RECIP_MASK_VEC_SQRT
},
3196 int const pta_size
= ARRAY_SIZE (processor_alias_table
);
3198 /* Set up prefix/suffix so the error messages refer to either the command
3199 line argument, or the attribute(target). */
3208 prefix
= "option(\"";
3213 /* Turn off both OPTION_MASK_ABI_64 and OPTION_MASK_ABI_X32 if
3214 TARGET_64BIT_DEFAULT is true and TARGET_64BIT is false. */
3215 if (TARGET_64BIT_DEFAULT
&& !TARGET_64BIT
)
3216 ix86_isa_flags
&= ~(OPTION_MASK_ABI_64
| OPTION_MASK_ABI_X32
);
3217 #ifdef TARGET_BI_ARCH
3220 #if TARGET_BI_ARCH == 1
3221 /* When TARGET_BI_ARCH == 1, by default, OPTION_MASK_ABI_64
3222 is on and OPTION_MASK_ABI_X32 is off. We turn off
3223 OPTION_MASK_ABI_64 if OPTION_MASK_ABI_X32 is turned on by
3226 ix86_isa_flags
&= ~OPTION_MASK_ABI_64
;
3228 /* When TARGET_BI_ARCH == 2, by default, OPTION_MASK_ABI_X32 is
3229 on and OPTION_MASK_ABI_64 is off. We turn off
3230 OPTION_MASK_ABI_X32 if OPTION_MASK_ABI_64 is turned on by
3233 ix86_isa_flags
&= ~OPTION_MASK_ABI_X32
;
3240 /* Always turn on OPTION_MASK_ISA_64BIT and turn off
3241 OPTION_MASK_ABI_64 for TARGET_X32. */
3242 ix86_isa_flags
|= OPTION_MASK_ISA_64BIT
;
3243 ix86_isa_flags
&= ~OPTION_MASK_ABI_64
;
3245 else if (TARGET_LP64
)
3247 /* Always turn on OPTION_MASK_ISA_64BIT and turn off
3248 OPTION_MASK_ABI_X32 for TARGET_LP64. */
3249 ix86_isa_flags
|= OPTION_MASK_ISA_64BIT
;
3250 ix86_isa_flags
&= ~OPTION_MASK_ABI_X32
;
3253 #ifdef SUBTARGET_OVERRIDE_OPTIONS
3254 SUBTARGET_OVERRIDE_OPTIONS
;
3257 #ifdef SUBSUBTARGET_OVERRIDE_OPTIONS
3258 SUBSUBTARGET_OVERRIDE_OPTIONS
;
3261 /* -fPIC is the default for x86_64. */
3262 if (TARGET_MACHO
&& TARGET_64BIT
)
3265 /* Need to check -mtune=generic first. */
3266 if (ix86_tune_string
)
3268 if (!strcmp (ix86_tune_string
, "generic")
3269 || !strcmp (ix86_tune_string
, "i686")
3270 /* As special support for cross compilers we read -mtune=native
3271 as -mtune=generic. With native compilers we won't see the
3272 -mtune=native, as it was changed by the driver. */
3273 || !strcmp (ix86_tune_string
, "native"))
3276 ix86_tune_string
= "generic64";
3278 ix86_tune_string
= "generic32";
3280 /* If this call is for setting the option attribute, allow the
3281 generic32/generic64 that was previously set. */
3282 else if (!main_args_p
3283 && (!strcmp (ix86_tune_string
, "generic32")
3284 || !strcmp (ix86_tune_string
, "generic64")))
3286 else if (!strncmp (ix86_tune_string
, "generic", 7))
3287 error ("bad value (%s) for %stune=%s %s",
3288 ix86_tune_string
, prefix
, suffix
, sw
);
3289 else if (!strcmp (ix86_tune_string
, "x86-64"))
3290 warning (OPT_Wdeprecated
, "%stune=x86-64%s is deprecated; use "
3291 "%stune=k8%s or %stune=generic%s instead as appropriate",
3292 prefix
, suffix
, prefix
, suffix
, prefix
, suffix
);
3296 if (ix86_arch_string
)
3297 ix86_tune_string
= ix86_arch_string
;
3298 if (!ix86_tune_string
)
3300 ix86_tune_string
= cpu_names
[TARGET_CPU_DEFAULT
];
3301 ix86_tune_defaulted
= 1;
3304 /* ix86_tune_string is set to ix86_arch_string or defaulted. We
3305 need to use a sensible tune option. */
3306 if (!strcmp (ix86_tune_string
, "generic")
3307 || !strcmp (ix86_tune_string
, "x86-64")
3308 || !strcmp (ix86_tune_string
, "i686"))
3311 ix86_tune_string
= "generic64";
3313 ix86_tune_string
= "generic32";
3317 if (ix86_stringop_alg
== rep_prefix_8_byte
&& !TARGET_64BIT
)
3319 /* rep; movq isn't available in 32-bit code. */
3320 error ("-mstringop-strategy=rep_8byte not supported for 32-bit code");
3321 ix86_stringop_alg
= no_stringop
;
3324 if (!ix86_arch_string
)
3325 ix86_arch_string
= TARGET_64BIT
? "x86-64" : SUBTARGET32_DEFAULT_CPU
;
3327 ix86_arch_specified
= 1;
3329 if (global_options_set
.x_ix86_pmode
)
3331 if ((TARGET_LP64
&& ix86_pmode
== PMODE_SI
)
3332 || (!TARGET_64BIT
&& ix86_pmode
== PMODE_DI
))
3333 error ("address mode %qs not supported in the %s bit mode",
3334 TARGET_64BIT
? "short" : "long",
3335 TARGET_64BIT
? "64" : "32");
3338 ix86_pmode
= TARGET_LP64
? PMODE_DI
: PMODE_SI
;
3340 if (!global_options_set
.x_ix86_abi
)
3341 ix86_abi
= DEFAULT_ABI
;
3343 if (global_options_set
.x_ix86_cmodel
)
3345 switch (ix86_cmodel
)
3350 ix86_cmodel
= CM_SMALL_PIC
;
3352 error ("code model %qs not supported in the %s bit mode",
3359 ix86_cmodel
= CM_MEDIUM_PIC
;
3361 error ("code model %qs not supported in the %s bit mode",
3363 else if (TARGET_X32
)
3364 error ("code model %qs not supported in x32 mode",
3371 ix86_cmodel
= CM_LARGE_PIC
;
3373 error ("code model %qs not supported in the %s bit mode",
3375 else if (TARGET_X32
)
3376 error ("code model %qs not supported in x32 mode",
3382 error ("code model %s does not support PIC mode", "32");
3384 error ("code model %qs not supported in the %s bit mode",
3391 error ("code model %s does not support PIC mode", "kernel");
3392 ix86_cmodel
= CM_32
;
3395 error ("code model %qs not supported in the %s bit mode",
3405 /* For TARGET_64BIT and MS_ABI, force pic on, in order to enable the
3406 use of rip-relative addressing. This eliminates fixups that
3407 would otherwise be needed if this object is to be placed in a
3408 DLL, and is essentially just as efficient as direct addressing. */
3409 if (TARGET_64BIT
&& DEFAULT_ABI
== MS_ABI
)
3410 ix86_cmodel
= CM_SMALL_PIC
, flag_pic
= 1;
3411 else if (TARGET_64BIT
)
3412 ix86_cmodel
= flag_pic
? CM_SMALL_PIC
: CM_SMALL
;
3414 ix86_cmodel
= CM_32
;
3416 if (TARGET_MACHO
&& ix86_asm_dialect
== ASM_INTEL
)
3418 error ("-masm=intel not supported in this configuration");
3419 ix86_asm_dialect
= ASM_ATT
;
3421 if ((TARGET_64BIT
!= 0) != ((ix86_isa_flags
& OPTION_MASK_ISA_64BIT
) != 0))
3422 sorry ("%i-bit mode not compiled in",
3423 (ix86_isa_flags
& OPTION_MASK_ISA_64BIT
) ? 64 : 32);
3425 for (i
= 0; i
< pta_size
; i
++)
3426 if (! strcmp (ix86_arch_string
, processor_alias_table
[i
].name
))
3428 ix86_schedule
= processor_alias_table
[i
].schedule
;
3429 ix86_arch
= processor_alias_table
[i
].processor
;
3430 /* Default cpu tuning to the architecture. */
3431 ix86_tune
= ix86_arch
;
3433 if (TARGET_64BIT
&& !(processor_alias_table
[i
].flags
& PTA_64BIT
))
3434 error ("CPU you selected does not support x86-64 "
3437 if (processor_alias_table
[i
].flags
& PTA_MMX
3438 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_MMX
))
3439 ix86_isa_flags
|= OPTION_MASK_ISA_MMX
;
3440 if (processor_alias_table
[i
].flags
& PTA_3DNOW
3441 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_3DNOW
))
3442 ix86_isa_flags
|= OPTION_MASK_ISA_3DNOW
;
3443 if (processor_alias_table
[i
].flags
& PTA_3DNOW_A
3444 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_3DNOW_A
))
3445 ix86_isa_flags
|= OPTION_MASK_ISA_3DNOW_A
;
3446 if (processor_alias_table
[i
].flags
& PTA_SSE
3447 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_SSE
))
3448 ix86_isa_flags
|= OPTION_MASK_ISA_SSE
;
3449 if (processor_alias_table
[i
].flags
& PTA_SSE2
3450 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_SSE2
))
3451 ix86_isa_flags
|= OPTION_MASK_ISA_SSE2
;
3452 if (processor_alias_table
[i
].flags
& PTA_SSE3
3453 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_SSE3
))
3454 ix86_isa_flags
|= OPTION_MASK_ISA_SSE3
;
3455 if (processor_alias_table
[i
].flags
& PTA_SSSE3
3456 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_SSSE3
))
3457 ix86_isa_flags
|= OPTION_MASK_ISA_SSSE3
;
3458 if (processor_alias_table
[i
].flags
& PTA_SSE4_1
3459 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_SSE4_1
))
3460 ix86_isa_flags
|= OPTION_MASK_ISA_SSE4_1
;
3461 if (processor_alias_table
[i
].flags
& PTA_SSE4_2
3462 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_SSE4_2
))
3463 ix86_isa_flags
|= OPTION_MASK_ISA_SSE4_2
;
3464 if (processor_alias_table
[i
].flags
& PTA_AVX
3465 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_AVX
))
3466 ix86_isa_flags
|= OPTION_MASK_ISA_AVX
;
3467 if (processor_alias_table
[i
].flags
& PTA_AVX2
3468 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_AVX2
))
3469 ix86_isa_flags
|= OPTION_MASK_ISA_AVX2
;
3470 if (processor_alias_table
[i
].flags
& PTA_FMA
3471 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_FMA
))
3472 ix86_isa_flags
|= OPTION_MASK_ISA_FMA
;
3473 if (processor_alias_table
[i
].flags
& PTA_SSE4A
3474 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_SSE4A
))
3475 ix86_isa_flags
|= OPTION_MASK_ISA_SSE4A
;
3476 if (processor_alias_table
[i
].flags
& PTA_FMA4
3477 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_FMA4
))
3478 ix86_isa_flags
|= OPTION_MASK_ISA_FMA4
;
3479 if (processor_alias_table
[i
].flags
& PTA_XOP
3480 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_XOP
))
3481 ix86_isa_flags
|= OPTION_MASK_ISA_XOP
;
3482 if (processor_alias_table
[i
].flags
& PTA_LWP
3483 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_LWP
))
3484 ix86_isa_flags
|= OPTION_MASK_ISA_LWP
;
3485 if (processor_alias_table
[i
].flags
& PTA_ABM
3486 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_ABM
))
3487 ix86_isa_flags
|= OPTION_MASK_ISA_ABM
;
3488 if (processor_alias_table
[i
].flags
& PTA_BMI
3489 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_BMI
))
3490 ix86_isa_flags
|= OPTION_MASK_ISA_BMI
;
3491 if (processor_alias_table
[i
].flags
& (PTA_LZCNT
| PTA_ABM
)
3492 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_LZCNT
))
3493 ix86_isa_flags
|= OPTION_MASK_ISA_LZCNT
;
3494 if (processor_alias_table
[i
].flags
& PTA_TBM
3495 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_TBM
))
3496 ix86_isa_flags
|= OPTION_MASK_ISA_TBM
;
3497 if (processor_alias_table
[i
].flags
& PTA_BMI2
3498 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_BMI2
))
3499 ix86_isa_flags
|= OPTION_MASK_ISA_BMI2
;
3500 if (processor_alias_table
[i
].flags
& PTA_CX16
3501 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_CX16
))
3502 ix86_isa_flags
|= OPTION_MASK_ISA_CX16
;
3503 if (processor_alias_table
[i
].flags
& (PTA_POPCNT
| PTA_ABM
)
3504 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_POPCNT
))
3505 ix86_isa_flags
|= OPTION_MASK_ISA_POPCNT
;
3506 if (!(TARGET_64BIT
&& (processor_alias_table
[i
].flags
& PTA_NO_SAHF
))
3507 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_SAHF
))
3508 ix86_isa_flags
|= OPTION_MASK_ISA_SAHF
;
3509 if (processor_alias_table
[i
].flags
& PTA_MOVBE
3510 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_MOVBE
))
3511 ix86_isa_flags
|= OPTION_MASK_ISA_MOVBE
;
3512 if (processor_alias_table
[i
].flags
& PTA_AES
3513 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_AES
))
3514 ix86_isa_flags
|= OPTION_MASK_ISA_AES
;
3515 if (processor_alias_table
[i
].flags
& PTA_PCLMUL
3516 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_PCLMUL
))
3517 ix86_isa_flags
|= OPTION_MASK_ISA_PCLMUL
;
3518 if (processor_alias_table
[i
].flags
& PTA_FSGSBASE
3519 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_FSGSBASE
))
3520 ix86_isa_flags
|= OPTION_MASK_ISA_FSGSBASE
;
3521 if (processor_alias_table
[i
].flags
& PTA_RDRND
3522 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_RDRND
))
3523 ix86_isa_flags
|= OPTION_MASK_ISA_RDRND
;
3524 if (processor_alias_table
[i
].flags
& PTA_F16C
3525 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_F16C
))
3526 ix86_isa_flags
|= OPTION_MASK_ISA_F16C
;
3527 if (processor_alias_table
[i
].flags
& PTA_RTM
3528 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_RTM
))
3529 ix86_isa_flags
|= OPTION_MASK_ISA_RTM
;
3530 if (processor_alias_table
[i
].flags
& PTA_HLE
3531 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_HLE
))
3532 ix86_isa_flags
|= OPTION_MASK_ISA_HLE
;
3533 if (processor_alias_table
[i
].flags
& PTA_PRFCHW
3534 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_PRFCHW
))
3535 ix86_isa_flags
|= OPTION_MASK_ISA_PRFCHW
;
3536 if (processor_alias_table
[i
].flags
& (PTA_PREFETCH_SSE
| PTA_SSE
))
3537 x86_prefetch_sse
= true;
3542 if (!strcmp (ix86_arch_string
, "generic"))
3543 error ("generic CPU can be used only for %stune=%s %s",
3544 prefix
, suffix
, sw
);
3545 else if (!strncmp (ix86_arch_string
, "generic", 7) || i
== pta_size
)
3546 error ("bad value (%s) for %sarch=%s %s",
3547 ix86_arch_string
, prefix
, suffix
, sw
);
3549 ix86_arch_mask
= 1u << ix86_arch
;
3550 for (i
= 0; i
< X86_ARCH_LAST
; ++i
)
3551 ix86_arch_features
[i
] = !!(initial_ix86_arch_features
[i
] & ix86_arch_mask
);
3553 for (i
= 0; i
< pta_size
; i
++)
3554 if (! strcmp (ix86_tune_string
, processor_alias_table
[i
].name
))
3556 ix86_schedule
= processor_alias_table
[i
].schedule
;
3557 ix86_tune
= processor_alias_table
[i
].processor
;
3560 if (!(processor_alias_table
[i
].flags
& PTA_64BIT
))
3562 if (ix86_tune_defaulted
)
3564 ix86_tune_string
= "x86-64";
3565 for (i
= 0; i
< pta_size
; i
++)
3566 if (! strcmp (ix86_tune_string
,
3567 processor_alias_table
[i
].name
))
3569 ix86_schedule
= processor_alias_table
[i
].schedule
;
3570 ix86_tune
= processor_alias_table
[i
].processor
;
3573 error ("CPU you selected does not support x86-64 "
3579 /* Adjust tuning when compiling for 32-bit ABI. */
3582 case PROCESSOR_GENERIC64
:
3583 ix86_tune
= PROCESSOR_GENERIC32
;
3584 ix86_schedule
= CPU_PENTIUMPRO
;
3587 case PROCESSOR_CORE2_64
:
3588 ix86_tune
= PROCESSOR_CORE2_32
;
3591 case PROCESSOR_COREI7_64
:
3592 ix86_tune
= PROCESSOR_COREI7_32
;
3599 /* Intel CPUs have always interpreted SSE prefetch instructions as
3600 NOPs; so, we can enable SSE prefetch instructions even when
3601 -mtune (rather than -march) points us to a processor that has them.
3602 However, the VIA C3 gives a SIGILL, so we only do that for i686 and
3603 higher processors. */
3605 && (processor_alias_table
[i
].flags
& (PTA_PREFETCH_SSE
| PTA_SSE
)))
3606 x86_prefetch_sse
= true;
3610 if (ix86_tune_specified
&& i
== pta_size
)
3611 error ("bad value (%s) for %stune=%s %s",
3612 ix86_tune_string
, prefix
, suffix
, sw
);
3614 ix86_tune_mask
= 1u << ix86_tune
;
3615 for (i
= 0; i
< X86_TUNE_LAST
; ++i
)
3616 ix86_tune_features
[i
] = !!(initial_ix86_tune_features
[i
] & ix86_tune_mask
);
3618 #ifndef USE_IX86_FRAME_POINTER
3619 #define USE_IX86_FRAME_POINTER 0
3622 #ifndef USE_X86_64_FRAME_POINTER
3623 #define USE_X86_64_FRAME_POINTER 0
3626 /* Set the default values for switches whose default depends on TARGET_64BIT
3627 in case they weren't overwritten by command line options. */
3630 if (optimize
>= 1 && !global_options_set
.x_flag_omit_frame_pointer
)
3631 flag_omit_frame_pointer
= !USE_X86_64_FRAME_POINTER
;
3632 if (flag_asynchronous_unwind_tables
== 2)
3633 flag_unwind_tables
= flag_asynchronous_unwind_tables
= 1;
3634 if (flag_pcc_struct_return
== 2)
3635 flag_pcc_struct_return
= 0;
3639 if (optimize
>= 1 && !global_options_set
.x_flag_omit_frame_pointer
)
3640 flag_omit_frame_pointer
= !(USE_IX86_FRAME_POINTER
|| optimize_size
);
3641 if (flag_asynchronous_unwind_tables
== 2)
3642 flag_asynchronous_unwind_tables
= !USE_IX86_FRAME_POINTER
;
3643 if (flag_pcc_struct_return
== 2)
3644 flag_pcc_struct_return
= DEFAULT_PCC_STRUCT_RETURN
;
3647 ix86_tune_cost
= processor_target_table
[ix86_tune
].cost
;
3649 ix86_cost
= &ix86_size_cost
;
3651 ix86_cost
= ix86_tune_cost
;
3653 /* Arrange to set up i386_stack_locals for all functions. */
3654 init_machine_status
= ix86_init_machine_status
;
3656 /* Validate -mregparm= value. */
3657 if (global_options_set
.x_ix86_regparm
)
3660 warning (0, "-mregparm is ignored in 64-bit mode");
3661 if (ix86_regparm
> REGPARM_MAX
)
3663 error ("-mregparm=%d is not between 0 and %d",
3664 ix86_regparm
, REGPARM_MAX
);
3669 ix86_regparm
= REGPARM_MAX
;
3671 /* Default align_* from the processor table. */
3672 if (align_loops
== 0)
3674 align_loops
= processor_target_table
[ix86_tune
].align_loop
;
3675 align_loops_max_skip
= processor_target_table
[ix86_tune
].align_loop_max_skip
;
3677 if (align_jumps
== 0)
3679 align_jumps
= processor_target_table
[ix86_tune
].align_jump
;
3680 align_jumps_max_skip
= processor_target_table
[ix86_tune
].align_jump_max_skip
;
3682 if (align_functions
== 0)
3684 align_functions
= processor_target_table
[ix86_tune
].align_func
;
3687 /* Provide default for -mbranch-cost= value. */
3688 if (!global_options_set
.x_ix86_branch_cost
)
3689 ix86_branch_cost
= ix86_cost
->branch_cost
;
3693 target_flags
|= TARGET_SUBTARGET64_DEFAULT
& ~target_flags_explicit
;
3695 /* Enable by default the SSE and MMX builtins. Do allow the user to
3696 explicitly disable any of these. In particular, disabling SSE and
3697 MMX for kernel code is extremely useful. */
3698 if (!ix86_arch_specified
)
3700 |= ((OPTION_MASK_ISA_SSE2
| OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_MMX
3701 | TARGET_SUBTARGET64_ISA_DEFAULT
) & ~ix86_isa_flags_explicit
);
3704 warning (0, "%srtd%s is ignored in 64bit mode", prefix
, suffix
);
3708 target_flags
|= TARGET_SUBTARGET32_DEFAULT
& ~target_flags_explicit
;
3710 if (!ix86_arch_specified
)
3712 |= TARGET_SUBTARGET32_ISA_DEFAULT
& ~ix86_isa_flags_explicit
;
3714 /* i386 ABI does not specify red zone. It still makes sense to use it
3715 when programmer takes care to stack from being destroyed. */
3716 if (!(target_flags_explicit
& MASK_NO_RED_ZONE
))
3717 target_flags
|= MASK_NO_RED_ZONE
;
3720 /* Keep nonleaf frame pointers. */
3721 if (flag_omit_frame_pointer
)
3722 target_flags
&= ~MASK_OMIT_LEAF_FRAME_POINTER
;
3723 else if (TARGET_OMIT_LEAF_FRAME_POINTER
)
3724 flag_omit_frame_pointer
= 1;
3726 /* If we're doing fast math, we don't care about comparison order
3727 wrt NaNs. This lets us use a shorter comparison sequence. */
3728 if (flag_finite_math_only
)
3729 target_flags
&= ~MASK_IEEE_FP
;
3731 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
3732 since the insns won't need emulation. */
3733 if (x86_arch_always_fancy_math_387
& ix86_arch_mask
)
3734 target_flags
&= ~MASK_NO_FANCY_MATH_387
;
3736 /* Likewise, if the target doesn't have a 387, or we've specified
3737 software floating point, don't use 387 inline intrinsics. */
3739 target_flags
|= MASK_NO_FANCY_MATH_387
;
3741 /* Turn on MMX builtins for -msse. */
3743 ix86_isa_flags
|= OPTION_MASK_ISA_MMX
& ~ix86_isa_flags_explicit
;
3745 /* Enable SSE prefetch. */
3746 if (TARGET_SSE
|| TARGET_PRFCHW
)
3747 x86_prefetch_sse
= true;
3749 /* Turn on popcnt instruction for -msse4.2 or -mabm. */
3750 if (TARGET_SSE4_2
|| TARGET_ABM
)
3751 ix86_isa_flags
|= OPTION_MASK_ISA_POPCNT
& ~ix86_isa_flags_explicit
;
3753 /* Turn on lzcnt instruction for -mabm. */
3755 ix86_isa_flags
|= OPTION_MASK_ISA_LZCNT
& ~ix86_isa_flags_explicit
;
3757 /* Validate -mpreferred-stack-boundary= value or default it to
3758 PREFERRED_STACK_BOUNDARY_DEFAULT. */
3759 ix86_preferred_stack_boundary
= PREFERRED_STACK_BOUNDARY_DEFAULT
;
3760 if (global_options_set
.x_ix86_preferred_stack_boundary_arg
)
3762 int min
= (TARGET_64BIT
? (TARGET_SSE
? 4 : 3) : 2);
3763 int max
= (TARGET_SEH
? 4 : 12);
3765 if (ix86_preferred_stack_boundary_arg
< min
3766 || ix86_preferred_stack_boundary_arg
> max
)
3769 error ("-mpreferred-stack-boundary is not supported "
3772 error ("-mpreferred-stack-boundary=%d is not between %d and %d",
3773 ix86_preferred_stack_boundary_arg
, min
, max
);
3776 ix86_preferred_stack_boundary
3777 = (1 << ix86_preferred_stack_boundary_arg
) * BITS_PER_UNIT
;
3780 /* Set the default value for -mstackrealign. */
3781 if (ix86_force_align_arg_pointer
== -1)
3782 ix86_force_align_arg_pointer
= STACK_REALIGN_DEFAULT
;
3784 ix86_default_incoming_stack_boundary
= PREFERRED_STACK_BOUNDARY
;
3786 /* Validate -mincoming-stack-boundary= value or default it to
3787 MIN_STACK_BOUNDARY/PREFERRED_STACK_BOUNDARY. */
3788 ix86_incoming_stack_boundary
= ix86_default_incoming_stack_boundary
;
3789 if (global_options_set
.x_ix86_incoming_stack_boundary_arg
)
3791 if (ix86_incoming_stack_boundary_arg
< (TARGET_64BIT
? 4 : 2)
3792 || ix86_incoming_stack_boundary_arg
> 12)
3793 error ("-mincoming-stack-boundary=%d is not between %d and 12",
3794 ix86_incoming_stack_boundary_arg
, TARGET_64BIT
? 4 : 2);
3797 ix86_user_incoming_stack_boundary
3798 = (1 << ix86_incoming_stack_boundary_arg
) * BITS_PER_UNIT
;
3799 ix86_incoming_stack_boundary
3800 = ix86_user_incoming_stack_boundary
;
3804 /* Accept -msseregparm only if at least SSE support is enabled. */
3805 if (TARGET_SSEREGPARM
3807 error ("%ssseregparm%s used without SSE enabled", prefix
, suffix
);
3809 if (global_options_set
.x_ix86_fpmath
)
3811 if (ix86_fpmath
& FPMATH_SSE
)
3815 warning (0, "SSE instruction set disabled, using 387 arithmetics");
3816 ix86_fpmath
= FPMATH_387
;
3818 else if ((ix86_fpmath
& FPMATH_387
) && !TARGET_80387
)
3820 warning (0, "387 instruction set disabled, using SSE arithmetics");
3821 ix86_fpmath
= FPMATH_SSE
;
3826 ix86_fpmath
= TARGET_FPMATH_DEFAULT
;
3828 /* If the i387 is disabled, then do not return values in it. */
3830 target_flags
&= ~MASK_FLOAT_RETURNS
;
3832 /* Use external vectorized library in vectorizing intrinsics. */
3833 if (global_options_set
.x_ix86_veclibabi_type
)
3834 switch (ix86_veclibabi_type
)
3836 case ix86_veclibabi_type_svml
:
3837 ix86_veclib_handler
= ix86_veclibabi_svml
;
3840 case ix86_veclibabi_type_acml
:
3841 ix86_veclib_handler
= ix86_veclibabi_acml
;
3848 if ((!USE_IX86_FRAME_POINTER
3849 || (x86_accumulate_outgoing_args
& ix86_tune_mask
))
3850 && !(target_flags_explicit
& MASK_ACCUMULATE_OUTGOING_ARGS
)
3852 target_flags
|= MASK_ACCUMULATE_OUTGOING_ARGS
;
3854 /* ??? Unwind info is not correct around the CFG unless either a frame
3855 pointer is present or M_A_O_A is set. Fixing this requires rewriting
3856 unwind info generation to be aware of the CFG and propagating states
3858 if ((flag_unwind_tables
|| flag_asynchronous_unwind_tables
3859 || flag_exceptions
|| flag_non_call_exceptions
)
3860 && flag_omit_frame_pointer
3861 && !(target_flags
& MASK_ACCUMULATE_OUTGOING_ARGS
))
3863 if (target_flags_explicit
& MASK_ACCUMULATE_OUTGOING_ARGS
)
3864 warning (0, "unwind tables currently require either a frame pointer "
3865 "or %saccumulate-outgoing-args%s for correctness",
3867 target_flags
|= MASK_ACCUMULATE_OUTGOING_ARGS
;
3870 /* If stack probes are required, the space used for large function
3871 arguments on the stack must also be probed, so enable
3872 -maccumulate-outgoing-args so this happens in the prologue. */
3873 if (TARGET_STACK_PROBE
3874 && !(target_flags
& MASK_ACCUMULATE_OUTGOING_ARGS
))
3876 if (target_flags_explicit
& MASK_ACCUMULATE_OUTGOING_ARGS
)
3877 warning (0, "stack probing requires %saccumulate-outgoing-args%s "
3878 "for correctness", prefix
, suffix
);
3879 target_flags
|= MASK_ACCUMULATE_OUTGOING_ARGS
;
3882 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
3885 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix
, "LX", 0);
3886 p
= strchr (internal_label_prefix
, 'X');
3887 internal_label_prefix_len
= p
- internal_label_prefix
;
3891 /* When scheduling description is not available, disable scheduler pass
3892 so it won't slow down the compilation and make x87 code slower. */
3893 if (!TARGET_SCHEDULE
)
3894 flag_schedule_insns_after_reload
= flag_schedule_insns
= 0;
3896 maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES
,
3897 ix86_tune_cost
->simultaneous_prefetches
,
3898 global_options
.x_param_values
,
3899 global_options_set
.x_param_values
);
3900 maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE
,
3901 ix86_tune_cost
->prefetch_block
,
3902 global_options
.x_param_values
,
3903 global_options_set
.x_param_values
);
3904 maybe_set_param_value (PARAM_L1_CACHE_SIZE
,
3905 ix86_tune_cost
->l1_cache_size
,
3906 global_options
.x_param_values
,
3907 global_options_set
.x_param_values
);
3908 maybe_set_param_value (PARAM_L2_CACHE_SIZE
,
3909 ix86_tune_cost
->l2_cache_size
,
3910 global_options
.x_param_values
,
3911 global_options_set
.x_param_values
);
3913 /* Enable sw prefetching at -O3 for CPUS that prefetching is helpful. */
3914 if (flag_prefetch_loop_arrays
< 0
3917 && TARGET_SOFTWARE_PREFETCHING_BENEFICIAL
)
3918 flag_prefetch_loop_arrays
= 1;
3920 /* If using typedef char *va_list, signal that __builtin_va_start (&ap, 0)
3921 can be optimized to ap = __builtin_next_arg (0). */
3922 if (!TARGET_64BIT
&& !flag_split_stack
)
3923 targetm
.expand_builtin_va_start
= NULL
;
3927 ix86_gen_leave
= gen_leave_rex64
;
3928 if (Pmode
== DImode
)
3930 ix86_gen_monitor
= gen_sse3_monitor64_di
;
3931 ix86_gen_tls_global_dynamic_64
= gen_tls_global_dynamic_64_di
;
3932 ix86_gen_tls_local_dynamic_base_64
3933 = gen_tls_local_dynamic_base_64_di
;
3937 ix86_gen_monitor
= gen_sse3_monitor64_si
;
3938 ix86_gen_tls_global_dynamic_64
= gen_tls_global_dynamic_64_si
;
3939 ix86_gen_tls_local_dynamic_base_64
3940 = gen_tls_local_dynamic_base_64_si
;
3945 ix86_gen_leave
= gen_leave
;
3946 ix86_gen_monitor
= gen_sse3_monitor
;
3949 if (Pmode
== DImode
)
3951 ix86_gen_add3
= gen_adddi3
;
3952 ix86_gen_sub3
= gen_subdi3
;
3953 ix86_gen_sub3_carry
= gen_subdi3_carry
;
3954 ix86_gen_one_cmpl2
= gen_one_cmpldi2
;
3955 ix86_gen_andsp
= gen_anddi3
;
3956 ix86_gen_allocate_stack_worker
= gen_allocate_stack_worker_probe_di
;
3957 ix86_gen_adjust_stack_and_probe
= gen_adjust_stack_and_probedi
;
3958 ix86_gen_probe_stack_range
= gen_probe_stack_rangedi
;
3962 ix86_gen_add3
= gen_addsi3
;
3963 ix86_gen_sub3
= gen_subsi3
;
3964 ix86_gen_sub3_carry
= gen_subsi3_carry
;
3965 ix86_gen_one_cmpl2
= gen_one_cmplsi2
;
3966 ix86_gen_andsp
= gen_andsi3
;
3967 ix86_gen_allocate_stack_worker
= gen_allocate_stack_worker_probe_si
;
3968 ix86_gen_adjust_stack_and_probe
= gen_adjust_stack_and_probesi
;
3969 ix86_gen_probe_stack_range
= gen_probe_stack_rangesi
;
3973 /* Use -mcld by default for 32-bit code if configured with --enable-cld. */
3975 target_flags
|= MASK_CLD
& ~target_flags_explicit
;
3978 if (!TARGET_64BIT
&& flag_pic
)
3980 if (flag_fentry
> 0)
3981 sorry ("-mfentry isn%'t supported for 32-bit in combination "
3985 else if (TARGET_SEH
)
3987 if (flag_fentry
== 0)
3988 sorry ("-mno-fentry isn%'t compatible with SEH");
3991 else if (flag_fentry
< 0)
3993 #if defined(PROFILE_BEFORE_PROLOGUE)
4002 /* When not optimize for size, enable vzeroupper optimization for
4003 TARGET_AVX with -fexpensive-optimizations and split 32-byte
4004 AVX unaligned load/store. */
4007 if (flag_expensive_optimizations
4008 && !(target_flags_explicit
& MASK_VZEROUPPER
))
4009 target_flags
|= MASK_VZEROUPPER
;
4010 if ((x86_avx256_split_unaligned_load
& ix86_tune_mask
)
4011 && !(target_flags_explicit
& MASK_AVX256_SPLIT_UNALIGNED_LOAD
))
4012 target_flags
|= MASK_AVX256_SPLIT_UNALIGNED_LOAD
;
4013 if ((x86_avx256_split_unaligned_store
& ix86_tune_mask
)
4014 && !(target_flags_explicit
& MASK_AVX256_SPLIT_UNALIGNED_STORE
))
4015 target_flags
|= MASK_AVX256_SPLIT_UNALIGNED_STORE
;
4016 /* Enable 128-bit AVX instruction generation for the auto-vectorizer. */
4017 if (TARGET_AVX128_OPTIMAL
&& !(target_flags_explicit
& MASK_PREFER_AVX128
))
4018 target_flags
|= MASK_PREFER_AVX128
;
4023 /* Disable vzeroupper pass if TARGET_AVX is disabled. */
4024 target_flags
&= ~MASK_VZEROUPPER
;
4027 if (ix86_recip_name
)
4029 char *p
= ASTRDUP (ix86_recip_name
);
4031 unsigned int mask
, i
;
4034 while ((q
= strtok (p
, ",")) != NULL
)
4045 if (!strcmp (q
, "default"))
4046 mask
= RECIP_MASK_ALL
;
4049 for (i
= 0; i
< ARRAY_SIZE (recip_options
); i
++)
4050 if (!strcmp (q
, recip_options
[i
].string
))
4052 mask
= recip_options
[i
].mask
;
4056 if (i
== ARRAY_SIZE (recip_options
))
4058 error ("unknown option for -mrecip=%s", q
);
4060 mask
= RECIP_MASK_NONE
;
4064 recip_mask_explicit
|= mask
;
4066 recip_mask
&= ~mask
;
4073 recip_mask
|= RECIP_MASK_ALL
& ~recip_mask_explicit
;
4074 else if (target_flags_explicit
& MASK_RECIP
)
4075 recip_mask
&= ~(RECIP_MASK_ALL
& ~recip_mask_explicit
);
4077 /* Save the initial options in case the user does function specific
4080 target_option_default_node
= target_option_current_node
4081 = build_target_option_node ();
4084 /* Return TRUE if VAL is passed in register with 256bit AVX modes. */
4087 function_pass_avx256_p (const_rtx val
)
4092 if (REG_P (val
) && VALID_AVX256_REG_MODE (GET_MODE (val
)))
4095 if (GET_CODE (val
) == PARALLEL
)
4100 for (i
= XVECLEN (val
, 0) - 1; i
>= 0; i
--)
4102 r
= XVECEXP (val
, 0, i
);
4103 if (GET_CODE (r
) == EXPR_LIST
4105 && REG_P (XEXP (r
, 0))
4106 && (GET_MODE (XEXP (r
, 0)) == OImode
4107 || VALID_AVX256_REG_MODE (GET_MODE (XEXP (r
, 0)))))
4115 /* Implement the TARGET_OPTION_OVERRIDE hook. */
4118 ix86_option_override (void)
4120 ix86_option_override_internal (true);
4123 /* Update register usage after having seen the compiler flags. */
4126 ix86_conditional_register_usage (void)
4131 for (i
= 0; i
< FIRST_PSEUDO_REGISTER
; i
++)
4133 if (fixed_regs
[i
] > 1)
4134 fixed_regs
[i
] = (fixed_regs
[i
] == (TARGET_64BIT
? 3 : 2));
4135 if (call_used_regs
[i
] > 1)
4136 call_used_regs
[i
] = (call_used_regs
[i
] == (TARGET_64BIT
? 3 : 2));
4139 /* The PIC register, if it exists, is fixed. */
4140 j
= PIC_OFFSET_TABLE_REGNUM
;
4141 if (j
!= INVALID_REGNUM
)
4142 fixed_regs
[j
] = call_used_regs
[j
] = 1;
4144 /* The 64-bit MS_ABI changes the set of call-used registers. */
4145 if (TARGET_64BIT_MS_ABI
)
4147 call_used_regs
[SI_REG
] = 0;
4148 call_used_regs
[DI_REG
] = 0;
4149 call_used_regs
[XMM6_REG
] = 0;
4150 call_used_regs
[XMM7_REG
] = 0;
4151 for (i
= FIRST_REX_SSE_REG
; i
<= LAST_REX_SSE_REG
; i
++)
4152 call_used_regs
[i
] = 0;
4155 /* The default setting of CLOBBERED_REGS is for 32-bit; add in the
4156 other call-clobbered regs for 64-bit. */
4159 CLEAR_HARD_REG_SET (reg_class_contents
[(int)CLOBBERED_REGS
]);
4161 for (i
= 0; i
< FIRST_PSEUDO_REGISTER
; i
++)
4162 if (TEST_HARD_REG_BIT (reg_class_contents
[(int)GENERAL_REGS
], i
)
4163 && call_used_regs
[i
])
4164 SET_HARD_REG_BIT (reg_class_contents
[(int)CLOBBERED_REGS
], i
);
4167 /* If MMX is disabled, squash the registers. */
4169 for (i
= 0; i
< FIRST_PSEUDO_REGISTER
; i
++)
4170 if (TEST_HARD_REG_BIT (reg_class_contents
[(int)MMX_REGS
], i
))
4171 fixed_regs
[i
] = call_used_regs
[i
] = 1, reg_names
[i
] = "";
4173 /* If SSE is disabled, squash the registers. */
4175 for (i
= 0; i
< FIRST_PSEUDO_REGISTER
; i
++)
4176 if (TEST_HARD_REG_BIT (reg_class_contents
[(int)SSE_REGS
], i
))
4177 fixed_regs
[i
] = call_used_regs
[i
] = 1, reg_names
[i
] = "";
4179 /* If the FPU is disabled, squash the registers. */
4180 if (! (TARGET_80387
|| TARGET_FLOAT_RETURNS_IN_80387
))
4181 for (i
= 0; i
< FIRST_PSEUDO_REGISTER
; i
++)
4182 if (TEST_HARD_REG_BIT (reg_class_contents
[(int)FLOAT_REGS
], i
))
4183 fixed_regs
[i
] = call_used_regs
[i
] = 1, reg_names
[i
] = "";
4185 /* If 32-bit, squash the 64-bit registers. */
4188 for (i
= FIRST_REX_INT_REG
; i
<= LAST_REX_INT_REG
; i
++)
4190 for (i
= FIRST_REX_SSE_REG
; i
<= LAST_REX_SSE_REG
; i
++)
4196 /* Save the current options */
4199 ix86_function_specific_save (struct cl_target_option
*ptr
)
4201 ptr
->arch
= ix86_arch
;
4202 ptr
->schedule
= ix86_schedule
;
4203 ptr
->tune
= ix86_tune
;
4204 ptr
->branch_cost
= ix86_branch_cost
;
4205 ptr
->tune_defaulted
= ix86_tune_defaulted
;
4206 ptr
->arch_specified
= ix86_arch_specified
;
4207 ptr
->x_ix86_isa_flags_explicit
= ix86_isa_flags_explicit
;
4208 ptr
->ix86_target_flags_explicit
= target_flags_explicit
;
4209 ptr
->x_recip_mask_explicit
= recip_mask_explicit
;
4211 /* The fields are char but the variables are not; make sure the
4212 values fit in the fields. */
4213 gcc_assert (ptr
->arch
== ix86_arch
);
4214 gcc_assert (ptr
->schedule
== ix86_schedule
);
4215 gcc_assert (ptr
->tune
== ix86_tune
);
4216 gcc_assert (ptr
->branch_cost
== ix86_branch_cost
);
4219 /* Restore the current options */
4222 ix86_function_specific_restore (struct cl_target_option
*ptr
)
4224 enum processor_type old_tune
= ix86_tune
;
4225 enum processor_type old_arch
= ix86_arch
;
4226 unsigned int ix86_arch_mask
, ix86_tune_mask
;
4229 ix86_arch
= (enum processor_type
) ptr
->arch
;
4230 ix86_schedule
= (enum attr_cpu
) ptr
->schedule
;
4231 ix86_tune
= (enum processor_type
) ptr
->tune
;
4232 ix86_branch_cost
= ptr
->branch_cost
;
4233 ix86_tune_defaulted
= ptr
->tune_defaulted
;
4234 ix86_arch_specified
= ptr
->arch_specified
;
4235 ix86_isa_flags_explicit
= ptr
->x_ix86_isa_flags_explicit
;
4236 target_flags_explicit
= ptr
->ix86_target_flags_explicit
;
4237 recip_mask_explicit
= ptr
->x_recip_mask_explicit
;
4239 /* Recreate the arch feature tests if the arch changed */
4240 if (old_arch
!= ix86_arch
)
4242 ix86_arch_mask
= 1u << ix86_arch
;
4243 for (i
= 0; i
< X86_ARCH_LAST
; ++i
)
4244 ix86_arch_features
[i
]
4245 = !!(initial_ix86_arch_features
[i
] & ix86_arch_mask
);
4248 /* Recreate the tune optimization tests */
4249 if (old_tune
!= ix86_tune
)
4251 ix86_tune_mask
= 1u << ix86_tune
;
4252 for (i
= 0; i
< X86_TUNE_LAST
; ++i
)
4253 ix86_tune_features
[i
]
4254 = !!(initial_ix86_tune_features
[i
] & ix86_tune_mask
);
4258 /* Print the current options */
4261 ix86_function_specific_print (FILE *file
, int indent
,
4262 struct cl_target_option
*ptr
)
4265 = ix86_target_string (ptr
->x_ix86_isa_flags
, ptr
->x_target_flags
,
4266 NULL
, NULL
, ptr
->x_ix86_fpmath
, false);
4268 fprintf (file
, "%*sarch = %d (%s)\n",
4271 ((ptr
->arch
< TARGET_CPU_DEFAULT_max
)
4272 ? cpu_names
[ptr
->arch
]
4275 fprintf (file
, "%*stune = %d (%s)\n",
4278 ((ptr
->tune
< TARGET_CPU_DEFAULT_max
)
4279 ? cpu_names
[ptr
->tune
]
4282 fprintf (file
, "%*sbranch_cost = %d\n", indent
, "", ptr
->branch_cost
);
4286 fprintf (file
, "%*s%s\n", indent
, "", target_string
);
4287 free (target_string
);
4292 /* Inner function to process the attribute((target(...))), take an argument and
4293 set the current options from the argument. If we have a list, recursively go
4297 ix86_valid_target_attribute_inner_p (tree args
, char *p_strings
[],
4298 struct gcc_options
*enum_opts_set
)
4303 #define IX86_ATTR_ISA(S,O) { S, sizeof (S)-1, ix86_opt_isa, O, 0 }
4304 #define IX86_ATTR_STR(S,O) { S, sizeof (S)-1, ix86_opt_str, O, 0 }
4305 #define IX86_ATTR_ENUM(S,O) { S, sizeof (S)-1, ix86_opt_enum, O, 0 }
4306 #define IX86_ATTR_YES(S,O,M) { S, sizeof (S)-1, ix86_opt_yes, O, M }
4307 #define IX86_ATTR_NO(S,O,M) { S, sizeof (S)-1, ix86_opt_no, O, M }
4323 enum ix86_opt_type type
;
4328 IX86_ATTR_ISA ("3dnow", OPT_m3dnow
),
4329 IX86_ATTR_ISA ("abm", OPT_mabm
),
4330 IX86_ATTR_ISA ("bmi", OPT_mbmi
),
4331 IX86_ATTR_ISA ("bmi2", OPT_mbmi2
),
4332 IX86_ATTR_ISA ("lzcnt", OPT_mlzcnt
),
4333 IX86_ATTR_ISA ("tbm", OPT_mtbm
),
4334 IX86_ATTR_ISA ("aes", OPT_maes
),
4335 IX86_ATTR_ISA ("avx", OPT_mavx
),
4336 IX86_ATTR_ISA ("avx2", OPT_mavx2
),
4337 IX86_ATTR_ISA ("mmx", OPT_mmmx
),
4338 IX86_ATTR_ISA ("pclmul", OPT_mpclmul
),
4339 IX86_ATTR_ISA ("popcnt", OPT_mpopcnt
),
4340 IX86_ATTR_ISA ("sse", OPT_msse
),
4341 IX86_ATTR_ISA ("sse2", OPT_msse2
),
4342 IX86_ATTR_ISA ("sse3", OPT_msse3
),
4343 IX86_ATTR_ISA ("sse4", OPT_msse4
),
4344 IX86_ATTR_ISA ("sse4.1", OPT_msse4_1
),
4345 IX86_ATTR_ISA ("sse4.2", OPT_msse4_2
),
4346 IX86_ATTR_ISA ("sse4a", OPT_msse4a
),
4347 IX86_ATTR_ISA ("ssse3", OPT_mssse3
),
4348 IX86_ATTR_ISA ("fma4", OPT_mfma4
),
4349 IX86_ATTR_ISA ("fma", OPT_mfma
),
4350 IX86_ATTR_ISA ("xop", OPT_mxop
),
4351 IX86_ATTR_ISA ("lwp", OPT_mlwp
),
4352 IX86_ATTR_ISA ("fsgsbase", OPT_mfsgsbase
),
4353 IX86_ATTR_ISA ("rdrnd", OPT_mrdrnd
),
4354 IX86_ATTR_ISA ("f16c", OPT_mf16c
),
4355 IX86_ATTR_ISA ("rtm", OPT_mrtm
),
4356 IX86_ATTR_ISA ("hle", OPT_mhle
),
4357 IX86_ATTR_ISA ("prfchw", OPT_mprfchw
),
4360 IX86_ATTR_ENUM ("fpmath=", OPT_mfpmath_
),
4362 /* string options */
4363 IX86_ATTR_STR ("arch=", IX86_FUNCTION_SPECIFIC_ARCH
),
4364 IX86_ATTR_STR ("tune=", IX86_FUNCTION_SPECIFIC_TUNE
),
4367 IX86_ATTR_YES ("cld",
4371 IX86_ATTR_NO ("fancy-math-387",
4372 OPT_mfancy_math_387
,
4373 MASK_NO_FANCY_MATH_387
),
4375 IX86_ATTR_YES ("ieee-fp",
4379 IX86_ATTR_YES ("inline-all-stringops",
4380 OPT_minline_all_stringops
,
4381 MASK_INLINE_ALL_STRINGOPS
),
4383 IX86_ATTR_YES ("inline-stringops-dynamically",
4384 OPT_minline_stringops_dynamically
,
4385 MASK_INLINE_STRINGOPS_DYNAMICALLY
),
4387 IX86_ATTR_NO ("align-stringops",
4388 OPT_mno_align_stringops
,
4389 MASK_NO_ALIGN_STRINGOPS
),
4391 IX86_ATTR_YES ("recip",
4397 /* If this is a list, recurse to get the options. */
4398 if (TREE_CODE (args
) == TREE_LIST
)
4402 for (; args
; args
= TREE_CHAIN (args
))
4403 if (TREE_VALUE (args
)
4404 && !ix86_valid_target_attribute_inner_p (TREE_VALUE (args
),
4405 p_strings
, enum_opts_set
))
4411 else if (TREE_CODE (args
) != STRING_CST
)
4414 /* Handle multiple arguments separated by commas. */
4415 next_optstr
= ASTRDUP (TREE_STRING_POINTER (args
));
4417 while (next_optstr
&& *next_optstr
!= '\0')
4419 char *p
= next_optstr
;
4421 char *comma
= strchr (next_optstr
, ',');
4422 const char *opt_string
;
4423 size_t len
, opt_len
;
4428 enum ix86_opt_type type
= ix86_opt_unknown
;
4434 len
= comma
- next_optstr
;
4435 next_optstr
= comma
+ 1;
4443 /* Recognize no-xxx. */
4444 if (len
> 3 && p
[0] == 'n' && p
[1] == 'o' && p
[2] == '-')
4453 /* Find the option. */
4456 for (i
= 0; i
< ARRAY_SIZE (attrs
); i
++)
4458 type
= attrs
[i
].type
;
4459 opt_len
= attrs
[i
].len
;
4460 if (ch
== attrs
[i
].string
[0]
4461 && ((type
!= ix86_opt_str
&& type
!= ix86_opt_enum
)
4464 && memcmp (p
, attrs
[i
].string
, opt_len
) == 0)
4467 mask
= attrs
[i
].mask
;
4468 opt_string
= attrs
[i
].string
;
4473 /* Process the option. */
4476 error ("attribute(target(\"%s\")) is unknown", orig_p
);
4480 else if (type
== ix86_opt_isa
)
4482 struct cl_decoded_option decoded
;
4484 generate_option (opt
, NULL
, opt_set_p
, CL_TARGET
, &decoded
);
4485 ix86_handle_option (&global_options
, &global_options_set
,
4486 &decoded
, input_location
);
4489 else if (type
== ix86_opt_yes
|| type
== ix86_opt_no
)
4491 if (type
== ix86_opt_no
)
4492 opt_set_p
= !opt_set_p
;
4495 target_flags
|= mask
;
4497 target_flags
&= ~mask
;
4500 else if (type
== ix86_opt_str
)
4504 error ("option(\"%s\") was already specified", opt_string
);
4508 p_strings
[opt
] = xstrdup (p
+ opt_len
);
4511 else if (type
== ix86_opt_enum
)
4516 arg_ok
= opt_enum_arg_to_value (opt
, p
+ opt_len
, &value
, CL_TARGET
);
4518 set_option (&global_options
, enum_opts_set
, opt
, value
,
4519 p
+ opt_len
, DK_UNSPECIFIED
, input_location
,
4523 error ("attribute(target(\"%s\")) is unknown", orig_p
);
4535 /* Return a TARGET_OPTION_NODE tree of the target options listed or NULL. */
4538 ix86_valid_target_attribute_tree (tree args
)
4540 const char *orig_arch_string
= ix86_arch_string
;
4541 const char *orig_tune_string
= ix86_tune_string
;
4542 enum fpmath_unit orig_fpmath_set
= global_options_set
.x_ix86_fpmath
;
4543 int orig_tune_defaulted
= ix86_tune_defaulted
;
4544 int orig_arch_specified
= ix86_arch_specified
;
4545 char *option_strings
[IX86_FUNCTION_SPECIFIC_MAX
] = { NULL
, NULL
};
4548 struct cl_target_option
*def
4549 = TREE_TARGET_OPTION (target_option_default_node
);
4550 struct gcc_options enum_opts_set
;
4552 memset (&enum_opts_set
, 0, sizeof (enum_opts_set
));
4554 /* Process each of the options on the chain. */
4555 if (! ix86_valid_target_attribute_inner_p (args
, option_strings
,
4559 /* If the changed options are different from the default, rerun
4560 ix86_option_override_internal, and then save the options away.
4561 The string options are are attribute options, and will be undone
4562 when we copy the save structure. */
4563 if (ix86_isa_flags
!= def
->x_ix86_isa_flags
4564 || target_flags
!= def
->x_target_flags
4565 || option_strings
[IX86_FUNCTION_SPECIFIC_ARCH
]
4566 || option_strings
[IX86_FUNCTION_SPECIFIC_TUNE
]
4567 || enum_opts_set
.x_ix86_fpmath
)
4569 /* If we are using the default tune= or arch=, undo the string assigned,
4570 and use the default. */
4571 if (option_strings
[IX86_FUNCTION_SPECIFIC_ARCH
])
4572 ix86_arch_string
= option_strings
[IX86_FUNCTION_SPECIFIC_ARCH
];
4573 else if (!orig_arch_specified
)
4574 ix86_arch_string
= NULL
;
4576 if (option_strings
[IX86_FUNCTION_SPECIFIC_TUNE
])
4577 ix86_tune_string
= option_strings
[IX86_FUNCTION_SPECIFIC_TUNE
];
4578 else if (orig_tune_defaulted
)
4579 ix86_tune_string
= NULL
;
4581 /* If fpmath= is not set, and we now have sse2 on 32-bit, use it. */
4582 if (enum_opts_set
.x_ix86_fpmath
)
4583 global_options_set
.x_ix86_fpmath
= (enum fpmath_unit
) 1;
4584 else if (!TARGET_64BIT
&& TARGET_SSE
)
4586 ix86_fpmath
= (enum fpmath_unit
) (FPMATH_SSE
| FPMATH_387
);
4587 global_options_set
.x_ix86_fpmath
= (enum fpmath_unit
) 1;
4590 /* Do any overrides, such as arch=xxx, or tune=xxx support. */
4591 ix86_option_override_internal (false);
4593 /* Add any builtin functions with the new isa if any. */
4594 ix86_add_new_builtins (ix86_isa_flags
);
4596 /* Save the current options unless we are validating options for
4598 t
= build_target_option_node ();
4600 ix86_arch_string
= orig_arch_string
;
4601 ix86_tune_string
= orig_tune_string
;
4602 global_options_set
.x_ix86_fpmath
= orig_fpmath_set
;
4604 /* Free up memory allocated to hold the strings */
4605 for (i
= 0; i
< IX86_FUNCTION_SPECIFIC_MAX
; i
++)
4606 free (option_strings
[i
]);
4612 /* Hook to validate attribute((target("string"))). */
4615 ix86_valid_target_attribute_p (tree fndecl
,
4616 tree
ARG_UNUSED (name
),
4618 int ARG_UNUSED (flags
))
4620 struct cl_target_option cur_target
;
4622 tree old_optimize
= build_optimization_node ();
4623 tree new_target
, new_optimize
;
4624 tree func_optimize
= DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl
);
4626 /* If the function changed the optimization levels as well as setting target
4627 options, start with the optimizations specified. */
4628 if (func_optimize
&& func_optimize
!= old_optimize
)
4629 cl_optimization_restore (&global_options
,
4630 TREE_OPTIMIZATION (func_optimize
));
4632 /* The target attributes may also change some optimization flags, so update
4633 the optimization options if necessary. */
4634 cl_target_option_save (&cur_target
, &global_options
);
4635 new_target
= ix86_valid_target_attribute_tree (args
);
4636 new_optimize
= build_optimization_node ();
4643 DECL_FUNCTION_SPECIFIC_TARGET (fndecl
) = new_target
;
4645 if (old_optimize
!= new_optimize
)
4646 DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl
) = new_optimize
;
4649 cl_target_option_restore (&global_options
, &cur_target
);
4651 if (old_optimize
!= new_optimize
)
4652 cl_optimization_restore (&global_options
,
4653 TREE_OPTIMIZATION (old_optimize
));
4659 /* Hook to determine if one function can safely inline another. */
4662 ix86_can_inline_p (tree caller
, tree callee
)
4665 tree caller_tree
= DECL_FUNCTION_SPECIFIC_TARGET (caller
);
4666 tree callee_tree
= DECL_FUNCTION_SPECIFIC_TARGET (callee
);
4668 /* If callee has no option attributes, then it is ok to inline. */
4672 /* If caller has no option attributes, but callee does then it is not ok to
4674 else if (!caller_tree
)
4679 struct cl_target_option
*caller_opts
= TREE_TARGET_OPTION (caller_tree
);
4680 struct cl_target_option
*callee_opts
= TREE_TARGET_OPTION (callee_tree
);
4682 /* Callee's isa options should a subset of the caller's, i.e. a SSE4 function
4683 can inline a SSE2 function but a SSE2 function can't inline a SSE4
4685 if ((caller_opts
->x_ix86_isa_flags
& callee_opts
->x_ix86_isa_flags
)
4686 != callee_opts
->x_ix86_isa_flags
)
4689 /* See if we have the same non-isa options. */
4690 else if (caller_opts
->x_target_flags
!= callee_opts
->x_target_flags
)
4693 /* See if arch, tune, etc. are the same. */
4694 else if (caller_opts
->arch
!= callee_opts
->arch
)
4697 else if (caller_opts
->tune
!= callee_opts
->tune
)
4700 else if (caller_opts
->x_ix86_fpmath
!= callee_opts
->x_ix86_fpmath
)
4703 else if (caller_opts
->branch_cost
!= callee_opts
->branch_cost
)
4714 /* Remember the last target of ix86_set_current_function. */
4715 static GTY(()) tree ix86_previous_fndecl
;
4717 /* Establish appropriate back-end context for processing the function
4718 FNDECL. The argument might be NULL to indicate processing at top
4719 level, outside of any function scope. */
4721 ix86_set_current_function (tree fndecl
)
4723 /* Only change the context if the function changes. This hook is called
4724 several times in the course of compiling a function, and we don't want to
4725 slow things down too much or call target_reinit when it isn't safe. */
4726 if (fndecl
&& fndecl
!= ix86_previous_fndecl
)
4728 tree old_tree
= (ix86_previous_fndecl
4729 ? DECL_FUNCTION_SPECIFIC_TARGET (ix86_previous_fndecl
)
4732 tree new_tree
= (fndecl
4733 ? DECL_FUNCTION_SPECIFIC_TARGET (fndecl
)
4736 ix86_previous_fndecl
= fndecl
;
4737 if (old_tree
== new_tree
)
4742 cl_target_option_restore (&global_options
,
4743 TREE_TARGET_OPTION (new_tree
));
4749 struct cl_target_option
*def
4750 = TREE_TARGET_OPTION (target_option_current_node
);
4752 cl_target_option_restore (&global_options
, def
);
4759 /* Return true if this goes in large data/bss. */
4762 ix86_in_large_data_p (tree exp
)
4764 if (ix86_cmodel
!= CM_MEDIUM
&& ix86_cmodel
!= CM_MEDIUM_PIC
)
4767 /* Functions are never large data. */
4768 if (TREE_CODE (exp
) == FUNCTION_DECL
)
4771 if (TREE_CODE (exp
) == VAR_DECL
&& DECL_SECTION_NAME (exp
))
4773 const char *section
= TREE_STRING_POINTER (DECL_SECTION_NAME (exp
));
4774 if (strcmp (section
, ".ldata") == 0
4775 || strcmp (section
, ".lbss") == 0)
4781 HOST_WIDE_INT size
= int_size_in_bytes (TREE_TYPE (exp
));
4783 /* If this is an incomplete type with size 0, then we can't put it
4784 in data because it might be too big when completed. */
4785 if (!size
|| size
> ix86_section_threshold
)
4792 /* Switch to the appropriate section for output of DECL.
4793 DECL is either a `VAR_DECL' node or a constant of some sort.
4794 RELOC indicates whether forming the initial value of DECL requires
4795 link-time relocations. */
4797 static section
* x86_64_elf_select_section (tree
, int, unsigned HOST_WIDE_INT
)
4801 x86_64_elf_select_section (tree decl
, int reloc
,
4802 unsigned HOST_WIDE_INT align
)
4804 if ((ix86_cmodel
== CM_MEDIUM
|| ix86_cmodel
== CM_MEDIUM_PIC
)
4805 && ix86_in_large_data_p (decl
))
4807 const char *sname
= NULL
;
4808 unsigned int flags
= SECTION_WRITE
;
4809 switch (categorize_decl_for_section (decl
, reloc
))
4814 case SECCAT_DATA_REL
:
4815 sname
= ".ldata.rel";
4817 case SECCAT_DATA_REL_LOCAL
:
4818 sname
= ".ldata.rel.local";
4820 case SECCAT_DATA_REL_RO
:
4821 sname
= ".ldata.rel.ro";
4823 case SECCAT_DATA_REL_RO_LOCAL
:
4824 sname
= ".ldata.rel.ro.local";
4828 flags
|= SECTION_BSS
;
4831 case SECCAT_RODATA_MERGE_STR
:
4832 case SECCAT_RODATA_MERGE_STR_INIT
:
4833 case SECCAT_RODATA_MERGE_CONST
:
4837 case SECCAT_SRODATA
:
4844 /* We don't split these for medium model. Place them into
4845 default sections and hope for best. */
4850 /* We might get called with string constants, but get_named_section
4851 doesn't like them as they are not DECLs. Also, we need to set
4852 flags in that case. */
4854 return get_section (sname
, flags
, NULL
);
4855 return get_named_section (decl
, sname
, reloc
);
4858 return default_elf_select_section (decl
, reloc
, align
);
4861 /* Build up a unique section name, expressed as a
4862 STRING_CST node, and assign it to DECL_SECTION_NAME (decl).
4863 RELOC indicates whether the initial value of EXP requires
4864 link-time relocations. */
4866 static void ATTRIBUTE_UNUSED
4867 x86_64_elf_unique_section (tree decl
, int reloc
)
4869 if ((ix86_cmodel
== CM_MEDIUM
|| ix86_cmodel
== CM_MEDIUM_PIC
)
4870 && ix86_in_large_data_p (decl
))
4872 const char *prefix
= NULL
;
4873 /* We only need to use .gnu.linkonce if we don't have COMDAT groups. */
4874 bool one_only
= DECL_ONE_ONLY (decl
) && !HAVE_COMDAT_GROUP
;
4876 switch (categorize_decl_for_section (decl
, reloc
))
4879 case SECCAT_DATA_REL
:
4880 case SECCAT_DATA_REL_LOCAL
:
4881 case SECCAT_DATA_REL_RO
:
4882 case SECCAT_DATA_REL_RO_LOCAL
:
4883 prefix
= one_only
? ".ld" : ".ldata";
4886 prefix
= one_only
? ".lb" : ".lbss";
4889 case SECCAT_RODATA_MERGE_STR
:
4890 case SECCAT_RODATA_MERGE_STR_INIT
:
4891 case SECCAT_RODATA_MERGE_CONST
:
4892 prefix
= one_only
? ".lr" : ".lrodata";
4894 case SECCAT_SRODATA
:
4901 /* We don't split these for medium model. Place them into
4902 default sections and hope for best. */
4907 const char *name
, *linkonce
;
4910 name
= IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl
));
4911 name
= targetm
.strip_name_encoding (name
);
4913 /* If we're using one_only, then there needs to be a .gnu.linkonce
4914 prefix to the section name. */
4915 linkonce
= one_only
? ".gnu.linkonce" : "";
4917 string
= ACONCAT ((linkonce
, prefix
, ".", name
, NULL
));
4919 DECL_SECTION_NAME (decl
) = build_string (strlen (string
), string
);
4923 default_unique_section (decl
, reloc
);
4926 #ifdef COMMON_ASM_OP
4927 /* This says how to output assembler code to declare an
4928 uninitialized external linkage data object.
4930 For medium model x86-64 we need to use .largecomm opcode for
4933 x86_elf_aligned_common (FILE *file
,
4934 const char *name
, unsigned HOST_WIDE_INT size
,
4937 if ((ix86_cmodel
== CM_MEDIUM
|| ix86_cmodel
== CM_MEDIUM_PIC
)
4938 && size
> (unsigned int)ix86_section_threshold
)
4939 fputs (".largecomm\t", file
);
4941 fputs (COMMON_ASM_OP
, file
);
4942 assemble_name (file
, name
);
4943 fprintf (file
, "," HOST_WIDE_INT_PRINT_UNSIGNED
",%u\n",
4944 size
, align
/ BITS_PER_UNIT
);
4948 /* Utility function for targets to use in implementing
4949 ASM_OUTPUT_ALIGNED_BSS. */
4952 x86_output_aligned_bss (FILE *file
, tree decl ATTRIBUTE_UNUSED
,
4953 const char *name
, unsigned HOST_WIDE_INT size
,
4956 if ((ix86_cmodel
== CM_MEDIUM
|| ix86_cmodel
== CM_MEDIUM_PIC
)
4957 && size
> (unsigned int)ix86_section_threshold
)
4958 switch_to_section (get_named_section (decl
, ".lbss", 0));
4960 switch_to_section (bss_section
);
4961 ASM_OUTPUT_ALIGN (file
, floor_log2 (align
/ BITS_PER_UNIT
));
4962 #ifdef ASM_DECLARE_OBJECT_NAME
4963 last_assemble_variable_decl
= decl
;
4964 ASM_DECLARE_OBJECT_NAME (file
, name
, decl
);
4966 /* Standard thing is just output label for the object. */
4967 ASM_OUTPUT_LABEL (file
, name
);
4968 #endif /* ASM_DECLARE_OBJECT_NAME */
4969 ASM_OUTPUT_SKIP (file
, size
? size
: 1);
4972 /* Decide whether we must probe the stack before any space allocation
4973 on this target. It's essentially TARGET_STACK_PROBE except when
4974 -fstack-check causes the stack to be already probed differently. */
4977 ix86_target_stack_probe (void)
4979 /* Do not probe the stack twice if static stack checking is enabled. */
4980 if (flag_stack_check
== STATIC_BUILTIN_STACK_CHECK
)
4983 return TARGET_STACK_PROBE
;
4986 /* Decide whether we can make a sibling call to a function. DECL is the
4987 declaration of the function being targeted by the call and EXP is the
4988 CALL_EXPR representing the call. */
4991 ix86_function_ok_for_sibcall (tree decl
, tree exp
)
4993 tree type
, decl_or_type
;
4996 /* If we are generating position-independent code, we cannot sibcall
4997 optimize any indirect call, or a direct call to a global function,
4998 as the PLT requires %ebx be live. (Darwin does not have a PLT.) */
5002 && (!decl
|| !targetm
.binds_local_p (decl
)))
5005 /* If we need to align the outgoing stack, then sibcalling would
5006 unalign the stack, which may break the called function. */
5007 if (ix86_minimum_incoming_stack_boundary (true)
5008 < PREFERRED_STACK_BOUNDARY
)
5013 decl_or_type
= decl
;
5014 type
= TREE_TYPE (decl
);
5018 /* We're looking at the CALL_EXPR, we need the type of the function. */
5019 type
= CALL_EXPR_FN (exp
); /* pointer expression */
5020 type
= TREE_TYPE (type
); /* pointer type */
5021 type
= TREE_TYPE (type
); /* function type */
5022 decl_or_type
= type
;
5025 /* Check that the return value locations are the same. Like
5026 if we are returning floats on the 80387 register stack, we cannot
5027 make a sibcall from a function that doesn't return a float to a
5028 function that does or, conversely, from a function that does return
5029 a float to a function that doesn't; the necessary stack adjustment
5030 would not be executed. This is also the place we notice
5031 differences in the return value ABI. Note that it is ok for one
5032 of the functions to have void return type as long as the return
5033 value of the other is passed in a register. */
5034 a
= ix86_function_value (TREE_TYPE (exp
), decl_or_type
, false);
5035 b
= ix86_function_value (TREE_TYPE (DECL_RESULT (cfun
->decl
)),
5037 if (STACK_REG_P (a
) || STACK_REG_P (b
))
5039 if (!rtx_equal_p (a
, b
))
5042 else if (VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun
->decl
))))
5044 /* Disable sibcall if we need to generate vzeroupper after
5046 if (TARGET_VZEROUPPER
5047 && cfun
->machine
->callee_return_avx256_p
5048 && !cfun
->machine
->caller_return_avx256_p
)
5051 else if (!rtx_equal_p (a
, b
))
5056 /* The SYSV ABI has more call-clobbered registers;
5057 disallow sibcalls from MS to SYSV. */
5058 if (cfun
->machine
->call_abi
== MS_ABI
5059 && ix86_function_type_abi (type
) == SYSV_ABI
)
5064 /* If this call is indirect, we'll need to be able to use a
5065 call-clobbered register for the address of the target function.
5066 Make sure that all such registers are not used for passing
5067 parameters. Note that DLLIMPORT functions are indirect. */
5069 || (TARGET_DLLIMPORT_DECL_ATTRIBUTES
&& DECL_DLLIMPORT_P (decl
)))
5071 if (ix86_function_regparm (type
, NULL
) >= 3)
5073 /* ??? Need to count the actual number of registers to be used,
5074 not the possible number of registers. Fix later. */
5080 /* Otherwise okay. That also includes certain types of indirect calls. */
5084 /* Handle "cdecl", "stdcall", "fastcall", "regparm", "thiscall",
5085 and "sseregparm" calling convention attributes;
5086 arguments as in struct attribute_spec.handler. */
5089 ix86_handle_cconv_attribute (tree
*node
, tree name
,
5091 int flags ATTRIBUTE_UNUSED
,
5094 if (TREE_CODE (*node
) != FUNCTION_TYPE
5095 && TREE_CODE (*node
) != METHOD_TYPE
5096 && TREE_CODE (*node
) != FIELD_DECL
5097 && TREE_CODE (*node
) != TYPE_DECL
)
5099 warning (OPT_Wattributes
, "%qE attribute only applies to functions",
5101 *no_add_attrs
= true;
5105 /* Can combine regparm with all attributes but fastcall, and thiscall. */
5106 if (is_attribute_p ("regparm", name
))
5110 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node
)))
5112 error ("fastcall and regparm attributes are not compatible");
5115 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node
)))
5117 error ("regparam and thiscall attributes are not compatible");
5120 cst
= TREE_VALUE (args
);
5121 if (TREE_CODE (cst
) != INTEGER_CST
)
5123 warning (OPT_Wattributes
,
5124 "%qE attribute requires an integer constant argument",
5126 *no_add_attrs
= true;
5128 else if (compare_tree_int (cst
, REGPARM_MAX
) > 0)
5130 warning (OPT_Wattributes
, "argument to %qE attribute larger than %d",
5132 *no_add_attrs
= true;
5140 /* Do not warn when emulating the MS ABI. */
5141 if ((TREE_CODE (*node
) != FUNCTION_TYPE
5142 && TREE_CODE (*node
) != METHOD_TYPE
)
5143 || ix86_function_type_abi (*node
) != MS_ABI
)
5144 warning (OPT_Wattributes
, "%qE attribute ignored",
5146 *no_add_attrs
= true;
5150 /* Can combine fastcall with stdcall (redundant) and sseregparm. */
5151 if (is_attribute_p ("fastcall", name
))
5153 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node
)))
5155 error ("fastcall and cdecl attributes are not compatible");
5157 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node
)))
5159 error ("fastcall and stdcall attributes are not compatible");
5161 if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node
)))
5163 error ("fastcall and regparm attributes are not compatible");
5165 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node
)))
5167 error ("fastcall and thiscall attributes are not compatible");
5171 /* Can combine stdcall with fastcall (redundant), regparm and
5173 else if (is_attribute_p ("stdcall", name
))
5175 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node
)))
5177 error ("stdcall and cdecl attributes are not compatible");
5179 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node
)))
5181 error ("stdcall and fastcall attributes are not compatible");
5183 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node
)))
5185 error ("stdcall and thiscall attributes are not compatible");
5189 /* Can combine cdecl with regparm and sseregparm. */
5190 else if (is_attribute_p ("cdecl", name
))
5192 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node
)))
5194 error ("stdcall and cdecl attributes are not compatible");
5196 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node
)))
5198 error ("fastcall and cdecl attributes are not compatible");
5200 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node
)))
5202 error ("cdecl and thiscall attributes are not compatible");
5205 else if (is_attribute_p ("thiscall", name
))
5207 if (TREE_CODE (*node
) != METHOD_TYPE
&& pedantic
)
5208 warning (OPT_Wattributes
, "%qE attribute is used for none class-method",
5210 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node
)))
5212 error ("stdcall and thiscall attributes are not compatible");
5214 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node
)))
5216 error ("fastcall and thiscall attributes are not compatible");
5218 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node
)))
5220 error ("cdecl and thiscall attributes are not compatible");
5224 /* Can combine sseregparm with all attributes. */
5229 /* The transactional memory builtins are implicitly regparm or fastcall
5230 depending on the ABI. Override the generic do-nothing attribute that
5231 these builtins were declared with, and replace it with one of the two
5232 attributes that we expect elsewhere. */
5235 ix86_handle_tm_regparm_attribute (tree
*node
, tree name ATTRIBUTE_UNUSED
,
5236 tree args ATTRIBUTE_UNUSED
,
5237 int flags ATTRIBUTE_UNUSED
,
5242 /* In no case do we want to add the placeholder attribute. */
5243 *no_add_attrs
= true;
5245 /* The 64-bit ABI is unchanged for transactional memory. */
5249 /* ??? Is there a better way to validate 32-bit windows? We have
5250 cfun->machine->call_abi, but that seems to be set only for 64-bit. */
5251 if (CHECK_STACK_LIMIT
> 0)
5252 alt
= tree_cons (get_identifier ("fastcall"), NULL
, NULL
);
5255 alt
= tree_cons (NULL
, build_int_cst (NULL
, 2), NULL
);
5256 alt
= tree_cons (get_identifier ("regparm"), alt
, NULL
);
5258 decl_attributes (node
, alt
, flags
);
5263 /* This function determines from TYPE the calling-convention. */
5266 ix86_get_callcvt (const_tree type
)
5268 unsigned int ret
= 0;
5273 return IX86_CALLCVT_CDECL
;
5275 attrs
= TYPE_ATTRIBUTES (type
);
5276 if (attrs
!= NULL_TREE
)
5278 if (lookup_attribute ("cdecl", attrs
))
5279 ret
|= IX86_CALLCVT_CDECL
;
5280 else if (lookup_attribute ("stdcall", attrs
))
5281 ret
|= IX86_CALLCVT_STDCALL
;
5282 else if (lookup_attribute ("fastcall", attrs
))
5283 ret
|= IX86_CALLCVT_FASTCALL
;
5284 else if (lookup_attribute ("thiscall", attrs
))
5285 ret
|= IX86_CALLCVT_THISCALL
;
5287 /* Regparam isn't allowed for thiscall and fastcall. */
5288 if ((ret
& (IX86_CALLCVT_THISCALL
| IX86_CALLCVT_FASTCALL
)) == 0)
5290 if (lookup_attribute ("regparm", attrs
))
5291 ret
|= IX86_CALLCVT_REGPARM
;
5292 if (lookup_attribute ("sseregparm", attrs
))
5293 ret
|= IX86_CALLCVT_SSEREGPARM
;
5296 if (IX86_BASE_CALLCVT(ret
) != 0)
5300 is_stdarg
= stdarg_p (type
);
5301 if (TARGET_RTD
&& !is_stdarg
)
5302 return IX86_CALLCVT_STDCALL
| ret
;
5306 || TREE_CODE (type
) != METHOD_TYPE
5307 || ix86_function_type_abi (type
) != MS_ABI
)
5308 return IX86_CALLCVT_CDECL
| ret
;
5310 return IX86_CALLCVT_THISCALL
;
5313 /* Return 0 if the attributes for two types are incompatible, 1 if they
5314 are compatible, and 2 if they are nearly compatible (which causes a
5315 warning to be generated). */
5318 ix86_comp_type_attributes (const_tree type1
, const_tree type2
)
5320 unsigned int ccvt1
, ccvt2
;
5322 if (TREE_CODE (type1
) != FUNCTION_TYPE
5323 && TREE_CODE (type1
) != METHOD_TYPE
)
5326 ccvt1
= ix86_get_callcvt (type1
);
5327 ccvt2
= ix86_get_callcvt (type2
);
5330 if (ix86_function_regparm (type1
, NULL
)
5331 != ix86_function_regparm (type2
, NULL
))
5337 /* Return the regparm value for a function with the indicated TYPE and DECL.
5338 DECL may be NULL when calling function indirectly
5339 or considering a libcall. */
5342 ix86_function_regparm (const_tree type
, const_tree decl
)
5349 return (ix86_function_type_abi (type
) == SYSV_ABI
5350 ? X86_64_REGPARM_MAX
: X86_64_MS_REGPARM_MAX
);
5351 ccvt
= ix86_get_callcvt (type
);
5352 regparm
= ix86_regparm
;
5354 if ((ccvt
& IX86_CALLCVT_REGPARM
) != 0)
5356 attr
= lookup_attribute ("regparm", TYPE_ATTRIBUTES (type
));
5359 regparm
= TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr
)));
5363 else if ((ccvt
& IX86_CALLCVT_FASTCALL
) != 0)
5365 else if ((ccvt
& IX86_CALLCVT_THISCALL
) != 0)
5368 /* Use register calling convention for local functions when possible. */
5370 && TREE_CODE (decl
) == FUNCTION_DECL
5372 && !(profile_flag
&& !flag_fentry
))
5374 /* FIXME: remove this CONST_CAST when cgraph.[ch] is constified. */
5375 struct cgraph_local_info
*i
= cgraph_local_info (CONST_CAST_TREE (decl
));
5376 if (i
&& i
->local
&& i
->can_change_signature
)
5378 int local_regparm
, globals
= 0, regno
;
5380 /* Make sure no regparm register is taken by a
5381 fixed register variable. */
5382 for (local_regparm
= 0; local_regparm
< REGPARM_MAX
; local_regparm
++)
5383 if (fixed_regs
[local_regparm
])
5386 /* We don't want to use regparm(3) for nested functions as
5387 these use a static chain pointer in the third argument. */
5388 if (local_regparm
== 3 && DECL_STATIC_CHAIN (decl
))
5391 /* In 32-bit mode save a register for the split stack. */
5392 if (!TARGET_64BIT
&& local_regparm
== 3 && flag_split_stack
)
5395 /* Each fixed register usage increases register pressure,
5396 so less registers should be used for argument passing.
5397 This functionality can be overriden by an explicit
5399 for (regno
= 0; regno
<= DI_REG
; regno
++)
5400 if (fixed_regs
[regno
])
5404 = globals
< local_regparm
? local_regparm
- globals
: 0;
5406 if (local_regparm
> regparm
)
5407 regparm
= local_regparm
;
5414 /* Return 1 or 2, if we can pass up to SSE_REGPARM_MAX SFmode (1) and
5415 DFmode (2) arguments in SSE registers for a function with the
5416 indicated TYPE and DECL. DECL may be NULL when calling function
5417 indirectly or considering a libcall. Otherwise return 0. */
5420 ix86_function_sseregparm (const_tree type
, const_tree decl
, bool warn
)
5422 gcc_assert (!TARGET_64BIT
);
5424 /* Use SSE registers to pass SFmode and DFmode arguments if requested
5425 by the sseregparm attribute. */
5426 if (TARGET_SSEREGPARM
5427 || (type
&& lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type
))))
5434 error ("calling %qD with attribute sseregparm without "
5435 "SSE/SSE2 enabled", decl
);
5437 error ("calling %qT with attribute sseregparm without "
5438 "SSE/SSE2 enabled", type
);
5446 /* For local functions, pass up to SSE_REGPARM_MAX SFmode
5447 (and DFmode for SSE2) arguments in SSE registers. */
5448 if (decl
&& TARGET_SSE_MATH
&& optimize
5449 && !(profile_flag
&& !flag_fentry
))
5451 /* FIXME: remove this CONST_CAST when cgraph.[ch] is constified. */
5452 struct cgraph_local_info
*i
= cgraph_local_info (CONST_CAST_TREE(decl
));
5453 if (i
&& i
->local
&& i
->can_change_signature
)
5454 return TARGET_SSE2
? 2 : 1;
5460 /* Return true if EAX is live at the start of the function. Used by
5461 ix86_expand_prologue to determine if we need special help before
5462 calling allocate_stack_worker. */
5465 ix86_eax_live_at_start_p (void)
5467 /* Cheat. Don't bother working forward from ix86_function_regparm
5468 to the function type to whether an actual argument is located in
5469 eax. Instead just look at cfg info, which is still close enough
5470 to correct at this point. This gives false positives for broken
5471 functions that might use uninitialized data that happens to be
5472 allocated in eax, but who cares? */
5473 return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR
), 0);
5477 ix86_keep_aggregate_return_pointer (tree fntype
)
5483 attr
= lookup_attribute ("callee_pop_aggregate_return",
5484 TYPE_ATTRIBUTES (fntype
));
5486 return (TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr
))) == 0);
5488 /* For 32-bit MS-ABI the default is to keep aggregate
5490 if (ix86_function_type_abi (fntype
) == MS_ABI
)
5493 return KEEP_AGGREGATE_RETURN_POINTER
!= 0;
5496 /* Value is the number of bytes of arguments automatically
5497 popped when returning from a subroutine call.
5498 FUNDECL is the declaration node of the function (as a tree),
5499 FUNTYPE is the data type of the function (as a tree),
5500 or for a library call it is an identifier node for the subroutine name.
5501 SIZE is the number of bytes of arguments passed on the stack.
5503 On the 80386, the RTD insn may be used to pop them if the number
5504 of args is fixed, but if the number is variable then the caller
5505 must pop them all. RTD can't be used for library calls now
5506 because the library is compiled with the Unix compiler.
5507 Use of RTD is a selectable option, since it is incompatible with
5508 standard Unix calling sequences. If the option is not selected,
5509 the caller must always pop the args.
5511 The attribute stdcall is equivalent to RTD on a per module basis. */
5514 ix86_return_pops_args (tree fundecl
, tree funtype
, int size
)
5518 /* None of the 64-bit ABIs pop arguments. */
5522 ccvt
= ix86_get_callcvt (funtype
);
5524 if ((ccvt
& (IX86_CALLCVT_STDCALL
| IX86_CALLCVT_FASTCALL
5525 | IX86_CALLCVT_THISCALL
)) != 0
5526 && ! stdarg_p (funtype
))
5529 /* Lose any fake structure return argument if it is passed on the stack. */
5530 if (aggregate_value_p (TREE_TYPE (funtype
), fundecl
)
5531 && !ix86_keep_aggregate_return_pointer (funtype
))
5533 int nregs
= ix86_function_regparm (funtype
, fundecl
);
5535 return GET_MODE_SIZE (Pmode
);
5541 /* Argument support functions. */
5543 /* Return true when register may be used to pass function parameters. */
5545 ix86_function_arg_regno_p (int regno
)
5548 const int *parm_regs
;
5553 return (regno
< REGPARM_MAX
5554 || (TARGET_SSE
&& SSE_REGNO_P (regno
) && !fixed_regs
[regno
]));
5556 return (regno
< REGPARM_MAX
5557 || (TARGET_MMX
&& MMX_REGNO_P (regno
)
5558 && (regno
< FIRST_MMX_REG
+ MMX_REGPARM_MAX
))
5559 || (TARGET_SSE
&& SSE_REGNO_P (regno
)
5560 && (regno
< FIRST_SSE_REG
+ SSE_REGPARM_MAX
)));
5565 if (SSE_REGNO_P (regno
) && TARGET_SSE
)
5570 if (TARGET_SSE
&& SSE_REGNO_P (regno
)
5571 && (regno
< FIRST_SSE_REG
+ SSE_REGPARM_MAX
))
5575 /* TODO: The function should depend on current function ABI but
5576 builtins.c would need updating then. Therefore we use the
5579 /* RAX is used as hidden argument to va_arg functions. */
5580 if (ix86_abi
== SYSV_ABI
&& regno
== AX_REG
)
5583 if (ix86_abi
== MS_ABI
)
5584 parm_regs
= x86_64_ms_abi_int_parameter_registers
;
5586 parm_regs
= x86_64_int_parameter_registers
;
5587 for (i
= 0; i
< (ix86_abi
== MS_ABI
5588 ? X86_64_MS_REGPARM_MAX
: X86_64_REGPARM_MAX
); i
++)
5589 if (regno
== parm_regs
[i
])
5594 /* Return if we do not know how to pass TYPE solely in registers. */
5597 ix86_must_pass_in_stack (enum machine_mode mode
, const_tree type
)
5599 if (must_pass_in_stack_var_size_or_pad (mode
, type
))
5602 /* For 32-bit, we want TImode aggregates to go on the stack. But watch out!
5603 The layout_type routine is crafty and tries to trick us into passing
5604 currently unsupported vector types on the stack by using TImode. */
5605 return (!TARGET_64BIT
&& mode
== TImode
5606 && type
&& TREE_CODE (type
) != VECTOR_TYPE
);
5609 /* It returns the size, in bytes, of the area reserved for arguments passed
5610 in registers for the function represented by fndecl dependent to the used
5613 ix86_reg_parm_stack_space (const_tree fndecl
)
5615 enum calling_abi call_abi
= SYSV_ABI
;
5616 if (fndecl
!= NULL_TREE
&& TREE_CODE (fndecl
) == FUNCTION_DECL
)
5617 call_abi
= ix86_function_abi (fndecl
);
5619 call_abi
= ix86_function_type_abi (fndecl
);
5620 if (TARGET_64BIT
&& call_abi
== MS_ABI
)
5625 /* Returns value SYSV_ABI, MS_ABI dependent on fntype, specifying the
5628 ix86_function_type_abi (const_tree fntype
)
5630 if (fntype
!= NULL_TREE
&& TYPE_ATTRIBUTES (fntype
) != NULL_TREE
)
5632 enum calling_abi abi
= ix86_abi
;
5633 if (abi
== SYSV_ABI
)
5635 if (lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (fntype
)))
5638 else if (lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (fntype
)))
5646 ix86_function_ms_hook_prologue (const_tree fn
)
5648 if (fn
&& lookup_attribute ("ms_hook_prologue", DECL_ATTRIBUTES (fn
)))
5650 if (decl_function_context (fn
) != NULL_TREE
)
5651 error_at (DECL_SOURCE_LOCATION (fn
),
5652 "ms_hook_prologue is not compatible with nested function");
5659 static enum calling_abi
5660 ix86_function_abi (const_tree fndecl
)
5664 return ix86_function_type_abi (TREE_TYPE (fndecl
));
5667 /* Returns value SYSV_ABI, MS_ABI dependent on cfun, specifying the
5670 ix86_cfun_abi (void)
5674 return cfun
->machine
->call_abi
;
5677 /* Write the extra assembler code needed to declare a function properly. */
5680 ix86_asm_output_function_label (FILE *asm_out_file
, const char *fname
,
5683 bool is_ms_hook
= ix86_function_ms_hook_prologue (decl
);
5687 int i
, filler_count
= (TARGET_64BIT
? 32 : 16);
5688 unsigned int filler_cc
= 0xcccccccc;
5690 for (i
= 0; i
< filler_count
; i
+= 4)
5691 fprintf (asm_out_file
, ASM_LONG
" %#x\n", filler_cc
);
5694 #ifdef SUBTARGET_ASM_UNWIND_INIT
5695 SUBTARGET_ASM_UNWIND_INIT (asm_out_file
);
5698 ASM_OUTPUT_LABEL (asm_out_file
, fname
);
5700 /* Output magic byte marker, if hot-patch attribute is set. */
5705 /* leaq [%rsp + 0], %rsp */
5706 asm_fprintf (asm_out_file
, ASM_BYTE
5707 "0x48, 0x8d, 0xa4, 0x24, 0x00, 0x00, 0x00, 0x00\n");
5711 /* movl.s %edi, %edi
5713 movl.s %esp, %ebp */
5714 asm_fprintf (asm_out_file
, ASM_BYTE
5715 "0x8b, 0xff, 0x55, 0x8b, 0xec\n");
5721 extern void init_regs (void);
5723 /* Implementation of call abi switching target hook. Specific to FNDECL
5724 the specific call register sets are set. See also
5725 ix86_conditional_register_usage for more details. */
5727 ix86_call_abi_override (const_tree fndecl
)
5729 if (fndecl
== NULL_TREE
)
5730 cfun
->machine
->call_abi
= ix86_abi
;
5732 cfun
->machine
->call_abi
= ix86_function_type_abi (TREE_TYPE (fndecl
));
5735 /* 64-bit MS and SYSV ABI have different set of call used registers. Avoid
5736 expensive re-initialization of init_regs each time we switch function context
5737 since this is needed only during RTL expansion. */
5739 ix86_maybe_switch_abi (void)
5742 call_used_regs
[SI_REG
] == (cfun
->machine
->call_abi
== MS_ABI
))
5746 /* Initialize a variable CUM of type CUMULATIVE_ARGS
5747 for a call to a function whose data type is FNTYPE.
5748 For a library call, FNTYPE is 0. */
5751 init_cumulative_args (CUMULATIVE_ARGS
*cum
, /* Argument info to initialize */
5752 tree fntype
, /* tree ptr for function decl */
5753 rtx libname
, /* SYMBOL_REF of library name or 0 */
5757 struct cgraph_local_info
*i
;
5760 memset (cum
, 0, sizeof (*cum
));
5762 /* Initialize for the current callee. */
5765 cfun
->machine
->callee_pass_avx256_p
= false;
5766 cfun
->machine
->callee_return_avx256_p
= false;
5771 i
= cgraph_local_info (fndecl
);
5772 cum
->call_abi
= ix86_function_abi (fndecl
);
5773 fnret_type
= TREE_TYPE (TREE_TYPE (fndecl
));
5778 cum
->call_abi
= ix86_function_type_abi (fntype
);
5780 fnret_type
= TREE_TYPE (fntype
);
5785 if (TARGET_VZEROUPPER
&& fnret_type
)
5787 rtx fnret_value
= ix86_function_value (fnret_type
, fntype
,
5789 if (function_pass_avx256_p (fnret_value
))
5791 /* The return value of this function uses 256bit AVX modes. */
5793 cfun
->machine
->callee_return_avx256_p
= true;
5795 cfun
->machine
->caller_return_avx256_p
= true;
5799 cum
->caller
= caller
;
5801 /* Set up the number of registers to use for passing arguments. */
5803 if (TARGET_64BIT
&& cum
->call_abi
== MS_ABI
&& !ACCUMULATE_OUTGOING_ARGS
)
5804 sorry ("ms_abi attribute requires -maccumulate-outgoing-args "
5805 "or subtarget optimization implying it");
5806 cum
->nregs
= ix86_regparm
;
5809 cum
->nregs
= (cum
->call_abi
== SYSV_ABI
5810 ? X86_64_REGPARM_MAX
5811 : X86_64_MS_REGPARM_MAX
);
5815 cum
->sse_nregs
= SSE_REGPARM_MAX
;
5818 cum
->sse_nregs
= (cum
->call_abi
== SYSV_ABI
5819 ? X86_64_SSE_REGPARM_MAX
5820 : X86_64_MS_SSE_REGPARM_MAX
);
5824 cum
->mmx_nregs
= MMX_REGPARM_MAX
;
5825 cum
->warn_avx
= true;
5826 cum
->warn_sse
= true;
5827 cum
->warn_mmx
= true;
5829 /* Because type might mismatch in between caller and callee, we need to
5830 use actual type of function for local calls.
5831 FIXME: cgraph_analyze can be told to actually record if function uses
5832 va_start so for local functions maybe_vaarg can be made aggressive
5834 FIXME: once typesytem is fixed, we won't need this code anymore. */
5835 if (i
&& i
->local
&& i
->can_change_signature
)
5836 fntype
= TREE_TYPE (fndecl
);
5837 cum
->maybe_vaarg
= (fntype
5838 ? (!prototype_p (fntype
) || stdarg_p (fntype
))
5843 /* If there are variable arguments, then we won't pass anything
5844 in registers in 32-bit mode. */
5845 if (stdarg_p (fntype
))
5856 /* Use ecx and edx registers if function has fastcall attribute,
5857 else look for regparm information. */
5860 unsigned int ccvt
= ix86_get_callcvt (fntype
);
5861 if ((ccvt
& IX86_CALLCVT_THISCALL
) != 0)
5864 cum
->fastcall
= 1; /* Same first register as in fastcall. */
5866 else if ((ccvt
& IX86_CALLCVT_FASTCALL
) != 0)
5872 cum
->nregs
= ix86_function_regparm (fntype
, fndecl
);
5875 /* Set up the number of SSE registers used for passing SFmode
5876 and DFmode arguments. Warn for mismatching ABI. */
5877 cum
->float_in_sse
= ix86_function_sseregparm (fntype
, fndecl
, true);
5881 /* Return the "natural" mode for TYPE. In most cases, this is just TYPE_MODE.
5882 But in the case of vector types, it is some vector mode.
5884 When we have only some of our vector isa extensions enabled, then there
5885 are some modes for which vector_mode_supported_p is false. For these
5886 modes, the generic vector support in gcc will choose some non-vector mode
5887 in order to implement the type. By computing the natural mode, we'll
5888 select the proper ABI location for the operand and not depend on whatever
5889 the middle-end decides to do with these vector types.
5891 The midde-end can't deal with the vector types > 16 bytes. In this
5892 case, we return the original mode and warn ABI change if CUM isn't
5895 static enum machine_mode
5896 type_natural_mode (const_tree type
, const CUMULATIVE_ARGS
*cum
)
5898 enum machine_mode mode
= TYPE_MODE (type
);
5900 if (TREE_CODE (type
) == VECTOR_TYPE
&& !VECTOR_MODE_P (mode
))
5902 HOST_WIDE_INT size
= int_size_in_bytes (type
);
5903 if ((size
== 8 || size
== 16 || size
== 32)
5904 /* ??? Generic code allows us to create width 1 vectors. Ignore. */
5905 && TYPE_VECTOR_SUBPARTS (type
) > 1)
5907 enum machine_mode innermode
= TYPE_MODE (TREE_TYPE (type
));
5909 if (TREE_CODE (TREE_TYPE (type
)) == REAL_TYPE
)
5910 mode
= MIN_MODE_VECTOR_FLOAT
;
5912 mode
= MIN_MODE_VECTOR_INT
;
5914 /* Get the mode which has this inner mode and number of units. */
5915 for (; mode
!= VOIDmode
; mode
= GET_MODE_WIDER_MODE (mode
))
5916 if (GET_MODE_NUNITS (mode
) == TYPE_VECTOR_SUBPARTS (type
)
5917 && GET_MODE_INNER (mode
) == innermode
)
5919 if (size
== 32 && !TARGET_AVX
)
5921 static bool warnedavx
;
5928 warning (0, "AVX vector argument without AVX "
5929 "enabled changes the ABI");
5931 return TYPE_MODE (type
);
5933 else if ((size
== 8 || size
== 16) && !TARGET_SSE
)
5935 static bool warnedsse
;
5942 warning (0, "SSE vector argument without SSE "
5943 "enabled changes the ABI");
5958 /* We want to pass a value in REGNO whose "natural" mode is MODE. However,
5959 this may not agree with the mode that the type system has chosen for the
5960 register, which is ORIG_MODE. If ORIG_MODE is not BLKmode, then we can
5961 go ahead and use it. Otherwise we have to build a PARALLEL instead. */
5964 gen_reg_or_parallel (enum machine_mode mode
, enum machine_mode orig_mode
,
5969 if (orig_mode
!= BLKmode
)
5970 tmp
= gen_rtx_REG (orig_mode
, regno
);
5973 tmp
= gen_rtx_REG (mode
, regno
);
5974 tmp
= gen_rtx_EXPR_LIST (VOIDmode
, tmp
, const0_rtx
);
5975 tmp
= gen_rtx_PARALLEL (orig_mode
, gen_rtvec (1, tmp
));
5981 /* x86-64 register passing implementation. See x86-64 ABI for details. Goal
5982 of this code is to classify each 8bytes of incoming argument by the register
5983 class and assign registers accordingly. */
5985 /* Return the union class of CLASS1 and CLASS2.
5986 See the x86-64 PS ABI for details. */
5988 static enum x86_64_reg_class
5989 merge_classes (enum x86_64_reg_class class1
, enum x86_64_reg_class class2
)
5991 /* Rule #1: If both classes are equal, this is the resulting class. */
5992 if (class1
== class2
)
5995 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
5997 if (class1
== X86_64_NO_CLASS
)
5999 if (class2
== X86_64_NO_CLASS
)
6002 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
6003 if (class1
== X86_64_MEMORY_CLASS
|| class2
== X86_64_MEMORY_CLASS
)
6004 return X86_64_MEMORY_CLASS
;
6006 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
6007 if ((class1
== X86_64_INTEGERSI_CLASS
&& class2
== X86_64_SSESF_CLASS
)
6008 || (class2
== X86_64_INTEGERSI_CLASS
&& class1
== X86_64_SSESF_CLASS
))
6009 return X86_64_INTEGERSI_CLASS
;
6010 if (class1
== X86_64_INTEGER_CLASS
|| class1
== X86_64_INTEGERSI_CLASS
6011 || class2
== X86_64_INTEGER_CLASS
|| class2
== X86_64_INTEGERSI_CLASS
)
6012 return X86_64_INTEGER_CLASS
;
6014 /* Rule #5: If one of the classes is X87, X87UP, or COMPLEX_X87 class,
6016 if (class1
== X86_64_X87_CLASS
6017 || class1
== X86_64_X87UP_CLASS
6018 || class1
== X86_64_COMPLEX_X87_CLASS
6019 || class2
== X86_64_X87_CLASS
6020 || class2
== X86_64_X87UP_CLASS
6021 || class2
== X86_64_COMPLEX_X87_CLASS
)
6022 return X86_64_MEMORY_CLASS
;
6024 /* Rule #6: Otherwise class SSE is used. */
6025 return X86_64_SSE_CLASS
;
6028 /* Classify the argument of type TYPE and mode MODE.
6029 CLASSES will be filled by the register class used to pass each word
6030 of the operand. The number of words is returned. In case the parameter
6031 should be passed in memory, 0 is returned. As a special case for zero
6032 sized containers, classes[0] will be NO_CLASS and 1 is returned.
6034 BIT_OFFSET is used internally for handling records and specifies offset
6035 of the offset in bits modulo 256 to avoid overflow cases.
6037 See the x86-64 PS ABI for details.
6041 classify_argument (enum machine_mode mode
, const_tree type
,
6042 enum x86_64_reg_class classes
[MAX_CLASSES
], int bit_offset
)
6044 HOST_WIDE_INT bytes
=
6045 (mode
== BLKmode
) ? int_size_in_bytes (type
) : (int) GET_MODE_SIZE (mode
);
6047 = (bytes
+ (bit_offset
% 64) / 8 + UNITS_PER_WORD
- 1) / UNITS_PER_WORD
;
6049 /* Variable sized entities are always passed/returned in memory. */
6053 if (mode
!= VOIDmode
6054 && targetm
.calls
.must_pass_in_stack (mode
, type
))
6057 if (type
&& AGGREGATE_TYPE_P (type
))
6061 enum x86_64_reg_class subclasses
[MAX_CLASSES
];
6063 /* On x86-64 we pass structures larger than 32 bytes on the stack. */
6067 for (i
= 0; i
< words
; i
++)
6068 classes
[i
] = X86_64_NO_CLASS
;
6070 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
6071 signalize memory class, so handle it as special case. */
6074 classes
[0] = X86_64_NO_CLASS
;
6078 /* Classify each field of record and merge classes. */
6079 switch (TREE_CODE (type
))
6082 /* And now merge the fields of structure. */
6083 for (field
= TYPE_FIELDS (type
); field
; field
= DECL_CHAIN (field
))
6085 if (TREE_CODE (field
) == FIELD_DECL
)
6089 if (TREE_TYPE (field
) == error_mark_node
)
6092 /* Bitfields are always classified as integer. Handle them
6093 early, since later code would consider them to be
6094 misaligned integers. */
6095 if (DECL_BIT_FIELD (field
))
6097 for (i
= (int_bit_position (field
)
6098 + (bit_offset
% 64)) / 8 / 8;
6099 i
< ((int_bit_position (field
) + (bit_offset
% 64))
6100 + tree_low_cst (DECL_SIZE (field
), 0)
6103 merge_classes (X86_64_INTEGER_CLASS
,
6110 type
= TREE_TYPE (field
);
6112 /* Flexible array member is ignored. */
6113 if (TYPE_MODE (type
) == BLKmode
6114 && TREE_CODE (type
) == ARRAY_TYPE
6115 && TYPE_SIZE (type
) == NULL_TREE
6116 && TYPE_DOMAIN (type
) != NULL_TREE
6117 && (TYPE_MAX_VALUE (TYPE_DOMAIN (type
))
6122 if (!warned
&& warn_psabi
)
6125 inform (input_location
,
6126 "the ABI of passing struct with"
6127 " a flexible array member has"
6128 " changed in GCC 4.4");
6132 num
= classify_argument (TYPE_MODE (type
), type
,
6134 (int_bit_position (field
)
6135 + bit_offset
) % 256);
6138 pos
= (int_bit_position (field
)
6139 + (bit_offset
% 64)) / 8 / 8;
6140 for (i
= 0; i
< num
&& (i
+ pos
) < words
; i
++)
6142 merge_classes (subclasses
[i
], classes
[i
+ pos
]);
6149 /* Arrays are handled as small records. */
6152 num
= classify_argument (TYPE_MODE (TREE_TYPE (type
)),
6153 TREE_TYPE (type
), subclasses
, bit_offset
);
6157 /* The partial classes are now full classes. */
6158 if (subclasses
[0] == X86_64_SSESF_CLASS
&& bytes
!= 4)
6159 subclasses
[0] = X86_64_SSE_CLASS
;
6160 if (subclasses
[0] == X86_64_INTEGERSI_CLASS
6161 && !((bit_offset
% 64) == 0 && bytes
== 4))
6162 subclasses
[0] = X86_64_INTEGER_CLASS
;
6164 for (i
= 0; i
< words
; i
++)
6165 classes
[i
] = subclasses
[i
% num
];
6170 case QUAL_UNION_TYPE
:
6171 /* Unions are similar to RECORD_TYPE but offset is always 0.
6173 for (field
= TYPE_FIELDS (type
); field
; field
= DECL_CHAIN (field
))
6175 if (TREE_CODE (field
) == FIELD_DECL
)
6179 if (TREE_TYPE (field
) == error_mark_node
)
6182 num
= classify_argument (TYPE_MODE (TREE_TYPE (field
)),
6183 TREE_TYPE (field
), subclasses
,
6187 for (i
= 0; i
< num
; i
++)
6188 classes
[i
] = merge_classes (subclasses
[i
], classes
[i
]);
6199 /* When size > 16 bytes, if the first one isn't
6200 X86_64_SSE_CLASS or any other ones aren't
6201 X86_64_SSEUP_CLASS, everything should be passed in
6203 if (classes
[0] != X86_64_SSE_CLASS
)
6206 for (i
= 1; i
< words
; i
++)
6207 if (classes
[i
] != X86_64_SSEUP_CLASS
)
6211 /* Final merger cleanup. */
6212 for (i
= 0; i
< words
; i
++)
6214 /* If one class is MEMORY, everything should be passed in
6216 if (classes
[i
] == X86_64_MEMORY_CLASS
)
6219 /* The X86_64_SSEUP_CLASS should be always preceded by
6220 X86_64_SSE_CLASS or X86_64_SSEUP_CLASS. */
6221 if (classes
[i
] == X86_64_SSEUP_CLASS
6222 && classes
[i
- 1] != X86_64_SSE_CLASS
6223 && classes
[i
- 1] != X86_64_SSEUP_CLASS
)
6225 /* The first one should never be X86_64_SSEUP_CLASS. */
6226 gcc_assert (i
!= 0);
6227 classes
[i
] = X86_64_SSE_CLASS
;
6230 /* If X86_64_X87UP_CLASS isn't preceded by X86_64_X87_CLASS,
6231 everything should be passed in memory. */
6232 if (classes
[i
] == X86_64_X87UP_CLASS
6233 && (classes
[i
- 1] != X86_64_X87_CLASS
))
6237 /* The first one should never be X86_64_X87UP_CLASS. */
6238 gcc_assert (i
!= 0);
6239 if (!warned
&& warn_psabi
)
6242 inform (input_location
,
6243 "the ABI of passing union with long double"
6244 " has changed in GCC 4.4");
6252 /* Compute alignment needed. We align all types to natural boundaries with
6253 exception of XFmode that is aligned to 64bits. */
6254 if (mode
!= VOIDmode
&& mode
!= BLKmode
)
6256 int mode_alignment
= GET_MODE_BITSIZE (mode
);
6259 mode_alignment
= 128;
6260 else if (mode
== XCmode
)
6261 mode_alignment
= 256;
6262 if (COMPLEX_MODE_P (mode
))
6263 mode_alignment
/= 2;
6264 /* Misaligned fields are always returned in memory. */
6265 if (bit_offset
% mode_alignment
)
6269 /* for V1xx modes, just use the base mode */
6270 if (VECTOR_MODE_P (mode
) && mode
!= V1DImode
&& mode
!= V1TImode
6271 && GET_MODE_SIZE (GET_MODE_INNER (mode
)) == bytes
)
6272 mode
= GET_MODE_INNER (mode
);
6274 /* Classification of atomic types. */
6279 classes
[0] = X86_64_SSE_CLASS
;
6282 classes
[0] = X86_64_SSE_CLASS
;
6283 classes
[1] = X86_64_SSEUP_CLASS
;
6293 int size
= (bit_offset
% 64)+ (int) GET_MODE_BITSIZE (mode
);
6297 classes
[0] = X86_64_INTEGERSI_CLASS
;
6300 else if (size
<= 64)
6302 classes
[0] = X86_64_INTEGER_CLASS
;
6305 else if (size
<= 64+32)
6307 classes
[0] = X86_64_INTEGER_CLASS
;
6308 classes
[1] = X86_64_INTEGERSI_CLASS
;
6311 else if (size
<= 64+64)
6313 classes
[0] = classes
[1] = X86_64_INTEGER_CLASS
;
6321 classes
[0] = classes
[1] = X86_64_INTEGER_CLASS
;
6325 /* OImode shouldn't be used directly. */
6330 if (!(bit_offset
% 64))
6331 classes
[0] = X86_64_SSESF_CLASS
;
6333 classes
[0] = X86_64_SSE_CLASS
;
6336 classes
[0] = X86_64_SSEDF_CLASS
;
6339 classes
[0] = X86_64_X87_CLASS
;
6340 classes
[1] = X86_64_X87UP_CLASS
;
6343 classes
[0] = X86_64_SSE_CLASS
;
6344 classes
[1] = X86_64_SSEUP_CLASS
;
6347 classes
[0] = X86_64_SSE_CLASS
;
6348 if (!(bit_offset
% 64))
6354 if (!warned
&& warn_psabi
)
6357 inform (input_location
,
6358 "the ABI of passing structure with complex float"
6359 " member has changed in GCC 4.4");
6361 classes
[1] = X86_64_SSESF_CLASS
;
6365 classes
[0] = X86_64_SSEDF_CLASS
;
6366 classes
[1] = X86_64_SSEDF_CLASS
;
6369 classes
[0] = X86_64_COMPLEX_X87_CLASS
;
6372 /* This modes is larger than 16 bytes. */
6380 classes
[0] = X86_64_SSE_CLASS
;
6381 classes
[1] = X86_64_SSEUP_CLASS
;
6382 classes
[2] = X86_64_SSEUP_CLASS
;
6383 classes
[3] = X86_64_SSEUP_CLASS
;
6391 classes
[0] = X86_64_SSE_CLASS
;
6392 classes
[1] = X86_64_SSEUP_CLASS
;
6400 classes
[0] = X86_64_SSE_CLASS
;
6406 gcc_assert (VECTOR_MODE_P (mode
));
6411 gcc_assert (GET_MODE_CLASS (GET_MODE_INNER (mode
)) == MODE_INT
);
6413 if (bit_offset
+ GET_MODE_BITSIZE (mode
) <= 32)
6414 classes
[0] = X86_64_INTEGERSI_CLASS
;
6416 classes
[0] = X86_64_INTEGER_CLASS
;
6417 classes
[1] = X86_64_INTEGER_CLASS
;
6418 return 1 + (bytes
> 8);
6422 /* Examine the argument and return set number of register required in each
6423 class. Return 0 iff parameter should be passed in memory. */
6425 examine_argument (enum machine_mode mode
, const_tree type
, int in_return
,
6426 int *int_nregs
, int *sse_nregs
)
6428 enum x86_64_reg_class regclass
[MAX_CLASSES
];
6429 int n
= classify_argument (mode
, type
, regclass
, 0);
6435 for (n
--; n
>= 0; n
--)
6436 switch (regclass
[n
])
6438 case X86_64_INTEGER_CLASS
:
6439 case X86_64_INTEGERSI_CLASS
:
6442 case X86_64_SSE_CLASS
:
6443 case X86_64_SSESF_CLASS
:
6444 case X86_64_SSEDF_CLASS
:
6447 case X86_64_NO_CLASS
:
6448 case X86_64_SSEUP_CLASS
:
6450 case X86_64_X87_CLASS
:
6451 case X86_64_X87UP_CLASS
:
6455 case X86_64_COMPLEX_X87_CLASS
:
6456 return in_return
? 2 : 0;
6457 case X86_64_MEMORY_CLASS
:
6463 /* Construct container for the argument used by GCC interface. See
6464 FUNCTION_ARG for the detailed description. */
6467 construct_container (enum machine_mode mode
, enum machine_mode orig_mode
,
6468 const_tree type
, int in_return
, int nintregs
, int nsseregs
,
6469 const int *intreg
, int sse_regno
)
6471 /* The following variables hold the static issued_error state. */
6472 static bool issued_sse_arg_error
;
6473 static bool issued_sse_ret_error
;
6474 static bool issued_x87_ret_error
;
6476 enum machine_mode tmpmode
;
6478 (mode
== BLKmode
) ? int_size_in_bytes (type
) : (int) GET_MODE_SIZE (mode
);
6479 enum x86_64_reg_class regclass
[MAX_CLASSES
];
6483 int needed_sseregs
, needed_intregs
;
6484 rtx exp
[MAX_CLASSES
];
6487 n
= classify_argument (mode
, type
, regclass
, 0);
6490 if (!examine_argument (mode
, type
, in_return
, &needed_intregs
,
6493 if (needed_intregs
> nintregs
|| needed_sseregs
> nsseregs
)
6496 /* We allowed the user to turn off SSE for kernel mode. Don't crash if
6497 some less clueful developer tries to use floating-point anyway. */
6498 if (needed_sseregs
&& !TARGET_SSE
)
6502 if (!issued_sse_ret_error
)
6504 error ("SSE register return with SSE disabled");
6505 issued_sse_ret_error
= true;
6508 else if (!issued_sse_arg_error
)
6510 error ("SSE register argument with SSE disabled");
6511 issued_sse_arg_error
= true;
6516 /* Likewise, error if the ABI requires us to return values in the
6517 x87 registers and the user specified -mno-80387. */
6518 if (!TARGET_80387
&& in_return
)
6519 for (i
= 0; i
< n
; i
++)
6520 if (regclass
[i
] == X86_64_X87_CLASS
6521 || regclass
[i
] == X86_64_X87UP_CLASS
6522 || regclass
[i
] == X86_64_COMPLEX_X87_CLASS
)
6524 if (!issued_x87_ret_error
)
6526 error ("x87 register return with x87 disabled");
6527 issued_x87_ret_error
= true;
6532 /* First construct simple cases. Avoid SCmode, since we want to use
6533 single register to pass this type. */
6534 if (n
== 1 && mode
!= SCmode
)
6535 switch (regclass
[0])
6537 case X86_64_INTEGER_CLASS
:
6538 case X86_64_INTEGERSI_CLASS
:
6539 return gen_rtx_REG (mode
, intreg
[0]);
6540 case X86_64_SSE_CLASS
:
6541 case X86_64_SSESF_CLASS
:
6542 case X86_64_SSEDF_CLASS
:
6543 if (mode
!= BLKmode
)
6544 return gen_reg_or_parallel (mode
, orig_mode
,
6545 SSE_REGNO (sse_regno
));
6547 case X86_64_X87_CLASS
:
6548 case X86_64_COMPLEX_X87_CLASS
:
6549 return gen_rtx_REG (mode
, FIRST_STACK_REG
);
6550 case X86_64_NO_CLASS
:
6551 /* Zero sized array, struct or class. */
6557 && regclass
[0] == X86_64_SSE_CLASS
6558 && regclass
[1] == X86_64_SSEUP_CLASS
6560 return gen_reg_or_parallel (mode
, orig_mode
,
6561 SSE_REGNO (sse_regno
));
6563 && regclass
[0] == X86_64_SSE_CLASS
6564 && regclass
[1] == X86_64_SSEUP_CLASS
6565 && regclass
[2] == X86_64_SSEUP_CLASS
6566 && regclass
[3] == X86_64_SSEUP_CLASS
6568 return gen_reg_or_parallel (mode
, orig_mode
,
6569 SSE_REGNO (sse_regno
));
6571 && regclass
[0] == X86_64_X87_CLASS
6572 && regclass
[1] == X86_64_X87UP_CLASS
)
6573 return gen_rtx_REG (XFmode
, FIRST_STACK_REG
);
6576 && regclass
[0] == X86_64_INTEGER_CLASS
6577 && regclass
[1] == X86_64_INTEGER_CLASS
6578 && (mode
== CDImode
|| mode
== TImode
|| mode
== TFmode
)
6579 && intreg
[0] + 1 == intreg
[1])
6580 return gen_rtx_REG (mode
, intreg
[0]);
6582 /* Otherwise figure out the entries of the PARALLEL. */
6583 for (i
= 0; i
< n
; i
++)
6587 switch (regclass
[i
])
6589 case X86_64_NO_CLASS
:
6591 case X86_64_INTEGER_CLASS
:
6592 case X86_64_INTEGERSI_CLASS
:
6593 /* Merge TImodes on aligned occasions here too. */
6594 if (i
* 8 + 8 > bytes
)
6596 = mode_for_size ((bytes
- i
* 8) * BITS_PER_UNIT
, MODE_INT
, 0);
6597 else if (regclass
[i
] == X86_64_INTEGERSI_CLASS
)
6601 /* We've requested 24 bytes we
6602 don't have mode for. Use DImode. */
6603 if (tmpmode
== BLKmode
)
6606 = gen_rtx_EXPR_LIST (VOIDmode
,
6607 gen_rtx_REG (tmpmode
, *intreg
),
6611 case X86_64_SSESF_CLASS
:
6613 = gen_rtx_EXPR_LIST (VOIDmode
,
6614 gen_rtx_REG (SFmode
,
6615 SSE_REGNO (sse_regno
)),
6619 case X86_64_SSEDF_CLASS
:
6621 = gen_rtx_EXPR_LIST (VOIDmode
,
6622 gen_rtx_REG (DFmode
,
6623 SSE_REGNO (sse_regno
)),
6627 case X86_64_SSE_CLASS
:
6635 if (i
== 0 && regclass
[1] == X86_64_SSEUP_CLASS
)
6645 && regclass
[1] == X86_64_SSEUP_CLASS
6646 && regclass
[2] == X86_64_SSEUP_CLASS
6647 && regclass
[3] == X86_64_SSEUP_CLASS
);
6655 = gen_rtx_EXPR_LIST (VOIDmode
,
6656 gen_rtx_REG (tmpmode
,
6657 SSE_REGNO (sse_regno
)),
6666 /* Empty aligned struct, union or class. */
6670 ret
= gen_rtx_PARALLEL (mode
, rtvec_alloc (nexps
));
6671 for (i
= 0; i
< nexps
; i
++)
6672 XVECEXP (ret
, 0, i
) = exp
[i
];
6676 /* Update the data in CUM to advance over an argument of mode MODE
6677 and data type TYPE. (TYPE is null for libcalls where that information
6678 may not be available.) */
6681 function_arg_advance_32 (CUMULATIVE_ARGS
*cum
, enum machine_mode mode
,
6682 const_tree type
, HOST_WIDE_INT bytes
,
6683 HOST_WIDE_INT words
)
6699 cum
->words
+= words
;
6700 cum
->nregs
-= words
;
6701 cum
->regno
+= words
;
6703 if (cum
->nregs
<= 0)
6711 /* OImode shouldn't be used directly. */
6715 if (cum
->float_in_sse
< 2)
6718 if (cum
->float_in_sse
< 1)
6735 if (!type
|| !AGGREGATE_TYPE_P (type
))
6737 cum
->sse_words
+= words
;
6738 cum
->sse_nregs
-= 1;
6739 cum
->sse_regno
+= 1;
6740 if (cum
->sse_nregs
<= 0)
6754 if (!type
|| !AGGREGATE_TYPE_P (type
))
6756 cum
->mmx_words
+= words
;
6757 cum
->mmx_nregs
-= 1;
6758 cum
->mmx_regno
+= 1;
6759 if (cum
->mmx_nregs
<= 0)
6770 function_arg_advance_64 (CUMULATIVE_ARGS
*cum
, enum machine_mode mode
,
6771 const_tree type
, HOST_WIDE_INT words
, bool named
)
6773 int int_nregs
, sse_nregs
;
6775 /* Unnamed 256bit vector mode parameters are passed on stack. */
6776 if (!named
&& VALID_AVX256_REG_MODE (mode
))
6779 if (examine_argument (mode
, type
, 0, &int_nregs
, &sse_nregs
)
6780 && sse_nregs
<= cum
->sse_nregs
&& int_nregs
<= cum
->nregs
)
6782 cum
->nregs
-= int_nregs
;
6783 cum
->sse_nregs
-= sse_nregs
;
6784 cum
->regno
+= int_nregs
;
6785 cum
->sse_regno
+= sse_nregs
;
6789 int align
= ix86_function_arg_boundary (mode
, type
) / BITS_PER_WORD
;
6790 cum
->words
= (cum
->words
+ align
- 1) & ~(align
- 1);
6791 cum
->words
+= words
;
6796 function_arg_advance_ms_64 (CUMULATIVE_ARGS
*cum
, HOST_WIDE_INT bytes
,
6797 HOST_WIDE_INT words
)
6799 /* Otherwise, this should be passed indirect. */
6800 gcc_assert (bytes
== 1 || bytes
== 2 || bytes
== 4 || bytes
== 8);
6802 cum
->words
+= words
;
6810 /* Update the data in CUM to advance over an argument of mode MODE and
6811 data type TYPE. (TYPE is null for libcalls where that information
6812 may not be available.) */
6815 ix86_function_arg_advance (cumulative_args_t cum_v
, enum machine_mode mode
,
6816 const_tree type
, bool named
)
6818 CUMULATIVE_ARGS
*cum
= get_cumulative_args (cum_v
);
6819 HOST_WIDE_INT bytes
, words
;
6821 if (mode
== BLKmode
)
6822 bytes
= int_size_in_bytes (type
);
6824 bytes
= GET_MODE_SIZE (mode
);
6825 words
= (bytes
+ UNITS_PER_WORD
- 1) / UNITS_PER_WORD
;
6828 mode
= type_natural_mode (type
, NULL
);
6830 if (TARGET_64BIT
&& (cum
? cum
->call_abi
: ix86_abi
) == MS_ABI
)
6831 function_arg_advance_ms_64 (cum
, bytes
, words
);
6832 else if (TARGET_64BIT
)
6833 function_arg_advance_64 (cum
, mode
, type
, words
, named
);
6835 function_arg_advance_32 (cum
, mode
, type
, bytes
, words
);
6838 /* Define where to put the arguments to a function.
6839 Value is zero to push the argument on the stack,
6840 or a hard register in which to store the argument.
6842 MODE is the argument's machine mode.
6843 TYPE is the data type of the argument (as a tree).
6844 This is null for libcalls where that information may
6846 CUM is a variable of type CUMULATIVE_ARGS which gives info about
6847 the preceding args and about the function being called.
6848 NAMED is nonzero if this argument is a named parameter
6849 (otherwise it is an extra parameter matching an ellipsis). */
6852 function_arg_32 (const CUMULATIVE_ARGS
*cum
, enum machine_mode mode
,
6853 enum machine_mode orig_mode
, const_tree type
,
6854 HOST_WIDE_INT bytes
, HOST_WIDE_INT words
)
6856 static bool warnedsse
, warnedmmx
;
6858 /* Avoid the AL settings for the Unix64 ABI. */
6859 if (mode
== VOIDmode
)
6875 if (words
<= cum
->nregs
)
6877 int regno
= cum
->regno
;
6879 /* Fastcall allocates the first two DWORD (SImode) or
6880 smaller arguments to ECX and EDX if it isn't an
6886 || (type
&& AGGREGATE_TYPE_P (type
)))
6889 /* ECX not EAX is the first allocated register. */
6890 if (regno
== AX_REG
)
6893 return gen_rtx_REG (mode
, regno
);
6898 if (cum
->float_in_sse
< 2)
6901 if (cum
->float_in_sse
< 1)
6905 /* In 32bit, we pass TImode in xmm registers. */
6912 if (!type
|| !AGGREGATE_TYPE_P (type
))
6914 if (!TARGET_SSE
&& !warnedsse
&& cum
->warn_sse
)
6917 warning (0, "SSE vector argument without SSE enabled "
6921 return gen_reg_or_parallel (mode
, orig_mode
,
6922 cum
->sse_regno
+ FIRST_SSE_REG
);
6927 /* OImode shouldn't be used directly. */
6936 if (!type
|| !AGGREGATE_TYPE_P (type
))
6939 return gen_reg_or_parallel (mode
, orig_mode
,
6940 cum
->sse_regno
+ FIRST_SSE_REG
);
6950 if (!type
|| !AGGREGATE_TYPE_P (type
))
6952 if (!TARGET_MMX
&& !warnedmmx
&& cum
->warn_mmx
)
6955 warning (0, "MMX vector argument without MMX enabled "
6959 return gen_reg_or_parallel (mode
, orig_mode
,
6960 cum
->mmx_regno
+ FIRST_MMX_REG
);
6969 function_arg_64 (const CUMULATIVE_ARGS
*cum
, enum machine_mode mode
,
6970 enum machine_mode orig_mode
, const_tree type
, bool named
)
6972 /* Handle a hidden AL argument containing number of registers
6973 for varargs x86-64 functions. */
6974 if (mode
== VOIDmode
)
6975 return GEN_INT (cum
->maybe_vaarg
6976 ? (cum
->sse_nregs
< 0
6977 ? X86_64_SSE_REGPARM_MAX
6992 /* Unnamed 256bit vector mode parameters are passed on stack. */
6998 return construct_container (mode
, orig_mode
, type
, 0, cum
->nregs
,
7000 &x86_64_int_parameter_registers
[cum
->regno
],
7005 function_arg_ms_64 (const CUMULATIVE_ARGS
*cum
, enum machine_mode mode
,
7006 enum machine_mode orig_mode
, bool named
,
7007 HOST_WIDE_INT bytes
)
7011 /* We need to add clobber for MS_ABI->SYSV ABI calls in expand_call.
7012 We use value of -2 to specify that current function call is MSABI. */
7013 if (mode
== VOIDmode
)
7014 return GEN_INT (-2);
7016 /* If we've run out of registers, it goes on the stack. */
7017 if (cum
->nregs
== 0)
7020 regno
= x86_64_ms_abi_int_parameter_registers
[cum
->regno
];
7022 /* Only floating point modes are passed in anything but integer regs. */
7023 if (TARGET_SSE
&& (mode
== SFmode
|| mode
== DFmode
))
7026 regno
= cum
->regno
+ FIRST_SSE_REG
;
7031 /* Unnamed floating parameters are passed in both the
7032 SSE and integer registers. */
7033 t1
= gen_rtx_REG (mode
, cum
->regno
+ FIRST_SSE_REG
);
7034 t2
= gen_rtx_REG (mode
, regno
);
7035 t1
= gen_rtx_EXPR_LIST (VOIDmode
, t1
, const0_rtx
);
7036 t2
= gen_rtx_EXPR_LIST (VOIDmode
, t2
, const0_rtx
);
7037 return gen_rtx_PARALLEL (mode
, gen_rtvec (2, t1
, t2
));
7040 /* Handle aggregated types passed in register. */
7041 if (orig_mode
== BLKmode
)
7043 if (bytes
> 0 && bytes
<= 8)
7044 mode
= (bytes
> 4 ? DImode
: SImode
);
7045 if (mode
== BLKmode
)
7049 return gen_reg_or_parallel (mode
, orig_mode
, regno
);
7052 /* Return where to put the arguments to a function.
7053 Return zero to push the argument on the stack, or a hard register in which to store the argument.
7055 MODE is the argument's machine mode. TYPE is the data type of the
7056 argument. It is null for libcalls where that information may not be
7057 available. CUM gives information about the preceding args and about
7058 the function being called. NAMED is nonzero if this argument is a
7059 named parameter (otherwise it is an extra parameter matching an
7063 ix86_function_arg (cumulative_args_t cum_v
, enum machine_mode omode
,
7064 const_tree type
, bool named
)
7066 CUMULATIVE_ARGS
*cum
= get_cumulative_args (cum_v
);
7067 enum machine_mode mode
= omode
;
7068 HOST_WIDE_INT bytes
, words
;
7071 if (mode
== BLKmode
)
7072 bytes
= int_size_in_bytes (type
);
7074 bytes
= GET_MODE_SIZE (mode
);
7075 words
= (bytes
+ UNITS_PER_WORD
- 1) / UNITS_PER_WORD
;
7077 /* To simplify the code below, represent vector types with a vector mode
7078 even if MMX/SSE are not active. */
7079 if (type
&& TREE_CODE (type
) == VECTOR_TYPE
)
7080 mode
= type_natural_mode (type
, cum
);
7082 if (TARGET_64BIT
&& (cum
? cum
->call_abi
: ix86_abi
) == MS_ABI
)
7083 arg
= function_arg_ms_64 (cum
, mode
, omode
, named
, bytes
);
7084 else if (TARGET_64BIT
)
7085 arg
= function_arg_64 (cum
, mode
, omode
, type
, named
);
7087 arg
= function_arg_32 (cum
, mode
, omode
, type
, bytes
, words
);
7089 if (TARGET_VZEROUPPER
&& function_pass_avx256_p (arg
))
7091 /* This argument uses 256bit AVX modes. */
7093 cfun
->machine
->callee_pass_avx256_p
= true;
7095 cfun
->machine
->caller_pass_avx256_p
= true;
7101 /* A C expression that indicates when an argument must be passed by
7102 reference. If nonzero for an argument, a copy of that argument is
7103 made in memory and a pointer to the argument is passed instead of
7104 the argument itself. The pointer is passed in whatever way is
7105 appropriate for passing a pointer to that type. */
7108 ix86_pass_by_reference (cumulative_args_t cum_v ATTRIBUTE_UNUSED
,
7109 enum machine_mode mode ATTRIBUTE_UNUSED
,
7110 const_tree type
, bool named ATTRIBUTE_UNUSED
)
7112 CUMULATIVE_ARGS
*cum
= get_cumulative_args (cum_v
);
7114 /* See Windows x64 Software Convention. */
7115 if (TARGET_64BIT
&& (cum
? cum
->call_abi
: ix86_abi
) == MS_ABI
)
7117 int msize
= (int) GET_MODE_SIZE (mode
);
7120 /* Arrays are passed by reference. */
7121 if (TREE_CODE (type
) == ARRAY_TYPE
)
7124 if (AGGREGATE_TYPE_P (type
))
7126 /* Structs/unions of sizes other than 8, 16, 32, or 64 bits
7127 are passed by reference. */
7128 msize
= int_size_in_bytes (type
);
7132 /* __m128 is passed by reference. */
7134 case 1: case 2: case 4: case 8:
7140 else if (TARGET_64BIT
&& type
&& int_size_in_bytes (type
) == -1)
7146 /* Return true when TYPE should be 128bit aligned for 32bit argument
7147 passing ABI. XXX: This function is obsolete and is only used for
7148 checking psABI compatibility with previous versions of GCC. */
7151 ix86_compat_aligned_value_p (const_tree type
)
7153 enum machine_mode mode
= TYPE_MODE (type
);
7154 if (((TARGET_SSE
&& SSE_REG_MODE_P (mode
))
7158 && (!TYPE_USER_ALIGN (type
) || TYPE_ALIGN (type
) > 128))
7160 if (TYPE_ALIGN (type
) < 128)
7163 if (AGGREGATE_TYPE_P (type
))
7165 /* Walk the aggregates recursively. */
7166 switch (TREE_CODE (type
))
7170 case QUAL_UNION_TYPE
:
7174 /* Walk all the structure fields. */
7175 for (field
= TYPE_FIELDS (type
); field
; field
= DECL_CHAIN (field
))
7177 if (TREE_CODE (field
) == FIELD_DECL
7178 && ix86_compat_aligned_value_p (TREE_TYPE (field
)))
7185 /* Just for use if some languages passes arrays by value. */
7186 if (ix86_compat_aligned_value_p (TREE_TYPE (type
)))
7197 /* Return the alignment boundary for MODE and TYPE with alignment ALIGN.
7198 XXX: This function is obsolete and is only used for checking psABI
7199 compatibility with previous versions of GCC. */
7202 ix86_compat_function_arg_boundary (enum machine_mode mode
,
7203 const_tree type
, unsigned int align
)
7205 /* In 32bit, only _Decimal128 and __float128 are aligned to their
7206 natural boundaries. */
7207 if (!TARGET_64BIT
&& mode
!= TDmode
&& mode
!= TFmode
)
7209 /* i386 ABI defines all arguments to be 4 byte aligned. We have to
7210 make an exception for SSE modes since these require 128bit
7213 The handling here differs from field_alignment. ICC aligns MMX
7214 arguments to 4 byte boundaries, while structure fields are aligned
7215 to 8 byte boundaries. */
7218 if (!(TARGET_SSE
&& SSE_REG_MODE_P (mode
)))
7219 align
= PARM_BOUNDARY
;
7223 if (!ix86_compat_aligned_value_p (type
))
7224 align
= PARM_BOUNDARY
;
7227 if (align
> BIGGEST_ALIGNMENT
)
7228 align
= BIGGEST_ALIGNMENT
;
7232 /* Return true when TYPE should be 128bit aligned for 32bit argument
7236 ix86_contains_aligned_value_p (const_tree type
)
7238 enum machine_mode mode
= TYPE_MODE (type
);
7240 if (mode
== XFmode
|| mode
== XCmode
)
7243 if (TYPE_ALIGN (type
) < 128)
7246 if (AGGREGATE_TYPE_P (type
))
7248 /* Walk the aggregates recursively. */
7249 switch (TREE_CODE (type
))
7253 case QUAL_UNION_TYPE
:
7257 /* Walk all the structure fields. */
7258 for (field
= TYPE_FIELDS (type
);
7260 field
= DECL_CHAIN (field
))
7262 if (TREE_CODE (field
) == FIELD_DECL
7263 && ix86_contains_aligned_value_p (TREE_TYPE (field
)))
7270 /* Just for use if some languages passes arrays by value. */
7271 if (ix86_contains_aligned_value_p (TREE_TYPE (type
)))
7280 return TYPE_ALIGN (type
) >= 128;
7285 /* Gives the alignment boundary, in bits, of an argument with the
7286 specified mode and type. */
7289 ix86_function_arg_boundary (enum machine_mode mode
, const_tree type
)
7294 /* Since the main variant type is used for call, we convert it to
7295 the main variant type. */
7296 type
= TYPE_MAIN_VARIANT (type
);
7297 align
= TYPE_ALIGN (type
);
7300 align
= GET_MODE_ALIGNMENT (mode
);
7301 if (align
< PARM_BOUNDARY
)
7302 align
= PARM_BOUNDARY
;
7306 unsigned int saved_align
= align
;
7310 /* i386 ABI defines XFmode arguments to be 4 byte aligned. */
7313 if (mode
== XFmode
|| mode
== XCmode
)
7314 align
= PARM_BOUNDARY
;
7316 else if (!ix86_contains_aligned_value_p (type
))
7317 align
= PARM_BOUNDARY
;
7320 align
= PARM_BOUNDARY
;
7325 && align
!= ix86_compat_function_arg_boundary (mode
, type
,
7329 inform (input_location
,
7330 "The ABI for passing parameters with %d-byte"
7331 " alignment has changed in GCC 4.6",
7332 align
/ BITS_PER_UNIT
);
7339 /* Return true if N is a possible register number of function value. */
7342 ix86_function_value_regno_p (const unsigned int regno
)
7349 case FIRST_FLOAT_REG
:
7350 /* TODO: The function should depend on current function ABI but
7351 builtins.c would need updating then. Therefore we use the
7353 if (TARGET_64BIT
&& ix86_abi
== MS_ABI
)
7355 return TARGET_FLOAT_RETURNS_IN_80387
;
7361 if (TARGET_MACHO
|| TARGET_64BIT
)
7369 /* Define how to find the value returned by a function.
7370 VALTYPE is the data type of the value (as a tree).
7371 If the precise function being called is known, FUNC is its FUNCTION_DECL;
7372 otherwise, FUNC is 0. */
7375 function_value_32 (enum machine_mode orig_mode
, enum machine_mode mode
,
7376 const_tree fntype
, const_tree fn
)
7380 /* 8-byte vector modes in %mm0. See ix86_return_in_memory for where
7381 we normally prevent this case when mmx is not available. However
7382 some ABIs may require the result to be returned like DImode. */
7383 if (VECTOR_MODE_P (mode
) && GET_MODE_SIZE (mode
) == 8)
7384 regno
= FIRST_MMX_REG
;
7386 /* 16-byte vector modes in %xmm0. See ix86_return_in_memory for where
7387 we prevent this case when sse is not available. However some ABIs
7388 may require the result to be returned like integer TImode. */
7389 else if (mode
== TImode
7390 || (VECTOR_MODE_P (mode
) && GET_MODE_SIZE (mode
) == 16))
7391 regno
= FIRST_SSE_REG
;
7393 /* 32-byte vector modes in %ymm0. */
7394 else if (VECTOR_MODE_P (mode
) && GET_MODE_SIZE (mode
) == 32)
7395 regno
= FIRST_SSE_REG
;
7397 /* Floating point return values in %st(0) (unless -mno-fp-ret-in-387). */
7398 else if (X87_FLOAT_MODE_P (mode
) && TARGET_FLOAT_RETURNS_IN_80387
)
7399 regno
= FIRST_FLOAT_REG
;
7401 /* Most things go in %eax. */
7404 /* Override FP return register with %xmm0 for local functions when
7405 SSE math is enabled or for functions with sseregparm attribute. */
7406 if ((fn
|| fntype
) && (mode
== SFmode
|| mode
== DFmode
))
7408 int sse_level
= ix86_function_sseregparm (fntype
, fn
, false);
7409 if ((sse_level
>= 1 && mode
== SFmode
)
7410 || (sse_level
== 2 && mode
== DFmode
))
7411 regno
= FIRST_SSE_REG
;
7414 /* OImode shouldn't be used directly. */
7415 gcc_assert (mode
!= OImode
);
7417 return gen_rtx_REG (orig_mode
, regno
);
7421 function_value_64 (enum machine_mode orig_mode
, enum machine_mode mode
,
7426 /* Handle libcalls, which don't provide a type node. */
7427 if (valtype
== NULL
)
7441 regno
= FIRST_SSE_REG
;
7445 regno
= FIRST_FLOAT_REG
;
7453 return gen_rtx_REG (mode
, regno
);
7455 else if (POINTER_TYPE_P (valtype
))
7457 /* Pointers are always returned in word_mode. */
7461 ret
= construct_container (mode
, orig_mode
, valtype
, 1,
7462 X86_64_REGPARM_MAX
, X86_64_SSE_REGPARM_MAX
,
7463 x86_64_int_return_registers
, 0);
7465 /* For zero sized structures, construct_container returns NULL, but we
7466 need to keep rest of compiler happy by returning meaningful value. */
7468 ret
= gen_rtx_REG (orig_mode
, AX_REG
);
7474 function_value_ms_64 (enum machine_mode orig_mode
, enum machine_mode mode
)
7476 unsigned int regno
= AX_REG
;
7480 switch (GET_MODE_SIZE (mode
))
7483 if((SCALAR_INT_MODE_P (mode
) || VECTOR_MODE_P (mode
))
7484 && !COMPLEX_MODE_P (mode
))
7485 regno
= FIRST_SSE_REG
;
7489 if (mode
== SFmode
|| mode
== DFmode
)
7490 regno
= FIRST_SSE_REG
;
7496 return gen_rtx_REG (orig_mode
, regno
);
7500 ix86_function_value_1 (const_tree valtype
, const_tree fntype_or_decl
,
7501 enum machine_mode orig_mode
, enum machine_mode mode
)
7503 const_tree fn
, fntype
;
7506 if (fntype_or_decl
&& DECL_P (fntype_or_decl
))
7507 fn
= fntype_or_decl
;
7508 fntype
= fn
? TREE_TYPE (fn
) : fntype_or_decl
;
7510 if (TARGET_64BIT
&& ix86_function_type_abi (fntype
) == MS_ABI
)
7511 return function_value_ms_64 (orig_mode
, mode
);
7512 else if (TARGET_64BIT
)
7513 return function_value_64 (orig_mode
, mode
, valtype
);
7515 return function_value_32 (orig_mode
, mode
, fntype
, fn
);
7519 ix86_function_value (const_tree valtype
, const_tree fntype_or_decl
,
7520 bool outgoing ATTRIBUTE_UNUSED
)
7522 enum machine_mode mode
, orig_mode
;
7524 orig_mode
= TYPE_MODE (valtype
);
7525 mode
= type_natural_mode (valtype
, NULL
);
7526 return ix86_function_value_1 (valtype
, fntype_or_decl
, orig_mode
, mode
);
7529 /* Pointer function arguments and return values are promoted to
7532 static enum machine_mode
7533 ix86_promote_function_mode (const_tree type
, enum machine_mode mode
,
7534 int *punsignedp
, const_tree fntype
,
7537 if (type
!= NULL_TREE
&& POINTER_TYPE_P (type
))
7539 *punsignedp
= POINTERS_EXTEND_UNSIGNED
;
7542 return default_promote_function_mode (type
, mode
, punsignedp
, fntype
,
7547 ix86_libcall_value (enum machine_mode mode
)
7549 return ix86_function_value_1 (NULL
, NULL
, mode
, mode
);
7552 /* Return true iff type is returned in memory. */
7554 static bool ATTRIBUTE_UNUSED
7555 return_in_memory_32 (const_tree type
, enum machine_mode mode
)
7559 if (mode
== BLKmode
)
7562 size
= int_size_in_bytes (type
);
7564 if (MS_AGGREGATE_RETURN
&& AGGREGATE_TYPE_P (type
) && size
<= 8)
7567 if (VECTOR_MODE_P (mode
) || mode
== TImode
)
7569 /* User-created vectors small enough to fit in EAX. */
7573 /* MMX/3dNow values are returned in MM0,
7574 except when it doesn't exits or the ABI prescribes otherwise. */
7576 return !TARGET_MMX
|| TARGET_VECT8_RETURNS
;
7578 /* SSE values are returned in XMM0, except when it doesn't exist. */
7582 /* AVX values are returned in YMM0, except when it doesn't exist. */
7593 /* OImode shouldn't be used directly. */
7594 gcc_assert (mode
!= OImode
);
7599 static bool ATTRIBUTE_UNUSED
7600 return_in_memory_64 (const_tree type
, enum machine_mode mode
)
7602 int needed_intregs
, needed_sseregs
;
7603 return !examine_argument (mode
, type
, 1, &needed_intregs
, &needed_sseregs
);
7606 static bool ATTRIBUTE_UNUSED
7607 return_in_memory_ms_64 (const_tree type
, enum machine_mode mode
)
7609 HOST_WIDE_INT size
= int_size_in_bytes (type
);
7611 /* __m128 is returned in xmm0. */
7612 if ((SCALAR_INT_MODE_P (mode
) || VECTOR_MODE_P (mode
))
7613 && !COMPLEX_MODE_P (mode
) && (GET_MODE_SIZE (mode
) == 16 || size
== 16))
7616 /* Otherwise, the size must be exactly in [1248]. */
7617 return size
!= 1 && size
!= 2 && size
!= 4 && size
!= 8;
7621 ix86_return_in_memory (const_tree type
, const_tree fntype ATTRIBUTE_UNUSED
)
7623 #ifdef SUBTARGET_RETURN_IN_MEMORY
7624 return SUBTARGET_RETURN_IN_MEMORY (type
, fntype
);
7626 const enum machine_mode mode
= type_natural_mode (type
, NULL
);
7630 if (ix86_function_type_abi (fntype
) == MS_ABI
)
7631 return return_in_memory_ms_64 (type
, mode
);
7633 return return_in_memory_64 (type
, mode
);
7636 return return_in_memory_32 (type
, mode
);
7640 /* When returning SSE vector types, we have a choice of either
7641 (1) being abi incompatible with a -march switch, or
7642 (2) generating an error.
7643 Given no good solution, I think the safest thing is one warning.
7644 The user won't be able to use -Werror, but....
7646 Choose the STRUCT_VALUE_RTX hook because that's (at present) only
7647 called in response to actually generating a caller or callee that
7648 uses such a type. As opposed to TARGET_RETURN_IN_MEMORY, which is called
7649 via aggregate_value_p for general type probing from tree-ssa. */
7652 ix86_struct_value_rtx (tree type
, int incoming ATTRIBUTE_UNUSED
)
7654 static bool warnedsse
, warnedmmx
;
7656 if (!TARGET_64BIT
&& type
)
7658 /* Look at the return type of the function, not the function type. */
7659 enum machine_mode mode
= TYPE_MODE (TREE_TYPE (type
));
7661 if (!TARGET_SSE
&& !warnedsse
)
7664 || (VECTOR_MODE_P (mode
) && GET_MODE_SIZE (mode
) == 16))
7667 warning (0, "SSE vector return without SSE enabled "
7672 if (!TARGET_MMX
&& !warnedmmx
)
7674 if (VECTOR_MODE_P (mode
) && GET_MODE_SIZE (mode
) == 8)
7677 warning (0, "MMX vector return without MMX enabled "
7687 /* Create the va_list data type. */
7689 /* Returns the calling convention specific va_list date type.
7690 The argument ABI can be DEFAULT_ABI, MS_ABI, or SYSV_ABI. */
7693 ix86_build_builtin_va_list_abi (enum calling_abi abi
)
7695 tree f_gpr
, f_fpr
, f_ovf
, f_sav
, record
, type_decl
;
7697 /* For i386 we use plain pointer to argument area. */
7698 if (!TARGET_64BIT
|| abi
== MS_ABI
)
7699 return build_pointer_type (char_type_node
);
7701 record
= lang_hooks
.types
.make_type (RECORD_TYPE
);
7702 type_decl
= build_decl (BUILTINS_LOCATION
,
7703 TYPE_DECL
, get_identifier ("__va_list_tag"), record
);
7705 f_gpr
= build_decl (BUILTINS_LOCATION
,
7706 FIELD_DECL
, get_identifier ("gp_offset"),
7707 unsigned_type_node
);
7708 f_fpr
= build_decl (BUILTINS_LOCATION
,
7709 FIELD_DECL
, get_identifier ("fp_offset"),
7710 unsigned_type_node
);
7711 f_ovf
= build_decl (BUILTINS_LOCATION
,
7712 FIELD_DECL
, get_identifier ("overflow_arg_area"),
7714 f_sav
= build_decl (BUILTINS_LOCATION
,
7715 FIELD_DECL
, get_identifier ("reg_save_area"),
7718 va_list_gpr_counter_field
= f_gpr
;
7719 va_list_fpr_counter_field
= f_fpr
;
7721 DECL_FIELD_CONTEXT (f_gpr
) = record
;
7722 DECL_FIELD_CONTEXT (f_fpr
) = record
;
7723 DECL_FIELD_CONTEXT (f_ovf
) = record
;
7724 DECL_FIELD_CONTEXT (f_sav
) = record
;
7726 TYPE_STUB_DECL (record
) = type_decl
;
7727 TYPE_NAME (record
) = type_decl
;
7728 TYPE_FIELDS (record
) = f_gpr
;
7729 DECL_CHAIN (f_gpr
) = f_fpr
;
7730 DECL_CHAIN (f_fpr
) = f_ovf
;
7731 DECL_CHAIN (f_ovf
) = f_sav
;
7733 layout_type (record
);
7735 /* The correct type is an array type of one element. */
7736 return build_array_type (record
, build_index_type (size_zero_node
));
7739 /* Setup the builtin va_list data type and for 64-bit the additional
7740 calling convention specific va_list data types. */
7743 ix86_build_builtin_va_list (void)
7745 tree ret
= ix86_build_builtin_va_list_abi (ix86_abi
);
7747 /* Initialize abi specific va_list builtin types. */
7751 if (ix86_abi
== MS_ABI
)
7753 t
= ix86_build_builtin_va_list_abi (SYSV_ABI
);
7754 if (TREE_CODE (t
) != RECORD_TYPE
)
7755 t
= build_variant_type_copy (t
);
7756 sysv_va_list_type_node
= t
;
7761 if (TREE_CODE (t
) != RECORD_TYPE
)
7762 t
= build_variant_type_copy (t
);
7763 sysv_va_list_type_node
= t
;
7765 if (ix86_abi
!= MS_ABI
)
7767 t
= ix86_build_builtin_va_list_abi (MS_ABI
);
7768 if (TREE_CODE (t
) != RECORD_TYPE
)
7769 t
= build_variant_type_copy (t
);
7770 ms_va_list_type_node
= t
;
7775 if (TREE_CODE (t
) != RECORD_TYPE
)
7776 t
= build_variant_type_copy (t
);
7777 ms_va_list_type_node
= t
;
7784 /* Worker function for TARGET_SETUP_INCOMING_VARARGS. */
7787 setup_incoming_varargs_64 (CUMULATIVE_ARGS
*cum
)
7793 /* GPR size of varargs save area. */
7794 if (cfun
->va_list_gpr_size
)
7795 ix86_varargs_gpr_size
= X86_64_REGPARM_MAX
* UNITS_PER_WORD
;
7797 ix86_varargs_gpr_size
= 0;
7799 /* FPR size of varargs save area. We don't need it if we don't pass
7800 anything in SSE registers. */
7801 if (TARGET_SSE
&& cfun
->va_list_fpr_size
)
7802 ix86_varargs_fpr_size
= X86_64_SSE_REGPARM_MAX
* 16;
7804 ix86_varargs_fpr_size
= 0;
7806 if (! ix86_varargs_gpr_size
&& ! ix86_varargs_fpr_size
)
7809 save_area
= frame_pointer_rtx
;
7810 set
= get_varargs_alias_set ();
7812 max
= cum
->regno
+ cfun
->va_list_gpr_size
/ UNITS_PER_WORD
;
7813 if (max
> X86_64_REGPARM_MAX
)
7814 max
= X86_64_REGPARM_MAX
;
7816 for (i
= cum
->regno
; i
< max
; i
++)
7818 mem
= gen_rtx_MEM (word_mode
,
7819 plus_constant (Pmode
, save_area
, i
* UNITS_PER_WORD
));
7820 MEM_NOTRAP_P (mem
) = 1;
7821 set_mem_alias_set (mem
, set
);
7822 emit_move_insn (mem
,
7823 gen_rtx_REG (word_mode
,
7824 x86_64_int_parameter_registers
[i
]));
7827 if (ix86_varargs_fpr_size
)
7829 enum machine_mode smode
;
7832 /* Now emit code to save SSE registers. The AX parameter contains number
7833 of SSE parameter registers used to call this function, though all we
7834 actually check here is the zero/non-zero status. */
7836 label
= gen_label_rtx ();
7837 test
= gen_rtx_EQ (VOIDmode
, gen_rtx_REG (QImode
, AX_REG
), const0_rtx
);
7838 emit_jump_insn (gen_cbranchqi4 (test
, XEXP (test
, 0), XEXP (test
, 1),
7841 /* ??? If !TARGET_SSE_TYPELESS_STORES, would we perform better if
7842 we used movdqa (i.e. TImode) instead? Perhaps even better would
7843 be if we could determine the real mode of the data, via a hook
7844 into pass_stdarg. Ignore all that for now. */
7846 if (crtl
->stack_alignment_needed
< GET_MODE_ALIGNMENT (smode
))
7847 crtl
->stack_alignment_needed
= GET_MODE_ALIGNMENT (smode
);
7849 max
= cum
->sse_regno
+ cfun
->va_list_fpr_size
/ 16;
7850 if (max
> X86_64_SSE_REGPARM_MAX
)
7851 max
= X86_64_SSE_REGPARM_MAX
;
7853 for (i
= cum
->sse_regno
; i
< max
; ++i
)
7855 mem
= plus_constant (Pmode
, save_area
,
7856 i
* 16 + ix86_varargs_gpr_size
);
7857 mem
= gen_rtx_MEM (smode
, mem
);
7858 MEM_NOTRAP_P (mem
) = 1;
7859 set_mem_alias_set (mem
, set
);
7860 set_mem_align (mem
, GET_MODE_ALIGNMENT (smode
));
7862 emit_move_insn (mem
, gen_rtx_REG (smode
, SSE_REGNO (i
)));
7870 setup_incoming_varargs_ms_64 (CUMULATIVE_ARGS
*cum
)
7872 alias_set_type set
= get_varargs_alias_set ();
7875 /* Reset to zero, as there might be a sysv vaarg used
7877 ix86_varargs_gpr_size
= 0;
7878 ix86_varargs_fpr_size
= 0;
7880 for (i
= cum
->regno
; i
< X86_64_MS_REGPARM_MAX
; i
++)
7884 mem
= gen_rtx_MEM (Pmode
,
7885 plus_constant (Pmode
, virtual_incoming_args_rtx
,
7886 i
* UNITS_PER_WORD
));
7887 MEM_NOTRAP_P (mem
) = 1;
7888 set_mem_alias_set (mem
, set
);
7890 reg
= gen_rtx_REG (Pmode
, x86_64_ms_abi_int_parameter_registers
[i
]);
7891 emit_move_insn (mem
, reg
);
7896 ix86_setup_incoming_varargs (cumulative_args_t cum_v
, enum machine_mode mode
,
7897 tree type
, int *pretend_size ATTRIBUTE_UNUSED
,
7900 CUMULATIVE_ARGS
*cum
= get_cumulative_args (cum_v
);
7901 CUMULATIVE_ARGS next_cum
;
7904 /* This argument doesn't appear to be used anymore. Which is good,
7905 because the old code here didn't suppress rtl generation. */
7906 gcc_assert (!no_rtl
);
7911 fntype
= TREE_TYPE (current_function_decl
);
7913 /* For varargs, we do not want to skip the dummy va_dcl argument.
7914 For stdargs, we do want to skip the last named argument. */
7916 if (stdarg_p (fntype
))
7917 ix86_function_arg_advance (pack_cumulative_args (&next_cum
), mode
, type
,
7920 if (cum
->call_abi
== MS_ABI
)
7921 setup_incoming_varargs_ms_64 (&next_cum
);
7923 setup_incoming_varargs_64 (&next_cum
);
7926 /* Checks if TYPE is of kind va_list char *. */
7929 is_va_list_char_pointer (tree type
)
7933 /* For 32-bit it is always true. */
7936 canonic
= ix86_canonical_va_list_type (type
);
7937 return (canonic
== ms_va_list_type_node
7938 || (ix86_abi
== MS_ABI
&& canonic
== va_list_type_node
));
7941 /* Implement va_start. */
7944 ix86_va_start (tree valist
, rtx nextarg
)
7946 HOST_WIDE_INT words
, n_gpr
, n_fpr
;
7947 tree f_gpr
, f_fpr
, f_ovf
, f_sav
;
7948 tree gpr
, fpr
, ovf
, sav
, t
;
7952 if (flag_split_stack
7953 && cfun
->machine
->split_stack_varargs_pointer
== NULL_RTX
)
7955 unsigned int scratch_regno
;
7957 /* When we are splitting the stack, we can't refer to the stack
7958 arguments using internal_arg_pointer, because they may be on
7959 the old stack. The split stack prologue will arrange to
7960 leave a pointer to the old stack arguments in a scratch
7961 register, which we here copy to a pseudo-register. The split
7962 stack prologue can't set the pseudo-register directly because
7963 it (the prologue) runs before any registers have been saved. */
7965 scratch_regno
= split_stack_prologue_scratch_regno ();
7966 if (scratch_regno
!= INVALID_REGNUM
)
7970 reg
= gen_reg_rtx (Pmode
);
7971 cfun
->machine
->split_stack_varargs_pointer
= reg
;
7974 emit_move_insn (reg
, gen_rtx_REG (Pmode
, scratch_regno
));
7978 push_topmost_sequence ();
7979 emit_insn_after (seq
, entry_of_function ());
7980 pop_topmost_sequence ();
7984 /* Only 64bit target needs something special. */
7985 if (!TARGET_64BIT
|| is_va_list_char_pointer (TREE_TYPE (valist
)))
7987 if (cfun
->machine
->split_stack_varargs_pointer
== NULL_RTX
)
7988 std_expand_builtin_va_start (valist
, nextarg
);
7993 va_r
= expand_expr (valist
, NULL_RTX
, VOIDmode
, EXPAND_WRITE
);
7994 next
= expand_binop (ptr_mode
, add_optab
,
7995 cfun
->machine
->split_stack_varargs_pointer
,
7996 crtl
->args
.arg_offset_rtx
,
7997 NULL_RTX
, 0, OPTAB_LIB_WIDEN
);
7998 convert_move (va_r
, next
, 0);
8003 f_gpr
= TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node
));
8004 f_fpr
= DECL_CHAIN (f_gpr
);
8005 f_ovf
= DECL_CHAIN (f_fpr
);
8006 f_sav
= DECL_CHAIN (f_ovf
);
8008 valist
= build_simple_mem_ref (valist
);
8009 TREE_TYPE (valist
) = TREE_TYPE (sysv_va_list_type_node
);
8010 /* The following should be folded into the MEM_REF offset. */
8011 gpr
= build3 (COMPONENT_REF
, TREE_TYPE (f_gpr
), unshare_expr (valist
),
8013 fpr
= build3 (COMPONENT_REF
, TREE_TYPE (f_fpr
), unshare_expr (valist
),
8015 ovf
= build3 (COMPONENT_REF
, TREE_TYPE (f_ovf
), unshare_expr (valist
),
8017 sav
= build3 (COMPONENT_REF
, TREE_TYPE (f_sav
), unshare_expr (valist
),
8020 /* Count number of gp and fp argument registers used. */
8021 words
= crtl
->args
.info
.words
;
8022 n_gpr
= crtl
->args
.info
.regno
;
8023 n_fpr
= crtl
->args
.info
.sse_regno
;
8025 if (cfun
->va_list_gpr_size
)
8027 type
= TREE_TYPE (gpr
);
8028 t
= build2 (MODIFY_EXPR
, type
,
8029 gpr
, build_int_cst (type
, n_gpr
* 8));
8030 TREE_SIDE_EFFECTS (t
) = 1;
8031 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
8034 if (TARGET_SSE
&& cfun
->va_list_fpr_size
)
8036 type
= TREE_TYPE (fpr
);
8037 t
= build2 (MODIFY_EXPR
, type
, fpr
,
8038 build_int_cst (type
, n_fpr
* 16 + 8*X86_64_REGPARM_MAX
));
8039 TREE_SIDE_EFFECTS (t
) = 1;
8040 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
8043 /* Find the overflow area. */
8044 type
= TREE_TYPE (ovf
);
8045 if (cfun
->machine
->split_stack_varargs_pointer
== NULL_RTX
)
8046 ovf_rtx
= crtl
->args
.internal_arg_pointer
;
8048 ovf_rtx
= cfun
->machine
->split_stack_varargs_pointer
;
8049 t
= make_tree (type
, ovf_rtx
);
8051 t
= fold_build_pointer_plus_hwi (t
, words
* UNITS_PER_WORD
);
8052 t
= build2 (MODIFY_EXPR
, type
, ovf
, t
);
8053 TREE_SIDE_EFFECTS (t
) = 1;
8054 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
8056 if (ix86_varargs_gpr_size
|| ix86_varargs_fpr_size
)
8058 /* Find the register save area.
8059 Prologue of the function save it right above stack frame. */
8060 type
= TREE_TYPE (sav
);
8061 t
= make_tree (type
, frame_pointer_rtx
);
8062 if (!ix86_varargs_gpr_size
)
8063 t
= fold_build_pointer_plus_hwi (t
, -8 * X86_64_REGPARM_MAX
);
8064 t
= build2 (MODIFY_EXPR
, type
, sav
, t
);
8065 TREE_SIDE_EFFECTS (t
) = 1;
8066 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
8070 /* Implement va_arg. */
8073 ix86_gimplify_va_arg (tree valist
, tree type
, gimple_seq
*pre_p
,
8076 static const int intreg
[6] = { 0, 1, 2, 3, 4, 5 };
8077 tree f_gpr
, f_fpr
, f_ovf
, f_sav
;
8078 tree gpr
, fpr
, ovf
, sav
, t
;
8080 tree lab_false
, lab_over
= NULL_TREE
;
8085 enum machine_mode nat_mode
;
8086 unsigned int arg_boundary
;
8088 /* Only 64bit target needs something special. */
8089 if (!TARGET_64BIT
|| is_va_list_char_pointer (TREE_TYPE (valist
)))
8090 return std_gimplify_va_arg_expr (valist
, type
, pre_p
, post_p
);
8092 f_gpr
= TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node
));
8093 f_fpr
= DECL_CHAIN (f_gpr
);
8094 f_ovf
= DECL_CHAIN (f_fpr
);
8095 f_sav
= DECL_CHAIN (f_ovf
);
8097 gpr
= build3 (COMPONENT_REF
, TREE_TYPE (f_gpr
),
8098 build_va_arg_indirect_ref (valist
), f_gpr
, NULL_TREE
);
8099 valist
= build_va_arg_indirect_ref (valist
);
8100 fpr
= build3 (COMPONENT_REF
, TREE_TYPE (f_fpr
), valist
, f_fpr
, NULL_TREE
);
8101 ovf
= build3 (COMPONENT_REF
, TREE_TYPE (f_ovf
), valist
, f_ovf
, NULL_TREE
);
8102 sav
= build3 (COMPONENT_REF
, TREE_TYPE (f_sav
), valist
, f_sav
, NULL_TREE
);
8104 indirect_p
= pass_by_reference (NULL
, TYPE_MODE (type
), type
, false);
8106 type
= build_pointer_type (type
);
8107 size
= int_size_in_bytes (type
);
8108 rsize
= (size
+ UNITS_PER_WORD
- 1) / UNITS_PER_WORD
;
8110 nat_mode
= type_natural_mode (type
, NULL
);
8119 /* Unnamed 256bit vector mode parameters are passed on stack. */
8120 if (!TARGET_64BIT_MS_ABI
)
8127 container
= construct_container (nat_mode
, TYPE_MODE (type
),
8128 type
, 0, X86_64_REGPARM_MAX
,
8129 X86_64_SSE_REGPARM_MAX
, intreg
,
8134 /* Pull the value out of the saved registers. */
8136 addr
= create_tmp_var (ptr_type_node
, "addr");
8140 int needed_intregs
, needed_sseregs
;
8142 tree int_addr
, sse_addr
;
8144 lab_false
= create_artificial_label (UNKNOWN_LOCATION
);
8145 lab_over
= create_artificial_label (UNKNOWN_LOCATION
);
8147 examine_argument (nat_mode
, type
, 0, &needed_intregs
, &needed_sseregs
);
8149 need_temp
= (!REG_P (container
)
8150 && ((needed_intregs
&& TYPE_ALIGN (type
) > 64)
8151 || TYPE_ALIGN (type
) > 128));
8153 /* In case we are passing structure, verify that it is consecutive block
8154 on the register save area. If not we need to do moves. */
8155 if (!need_temp
&& !REG_P (container
))
8157 /* Verify that all registers are strictly consecutive */
8158 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container
, 0, 0), 0))))
8162 for (i
= 0; i
< XVECLEN (container
, 0) && !need_temp
; i
++)
8164 rtx slot
= XVECEXP (container
, 0, i
);
8165 if (REGNO (XEXP (slot
, 0)) != FIRST_SSE_REG
+ (unsigned int) i
8166 || INTVAL (XEXP (slot
, 1)) != i
* 16)
8174 for (i
= 0; i
< XVECLEN (container
, 0) && !need_temp
; i
++)
8176 rtx slot
= XVECEXP (container
, 0, i
);
8177 if (REGNO (XEXP (slot
, 0)) != (unsigned int) i
8178 || INTVAL (XEXP (slot
, 1)) != i
* 8)
8190 int_addr
= create_tmp_var (ptr_type_node
, "int_addr");
8191 sse_addr
= create_tmp_var (ptr_type_node
, "sse_addr");
8194 /* First ensure that we fit completely in registers. */
8197 t
= build_int_cst (TREE_TYPE (gpr
),
8198 (X86_64_REGPARM_MAX
- needed_intregs
+ 1) * 8);
8199 t
= build2 (GE_EXPR
, boolean_type_node
, gpr
, t
);
8200 t2
= build1 (GOTO_EXPR
, void_type_node
, lab_false
);
8201 t
= build3 (COND_EXPR
, void_type_node
, t
, t2
, NULL_TREE
);
8202 gimplify_and_add (t
, pre_p
);
8206 t
= build_int_cst (TREE_TYPE (fpr
),
8207 (X86_64_SSE_REGPARM_MAX
- needed_sseregs
+ 1) * 16
8208 + X86_64_REGPARM_MAX
* 8);
8209 t
= build2 (GE_EXPR
, boolean_type_node
, fpr
, t
);
8210 t2
= build1 (GOTO_EXPR
, void_type_node
, lab_false
);
8211 t
= build3 (COND_EXPR
, void_type_node
, t
, t2
, NULL_TREE
);
8212 gimplify_and_add (t
, pre_p
);
8215 /* Compute index to start of area used for integer regs. */
8218 /* int_addr = gpr + sav; */
8219 t
= fold_build_pointer_plus (sav
, gpr
);
8220 gimplify_assign (int_addr
, t
, pre_p
);
8224 /* sse_addr = fpr + sav; */
8225 t
= fold_build_pointer_plus (sav
, fpr
);
8226 gimplify_assign (sse_addr
, t
, pre_p
);
8230 int i
, prev_size
= 0;
8231 tree temp
= create_tmp_var (type
, "va_arg_tmp");
8234 t
= build1 (ADDR_EXPR
, build_pointer_type (type
), temp
);
8235 gimplify_assign (addr
, t
, pre_p
);
8237 for (i
= 0; i
< XVECLEN (container
, 0); i
++)
8239 rtx slot
= XVECEXP (container
, 0, i
);
8240 rtx reg
= XEXP (slot
, 0);
8241 enum machine_mode mode
= GET_MODE (reg
);
8247 tree dest_addr
, dest
;
8248 int cur_size
= GET_MODE_SIZE (mode
);
8250 gcc_assert (prev_size
<= INTVAL (XEXP (slot
, 1)));
8251 prev_size
= INTVAL (XEXP (slot
, 1));
8252 if (prev_size
+ cur_size
> size
)
8254 cur_size
= size
- prev_size
;
8255 mode
= mode_for_size (cur_size
* BITS_PER_UNIT
, MODE_INT
, 1);
8256 if (mode
== BLKmode
)
8259 piece_type
= lang_hooks
.types
.type_for_mode (mode
, 1);
8260 if (mode
== GET_MODE (reg
))
8261 addr_type
= build_pointer_type (piece_type
);
8263 addr_type
= build_pointer_type_for_mode (piece_type
, ptr_mode
,
8265 daddr_type
= build_pointer_type_for_mode (piece_type
, ptr_mode
,
8268 if (SSE_REGNO_P (REGNO (reg
)))
8270 src_addr
= sse_addr
;
8271 src_offset
= (REGNO (reg
) - FIRST_SSE_REG
) * 16;
8275 src_addr
= int_addr
;
8276 src_offset
= REGNO (reg
) * 8;
8278 src_addr
= fold_convert (addr_type
, src_addr
);
8279 src_addr
= fold_build_pointer_plus_hwi (src_addr
, src_offset
);
8281 dest_addr
= fold_convert (daddr_type
, addr
);
8282 dest_addr
= fold_build_pointer_plus_hwi (dest_addr
, prev_size
);
8283 if (cur_size
== GET_MODE_SIZE (mode
))
8285 src
= build_va_arg_indirect_ref (src_addr
);
8286 dest
= build_va_arg_indirect_ref (dest_addr
);
8288 gimplify_assign (dest
, src
, pre_p
);
8293 = build_call_expr (builtin_decl_implicit (BUILT_IN_MEMCPY
),
8294 3, dest_addr
, src_addr
,
8295 size_int (cur_size
));
8296 gimplify_and_add (copy
, pre_p
);
8298 prev_size
+= cur_size
;
8304 t
= build2 (PLUS_EXPR
, TREE_TYPE (gpr
), gpr
,
8305 build_int_cst (TREE_TYPE (gpr
), needed_intregs
* 8));
8306 gimplify_assign (gpr
, t
, pre_p
);
8311 t
= build2 (PLUS_EXPR
, TREE_TYPE (fpr
), fpr
,
8312 build_int_cst (TREE_TYPE (fpr
), needed_sseregs
* 16));
8313 gimplify_assign (fpr
, t
, pre_p
);
8316 gimple_seq_add_stmt (pre_p
, gimple_build_goto (lab_over
));
8318 gimple_seq_add_stmt (pre_p
, gimple_build_label (lab_false
));
8321 /* ... otherwise out of the overflow area. */
8323 /* When we align parameter on stack for caller, if the parameter
8324 alignment is beyond MAX_SUPPORTED_STACK_ALIGNMENT, it will be
8325 aligned at MAX_SUPPORTED_STACK_ALIGNMENT. We will match callee
8326 here with caller. */
8327 arg_boundary
= ix86_function_arg_boundary (VOIDmode
, type
);
8328 if ((unsigned int) arg_boundary
> MAX_SUPPORTED_STACK_ALIGNMENT
)
8329 arg_boundary
= MAX_SUPPORTED_STACK_ALIGNMENT
;
8331 /* Care for on-stack alignment if needed. */
8332 if (arg_boundary
<= 64 || size
== 0)
8336 HOST_WIDE_INT align
= arg_boundary
/ 8;
8337 t
= fold_build_pointer_plus_hwi (ovf
, align
- 1);
8338 t
= build2 (BIT_AND_EXPR
, TREE_TYPE (t
), t
,
8339 build_int_cst (TREE_TYPE (t
), -align
));
8342 gimplify_expr (&t
, pre_p
, NULL
, is_gimple_val
, fb_rvalue
);
8343 gimplify_assign (addr
, t
, pre_p
);
8345 t
= fold_build_pointer_plus_hwi (t
, rsize
* UNITS_PER_WORD
);
8346 gimplify_assign (unshare_expr (ovf
), t
, pre_p
);
8349 gimple_seq_add_stmt (pre_p
, gimple_build_label (lab_over
));
8351 ptrtype
= build_pointer_type_for_mode (type
, ptr_mode
, true);
8352 addr
= fold_convert (ptrtype
, addr
);
8355 addr
= build_va_arg_indirect_ref (addr
);
8356 return build_va_arg_indirect_ref (addr
);
8359 /* Return true if OPNUM's MEM should be matched
8360 in movabs* patterns. */
8363 ix86_check_movabs (rtx insn
, int opnum
)
8367 set
= PATTERN (insn
);
8368 if (GET_CODE (set
) == PARALLEL
)
8369 set
= XVECEXP (set
, 0, 0);
8370 gcc_assert (GET_CODE (set
) == SET
);
8371 mem
= XEXP (set
, opnum
);
8372 while (GET_CODE (mem
) == SUBREG
)
8373 mem
= SUBREG_REG (mem
);
8374 gcc_assert (MEM_P (mem
));
8375 return volatile_ok
|| !MEM_VOLATILE_P (mem
);
8378 /* Initialize the table of extra 80387 mathematical constants. */
8381 init_ext_80387_constants (void)
8383 static const char * cst
[5] =
8385 "0.3010299956639811952256464283594894482", /* 0: fldlg2 */
8386 "0.6931471805599453094286904741849753009", /* 1: fldln2 */
8387 "1.4426950408889634073876517827983434472", /* 2: fldl2e */
8388 "3.3219280948873623478083405569094566090", /* 3: fldl2t */
8389 "3.1415926535897932385128089594061862044", /* 4: fldpi */
8393 for (i
= 0; i
< 5; i
++)
8395 real_from_string (&ext_80387_constants_table
[i
], cst
[i
]);
8396 /* Ensure each constant is rounded to XFmode precision. */
8397 real_convert (&ext_80387_constants_table
[i
],
8398 XFmode
, &ext_80387_constants_table
[i
]);
8401 ext_80387_constants_init
= 1;
8404 /* Return non-zero if the constant is something that
8405 can be loaded with a special instruction. */
8408 standard_80387_constant_p (rtx x
)
8410 enum machine_mode mode
= GET_MODE (x
);
8414 if (!(X87_FLOAT_MODE_P (mode
) && (GET_CODE (x
) == CONST_DOUBLE
)))
8417 if (x
== CONST0_RTX (mode
))
8419 if (x
== CONST1_RTX (mode
))
8422 REAL_VALUE_FROM_CONST_DOUBLE (r
, x
);
8424 /* For XFmode constants, try to find a special 80387 instruction when
8425 optimizing for size or on those CPUs that benefit from them. */
8427 && (optimize_function_for_size_p (cfun
) || TARGET_EXT_80387_CONSTANTS
))
8431 if (! ext_80387_constants_init
)
8432 init_ext_80387_constants ();
8434 for (i
= 0; i
< 5; i
++)
8435 if (real_identical (&r
, &ext_80387_constants_table
[i
]))
8439 /* Load of the constant -0.0 or -1.0 will be split as
8440 fldz;fchs or fld1;fchs sequence. */
8441 if (real_isnegzero (&r
))
8443 if (real_identical (&r
, &dconstm1
))
8449 /* Return the opcode of the special instruction to be used to load
8453 standard_80387_constant_opcode (rtx x
)
8455 switch (standard_80387_constant_p (x
))
8479 /* Return the CONST_DOUBLE representing the 80387 constant that is
8480 loaded by the specified special instruction. The argument IDX
8481 matches the return value from standard_80387_constant_p. */
8484 standard_80387_constant_rtx (int idx
)
8488 if (! ext_80387_constants_init
)
8489 init_ext_80387_constants ();
8505 return CONST_DOUBLE_FROM_REAL_VALUE (ext_80387_constants_table
[i
],
8509 /* Return 1 if X is all 0s and 2 if x is all 1s
8510 in supported SSE/AVX vector mode. */
8513 standard_sse_constant_p (rtx x
)
8515 enum machine_mode mode
= GET_MODE (x
);
8517 if (x
== const0_rtx
|| x
== CONST0_RTX (GET_MODE (x
)))
8519 if (vector_all_ones_operand (x
, mode
))
8541 /* Return the opcode of the special instruction to be used to load
8545 standard_sse_constant_opcode (rtx insn
, rtx x
)
8547 switch (standard_sse_constant_p (x
))
8550 switch (get_attr_mode (insn
))
8553 return "%vpxor\t%0, %d0";
8555 return "%vxorpd\t%0, %d0";
8557 return "%vxorps\t%0, %d0";
8560 return "vpxor\t%x0, %x0, %x0";
8562 return "vxorpd\t%x0, %x0, %x0";
8564 return "vxorps\t%x0, %x0, %x0";
8572 return "vpcmpeqd\t%0, %0, %0";
8574 return "pcmpeqd\t%0, %0";
8582 /* Returns true if OP contains a symbol reference */
8585 symbolic_reference_mentioned_p (rtx op
)
8590 if (GET_CODE (op
) == SYMBOL_REF
|| GET_CODE (op
) == LABEL_REF
)
8593 fmt
= GET_RTX_FORMAT (GET_CODE (op
));
8594 for (i
= GET_RTX_LENGTH (GET_CODE (op
)) - 1; i
>= 0; i
--)
8600 for (j
= XVECLEN (op
, i
) - 1; j
>= 0; j
--)
8601 if (symbolic_reference_mentioned_p (XVECEXP (op
, i
, j
)))
8605 else if (fmt
[i
] == 'e' && symbolic_reference_mentioned_p (XEXP (op
, i
)))
8612 /* Return true if it is appropriate to emit `ret' instructions in the
8613 body of a function. Do this only if the epilogue is simple, needing a
8614 couple of insns. Prior to reloading, we can't tell how many registers
8615 must be saved, so return false then. Return false if there is no frame
8616 marker to de-allocate. */
8619 ix86_can_use_return_insn_p (void)
8621 struct ix86_frame frame
;
8623 if (! reload_completed
|| frame_pointer_needed
)
8626 /* Don't allow more than 32k pop, since that's all we can do
8627 with one instruction. */
8628 if (crtl
->args
.pops_args
&& crtl
->args
.size
>= 32768)
8631 ix86_compute_frame_layout (&frame
);
8632 return (frame
.stack_pointer_offset
== UNITS_PER_WORD
8633 && (frame
.nregs
+ frame
.nsseregs
) == 0);
8636 /* Value should be nonzero if functions must have frame pointers.
8637 Zero means the frame pointer need not be set up (and parms may
8638 be accessed via the stack pointer) in functions that seem suitable. */
8641 ix86_frame_pointer_required (void)
8643 /* If we accessed previous frames, then the generated code expects
8644 to be able to access the saved ebp value in our frame. */
8645 if (cfun
->machine
->accesses_prev_frame
)
8648 /* Several x86 os'es need a frame pointer for other reasons,
8649 usually pertaining to setjmp. */
8650 if (SUBTARGET_FRAME_POINTER_REQUIRED
)
8653 /* For older 32-bit runtimes setjmp requires valid frame-pointer. */
8654 if (TARGET_32BIT_MS_ABI
&& cfun
->calls_setjmp
)
8657 /* Win64 SEH, very large frames need a frame-pointer as maximum stack
8658 allocation is 4GB. */
8659 if (TARGET_64BIT_MS_ABI
&& get_frame_size () > SEH_MAX_FRAME_SIZE
)
8662 /* In ix86_option_override_internal, TARGET_OMIT_LEAF_FRAME_POINTER
8663 turns off the frame pointer by default. Turn it back on now if
8664 we've not got a leaf function. */
8665 if (TARGET_OMIT_LEAF_FRAME_POINTER
8667 || ix86_current_function_calls_tls_descriptor
))
8670 if (crtl
->profile
&& !flag_fentry
)
8676 /* Record that the current function accesses previous call frames. */
8679 ix86_setup_frame_addresses (void)
8681 cfun
->machine
->accesses_prev_frame
= 1;
8684 #ifndef USE_HIDDEN_LINKONCE
8685 # if defined(HAVE_GAS_HIDDEN) && (SUPPORTS_ONE_ONLY - 0)
8686 # define USE_HIDDEN_LINKONCE 1
8688 # define USE_HIDDEN_LINKONCE 0
8692 static int pic_labels_used
;
8694 /* Fills in the label name that should be used for a pc thunk for
8695 the given register. */
8698 get_pc_thunk_name (char name
[32], unsigned int regno
)
8700 gcc_assert (!TARGET_64BIT
);
8702 if (USE_HIDDEN_LINKONCE
)
8703 sprintf (name
, "__x86.get_pc_thunk.%s", reg_names
[regno
]);
8705 ASM_GENERATE_INTERNAL_LABEL (name
, "LPR", regno
);
8709 /* This function generates code for -fpic that loads %ebx with
8710 the return address of the caller and then returns. */
8713 ix86_code_end (void)
8718 for (regno
= AX_REG
; regno
<= SP_REG
; regno
++)
8723 if (!(pic_labels_used
& (1 << regno
)))
8726 get_pc_thunk_name (name
, regno
);
8728 decl
= build_decl (BUILTINS_LOCATION
, FUNCTION_DECL
,
8729 get_identifier (name
),
8730 build_function_type_list (void_type_node
, NULL_TREE
));
8731 DECL_RESULT (decl
) = build_decl (BUILTINS_LOCATION
, RESULT_DECL
,
8732 NULL_TREE
, void_type_node
);
8733 TREE_PUBLIC (decl
) = 1;
8734 TREE_STATIC (decl
) = 1;
8735 DECL_IGNORED_P (decl
) = 1;
8740 switch_to_section (darwin_sections
[text_coal_section
]);
8741 fputs ("\t.weak_definition\t", asm_out_file
);
8742 assemble_name (asm_out_file
, name
);
8743 fputs ("\n\t.private_extern\t", asm_out_file
);
8744 assemble_name (asm_out_file
, name
);
8745 putc ('\n', asm_out_file
);
8746 ASM_OUTPUT_LABEL (asm_out_file
, name
);
8747 DECL_WEAK (decl
) = 1;
8751 if (USE_HIDDEN_LINKONCE
)
8753 DECL_COMDAT_GROUP (decl
) = DECL_ASSEMBLER_NAME (decl
);
8755 targetm
.asm_out
.unique_section (decl
, 0);
8756 switch_to_section (get_named_section (decl
, NULL
, 0));
8758 targetm
.asm_out
.globalize_label (asm_out_file
, name
);
8759 fputs ("\t.hidden\t", asm_out_file
);
8760 assemble_name (asm_out_file
, name
);
8761 putc ('\n', asm_out_file
);
8762 ASM_DECLARE_FUNCTION_NAME (asm_out_file
, name
, decl
);
8766 switch_to_section (text_section
);
8767 ASM_OUTPUT_LABEL (asm_out_file
, name
);
8770 DECL_INITIAL (decl
) = make_node (BLOCK
);
8771 current_function_decl
= decl
;
8772 init_function_start (decl
);
8773 first_function_block_is_cold
= false;
8774 /* Make sure unwind info is emitted for the thunk if needed. */
8775 final_start_function (emit_barrier (), asm_out_file
, 1);
8777 /* Pad stack IP move with 4 instructions (two NOPs count
8778 as one instruction). */
8779 if (TARGET_PAD_SHORT_FUNCTION
)
8784 fputs ("\tnop\n", asm_out_file
);
8787 xops
[0] = gen_rtx_REG (Pmode
, regno
);
8788 xops
[1] = gen_rtx_MEM (Pmode
, stack_pointer_rtx
);
8789 output_asm_insn ("mov%z0\t{%1, %0|%0, %1}", xops
);
8790 fputs ("\tret\n", asm_out_file
);
8791 final_end_function ();
8792 init_insn_lengths ();
8793 free_after_compilation (cfun
);
8795 current_function_decl
= NULL
;
8798 if (flag_split_stack
)
8799 file_end_indicate_split_stack ();
8802 /* Emit code for the SET_GOT patterns. */
8805 output_set_got (rtx dest
, rtx label ATTRIBUTE_UNUSED
)
8811 if (TARGET_VXWORKS_RTP
&& flag_pic
)
8813 /* Load (*VXWORKS_GOTT_BASE) into the PIC register. */
8814 xops
[2] = gen_rtx_MEM (Pmode
,
8815 gen_rtx_SYMBOL_REF (Pmode
, VXWORKS_GOTT_BASE
));
8816 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops
);
8818 /* Load (*VXWORKS_GOTT_BASE)[VXWORKS_GOTT_INDEX] into the PIC register.
8819 Use %P and a local symbol in order to print VXWORKS_GOTT_INDEX as
8820 an unadorned address. */
8821 xops
[2] = gen_rtx_SYMBOL_REF (Pmode
, VXWORKS_GOTT_INDEX
);
8822 SYMBOL_REF_FLAGS (xops
[2]) |= SYMBOL_FLAG_LOCAL
;
8823 output_asm_insn ("mov{l}\t{%P2(%0), %0|%0, DWORD PTR %P2[%0]}", xops
);
8827 xops
[1] = gen_rtx_SYMBOL_REF (Pmode
, GOT_SYMBOL_NAME
);
8831 xops
[2] = gen_rtx_LABEL_REF (Pmode
, label
? label
: gen_label_rtx ());
8833 output_asm_insn ("mov%z0\t{%2, %0|%0, %2}", xops
);
8836 /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This
8837 is what will be referenced by the Mach-O PIC subsystem. */
8839 ASM_OUTPUT_LABEL (asm_out_file
, MACHOPIC_FUNCTION_BASE_NAME
);
8842 targetm
.asm_out
.internal_label (asm_out_file
, "L",
8843 CODE_LABEL_NUMBER (XEXP (xops
[2], 0)));
8848 get_pc_thunk_name (name
, REGNO (dest
));
8849 pic_labels_used
|= 1 << REGNO (dest
);
8851 xops
[2] = gen_rtx_SYMBOL_REF (Pmode
, ggc_strdup (name
));
8852 xops
[2] = gen_rtx_MEM (QImode
, xops
[2]);
8853 output_asm_insn ("call\t%X2", xops
);
8854 /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This
8855 is what will be referenced by the Mach-O PIC subsystem. */
8858 ASM_OUTPUT_LABEL (asm_out_file
, MACHOPIC_FUNCTION_BASE_NAME
);
8860 targetm
.asm_out
.internal_label (asm_out_file
, "L",
8861 CODE_LABEL_NUMBER (label
));
8866 output_asm_insn ("add%z0\t{%1, %0|%0, %1}", xops
);
8871 /* Generate an "push" pattern for input ARG. */
8876 struct machine_function
*m
= cfun
->machine
;
8878 if (m
->fs
.cfa_reg
== stack_pointer_rtx
)
8879 m
->fs
.cfa_offset
+= UNITS_PER_WORD
;
8880 m
->fs
.sp_offset
+= UNITS_PER_WORD
;
8882 if (REG_P (arg
) && GET_MODE (arg
) != word_mode
)
8883 arg
= gen_rtx_REG (word_mode
, REGNO (arg
));
8885 return gen_rtx_SET (VOIDmode
,
8886 gen_rtx_MEM (word_mode
,
8887 gen_rtx_PRE_DEC (Pmode
,
8888 stack_pointer_rtx
)),
8892 /* Generate an "pop" pattern for input ARG. */
8897 if (REG_P (arg
) && GET_MODE (arg
) != word_mode
)
8898 arg
= gen_rtx_REG (word_mode
, REGNO (arg
));
8900 return gen_rtx_SET (VOIDmode
,
8902 gen_rtx_MEM (word_mode
,
8903 gen_rtx_POST_INC (Pmode
,
8904 stack_pointer_rtx
)));
8907 /* Return >= 0 if there is an unused call-clobbered register available
8908 for the entire function. */
8911 ix86_select_alt_pic_regnum (void)
8915 && !ix86_current_function_calls_tls_descriptor
)
8918 /* Can't use the same register for both PIC and DRAP. */
8920 drap
= REGNO (crtl
->drap_reg
);
8923 for (i
= 2; i
>= 0; --i
)
8924 if (i
!= drap
&& !df_regs_ever_live_p (i
))
8928 return INVALID_REGNUM
;
8931 /* Return TRUE if we need to save REGNO. */
8934 ix86_save_reg (unsigned int regno
, bool maybe_eh_return
)
8936 if (pic_offset_table_rtx
8937 && regno
== REAL_PIC_OFFSET_TABLE_REGNUM
8938 && (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM
)
8940 || crtl
->calls_eh_return
8941 || crtl
->uses_const_pool
))
8942 return ix86_select_alt_pic_regnum () == INVALID_REGNUM
;
8944 if (crtl
->calls_eh_return
&& maybe_eh_return
)
8949 unsigned test
= EH_RETURN_DATA_REGNO (i
);
8950 if (test
== INVALID_REGNUM
)
8957 if (crtl
->drap_reg
&& regno
== REGNO (crtl
->drap_reg
))
8960 return (df_regs_ever_live_p (regno
)
8961 && !call_used_regs
[regno
]
8962 && !fixed_regs
[regno
]
8963 && (regno
!= HARD_FRAME_POINTER_REGNUM
|| !frame_pointer_needed
));
8966 /* Return number of saved general prupose registers. */
8969 ix86_nsaved_regs (void)
8974 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; regno
++)
8975 if (!SSE_REGNO_P (regno
) && ix86_save_reg (regno
, true))
8980 /* Return number of saved SSE registrers. */
8983 ix86_nsaved_sseregs (void)
8988 if (!TARGET_64BIT_MS_ABI
)
8990 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; regno
++)
8991 if (SSE_REGNO_P (regno
) && ix86_save_reg (regno
, true))
8996 /* Given FROM and TO register numbers, say whether this elimination is
8997 allowed. If stack alignment is needed, we can only replace argument
8998 pointer with hard frame pointer, or replace frame pointer with stack
8999 pointer. Otherwise, frame pointer elimination is automatically
9000 handled and all other eliminations are valid. */
9003 ix86_can_eliminate (const int from
, const int to
)
9005 if (stack_realign_fp
)
9006 return ((from
== ARG_POINTER_REGNUM
9007 && to
== HARD_FRAME_POINTER_REGNUM
)
9008 || (from
== FRAME_POINTER_REGNUM
9009 && to
== STACK_POINTER_REGNUM
));
9011 return to
== STACK_POINTER_REGNUM
? !frame_pointer_needed
: true;
9014 /* Return the offset between two registers, one to be eliminated, and the other
9015 its replacement, at the start of a routine. */
9018 ix86_initial_elimination_offset (int from
, int to
)
9020 struct ix86_frame frame
;
9021 ix86_compute_frame_layout (&frame
);
9023 if (from
== ARG_POINTER_REGNUM
&& to
== HARD_FRAME_POINTER_REGNUM
)
9024 return frame
.hard_frame_pointer_offset
;
9025 else if (from
== FRAME_POINTER_REGNUM
9026 && to
== HARD_FRAME_POINTER_REGNUM
)
9027 return frame
.hard_frame_pointer_offset
- frame
.frame_pointer_offset
;
9030 gcc_assert (to
== STACK_POINTER_REGNUM
);
9032 if (from
== ARG_POINTER_REGNUM
)
9033 return frame
.stack_pointer_offset
;
9035 gcc_assert (from
== FRAME_POINTER_REGNUM
);
9036 return frame
.stack_pointer_offset
- frame
.frame_pointer_offset
;
9040 /* In a dynamically-aligned function, we can't know the offset from
9041 stack pointer to frame pointer, so we must ensure that setjmp
9042 eliminates fp against the hard fp (%ebp) rather than trying to
9043 index from %esp up to the top of the frame across a gap that is
9044 of unknown (at compile-time) size. */
9046 ix86_builtin_setjmp_frame_value (void)
9048 return stack_realign_fp
? hard_frame_pointer_rtx
: virtual_stack_vars_rtx
;
9051 /* When using -fsplit-stack, the allocation routines set a field in
9052 the TCB to the bottom of the stack plus this much space, measured
9055 #define SPLIT_STACK_AVAILABLE 256
9057 /* Fill structure ix86_frame about frame of currently computed function. */
9060 ix86_compute_frame_layout (struct ix86_frame
*frame
)
9062 unsigned HOST_WIDE_INT stack_alignment_needed
;
9063 HOST_WIDE_INT offset
;
9064 unsigned HOST_WIDE_INT preferred_alignment
;
9065 HOST_WIDE_INT size
= get_frame_size ();
9066 HOST_WIDE_INT to_allocate
;
9068 frame
->nregs
= ix86_nsaved_regs ();
9069 frame
->nsseregs
= ix86_nsaved_sseregs ();
9071 stack_alignment_needed
= crtl
->stack_alignment_needed
/ BITS_PER_UNIT
;
9072 preferred_alignment
= crtl
->preferred_stack_boundary
/ BITS_PER_UNIT
;
9074 /* 64-bit MS ABI seem to require stack alignment to be always 16 except for
9075 function prologues and leaf. */
9076 if ((TARGET_64BIT_MS_ABI
&& preferred_alignment
< 16)
9077 && (!crtl
->is_leaf
|| cfun
->calls_alloca
!= 0
9078 || ix86_current_function_calls_tls_descriptor
))
9080 preferred_alignment
= 16;
9081 stack_alignment_needed
= 16;
9082 crtl
->preferred_stack_boundary
= 128;
9083 crtl
->stack_alignment_needed
= 128;
9086 gcc_assert (!size
|| stack_alignment_needed
);
9087 gcc_assert (preferred_alignment
>= STACK_BOUNDARY
/ BITS_PER_UNIT
);
9088 gcc_assert (preferred_alignment
<= stack_alignment_needed
);
9090 /* For SEH we have to limit the amount of code movement into the prologue.
9091 At present we do this via a BLOCKAGE, at which point there's very little
9092 scheduling that can be done, which means that there's very little point
9093 in doing anything except PUSHs. */
9095 cfun
->machine
->use_fast_prologue_epilogue
= false;
9097 /* During reload iteration the amount of registers saved can change.
9098 Recompute the value as needed. Do not recompute when amount of registers
9099 didn't change as reload does multiple calls to the function and does not
9100 expect the decision to change within single iteration. */
9101 else if (!optimize_function_for_size_p (cfun
)
9102 && cfun
->machine
->use_fast_prologue_epilogue_nregs
!= frame
->nregs
)
9104 int count
= frame
->nregs
;
9105 struct cgraph_node
*node
= cgraph_get_node (current_function_decl
);
9107 cfun
->machine
->use_fast_prologue_epilogue_nregs
= count
;
9109 /* The fast prologue uses move instead of push to save registers. This
9110 is significantly longer, but also executes faster as modern hardware
9111 can execute the moves in parallel, but can't do that for push/pop.
9113 Be careful about choosing what prologue to emit: When function takes
9114 many instructions to execute we may use slow version as well as in
9115 case function is known to be outside hot spot (this is known with
9116 feedback only). Weight the size of function by number of registers
9117 to save as it is cheap to use one or two push instructions but very
9118 slow to use many of them. */
9120 count
= (count
- 1) * FAST_PROLOGUE_INSN_COUNT
;
9121 if (node
->frequency
< NODE_FREQUENCY_NORMAL
9122 || (flag_branch_probabilities
9123 && node
->frequency
< NODE_FREQUENCY_HOT
))
9124 cfun
->machine
->use_fast_prologue_epilogue
= false;
9126 cfun
->machine
->use_fast_prologue_epilogue
9127 = !expensive_function_p (count
);
9130 frame
->save_regs_using_mov
9131 = (TARGET_PROLOGUE_USING_MOVE
&& cfun
->machine
->use_fast_prologue_epilogue
9132 /* If static stack checking is enabled and done with probes,
9133 the registers need to be saved before allocating the frame. */
9134 && flag_stack_check
!= STATIC_BUILTIN_STACK_CHECK
);
9136 /* Skip return address. */
9137 offset
= UNITS_PER_WORD
;
9139 /* Skip pushed static chain. */
9140 if (ix86_static_chain_on_stack
)
9141 offset
+= UNITS_PER_WORD
;
9143 /* Skip saved base pointer. */
9144 if (frame_pointer_needed
)
9145 offset
+= UNITS_PER_WORD
;
9146 frame
->hfp_save_offset
= offset
;
9148 /* The traditional frame pointer location is at the top of the frame. */
9149 frame
->hard_frame_pointer_offset
= offset
;
9151 /* Register save area */
9152 offset
+= frame
->nregs
* UNITS_PER_WORD
;
9153 frame
->reg_save_offset
= offset
;
9155 /* On SEH target, registers are pushed just before the frame pointer
9158 frame
->hard_frame_pointer_offset
= offset
;
9160 /* Align and set SSE register save area. */
9161 if (frame
->nsseregs
)
9163 /* The only ABI that has saved SSE registers (Win64) also has a
9164 16-byte aligned default stack, and thus we don't need to be
9165 within the re-aligned local stack frame to save them. */
9166 gcc_assert (INCOMING_STACK_BOUNDARY
>= 128);
9167 offset
= (offset
+ 16 - 1) & -16;
9168 offset
+= frame
->nsseregs
* 16;
9170 frame
->sse_reg_save_offset
= offset
;
9172 /* The re-aligned stack starts here. Values before this point are not
9173 directly comparable with values below this point. In order to make
9174 sure that no value happens to be the same before and after, force
9175 the alignment computation below to add a non-zero value. */
9176 if (stack_realign_fp
)
9177 offset
= (offset
+ stack_alignment_needed
) & -stack_alignment_needed
;
9180 frame
->va_arg_size
= ix86_varargs_gpr_size
+ ix86_varargs_fpr_size
;
9181 offset
+= frame
->va_arg_size
;
9183 /* Align start of frame for local function. */
9184 if (stack_realign_fp
9185 || offset
!= frame
->sse_reg_save_offset
9188 || cfun
->calls_alloca
9189 || ix86_current_function_calls_tls_descriptor
)
9190 offset
= (offset
+ stack_alignment_needed
- 1) & -stack_alignment_needed
;
9192 /* Frame pointer points here. */
9193 frame
->frame_pointer_offset
= offset
;
9197 /* Add outgoing arguments area. Can be skipped if we eliminated
9198 all the function calls as dead code.
9199 Skipping is however impossible when function calls alloca. Alloca
9200 expander assumes that last crtl->outgoing_args_size
9201 of stack frame are unused. */
9202 if (ACCUMULATE_OUTGOING_ARGS
9203 && (!crtl
->is_leaf
|| cfun
->calls_alloca
9204 || ix86_current_function_calls_tls_descriptor
))
9206 offset
+= crtl
->outgoing_args_size
;
9207 frame
->outgoing_arguments_size
= crtl
->outgoing_args_size
;
9210 frame
->outgoing_arguments_size
= 0;
9212 /* Align stack boundary. Only needed if we're calling another function
9214 if (!crtl
->is_leaf
|| cfun
->calls_alloca
9215 || ix86_current_function_calls_tls_descriptor
)
9216 offset
= (offset
+ preferred_alignment
- 1) & -preferred_alignment
;
9218 /* We've reached end of stack frame. */
9219 frame
->stack_pointer_offset
= offset
;
9221 /* Size prologue needs to allocate. */
9222 to_allocate
= offset
- frame
->sse_reg_save_offset
;
9224 if ((!to_allocate
&& frame
->nregs
<= 1)
9225 || (TARGET_64BIT
&& to_allocate
>= (HOST_WIDE_INT
) 0x80000000))
9226 frame
->save_regs_using_mov
= false;
9228 if (ix86_using_red_zone ()
9229 && crtl
->sp_is_unchanging
9231 && !ix86_current_function_calls_tls_descriptor
)
9233 frame
->red_zone_size
= to_allocate
;
9234 if (frame
->save_regs_using_mov
)
9235 frame
->red_zone_size
+= frame
->nregs
* UNITS_PER_WORD
;
9236 if (frame
->red_zone_size
> RED_ZONE_SIZE
- RED_ZONE_RESERVE
)
9237 frame
->red_zone_size
= RED_ZONE_SIZE
- RED_ZONE_RESERVE
;
9240 frame
->red_zone_size
= 0;
9241 frame
->stack_pointer_offset
-= frame
->red_zone_size
;
9243 /* The SEH frame pointer location is near the bottom of the frame.
9244 This is enforced by the fact that the difference between the
9245 stack pointer and the frame pointer is limited to 240 bytes in
9246 the unwind data structure. */
9251 /* If we can leave the frame pointer where it is, do so. Also, returns
9252 the establisher frame for __builtin_frame_address (0). */
9253 diff
= frame
->stack_pointer_offset
- frame
->hard_frame_pointer_offset
;
9254 if (diff
<= SEH_MAX_FRAME_SIZE
9255 && (diff
> 240 || (diff
& 15) != 0)
9256 && !crtl
->accesses_prior_frames
)
9258 /* Ideally we'd determine what portion of the local stack frame
9259 (within the constraint of the lowest 240) is most heavily used.
9260 But without that complication, simply bias the frame pointer
9261 by 128 bytes so as to maximize the amount of the local stack
9262 frame that is addressable with 8-bit offsets. */
9263 frame
->hard_frame_pointer_offset
= frame
->stack_pointer_offset
- 128;
9268 /* This is semi-inlined memory_address_length, but simplified
9269 since we know that we're always dealing with reg+offset, and
9270 to avoid having to create and discard all that rtl. */
9273 choose_baseaddr_len (unsigned int regno
, HOST_WIDE_INT offset
)
9279 /* EBP and R13 cannot be encoded without an offset. */
9280 len
= (regno
== BP_REG
|| regno
== R13_REG
);
9282 else if (IN_RANGE (offset
, -128, 127))
9285 /* ESP and R12 must be encoded with a SIB byte. */
9286 if (regno
== SP_REG
|| regno
== R12_REG
)
9292 /* Return an RTX that points to CFA_OFFSET within the stack frame.
9293 The valid base registers are taken from CFUN->MACHINE->FS. */
9296 choose_baseaddr (HOST_WIDE_INT cfa_offset
)
9298 const struct machine_function
*m
= cfun
->machine
;
9299 rtx base_reg
= NULL
;
9300 HOST_WIDE_INT base_offset
= 0;
9302 if (m
->use_fast_prologue_epilogue
)
9304 /* Choose the base register most likely to allow the most scheduling
9305 opportunities. Generally FP is valid throughout the function,
9306 while DRAP must be reloaded within the epilogue. But choose either
9307 over the SP due to increased encoding size. */
9311 base_reg
= hard_frame_pointer_rtx
;
9312 base_offset
= m
->fs
.fp_offset
- cfa_offset
;
9314 else if (m
->fs
.drap_valid
)
9316 base_reg
= crtl
->drap_reg
;
9317 base_offset
= 0 - cfa_offset
;
9319 else if (m
->fs
.sp_valid
)
9321 base_reg
= stack_pointer_rtx
;
9322 base_offset
= m
->fs
.sp_offset
- cfa_offset
;
9327 HOST_WIDE_INT toffset
;
9330 /* Choose the base register with the smallest address encoding.
9331 With a tie, choose FP > DRAP > SP. */
9334 base_reg
= stack_pointer_rtx
;
9335 base_offset
= m
->fs
.sp_offset
- cfa_offset
;
9336 len
= choose_baseaddr_len (STACK_POINTER_REGNUM
, base_offset
);
9338 if (m
->fs
.drap_valid
)
9340 toffset
= 0 - cfa_offset
;
9341 tlen
= choose_baseaddr_len (REGNO (crtl
->drap_reg
), toffset
);
9344 base_reg
= crtl
->drap_reg
;
9345 base_offset
= toffset
;
9351 toffset
= m
->fs
.fp_offset
- cfa_offset
;
9352 tlen
= choose_baseaddr_len (HARD_FRAME_POINTER_REGNUM
, toffset
);
9355 base_reg
= hard_frame_pointer_rtx
;
9356 base_offset
= toffset
;
9361 gcc_assert (base_reg
!= NULL
);
9363 return plus_constant (Pmode
, base_reg
, base_offset
);
9366 /* Emit code to save registers in the prologue. */
9369 ix86_emit_save_regs (void)
9374 for (regno
= FIRST_PSEUDO_REGISTER
- 1; regno
-- > 0; )
9375 if (!SSE_REGNO_P (regno
) && ix86_save_reg (regno
, true))
9377 insn
= emit_insn (gen_push (gen_rtx_REG (word_mode
, regno
)));
9378 RTX_FRAME_RELATED_P (insn
) = 1;
9382 /* Emit a single register save at CFA - CFA_OFFSET. */
9385 ix86_emit_save_reg_using_mov (enum machine_mode mode
, unsigned int regno
,
9386 HOST_WIDE_INT cfa_offset
)
9388 struct machine_function
*m
= cfun
->machine
;
9389 rtx reg
= gen_rtx_REG (mode
, regno
);
9390 rtx mem
, addr
, base
, insn
;
9392 addr
= choose_baseaddr (cfa_offset
);
9393 mem
= gen_frame_mem (mode
, addr
);
9395 /* For SSE saves, we need to indicate the 128-bit alignment. */
9396 set_mem_align (mem
, GET_MODE_ALIGNMENT (mode
));
9398 insn
= emit_move_insn (mem
, reg
);
9399 RTX_FRAME_RELATED_P (insn
) = 1;
9402 if (GET_CODE (base
) == PLUS
)
9403 base
= XEXP (base
, 0);
9404 gcc_checking_assert (REG_P (base
));
9406 /* When saving registers into a re-aligned local stack frame, avoid
9407 any tricky guessing by dwarf2out. */
9408 if (m
->fs
.realigned
)
9410 gcc_checking_assert (stack_realign_drap
);
9412 if (regno
== REGNO (crtl
->drap_reg
))
9414 /* A bit of a hack. We force the DRAP register to be saved in
9415 the re-aligned stack frame, which provides us with a copy
9416 of the CFA that will last past the prologue. Install it. */
9417 gcc_checking_assert (cfun
->machine
->fs
.fp_valid
);
9418 addr
= plus_constant (Pmode
, hard_frame_pointer_rtx
,
9419 cfun
->machine
->fs
.fp_offset
- cfa_offset
);
9420 mem
= gen_rtx_MEM (mode
, addr
);
9421 add_reg_note (insn
, REG_CFA_DEF_CFA
, mem
);
9425 /* The frame pointer is a stable reference within the
9426 aligned frame. Use it. */
9427 gcc_checking_assert (cfun
->machine
->fs
.fp_valid
);
9428 addr
= plus_constant (Pmode
, hard_frame_pointer_rtx
,
9429 cfun
->machine
->fs
.fp_offset
- cfa_offset
);
9430 mem
= gen_rtx_MEM (mode
, addr
);
9431 add_reg_note (insn
, REG_CFA_EXPRESSION
,
9432 gen_rtx_SET (VOIDmode
, mem
, reg
));
9436 /* The memory may not be relative to the current CFA register,
9437 which means that we may need to generate a new pattern for
9438 use by the unwind info. */
9439 else if (base
!= m
->fs
.cfa_reg
)
9441 addr
= plus_constant (Pmode
, m
->fs
.cfa_reg
,
9442 m
->fs
.cfa_offset
- cfa_offset
);
9443 mem
= gen_rtx_MEM (mode
, addr
);
9444 add_reg_note (insn
, REG_CFA_OFFSET
, gen_rtx_SET (VOIDmode
, mem
, reg
));
9448 /* Emit code to save registers using MOV insns.
9449 First register is stored at CFA - CFA_OFFSET. */
9451 ix86_emit_save_regs_using_mov (HOST_WIDE_INT cfa_offset
)
9455 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; regno
++)
9456 if (!SSE_REGNO_P (regno
) && ix86_save_reg (regno
, true))
9458 ix86_emit_save_reg_using_mov (word_mode
, regno
, cfa_offset
);
9459 cfa_offset
-= UNITS_PER_WORD
;
9463 /* Emit code to save SSE registers using MOV insns.
9464 First register is stored at CFA - CFA_OFFSET. */
9466 ix86_emit_save_sse_regs_using_mov (HOST_WIDE_INT cfa_offset
)
9470 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; regno
++)
9471 if (SSE_REGNO_P (regno
) && ix86_save_reg (regno
, true))
9473 ix86_emit_save_reg_using_mov (V4SFmode
, regno
, cfa_offset
);
9478 static GTY(()) rtx queued_cfa_restores
;
9480 /* Add a REG_CFA_RESTORE REG note to INSN or queue them until next stack
9481 manipulation insn. The value is on the stack at CFA - CFA_OFFSET.
9482 Don't add the note if the previously saved value will be left untouched
9483 within stack red-zone till return, as unwinders can find the same value
9484 in the register and on the stack. */
9487 ix86_add_cfa_restore_note (rtx insn
, rtx reg
, HOST_WIDE_INT cfa_offset
)
9489 if (!crtl
->shrink_wrapped
9490 && cfa_offset
<= cfun
->machine
->fs
.red_zone_offset
)
9495 add_reg_note (insn
, REG_CFA_RESTORE
, reg
);
9496 RTX_FRAME_RELATED_P (insn
) = 1;
9500 = alloc_reg_note (REG_CFA_RESTORE
, reg
, queued_cfa_restores
);
9503 /* Add queued REG_CFA_RESTORE notes if any to INSN. */
9506 ix86_add_queued_cfa_restore_notes (rtx insn
)
9509 if (!queued_cfa_restores
)
9511 for (last
= queued_cfa_restores
; XEXP (last
, 1); last
= XEXP (last
, 1))
9513 XEXP (last
, 1) = REG_NOTES (insn
);
9514 REG_NOTES (insn
) = queued_cfa_restores
;
9515 queued_cfa_restores
= NULL_RTX
;
9516 RTX_FRAME_RELATED_P (insn
) = 1;
9519 /* Expand prologue or epilogue stack adjustment.
9520 The pattern exist to put a dependency on all ebp-based memory accesses.
9521 STYLE should be negative if instructions should be marked as frame related,
9522 zero if %r11 register is live and cannot be freely used and positive
9526 pro_epilogue_adjust_stack (rtx dest
, rtx src
, rtx offset
,
9527 int style
, bool set_cfa
)
9529 struct machine_function
*m
= cfun
->machine
;
9531 bool add_frame_related_expr
= false;
9533 if (Pmode
== SImode
)
9534 insn
= gen_pro_epilogue_adjust_stack_si_add (dest
, src
, offset
);
9535 else if (x86_64_immediate_operand (offset
, DImode
))
9536 insn
= gen_pro_epilogue_adjust_stack_di_add (dest
, src
, offset
);
9540 /* r11 is used by indirect sibcall return as well, set before the
9541 epilogue and used after the epilogue. */
9543 tmp
= gen_rtx_REG (DImode
, R11_REG
);
9546 gcc_assert (src
!= hard_frame_pointer_rtx
9547 && dest
!= hard_frame_pointer_rtx
);
9548 tmp
= hard_frame_pointer_rtx
;
9550 insn
= emit_insn (gen_rtx_SET (DImode
, tmp
, offset
));
9552 add_frame_related_expr
= true;
9554 insn
= gen_pro_epilogue_adjust_stack_di_add (dest
, src
, tmp
);
9557 insn
= emit_insn (insn
);
9559 ix86_add_queued_cfa_restore_notes (insn
);
9565 gcc_assert (m
->fs
.cfa_reg
== src
);
9566 m
->fs
.cfa_offset
+= INTVAL (offset
);
9567 m
->fs
.cfa_reg
= dest
;
9569 r
= gen_rtx_PLUS (Pmode
, src
, offset
);
9570 r
= gen_rtx_SET (VOIDmode
, dest
, r
);
9571 add_reg_note (insn
, REG_CFA_ADJUST_CFA
, r
);
9572 RTX_FRAME_RELATED_P (insn
) = 1;
9576 RTX_FRAME_RELATED_P (insn
) = 1;
9577 if (add_frame_related_expr
)
9579 rtx r
= gen_rtx_PLUS (Pmode
, src
, offset
);
9580 r
= gen_rtx_SET (VOIDmode
, dest
, r
);
9581 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, r
);
9585 if (dest
== stack_pointer_rtx
)
9587 HOST_WIDE_INT ooffset
= m
->fs
.sp_offset
;
9588 bool valid
= m
->fs
.sp_valid
;
9590 if (src
== hard_frame_pointer_rtx
)
9592 valid
= m
->fs
.fp_valid
;
9593 ooffset
= m
->fs
.fp_offset
;
9595 else if (src
== crtl
->drap_reg
)
9597 valid
= m
->fs
.drap_valid
;
9602 /* Else there are two possibilities: SP itself, which we set
9603 up as the default above. Or EH_RETURN_STACKADJ_RTX, which is
9604 taken care of this by hand along the eh_return path. */
9605 gcc_checking_assert (src
== stack_pointer_rtx
9606 || offset
== const0_rtx
);
9609 m
->fs
.sp_offset
= ooffset
- INTVAL (offset
);
9610 m
->fs
.sp_valid
= valid
;
9614 /* Find an available register to be used as dynamic realign argument
9615 pointer regsiter. Such a register will be written in prologue and
9616 used in begin of body, so it must not be
9617 1. parameter passing register.
9619 We reuse static-chain register if it is available. Otherwise, we
9620 use DI for i386 and R13 for x86-64. We chose R13 since it has
9623 Return: the regno of chosen register. */
9626 find_drap_reg (void)
9628 tree decl
= cfun
->decl
;
9632 /* Use R13 for nested function or function need static chain.
9633 Since function with tail call may use any caller-saved
9634 registers in epilogue, DRAP must not use caller-saved
9635 register in such case. */
9636 if (DECL_STATIC_CHAIN (decl
) || crtl
->tail_call_emit
)
9643 /* Use DI for nested function or function need static chain.
9644 Since function with tail call may use any caller-saved
9645 registers in epilogue, DRAP must not use caller-saved
9646 register in such case. */
9647 if (DECL_STATIC_CHAIN (decl
) || crtl
->tail_call_emit
)
9650 /* Reuse static chain register if it isn't used for parameter
9652 if (ix86_function_regparm (TREE_TYPE (decl
), decl
) <= 2)
9654 unsigned int ccvt
= ix86_get_callcvt (TREE_TYPE (decl
));
9655 if ((ccvt
& (IX86_CALLCVT_FASTCALL
| IX86_CALLCVT_THISCALL
)) == 0)
9662 /* Return minimum incoming stack alignment. */
9665 ix86_minimum_incoming_stack_boundary (bool sibcall
)
9667 unsigned int incoming_stack_boundary
;
9669 /* Prefer the one specified at command line. */
9670 if (ix86_user_incoming_stack_boundary
)
9671 incoming_stack_boundary
= ix86_user_incoming_stack_boundary
;
9672 /* In 32bit, use MIN_STACK_BOUNDARY for incoming stack boundary
9673 if -mstackrealign is used, it isn't used for sibcall check and
9674 estimated stack alignment is 128bit. */
9677 && ix86_force_align_arg_pointer
9678 && crtl
->stack_alignment_estimated
== 128)
9679 incoming_stack_boundary
= MIN_STACK_BOUNDARY
;
9681 incoming_stack_boundary
= ix86_default_incoming_stack_boundary
;
9683 /* Incoming stack alignment can be changed on individual functions
9684 via force_align_arg_pointer attribute. We use the smallest
9685 incoming stack boundary. */
9686 if (incoming_stack_boundary
> MIN_STACK_BOUNDARY
9687 && lookup_attribute (ix86_force_align_arg_pointer_string
,
9688 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl
))))
9689 incoming_stack_boundary
= MIN_STACK_BOUNDARY
;
9691 /* The incoming stack frame has to be aligned at least at
9692 parm_stack_boundary. */
9693 if (incoming_stack_boundary
< crtl
->parm_stack_boundary
)
9694 incoming_stack_boundary
= crtl
->parm_stack_boundary
;
9696 /* Stack at entrance of main is aligned by runtime. We use the
9697 smallest incoming stack boundary. */
9698 if (incoming_stack_boundary
> MAIN_STACK_BOUNDARY
9699 && DECL_NAME (current_function_decl
)
9700 && MAIN_NAME_P (DECL_NAME (current_function_decl
))
9701 && DECL_FILE_SCOPE_P (current_function_decl
))
9702 incoming_stack_boundary
= MAIN_STACK_BOUNDARY
;
9704 return incoming_stack_boundary
;
9707 /* Update incoming stack boundary and estimated stack alignment. */
9710 ix86_update_stack_boundary (void)
9712 ix86_incoming_stack_boundary
9713 = ix86_minimum_incoming_stack_boundary (false);
9715 /* x86_64 vararg needs 16byte stack alignment for register save
9719 && crtl
->stack_alignment_estimated
< 128)
9720 crtl
->stack_alignment_estimated
= 128;
9723 /* Handle the TARGET_GET_DRAP_RTX hook. Return NULL if no DRAP is
9724 needed or an rtx for DRAP otherwise. */
9727 ix86_get_drap_rtx (void)
9729 if (ix86_force_drap
|| !ACCUMULATE_OUTGOING_ARGS
)
9730 crtl
->need_drap
= true;
9732 if (stack_realign_drap
)
9734 /* Assign DRAP to vDRAP and returns vDRAP */
9735 unsigned int regno
= find_drap_reg ();
9740 arg_ptr
= gen_rtx_REG (Pmode
, regno
);
9741 crtl
->drap_reg
= arg_ptr
;
9744 drap_vreg
= copy_to_reg (arg_ptr
);
9748 insn
= emit_insn_before (seq
, NEXT_INSN (entry_of_function ()));
9751 add_reg_note (insn
, REG_CFA_SET_VDRAP
, drap_vreg
);
9752 RTX_FRAME_RELATED_P (insn
) = 1;
9760 /* Handle the TARGET_INTERNAL_ARG_POINTER hook. */
9763 ix86_internal_arg_pointer (void)
9765 return virtual_incoming_args_rtx
;
9768 struct scratch_reg
{
9773 /* Return a short-lived scratch register for use on function entry.
9774 In 32-bit mode, it is valid only after the registers are saved
9775 in the prologue. This register must be released by means of
9776 release_scratch_register_on_entry once it is dead. */
9779 get_scratch_register_on_entry (struct scratch_reg
*sr
)
9787 /* We always use R11 in 64-bit mode. */
9792 tree decl
= current_function_decl
, fntype
= TREE_TYPE (decl
);
9794 = lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype
)) != NULL_TREE
;
9795 bool static_chain_p
= DECL_STATIC_CHAIN (decl
);
9796 int regparm
= ix86_function_regparm (fntype
, decl
);
9798 = crtl
->drap_reg
? REGNO (crtl
->drap_reg
) : INVALID_REGNUM
;
9800 /* 'fastcall' sets regparm to 2, uses ecx/edx for arguments and eax
9801 for the static chain register. */
9802 if ((regparm
< 1 || (fastcall_p
&& !static_chain_p
))
9803 && drap_regno
!= AX_REG
)
9805 else if (regparm
< 2 && drap_regno
!= DX_REG
)
9807 /* ecx is the static chain register. */
9808 else if (regparm
< 3 && !fastcall_p
&& !static_chain_p
9809 && drap_regno
!= CX_REG
)
9811 else if (ix86_save_reg (BX_REG
, true))
9813 /* esi is the static chain register. */
9814 else if (!(regparm
== 3 && static_chain_p
)
9815 && ix86_save_reg (SI_REG
, true))
9817 else if (ix86_save_reg (DI_REG
, true))
9821 regno
= (drap_regno
== AX_REG
? DX_REG
: AX_REG
);
9826 sr
->reg
= gen_rtx_REG (Pmode
, regno
);
9829 rtx insn
= emit_insn (gen_push (sr
->reg
));
9830 RTX_FRAME_RELATED_P (insn
) = 1;
9834 /* Release a scratch register obtained from the preceding function. */
9837 release_scratch_register_on_entry (struct scratch_reg
*sr
)
9841 rtx x
, insn
= emit_insn (gen_pop (sr
->reg
));
9843 /* The RTX_FRAME_RELATED_P mechanism doesn't know about pop. */
9844 RTX_FRAME_RELATED_P (insn
) = 1;
9845 x
= gen_rtx_PLUS (Pmode
, stack_pointer_rtx
, GEN_INT (UNITS_PER_WORD
));
9846 x
= gen_rtx_SET (VOIDmode
, stack_pointer_rtx
, x
);
9847 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, x
);
9851 #define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP)
9853 /* Emit code to adjust the stack pointer by SIZE bytes while probing it. */
9856 ix86_adjust_stack_and_probe (const HOST_WIDE_INT size
)
9858 /* We skip the probe for the first interval + a small dope of 4 words and
9859 probe that many bytes past the specified size to maintain a protection
9860 area at the botton of the stack. */
9861 const int dope
= 4 * UNITS_PER_WORD
;
9862 rtx size_rtx
= GEN_INT (size
), last
;
9864 /* See if we have a constant small number of probes to generate. If so,
9865 that's the easy case. The run-time loop is made up of 11 insns in the
9866 generic case while the compile-time loop is made up of 3+2*(n-1) insns
9867 for n # of intervals. */
9868 if (size
<= 5 * PROBE_INTERVAL
)
9870 HOST_WIDE_INT i
, adjust
;
9871 bool first_probe
= true;
9873 /* Adjust SP and probe at PROBE_INTERVAL + N * PROBE_INTERVAL for
9874 values of N from 1 until it exceeds SIZE. If only one probe is
9875 needed, this will not generate any code. Then adjust and probe
9876 to PROBE_INTERVAL + SIZE. */
9877 for (i
= PROBE_INTERVAL
; i
< size
; i
+= PROBE_INTERVAL
)
9881 adjust
= 2 * PROBE_INTERVAL
+ dope
;
9882 first_probe
= false;
9885 adjust
= PROBE_INTERVAL
;
9887 emit_insn (gen_rtx_SET (VOIDmode
, stack_pointer_rtx
,
9888 plus_constant (Pmode
, stack_pointer_rtx
,
9890 emit_stack_probe (stack_pointer_rtx
);
9894 adjust
= size
+ PROBE_INTERVAL
+ dope
;
9896 adjust
= size
+ PROBE_INTERVAL
- i
;
9898 emit_insn (gen_rtx_SET (VOIDmode
, stack_pointer_rtx
,
9899 plus_constant (Pmode
, stack_pointer_rtx
,
9901 emit_stack_probe (stack_pointer_rtx
);
9903 /* Adjust back to account for the additional first interval. */
9904 last
= emit_insn (gen_rtx_SET (VOIDmode
, stack_pointer_rtx
,
9905 plus_constant (Pmode
, stack_pointer_rtx
,
9906 PROBE_INTERVAL
+ dope
)));
9909 /* Otherwise, do the same as above, but in a loop. Note that we must be
9910 extra careful with variables wrapping around because we might be at
9911 the very top (or the very bottom) of the address space and we have
9912 to be able to handle this case properly; in particular, we use an
9913 equality test for the loop condition. */
9916 HOST_WIDE_INT rounded_size
;
9917 struct scratch_reg sr
;
9919 get_scratch_register_on_entry (&sr
);
9922 /* Step 1: round SIZE to the previous multiple of the interval. */
9924 rounded_size
= size
& -PROBE_INTERVAL
;
9927 /* Step 2: compute initial and final value of the loop counter. */
9929 /* SP = SP_0 + PROBE_INTERVAL. */
9930 emit_insn (gen_rtx_SET (VOIDmode
, stack_pointer_rtx
,
9931 plus_constant (Pmode
, stack_pointer_rtx
,
9932 - (PROBE_INTERVAL
+ dope
))));
9934 /* LAST_ADDR = SP_0 + PROBE_INTERVAL + ROUNDED_SIZE. */
9935 emit_move_insn (sr
.reg
, GEN_INT (-rounded_size
));
9936 emit_insn (gen_rtx_SET (VOIDmode
, sr
.reg
,
9937 gen_rtx_PLUS (Pmode
, sr
.reg
,
9938 stack_pointer_rtx
)));
9943 while (SP != LAST_ADDR)
9945 SP = SP + PROBE_INTERVAL
9949 adjusts SP and probes to PROBE_INTERVAL + N * PROBE_INTERVAL for
9950 values of N from 1 until it is equal to ROUNDED_SIZE. */
9952 emit_insn (ix86_gen_adjust_stack_and_probe (sr
.reg
, sr
.reg
, size_rtx
));
9955 /* Step 4: adjust SP and probe at PROBE_INTERVAL + SIZE if we cannot
9956 assert at compile-time that SIZE is equal to ROUNDED_SIZE. */
9958 if (size
!= rounded_size
)
9960 emit_insn (gen_rtx_SET (VOIDmode
, stack_pointer_rtx
,
9961 plus_constant (Pmode
, stack_pointer_rtx
,
9962 rounded_size
- size
)));
9963 emit_stack_probe (stack_pointer_rtx
);
9966 /* Adjust back to account for the additional first interval. */
9967 last
= emit_insn (gen_rtx_SET (VOIDmode
, stack_pointer_rtx
,
9968 plus_constant (Pmode
, stack_pointer_rtx
,
9969 PROBE_INTERVAL
+ dope
)));
9971 release_scratch_register_on_entry (&sr
);
9974 gcc_assert (cfun
->machine
->fs
.cfa_reg
!= stack_pointer_rtx
);
9976 /* Even if the stack pointer isn't the CFA register, we need to correctly
9977 describe the adjustments made to it, in particular differentiate the
9978 frame-related ones from the frame-unrelated ones. */
9981 rtx expr
= gen_rtx_SEQUENCE (VOIDmode
, rtvec_alloc (2));
9982 XVECEXP (expr
, 0, 0)
9983 = gen_rtx_SET (VOIDmode
, stack_pointer_rtx
,
9984 plus_constant (Pmode
, stack_pointer_rtx
, -size
));
9985 XVECEXP (expr
, 0, 1)
9986 = gen_rtx_SET (VOIDmode
, stack_pointer_rtx
,
9987 plus_constant (Pmode
, stack_pointer_rtx
,
9988 PROBE_INTERVAL
+ dope
+ size
));
9989 add_reg_note (last
, REG_FRAME_RELATED_EXPR
, expr
);
9990 RTX_FRAME_RELATED_P (last
) = 1;
9992 cfun
->machine
->fs
.sp_offset
+= size
;
9995 /* Make sure nothing is scheduled before we are done. */
9996 emit_insn (gen_blockage ());
9999 /* Adjust the stack pointer up to REG while probing it. */
10002 output_adjust_stack_and_probe (rtx reg
)
10004 static int labelno
= 0;
10005 char loop_lab
[32], end_lab
[32];
10008 ASM_GENERATE_INTERNAL_LABEL (loop_lab
, "LPSRL", labelno
);
10009 ASM_GENERATE_INTERNAL_LABEL (end_lab
, "LPSRE", labelno
++);
10011 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file
, loop_lab
);
10013 /* Jump to END_LAB if SP == LAST_ADDR. */
10014 xops
[0] = stack_pointer_rtx
;
10016 output_asm_insn ("cmp%z0\t{%1, %0|%0, %1}", xops
);
10017 fputs ("\tje\t", asm_out_file
);
10018 assemble_name_raw (asm_out_file
, end_lab
);
10019 fputc ('\n', asm_out_file
);
10021 /* SP = SP + PROBE_INTERVAL. */
10022 xops
[1] = GEN_INT (PROBE_INTERVAL
);
10023 output_asm_insn ("sub%z0\t{%1, %0|%0, %1}", xops
);
10026 xops
[1] = const0_rtx
;
10027 output_asm_insn ("or%z0\t{%1, (%0)|DWORD PTR [%0], %1}", xops
);
10029 fprintf (asm_out_file
, "\tjmp\t");
10030 assemble_name_raw (asm_out_file
, loop_lab
);
10031 fputc ('\n', asm_out_file
);
10033 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file
, end_lab
);
10038 /* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE,
10039 inclusive. These are offsets from the current stack pointer. */
10042 ix86_emit_probe_stack_range (HOST_WIDE_INT first
, HOST_WIDE_INT size
)
10044 /* See if we have a constant small number of probes to generate. If so,
10045 that's the easy case. The run-time loop is made up of 7 insns in the
10046 generic case while the compile-time loop is made up of n insns for n #
10048 if (size
<= 7 * PROBE_INTERVAL
)
10052 /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 1 until
10053 it exceeds SIZE. If only one probe is needed, this will not
10054 generate any code. Then probe at FIRST + SIZE. */
10055 for (i
= PROBE_INTERVAL
; i
< size
; i
+= PROBE_INTERVAL
)
10056 emit_stack_probe (plus_constant (Pmode
, stack_pointer_rtx
,
10059 emit_stack_probe (plus_constant (Pmode
, stack_pointer_rtx
,
10063 /* Otherwise, do the same as above, but in a loop. Note that we must be
10064 extra careful with variables wrapping around because we might be at
10065 the very top (or the very bottom) of the address space and we have
10066 to be able to handle this case properly; in particular, we use an
10067 equality test for the loop condition. */
10070 HOST_WIDE_INT rounded_size
, last
;
10071 struct scratch_reg sr
;
10073 get_scratch_register_on_entry (&sr
);
10076 /* Step 1: round SIZE to the previous multiple of the interval. */
10078 rounded_size
= size
& -PROBE_INTERVAL
;
10081 /* Step 2: compute initial and final value of the loop counter. */
10083 /* TEST_OFFSET = FIRST. */
10084 emit_move_insn (sr
.reg
, GEN_INT (-first
));
10086 /* LAST_OFFSET = FIRST + ROUNDED_SIZE. */
10087 last
= first
+ rounded_size
;
10090 /* Step 3: the loop
10092 while (TEST_ADDR != LAST_ADDR)
10094 TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
10098 probes at FIRST + N * PROBE_INTERVAL for values of N from 1
10099 until it is equal to ROUNDED_SIZE. */
10101 emit_insn (ix86_gen_probe_stack_range (sr
.reg
, sr
.reg
, GEN_INT (-last
)));
10104 /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
10105 that SIZE is equal to ROUNDED_SIZE. */
10107 if (size
!= rounded_size
)
10108 emit_stack_probe (plus_constant (Pmode
,
10109 gen_rtx_PLUS (Pmode
,
10112 rounded_size
- size
));
10114 release_scratch_register_on_entry (&sr
);
10117 /* Make sure nothing is scheduled before we are done. */
10118 emit_insn (gen_blockage ());
10121 /* Probe a range of stack addresses from REG to END, inclusive. These are
10122 offsets from the current stack pointer. */
10125 output_probe_stack_range (rtx reg
, rtx end
)
10127 static int labelno
= 0;
10128 char loop_lab
[32], end_lab
[32];
10131 ASM_GENERATE_INTERNAL_LABEL (loop_lab
, "LPSRL", labelno
);
10132 ASM_GENERATE_INTERNAL_LABEL (end_lab
, "LPSRE", labelno
++);
10134 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file
, loop_lab
);
10136 /* Jump to END_LAB if TEST_ADDR == LAST_ADDR. */
10139 output_asm_insn ("cmp%z0\t{%1, %0|%0, %1}", xops
);
10140 fputs ("\tje\t", asm_out_file
);
10141 assemble_name_raw (asm_out_file
, end_lab
);
10142 fputc ('\n', asm_out_file
);
10144 /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL. */
10145 xops
[1] = GEN_INT (PROBE_INTERVAL
);
10146 output_asm_insn ("sub%z0\t{%1, %0|%0, %1}", xops
);
10148 /* Probe at TEST_ADDR. */
10149 xops
[0] = stack_pointer_rtx
;
10151 xops
[2] = const0_rtx
;
10152 output_asm_insn ("or%z0\t{%2, (%0,%1)|DWORD PTR [%0+%1], %2}", xops
);
10154 fprintf (asm_out_file
, "\tjmp\t");
10155 assemble_name_raw (asm_out_file
, loop_lab
);
10156 fputc ('\n', asm_out_file
);
10158 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file
, end_lab
);
10163 /* Finalize stack_realign_needed flag, which will guide prologue/epilogue
10164 to be generated in correct form. */
10166 ix86_finalize_stack_realign_flags (void)
10168 /* Check if stack realign is really needed after reload, and
10169 stores result in cfun */
10170 unsigned int incoming_stack_boundary
10171 = (crtl
->parm_stack_boundary
> ix86_incoming_stack_boundary
10172 ? crtl
->parm_stack_boundary
: ix86_incoming_stack_boundary
);
10173 unsigned int stack_realign
= (incoming_stack_boundary
10175 ? crtl
->max_used_stack_slot_alignment
10176 : crtl
->stack_alignment_needed
));
10178 if (crtl
->stack_realign_finalized
)
10180 /* After stack_realign_needed is finalized, we can't no longer
10182 gcc_assert (crtl
->stack_realign_needed
== stack_realign
);
10186 /* If the only reason for frame_pointer_needed is that we conservatively
10187 assumed stack realignment might be needed, but in the end nothing that
10188 needed the stack alignment had been spilled, clear frame_pointer_needed
10189 and say we don't need stack realignment. */
10191 && !crtl
->need_drap
10192 && frame_pointer_needed
10194 && flag_omit_frame_pointer
10195 && crtl
->sp_is_unchanging
10196 && !ix86_current_function_calls_tls_descriptor
10197 && !crtl
->accesses_prior_frames
10198 && !cfun
->calls_alloca
10199 && !crtl
->calls_eh_return
10200 && !(flag_stack_check
&& STACK_CHECK_MOVING_SP
)
10201 && !ix86_frame_pointer_required ()
10202 && get_frame_size () == 0
10203 && ix86_nsaved_sseregs () == 0
10204 && ix86_varargs_gpr_size
+ ix86_varargs_fpr_size
== 0)
10206 HARD_REG_SET set_up_by_prologue
, prologue_used
;
10209 CLEAR_HARD_REG_SET (prologue_used
);
10210 CLEAR_HARD_REG_SET (set_up_by_prologue
);
10211 add_to_hard_reg_set (&set_up_by_prologue
, Pmode
, STACK_POINTER_REGNUM
);
10212 add_to_hard_reg_set (&set_up_by_prologue
, Pmode
, ARG_POINTER_REGNUM
);
10213 add_to_hard_reg_set (&set_up_by_prologue
, Pmode
,
10214 HARD_FRAME_POINTER_REGNUM
);
10218 FOR_BB_INSNS (bb
, insn
)
10219 if (NONDEBUG_INSN_P (insn
)
10220 && requires_stack_frame_p (insn
, prologue_used
,
10221 set_up_by_prologue
))
10223 crtl
->stack_realign_needed
= stack_realign
;
10224 crtl
->stack_realign_finalized
= true;
10229 frame_pointer_needed
= false;
10230 stack_realign
= false;
10231 crtl
->max_used_stack_slot_alignment
= incoming_stack_boundary
;
10232 crtl
->stack_alignment_needed
= incoming_stack_boundary
;
10233 crtl
->stack_alignment_estimated
= incoming_stack_boundary
;
10234 if (crtl
->preferred_stack_boundary
> incoming_stack_boundary
)
10235 crtl
->preferred_stack_boundary
= incoming_stack_boundary
;
10236 df_finish_pass (true);
10237 df_scan_alloc (NULL
);
10239 df_compute_regs_ever_live (true);
10243 crtl
->stack_realign_needed
= stack_realign
;
10244 crtl
->stack_realign_finalized
= true;
10247 /* Expand the prologue into a bunch of separate insns. */
10250 ix86_expand_prologue (void)
10252 struct machine_function
*m
= cfun
->machine
;
10255 struct ix86_frame frame
;
10256 HOST_WIDE_INT allocate
;
10257 bool int_registers_saved
;
10258 bool sse_registers_saved
;
10260 ix86_finalize_stack_realign_flags ();
10262 /* DRAP should not coexist with stack_realign_fp */
10263 gcc_assert (!(crtl
->drap_reg
&& stack_realign_fp
));
10265 memset (&m
->fs
, 0, sizeof (m
->fs
));
10267 /* Initialize CFA state for before the prologue. */
10268 m
->fs
.cfa_reg
= stack_pointer_rtx
;
10269 m
->fs
.cfa_offset
= INCOMING_FRAME_SP_OFFSET
;
10271 /* Track SP offset to the CFA. We continue tracking this after we've
10272 swapped the CFA register away from SP. In the case of re-alignment
10273 this is fudged; we're interested to offsets within the local frame. */
10274 m
->fs
.sp_offset
= INCOMING_FRAME_SP_OFFSET
;
10275 m
->fs
.sp_valid
= true;
10277 ix86_compute_frame_layout (&frame
);
10279 if (!TARGET_64BIT
&& ix86_function_ms_hook_prologue (current_function_decl
))
10281 /* We should have already generated an error for any use of
10282 ms_hook on a nested function. */
10283 gcc_checking_assert (!ix86_static_chain_on_stack
);
10285 /* Check if profiling is active and we shall use profiling before
10286 prologue variant. If so sorry. */
10287 if (crtl
->profile
&& flag_fentry
!= 0)
10288 sorry ("ms_hook_prologue attribute isn%'t compatible "
10289 "with -mfentry for 32-bit");
10291 /* In ix86_asm_output_function_label we emitted:
10292 8b ff movl.s %edi,%edi
10294 8b ec movl.s %esp,%ebp
10296 This matches the hookable function prologue in Win32 API
10297 functions in Microsoft Windows XP Service Pack 2 and newer.
10298 Wine uses this to enable Windows apps to hook the Win32 API
10299 functions provided by Wine.
10301 What that means is that we've already set up the frame pointer. */
10303 if (frame_pointer_needed
10304 && !(crtl
->drap_reg
&& crtl
->stack_realign_needed
))
10308 /* We've decided to use the frame pointer already set up.
10309 Describe this to the unwinder by pretending that both
10310 push and mov insns happen right here.
10312 Putting the unwind info here at the end of the ms_hook
10313 is done so that we can make absolutely certain we get
10314 the required byte sequence at the start of the function,
10315 rather than relying on an assembler that can produce
10316 the exact encoding required.
10318 However it does mean (in the unpatched case) that we have
10319 a 1 insn window where the asynchronous unwind info is
10320 incorrect. However, if we placed the unwind info at
10321 its correct location we would have incorrect unwind info
10322 in the patched case. Which is probably all moot since
10323 I don't expect Wine generates dwarf2 unwind info for the
10324 system libraries that use this feature. */
10326 insn
= emit_insn (gen_blockage ());
10328 push
= gen_push (hard_frame_pointer_rtx
);
10329 mov
= gen_rtx_SET (VOIDmode
, hard_frame_pointer_rtx
,
10330 stack_pointer_rtx
);
10331 RTX_FRAME_RELATED_P (push
) = 1;
10332 RTX_FRAME_RELATED_P (mov
) = 1;
10334 RTX_FRAME_RELATED_P (insn
) = 1;
10335 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
,
10336 gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, push
, mov
)));
10338 /* Note that gen_push incremented m->fs.cfa_offset, even
10339 though we didn't emit the push insn here. */
10340 m
->fs
.cfa_reg
= hard_frame_pointer_rtx
;
10341 m
->fs
.fp_offset
= m
->fs
.cfa_offset
;
10342 m
->fs
.fp_valid
= true;
10346 /* The frame pointer is not needed so pop %ebp again.
10347 This leaves us with a pristine state. */
10348 emit_insn (gen_pop (hard_frame_pointer_rtx
));
10352 /* The first insn of a function that accepts its static chain on the
10353 stack is to push the register that would be filled in by a direct
10354 call. This insn will be skipped by the trampoline. */
10355 else if (ix86_static_chain_on_stack
)
10357 insn
= emit_insn (gen_push (ix86_static_chain (cfun
->decl
, false)));
10358 emit_insn (gen_blockage ());
10360 /* We don't want to interpret this push insn as a register save,
10361 only as a stack adjustment. The real copy of the register as
10362 a save will be done later, if needed. */
10363 t
= plus_constant (Pmode
, stack_pointer_rtx
, -UNITS_PER_WORD
);
10364 t
= gen_rtx_SET (VOIDmode
, stack_pointer_rtx
, t
);
10365 add_reg_note (insn
, REG_CFA_ADJUST_CFA
, t
);
10366 RTX_FRAME_RELATED_P (insn
) = 1;
10369 /* Emit prologue code to adjust stack alignment and setup DRAP, in case
10370 of DRAP is needed and stack realignment is really needed after reload */
10371 if (stack_realign_drap
)
10373 int align_bytes
= crtl
->stack_alignment_needed
/ BITS_PER_UNIT
;
10375 /* Only need to push parameter pointer reg if it is caller saved. */
10376 if (!call_used_regs
[REGNO (crtl
->drap_reg
)])
10378 /* Push arg pointer reg */
10379 insn
= emit_insn (gen_push (crtl
->drap_reg
));
10380 RTX_FRAME_RELATED_P (insn
) = 1;
10383 /* Grab the argument pointer. */
10384 t
= plus_constant (Pmode
, stack_pointer_rtx
, m
->fs
.sp_offset
);
10385 insn
= emit_insn (gen_rtx_SET (VOIDmode
, crtl
->drap_reg
, t
));
10386 RTX_FRAME_RELATED_P (insn
) = 1;
10387 m
->fs
.cfa_reg
= crtl
->drap_reg
;
10388 m
->fs
.cfa_offset
= 0;
10390 /* Align the stack. */
10391 insn
= emit_insn (ix86_gen_andsp (stack_pointer_rtx
,
10393 GEN_INT (-align_bytes
)));
10394 RTX_FRAME_RELATED_P (insn
) = 1;
10396 /* Replicate the return address on the stack so that return
10397 address can be reached via (argp - 1) slot. This is needed
10398 to implement macro RETURN_ADDR_RTX and intrinsic function
10399 expand_builtin_return_addr etc. */
10400 t
= plus_constant (Pmode
, crtl
->drap_reg
, -UNITS_PER_WORD
);
10401 t
= gen_frame_mem (word_mode
, t
);
10402 insn
= emit_insn (gen_push (t
));
10403 RTX_FRAME_RELATED_P (insn
) = 1;
10405 /* For the purposes of frame and register save area addressing,
10406 we've started over with a new frame. */
10407 m
->fs
.sp_offset
= INCOMING_FRAME_SP_OFFSET
;
10408 m
->fs
.realigned
= true;
10411 int_registers_saved
= (frame
.nregs
== 0);
10412 sse_registers_saved
= (frame
.nsseregs
== 0);
10414 if (frame_pointer_needed
&& !m
->fs
.fp_valid
)
10416 /* Note: AT&T enter does NOT have reversed args. Enter is probably
10417 slower on all targets. Also sdb doesn't like it. */
10418 insn
= emit_insn (gen_push (hard_frame_pointer_rtx
));
10419 RTX_FRAME_RELATED_P (insn
) = 1;
10421 /* Push registers now, before setting the frame pointer
10423 if (!int_registers_saved
10425 && !frame
.save_regs_using_mov
)
10427 ix86_emit_save_regs ();
10428 int_registers_saved
= true;
10429 gcc_assert (m
->fs
.sp_offset
== frame
.reg_save_offset
);
10432 if (m
->fs
.sp_offset
== frame
.hard_frame_pointer_offset
)
10434 insn
= emit_move_insn (hard_frame_pointer_rtx
, stack_pointer_rtx
);
10435 RTX_FRAME_RELATED_P (insn
) = 1;
10437 if (m
->fs
.cfa_reg
== stack_pointer_rtx
)
10438 m
->fs
.cfa_reg
= hard_frame_pointer_rtx
;
10439 m
->fs
.fp_offset
= m
->fs
.sp_offset
;
10440 m
->fs
.fp_valid
= true;
10444 if (!int_registers_saved
)
10446 /* If saving registers via PUSH, do so now. */
10447 if (!frame
.save_regs_using_mov
)
10449 ix86_emit_save_regs ();
10450 int_registers_saved
= true;
10451 gcc_assert (m
->fs
.sp_offset
== frame
.reg_save_offset
);
10454 /* When using red zone we may start register saving before allocating
10455 the stack frame saving one cycle of the prologue. However, avoid
10456 doing this if we have to probe the stack; at least on x86_64 the
10457 stack probe can turn into a call that clobbers a red zone location. */
10458 else if (ix86_using_red_zone ()
10459 && (! TARGET_STACK_PROBE
10460 || frame
.stack_pointer_offset
< CHECK_STACK_LIMIT
))
10462 ix86_emit_save_regs_using_mov (frame
.reg_save_offset
);
10463 int_registers_saved
= true;
10467 if (stack_realign_fp
)
10469 int align_bytes
= crtl
->stack_alignment_needed
/ BITS_PER_UNIT
;
10470 gcc_assert (align_bytes
> MIN_STACK_BOUNDARY
/ BITS_PER_UNIT
);
10472 /* The computation of the size of the re-aligned stack frame means
10473 that we must allocate the size of the register save area before
10474 performing the actual alignment. Otherwise we cannot guarantee
10475 that there's enough storage above the realignment point. */
10476 if (m
->fs
.sp_offset
!= frame
.sse_reg_save_offset
)
10477 pro_epilogue_adjust_stack (stack_pointer_rtx
, stack_pointer_rtx
,
10478 GEN_INT (m
->fs
.sp_offset
10479 - frame
.sse_reg_save_offset
),
10482 /* Align the stack. */
10483 insn
= emit_insn (ix86_gen_andsp (stack_pointer_rtx
,
10485 GEN_INT (-align_bytes
)));
10487 /* For the purposes of register save area addressing, the stack
10488 pointer is no longer valid. As for the value of sp_offset,
10489 see ix86_compute_frame_layout, which we need to match in order
10490 to pass verification of stack_pointer_offset at the end. */
10491 m
->fs
.sp_offset
= (m
->fs
.sp_offset
+ align_bytes
) & -align_bytes
;
10492 m
->fs
.sp_valid
= false;
10495 allocate
= frame
.stack_pointer_offset
- m
->fs
.sp_offset
;
10497 if (flag_stack_usage_info
)
10499 /* We start to count from ARG_POINTER. */
10500 HOST_WIDE_INT stack_size
= frame
.stack_pointer_offset
;
10502 /* If it was realigned, take into account the fake frame. */
10503 if (stack_realign_drap
)
10505 if (ix86_static_chain_on_stack
)
10506 stack_size
+= UNITS_PER_WORD
;
10508 if (!call_used_regs
[REGNO (crtl
->drap_reg
)])
10509 stack_size
+= UNITS_PER_WORD
;
10511 /* This over-estimates by 1 minimal-stack-alignment-unit but
10512 mitigates that by counting in the new return address slot. */
10513 current_function_dynamic_stack_size
10514 += crtl
->stack_alignment_needed
/ BITS_PER_UNIT
;
10517 current_function_static_stack_size
= stack_size
;
10520 /* On SEH target with very large frame size, allocate an area to save
10521 SSE registers (as the very large allocation won't be described). */
10523 && frame
.stack_pointer_offset
> SEH_MAX_FRAME_SIZE
10524 && !sse_registers_saved
)
10526 HOST_WIDE_INT sse_size
=
10527 frame
.sse_reg_save_offset
- frame
.reg_save_offset
;
10529 gcc_assert (int_registers_saved
);
10531 /* No need to do stack checking as the area will be immediately
10533 pro_epilogue_adjust_stack (stack_pointer_rtx
, stack_pointer_rtx
,
10534 GEN_INT (-sse_size
), -1,
10535 m
->fs
.cfa_reg
== stack_pointer_rtx
);
10536 allocate
-= sse_size
;
10537 ix86_emit_save_sse_regs_using_mov (frame
.sse_reg_save_offset
);
10538 sse_registers_saved
= true;
10541 /* The stack has already been decremented by the instruction calling us
10542 so probe if the size is non-negative to preserve the protection area. */
10543 if (allocate
>= 0 && flag_stack_check
== STATIC_BUILTIN_STACK_CHECK
)
10545 /* We expect the registers to be saved when probes are used. */
10546 gcc_assert (int_registers_saved
);
10548 if (STACK_CHECK_MOVING_SP
)
10550 ix86_adjust_stack_and_probe (allocate
);
10555 HOST_WIDE_INT size
= allocate
;
10557 if (TARGET_64BIT
&& size
>= (HOST_WIDE_INT
) 0x80000000)
10558 size
= 0x80000000 - STACK_CHECK_PROTECT
- 1;
10560 if (TARGET_STACK_PROBE
)
10561 ix86_emit_probe_stack_range (0, size
+ STACK_CHECK_PROTECT
);
10563 ix86_emit_probe_stack_range (STACK_CHECK_PROTECT
, size
);
10569 else if (!ix86_target_stack_probe ()
10570 || frame
.stack_pointer_offset
< CHECK_STACK_LIMIT
)
10572 pro_epilogue_adjust_stack (stack_pointer_rtx
, stack_pointer_rtx
,
10573 GEN_INT (-allocate
), -1,
10574 m
->fs
.cfa_reg
== stack_pointer_rtx
);
10578 rtx eax
= gen_rtx_REG (Pmode
, AX_REG
);
10580 rtx (*adjust_stack_insn
)(rtx
, rtx
, rtx
);
10582 bool eax_live
= false;
10583 bool r10_live
= false;
10586 r10_live
= (DECL_STATIC_CHAIN (current_function_decl
) != 0);
10587 if (!TARGET_64BIT_MS_ABI
)
10588 eax_live
= ix86_eax_live_at_start_p ();
10592 emit_insn (gen_push (eax
));
10593 allocate
-= UNITS_PER_WORD
;
10597 r10
= gen_rtx_REG (Pmode
, R10_REG
);
10598 emit_insn (gen_push (r10
));
10599 allocate
-= UNITS_PER_WORD
;
10602 emit_move_insn (eax
, GEN_INT (allocate
));
10603 emit_insn (ix86_gen_allocate_stack_worker (eax
, eax
));
10605 /* Use the fact that AX still contains ALLOCATE. */
10606 adjust_stack_insn
= (Pmode
== DImode
10607 ? gen_pro_epilogue_adjust_stack_di_sub
10608 : gen_pro_epilogue_adjust_stack_si_sub
);
10610 insn
= emit_insn (adjust_stack_insn (stack_pointer_rtx
,
10611 stack_pointer_rtx
, eax
));
10613 /* Note that SEH directives need to continue tracking the stack
10614 pointer even after the frame pointer has been set up. */
10615 if (m
->fs
.cfa_reg
== stack_pointer_rtx
|| TARGET_SEH
)
10617 if (m
->fs
.cfa_reg
== stack_pointer_rtx
)
10618 m
->fs
.cfa_offset
+= allocate
;
10620 RTX_FRAME_RELATED_P (insn
) = 1;
10621 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
,
10622 gen_rtx_SET (VOIDmode
, stack_pointer_rtx
,
10623 plus_constant (Pmode
, stack_pointer_rtx
,
10626 m
->fs
.sp_offset
+= allocate
;
10628 if (r10_live
&& eax_live
)
10630 t
= choose_baseaddr (m
->fs
.sp_offset
- allocate
);
10631 emit_move_insn (gen_rtx_REG (word_mode
, R10_REG
),
10632 gen_frame_mem (word_mode
, t
));
10633 t
= choose_baseaddr (m
->fs
.sp_offset
- allocate
- UNITS_PER_WORD
);
10634 emit_move_insn (gen_rtx_REG (word_mode
, AX_REG
),
10635 gen_frame_mem (word_mode
, t
));
10637 else if (eax_live
|| r10_live
)
10639 t
= choose_baseaddr (m
->fs
.sp_offset
- allocate
);
10640 emit_move_insn (gen_rtx_REG (word_mode
,
10641 (eax_live
? AX_REG
: R10_REG
)),
10642 gen_frame_mem (word_mode
, t
));
10645 gcc_assert (m
->fs
.sp_offset
== frame
.stack_pointer_offset
);
10647 /* If we havn't already set up the frame pointer, do so now. */
10648 if (frame_pointer_needed
&& !m
->fs
.fp_valid
)
10650 insn
= ix86_gen_add3 (hard_frame_pointer_rtx
, stack_pointer_rtx
,
10651 GEN_INT (frame
.stack_pointer_offset
10652 - frame
.hard_frame_pointer_offset
));
10653 insn
= emit_insn (insn
);
10654 RTX_FRAME_RELATED_P (insn
) = 1;
10655 add_reg_note (insn
, REG_CFA_ADJUST_CFA
, NULL
);
10657 if (m
->fs
.cfa_reg
== stack_pointer_rtx
)
10658 m
->fs
.cfa_reg
= hard_frame_pointer_rtx
;
10659 m
->fs
.fp_offset
= frame
.hard_frame_pointer_offset
;
10660 m
->fs
.fp_valid
= true;
10663 if (!int_registers_saved
)
10664 ix86_emit_save_regs_using_mov (frame
.reg_save_offset
);
10665 if (!sse_registers_saved
)
10666 ix86_emit_save_sse_regs_using_mov (frame
.sse_reg_save_offset
);
10668 pic_reg_used
= false;
10669 if (pic_offset_table_rtx
10670 && (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM
)
10673 unsigned int alt_pic_reg_used
= ix86_select_alt_pic_regnum ();
10675 if (alt_pic_reg_used
!= INVALID_REGNUM
)
10676 SET_REGNO (pic_offset_table_rtx
, alt_pic_reg_used
);
10678 pic_reg_used
= true;
10685 if (ix86_cmodel
== CM_LARGE_PIC
)
10687 rtx label
, tmp_reg
;
10689 gcc_assert (Pmode
== DImode
);
10690 label
= gen_label_rtx ();
10691 emit_label (label
);
10692 LABEL_PRESERVE_P (label
) = 1;
10693 tmp_reg
= gen_rtx_REG (Pmode
, R11_REG
);
10694 gcc_assert (REGNO (pic_offset_table_rtx
) != REGNO (tmp_reg
));
10695 insn
= emit_insn (gen_set_rip_rex64 (pic_offset_table_rtx
,
10697 insn
= emit_insn (gen_set_got_offset_rex64 (tmp_reg
, label
));
10698 insn
= emit_insn (ix86_gen_add3 (pic_offset_table_rtx
,
10699 pic_offset_table_rtx
, tmp_reg
));
10702 insn
= emit_insn (gen_set_got_rex64 (pic_offset_table_rtx
));
10706 insn
= emit_insn (gen_set_got (pic_offset_table_rtx
));
10707 RTX_FRAME_RELATED_P (insn
) = 1;
10708 add_reg_note (insn
, REG_CFA_FLUSH_QUEUE
, NULL_RTX
);
10712 /* In the pic_reg_used case, make sure that the got load isn't deleted
10713 when mcount needs it. Blockage to avoid call movement across mcount
10714 call is emitted in generic code after the NOTE_INSN_PROLOGUE_END
10716 if (crtl
->profile
&& !flag_fentry
&& pic_reg_used
)
10717 emit_insn (gen_prologue_use (pic_offset_table_rtx
));
10719 if (crtl
->drap_reg
&& !crtl
->stack_realign_needed
)
10721 /* vDRAP is setup but after reload it turns out stack realign
10722 isn't necessary, here we will emit prologue to setup DRAP
10723 without stack realign adjustment */
10724 t
= choose_baseaddr (0);
10725 emit_insn (gen_rtx_SET (VOIDmode
, crtl
->drap_reg
, t
));
10728 /* Prevent instructions from being scheduled into register save push
10729 sequence when access to the redzone area is done through frame pointer.
10730 The offset between the frame pointer and the stack pointer is calculated
10731 relative to the value of the stack pointer at the end of the function
10732 prologue, and moving instructions that access redzone area via frame
10733 pointer inside push sequence violates this assumption. */
10734 if (frame_pointer_needed
&& frame
.red_zone_size
)
10735 emit_insn (gen_memory_blockage ());
10737 /* Emit cld instruction if stringops are used in the function. */
10738 if (TARGET_CLD
&& ix86_current_function_needs_cld
)
10739 emit_insn (gen_cld ());
10741 /* SEH requires that the prologue end within 256 bytes of the start of
10742 the function. Prevent instruction schedules that would extend that.
10743 Further, prevent alloca modifications to the stack pointer from being
10744 combined with prologue modifications. */
10746 emit_insn (gen_prologue_use (stack_pointer_rtx
));
10749 /* Emit code to restore REG using a POP insn. */
10752 ix86_emit_restore_reg_using_pop (rtx reg
)
10754 struct machine_function
*m
= cfun
->machine
;
10755 rtx insn
= emit_insn (gen_pop (reg
));
10757 ix86_add_cfa_restore_note (insn
, reg
, m
->fs
.sp_offset
);
10758 m
->fs
.sp_offset
-= UNITS_PER_WORD
;
10760 if (m
->fs
.cfa_reg
== crtl
->drap_reg
10761 && REGNO (reg
) == REGNO (crtl
->drap_reg
))
10763 /* Previously we'd represented the CFA as an expression
10764 like *(%ebp - 8). We've just popped that value from
10765 the stack, which means we need to reset the CFA to
10766 the drap register. This will remain until we restore
10767 the stack pointer. */
10768 add_reg_note (insn
, REG_CFA_DEF_CFA
, reg
);
10769 RTX_FRAME_RELATED_P (insn
) = 1;
10771 /* This means that the DRAP register is valid for addressing too. */
10772 m
->fs
.drap_valid
= true;
10776 if (m
->fs
.cfa_reg
== stack_pointer_rtx
)
10778 rtx x
= plus_constant (Pmode
, stack_pointer_rtx
, UNITS_PER_WORD
);
10779 x
= gen_rtx_SET (VOIDmode
, stack_pointer_rtx
, x
);
10780 add_reg_note (insn
, REG_CFA_ADJUST_CFA
, x
);
10781 RTX_FRAME_RELATED_P (insn
) = 1;
10783 m
->fs
.cfa_offset
-= UNITS_PER_WORD
;
10786 /* When the frame pointer is the CFA, and we pop it, we are
10787 swapping back to the stack pointer as the CFA. This happens
10788 for stack frames that don't allocate other data, so we assume
10789 the stack pointer is now pointing at the return address, i.e.
10790 the function entry state, which makes the offset be 1 word. */
10791 if (reg
== hard_frame_pointer_rtx
)
10793 m
->fs
.fp_valid
= false;
10794 if (m
->fs
.cfa_reg
== hard_frame_pointer_rtx
)
10796 m
->fs
.cfa_reg
= stack_pointer_rtx
;
10797 m
->fs
.cfa_offset
-= UNITS_PER_WORD
;
10799 add_reg_note (insn
, REG_CFA_DEF_CFA
,
10800 gen_rtx_PLUS (Pmode
, stack_pointer_rtx
,
10801 GEN_INT (m
->fs
.cfa_offset
)));
10802 RTX_FRAME_RELATED_P (insn
) = 1;
10807 /* Emit code to restore saved registers using POP insns. */
10810 ix86_emit_restore_regs_using_pop (void)
10812 unsigned int regno
;
10814 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; regno
++)
10815 if (!SSE_REGNO_P (regno
) && ix86_save_reg (regno
, false))
10816 ix86_emit_restore_reg_using_pop (gen_rtx_REG (word_mode
, regno
));
10819 /* Emit code and notes for the LEAVE instruction. */
10822 ix86_emit_leave (void)
10824 struct machine_function
*m
= cfun
->machine
;
10825 rtx insn
= emit_insn (ix86_gen_leave ());
10827 ix86_add_queued_cfa_restore_notes (insn
);
10829 gcc_assert (m
->fs
.fp_valid
);
10830 m
->fs
.sp_valid
= true;
10831 m
->fs
.sp_offset
= m
->fs
.fp_offset
- UNITS_PER_WORD
;
10832 m
->fs
.fp_valid
= false;
10834 if (m
->fs
.cfa_reg
== hard_frame_pointer_rtx
)
10836 m
->fs
.cfa_reg
= stack_pointer_rtx
;
10837 m
->fs
.cfa_offset
= m
->fs
.sp_offset
;
10839 add_reg_note (insn
, REG_CFA_DEF_CFA
,
10840 plus_constant (Pmode
, stack_pointer_rtx
,
10842 RTX_FRAME_RELATED_P (insn
) = 1;
10844 ix86_add_cfa_restore_note (insn
, hard_frame_pointer_rtx
,
10848 /* Emit code to restore saved registers using MOV insns.
10849 First register is restored from CFA - CFA_OFFSET. */
10851 ix86_emit_restore_regs_using_mov (HOST_WIDE_INT cfa_offset
,
10852 bool maybe_eh_return
)
10854 struct machine_function
*m
= cfun
->machine
;
10855 unsigned int regno
;
10857 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; regno
++)
10858 if (!SSE_REGNO_P (regno
) && ix86_save_reg (regno
, maybe_eh_return
))
10860 rtx reg
= gen_rtx_REG (word_mode
, regno
);
10863 mem
= choose_baseaddr (cfa_offset
);
10864 mem
= gen_frame_mem (word_mode
, mem
);
10865 insn
= emit_move_insn (reg
, mem
);
10867 if (m
->fs
.cfa_reg
== crtl
->drap_reg
&& regno
== REGNO (crtl
->drap_reg
))
10869 /* Previously we'd represented the CFA as an expression
10870 like *(%ebp - 8). We've just popped that value from
10871 the stack, which means we need to reset the CFA to
10872 the drap register. This will remain until we restore
10873 the stack pointer. */
10874 add_reg_note (insn
, REG_CFA_DEF_CFA
, reg
);
10875 RTX_FRAME_RELATED_P (insn
) = 1;
10877 /* This means that the DRAP register is valid for addressing. */
10878 m
->fs
.drap_valid
= true;
10881 ix86_add_cfa_restore_note (NULL_RTX
, reg
, cfa_offset
);
10883 cfa_offset
-= UNITS_PER_WORD
;
10887 /* Emit code to restore saved registers using MOV insns.
10888 First register is restored from CFA - CFA_OFFSET. */
10890 ix86_emit_restore_sse_regs_using_mov (HOST_WIDE_INT cfa_offset
,
10891 bool maybe_eh_return
)
10893 unsigned int regno
;
10895 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; regno
++)
10896 if (SSE_REGNO_P (regno
) && ix86_save_reg (regno
, maybe_eh_return
))
10898 rtx reg
= gen_rtx_REG (V4SFmode
, regno
);
10901 mem
= choose_baseaddr (cfa_offset
);
10902 mem
= gen_rtx_MEM (V4SFmode
, mem
);
10903 set_mem_align (mem
, 128);
10904 emit_move_insn (reg
, mem
);
10906 ix86_add_cfa_restore_note (NULL_RTX
, reg
, cfa_offset
);
10912 /* Emit vzeroupper if needed. */
10915 ix86_maybe_emit_epilogue_vzeroupper (void)
10917 if (TARGET_VZEROUPPER
10918 && !TREE_THIS_VOLATILE (cfun
->decl
)
10919 && !cfun
->machine
->caller_return_avx256_p
)
10920 emit_insn (gen_avx_vzeroupper (GEN_INT (call_no_avx256
)));
10923 /* Restore function stack, frame, and registers. */
10926 ix86_expand_epilogue (int style
)
10928 struct machine_function
*m
= cfun
->machine
;
10929 struct machine_frame_state frame_state_save
= m
->fs
;
10930 struct ix86_frame frame
;
10931 bool restore_regs_via_mov
;
10934 ix86_finalize_stack_realign_flags ();
10935 ix86_compute_frame_layout (&frame
);
10937 m
->fs
.sp_valid
= (!frame_pointer_needed
10938 || (crtl
->sp_is_unchanging
10939 && !stack_realign_fp
));
10940 gcc_assert (!m
->fs
.sp_valid
10941 || m
->fs
.sp_offset
== frame
.stack_pointer_offset
);
10943 /* The FP must be valid if the frame pointer is present. */
10944 gcc_assert (frame_pointer_needed
== m
->fs
.fp_valid
);
10945 gcc_assert (!m
->fs
.fp_valid
10946 || m
->fs
.fp_offset
== frame
.hard_frame_pointer_offset
);
10948 /* We must have *some* valid pointer to the stack frame. */
10949 gcc_assert (m
->fs
.sp_valid
|| m
->fs
.fp_valid
);
10951 /* The DRAP is never valid at this point. */
10952 gcc_assert (!m
->fs
.drap_valid
);
10954 /* See the comment about red zone and frame
10955 pointer usage in ix86_expand_prologue. */
10956 if (frame_pointer_needed
&& frame
.red_zone_size
)
10957 emit_insn (gen_memory_blockage ());
10959 using_drap
= crtl
->drap_reg
&& crtl
->stack_realign_needed
;
10960 gcc_assert (!using_drap
|| m
->fs
.cfa_reg
== crtl
->drap_reg
);
10962 /* Determine the CFA offset of the end of the red-zone. */
10963 m
->fs
.red_zone_offset
= 0;
10964 if (ix86_using_red_zone () && crtl
->args
.pops_args
< 65536)
10966 /* The red-zone begins below the return address. */
10967 m
->fs
.red_zone_offset
= RED_ZONE_SIZE
+ UNITS_PER_WORD
;
10969 /* When the register save area is in the aligned portion of
10970 the stack, determine the maximum runtime displacement that
10971 matches up with the aligned frame. */
10972 if (stack_realign_drap
)
10973 m
->fs
.red_zone_offset
-= (crtl
->stack_alignment_needed
/ BITS_PER_UNIT
10977 /* Special care must be taken for the normal return case of a function
10978 using eh_return: the eax and edx registers are marked as saved, but
10979 not restored along this path. Adjust the save location to match. */
10980 if (crtl
->calls_eh_return
&& style
!= 2)
10981 frame
.reg_save_offset
-= 2 * UNITS_PER_WORD
;
10983 /* EH_RETURN requires the use of moves to function properly. */
10984 if (crtl
->calls_eh_return
)
10985 restore_regs_via_mov
= true;
10986 /* SEH requires the use of pops to identify the epilogue. */
10987 else if (TARGET_SEH
)
10988 restore_regs_via_mov
= false;
10989 /* If we're only restoring one register and sp is not valid then
10990 using a move instruction to restore the register since it's
10991 less work than reloading sp and popping the register. */
10992 else if (!m
->fs
.sp_valid
&& frame
.nregs
<= 1)
10993 restore_regs_via_mov
= true;
10994 else if (TARGET_EPILOGUE_USING_MOVE
10995 && cfun
->machine
->use_fast_prologue_epilogue
10996 && (frame
.nregs
> 1
10997 || m
->fs
.sp_offset
!= frame
.reg_save_offset
))
10998 restore_regs_via_mov
= true;
10999 else if (frame_pointer_needed
11001 && m
->fs
.sp_offset
!= frame
.reg_save_offset
)
11002 restore_regs_via_mov
= true;
11003 else if (frame_pointer_needed
11004 && TARGET_USE_LEAVE
11005 && cfun
->machine
->use_fast_prologue_epilogue
11006 && frame
.nregs
== 1)
11007 restore_regs_via_mov
= true;
11009 restore_regs_via_mov
= false;
11011 if (restore_regs_via_mov
|| frame
.nsseregs
)
11013 /* Ensure that the entire register save area is addressable via
11014 the stack pointer, if we will restore via sp. */
11016 && m
->fs
.sp_offset
> 0x7fffffff
11017 && !(m
->fs
.fp_valid
|| m
->fs
.drap_valid
)
11018 && (frame
.nsseregs
+ frame
.nregs
) != 0)
11020 pro_epilogue_adjust_stack (stack_pointer_rtx
, stack_pointer_rtx
,
11021 GEN_INT (m
->fs
.sp_offset
11022 - frame
.sse_reg_save_offset
),
11024 m
->fs
.cfa_reg
== stack_pointer_rtx
);
11028 /* If there are any SSE registers to restore, then we have to do it
11029 via moves, since there's obviously no pop for SSE regs. */
11030 if (frame
.nsseregs
)
11031 ix86_emit_restore_sse_regs_using_mov (frame
.sse_reg_save_offset
,
11034 if (restore_regs_via_mov
)
11039 ix86_emit_restore_regs_using_mov (frame
.reg_save_offset
, style
== 2);
11041 /* eh_return epilogues need %ecx added to the stack pointer. */
11044 rtx insn
, sa
= EH_RETURN_STACKADJ_RTX
;
11046 /* Stack align doesn't work with eh_return. */
11047 gcc_assert (!stack_realign_drap
);
11048 /* Neither does regparm nested functions. */
11049 gcc_assert (!ix86_static_chain_on_stack
);
11051 if (frame_pointer_needed
)
11053 t
= gen_rtx_PLUS (Pmode
, hard_frame_pointer_rtx
, sa
);
11054 t
= plus_constant (Pmode
, t
, m
->fs
.fp_offset
- UNITS_PER_WORD
);
11055 emit_insn (gen_rtx_SET (VOIDmode
, sa
, t
));
11057 t
= gen_frame_mem (Pmode
, hard_frame_pointer_rtx
);
11058 insn
= emit_move_insn (hard_frame_pointer_rtx
, t
);
11060 /* Note that we use SA as a temporary CFA, as the return
11061 address is at the proper place relative to it. We
11062 pretend this happens at the FP restore insn because
11063 prior to this insn the FP would be stored at the wrong
11064 offset relative to SA, and after this insn we have no
11065 other reasonable register to use for the CFA. We don't
11066 bother resetting the CFA to the SP for the duration of
11067 the return insn. */
11068 add_reg_note (insn
, REG_CFA_DEF_CFA
,
11069 plus_constant (Pmode
, sa
, UNITS_PER_WORD
));
11070 ix86_add_queued_cfa_restore_notes (insn
);
11071 add_reg_note (insn
, REG_CFA_RESTORE
, hard_frame_pointer_rtx
);
11072 RTX_FRAME_RELATED_P (insn
) = 1;
11074 m
->fs
.cfa_reg
= sa
;
11075 m
->fs
.cfa_offset
= UNITS_PER_WORD
;
11076 m
->fs
.fp_valid
= false;
11078 pro_epilogue_adjust_stack (stack_pointer_rtx
, sa
,
11079 const0_rtx
, style
, false);
11083 t
= gen_rtx_PLUS (Pmode
, stack_pointer_rtx
, sa
);
11084 t
= plus_constant (Pmode
, t
, m
->fs
.sp_offset
- UNITS_PER_WORD
);
11085 insn
= emit_insn (gen_rtx_SET (VOIDmode
, stack_pointer_rtx
, t
));
11086 ix86_add_queued_cfa_restore_notes (insn
);
11088 gcc_assert (m
->fs
.cfa_reg
== stack_pointer_rtx
);
11089 if (m
->fs
.cfa_offset
!= UNITS_PER_WORD
)
11091 m
->fs
.cfa_offset
= UNITS_PER_WORD
;
11092 add_reg_note (insn
, REG_CFA_DEF_CFA
,
11093 plus_constant (Pmode
, stack_pointer_rtx
,
11095 RTX_FRAME_RELATED_P (insn
) = 1;
11098 m
->fs
.sp_offset
= UNITS_PER_WORD
;
11099 m
->fs
.sp_valid
= true;
11104 /* SEH requires that the function end with (1) a stack adjustment
11105 if necessary, (2) a sequence of pops, and (3) a return or
11106 jump instruction. Prevent insns from the function body from
11107 being scheduled into this sequence. */
11110 /* Prevent a catch region from being adjacent to the standard
11111 epilogue sequence. Unfortuantely crtl->uses_eh_lsda nor
11112 several other flags that would be interesting to test are
11114 if (flag_non_call_exceptions
)
11115 emit_insn (gen_nops (const1_rtx
));
11117 emit_insn (gen_blockage ());
11120 /* First step is to deallocate the stack frame so that we can
11121 pop the registers. Also do it on SEH target for very large
11122 frame as the emitted instructions aren't allowed by the ABI in
11124 if (!m
->fs
.sp_valid
11126 && (m
->fs
.sp_offset
- frame
.reg_save_offset
11127 >= SEH_MAX_FRAME_SIZE
)))
11129 pro_epilogue_adjust_stack (stack_pointer_rtx
, hard_frame_pointer_rtx
,
11130 GEN_INT (m
->fs
.fp_offset
11131 - frame
.reg_save_offset
),
11134 else if (m
->fs
.sp_offset
!= frame
.reg_save_offset
)
11136 pro_epilogue_adjust_stack (stack_pointer_rtx
, stack_pointer_rtx
,
11137 GEN_INT (m
->fs
.sp_offset
11138 - frame
.reg_save_offset
),
11140 m
->fs
.cfa_reg
== stack_pointer_rtx
);
11143 ix86_emit_restore_regs_using_pop ();
11146 /* If we used a stack pointer and haven't already got rid of it,
11148 if (m
->fs
.fp_valid
)
11150 /* If the stack pointer is valid and pointing at the frame
11151 pointer store address, then we only need a pop. */
11152 if (m
->fs
.sp_valid
&& m
->fs
.sp_offset
== frame
.hfp_save_offset
)
11153 ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx
);
11154 /* Leave results in shorter dependency chains on CPUs that are
11155 able to grok it fast. */
11156 else if (TARGET_USE_LEAVE
11157 || optimize_function_for_size_p (cfun
)
11158 || !cfun
->machine
->use_fast_prologue_epilogue
)
11159 ix86_emit_leave ();
11162 pro_epilogue_adjust_stack (stack_pointer_rtx
,
11163 hard_frame_pointer_rtx
,
11164 const0_rtx
, style
, !using_drap
);
11165 ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx
);
11171 int param_ptr_offset
= UNITS_PER_WORD
;
11174 gcc_assert (stack_realign_drap
);
11176 if (ix86_static_chain_on_stack
)
11177 param_ptr_offset
+= UNITS_PER_WORD
;
11178 if (!call_used_regs
[REGNO (crtl
->drap_reg
)])
11179 param_ptr_offset
+= UNITS_PER_WORD
;
11181 insn
= emit_insn (gen_rtx_SET
11182 (VOIDmode
, stack_pointer_rtx
,
11183 gen_rtx_PLUS (Pmode
,
11185 GEN_INT (-param_ptr_offset
))));
11186 m
->fs
.cfa_reg
= stack_pointer_rtx
;
11187 m
->fs
.cfa_offset
= param_ptr_offset
;
11188 m
->fs
.sp_offset
= param_ptr_offset
;
11189 m
->fs
.realigned
= false;
11191 add_reg_note (insn
, REG_CFA_DEF_CFA
,
11192 gen_rtx_PLUS (Pmode
, stack_pointer_rtx
,
11193 GEN_INT (param_ptr_offset
)));
11194 RTX_FRAME_RELATED_P (insn
) = 1;
11196 if (!call_used_regs
[REGNO (crtl
->drap_reg
)])
11197 ix86_emit_restore_reg_using_pop (crtl
->drap_reg
);
11200 /* At this point the stack pointer must be valid, and we must have
11201 restored all of the registers. We may not have deallocated the
11202 entire stack frame. We've delayed this until now because it may
11203 be possible to merge the local stack deallocation with the
11204 deallocation forced by ix86_static_chain_on_stack. */
11205 gcc_assert (m
->fs
.sp_valid
);
11206 gcc_assert (!m
->fs
.fp_valid
);
11207 gcc_assert (!m
->fs
.realigned
);
11208 if (m
->fs
.sp_offset
!= UNITS_PER_WORD
)
11210 pro_epilogue_adjust_stack (stack_pointer_rtx
, stack_pointer_rtx
,
11211 GEN_INT (m
->fs
.sp_offset
- UNITS_PER_WORD
),
11215 ix86_add_queued_cfa_restore_notes (get_last_insn ());
11217 /* Sibcall epilogues don't want a return instruction. */
11220 m
->fs
= frame_state_save
;
11224 /* Emit vzeroupper if needed. */
11225 ix86_maybe_emit_epilogue_vzeroupper ();
11227 if (crtl
->args
.pops_args
&& crtl
->args
.size
)
11229 rtx popc
= GEN_INT (crtl
->args
.pops_args
);
11231 /* i386 can only pop 64K bytes. If asked to pop more, pop return
11232 address, do explicit add, and jump indirectly to the caller. */
11234 if (crtl
->args
.pops_args
>= 65536)
11236 rtx ecx
= gen_rtx_REG (SImode
, CX_REG
);
11239 /* There is no "pascal" calling convention in any 64bit ABI. */
11240 gcc_assert (!TARGET_64BIT
);
11242 insn
= emit_insn (gen_pop (ecx
));
11243 m
->fs
.cfa_offset
-= UNITS_PER_WORD
;
11244 m
->fs
.sp_offset
-= UNITS_PER_WORD
;
11246 add_reg_note (insn
, REG_CFA_ADJUST_CFA
,
11247 copy_rtx (XVECEXP (PATTERN (insn
), 0, 1)));
11248 add_reg_note (insn
, REG_CFA_REGISTER
,
11249 gen_rtx_SET (VOIDmode
, ecx
, pc_rtx
));
11250 RTX_FRAME_RELATED_P (insn
) = 1;
11252 pro_epilogue_adjust_stack (stack_pointer_rtx
, stack_pointer_rtx
,
11254 emit_jump_insn (gen_simple_return_indirect_internal (ecx
));
11257 emit_jump_insn (gen_simple_return_pop_internal (popc
));
11260 emit_jump_insn (gen_simple_return_internal ());
11262 /* Restore the state back to the state from the prologue,
11263 so that it's correct for the next epilogue. */
11264 m
->fs
= frame_state_save
;
11267 /* Reset from the function's potential modifications. */
11270 ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED
,
11271 HOST_WIDE_INT size ATTRIBUTE_UNUSED
)
11273 if (pic_offset_table_rtx
)
11274 SET_REGNO (pic_offset_table_rtx
, REAL_PIC_OFFSET_TABLE_REGNUM
);
11276 /* Mach-O doesn't support labels at the end of objects, so if
11277 it looks like we might want one, insert a NOP. */
11279 rtx insn
= get_last_insn ();
11280 rtx deleted_debug_label
= NULL_RTX
;
11283 && NOTE_KIND (insn
) != NOTE_INSN_DELETED_LABEL
)
11285 /* Don't insert a nop for NOTE_INSN_DELETED_DEBUG_LABEL
11286 notes only, instead set their CODE_LABEL_NUMBER to -1,
11287 otherwise there would be code generation differences
11288 in between -g and -g0. */
11289 if (NOTE_P (insn
) && NOTE_KIND (insn
) == NOTE_INSN_DELETED_DEBUG_LABEL
)
11290 deleted_debug_label
= insn
;
11291 insn
= PREV_INSN (insn
);
11296 && NOTE_KIND (insn
) == NOTE_INSN_DELETED_LABEL
)))
11297 fputs ("\tnop\n", file
);
11298 else if (deleted_debug_label
)
11299 for (insn
= deleted_debug_label
; insn
; insn
= NEXT_INSN (insn
))
11300 if (NOTE_KIND (insn
) == NOTE_INSN_DELETED_DEBUG_LABEL
)
11301 CODE_LABEL_NUMBER (insn
) = -1;
11307 /* Return a scratch register to use in the split stack prologue. The
11308 split stack prologue is used for -fsplit-stack. It is the first
11309 instructions in the function, even before the regular prologue.
11310 The scratch register can be any caller-saved register which is not
11311 used for parameters or for the static chain. */
11313 static unsigned int
11314 split_stack_prologue_scratch_regno (void)
11323 is_fastcall
= (lookup_attribute ("fastcall",
11324 TYPE_ATTRIBUTES (TREE_TYPE (cfun
->decl
)))
11326 regparm
= ix86_function_regparm (TREE_TYPE (cfun
->decl
), cfun
->decl
);
11330 if (DECL_STATIC_CHAIN (cfun
->decl
))
11332 sorry ("-fsplit-stack does not support fastcall with "
11333 "nested function");
11334 return INVALID_REGNUM
;
11338 else if (regparm
< 3)
11340 if (!DECL_STATIC_CHAIN (cfun
->decl
))
11346 sorry ("-fsplit-stack does not support 2 register "
11347 " parameters for a nested function");
11348 return INVALID_REGNUM
;
11355 /* FIXME: We could make this work by pushing a register
11356 around the addition and comparison. */
11357 sorry ("-fsplit-stack does not support 3 register parameters");
11358 return INVALID_REGNUM
;
11363 /* A SYMBOL_REF for the function which allocates new stackspace for
11366 static GTY(()) rtx split_stack_fn
;
11368 /* A SYMBOL_REF for the more stack function when using the large
11371 static GTY(()) rtx split_stack_fn_large
;
11373 /* Handle -fsplit-stack. These are the first instructions in the
11374 function, even before the regular prologue. */
11377 ix86_expand_split_stack_prologue (void)
11379 struct ix86_frame frame
;
11380 HOST_WIDE_INT allocate
;
11381 unsigned HOST_WIDE_INT args_size
;
11382 rtx label
, limit
, current
, jump_insn
, allocate_rtx
, call_insn
, call_fusage
;
11383 rtx scratch_reg
= NULL_RTX
;
11384 rtx varargs_label
= NULL_RTX
;
11387 gcc_assert (flag_split_stack
&& reload_completed
);
11389 ix86_finalize_stack_realign_flags ();
11390 ix86_compute_frame_layout (&frame
);
11391 allocate
= frame
.stack_pointer_offset
- INCOMING_FRAME_SP_OFFSET
;
11393 /* This is the label we will branch to if we have enough stack
11394 space. We expect the basic block reordering pass to reverse this
11395 branch if optimizing, so that we branch in the unlikely case. */
11396 label
= gen_label_rtx ();
11398 /* We need to compare the stack pointer minus the frame size with
11399 the stack boundary in the TCB. The stack boundary always gives
11400 us SPLIT_STACK_AVAILABLE bytes, so if we need less than that we
11401 can compare directly. Otherwise we need to do an addition. */
11403 limit
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, const0_rtx
),
11404 UNSPEC_STACK_CHECK
);
11405 limit
= gen_rtx_CONST (Pmode
, limit
);
11406 limit
= gen_rtx_MEM (Pmode
, limit
);
11407 if (allocate
< SPLIT_STACK_AVAILABLE
)
11408 current
= stack_pointer_rtx
;
11411 unsigned int scratch_regno
;
11414 /* We need a scratch register to hold the stack pointer minus
11415 the required frame size. Since this is the very start of the
11416 function, the scratch register can be any caller-saved
11417 register which is not used for parameters. */
11418 offset
= GEN_INT (- allocate
);
11419 scratch_regno
= split_stack_prologue_scratch_regno ();
11420 if (scratch_regno
== INVALID_REGNUM
)
11422 scratch_reg
= gen_rtx_REG (Pmode
, scratch_regno
);
11423 if (!TARGET_64BIT
|| x86_64_immediate_operand (offset
, Pmode
))
11425 /* We don't use ix86_gen_add3 in this case because it will
11426 want to split to lea, but when not optimizing the insn
11427 will not be split after this point. */
11428 emit_insn (gen_rtx_SET (VOIDmode
, scratch_reg
,
11429 gen_rtx_PLUS (Pmode
, stack_pointer_rtx
,
11434 emit_move_insn (scratch_reg
, offset
);
11435 emit_insn (ix86_gen_add3 (scratch_reg
, scratch_reg
,
11436 stack_pointer_rtx
));
11438 current
= scratch_reg
;
11441 ix86_expand_branch (GEU
, current
, limit
, label
);
11442 jump_insn
= get_last_insn ();
11443 JUMP_LABEL (jump_insn
) = label
;
11445 /* Mark the jump as very likely to be taken. */
11446 add_reg_note (jump_insn
, REG_BR_PROB
,
11447 GEN_INT (REG_BR_PROB_BASE
- REG_BR_PROB_BASE
/ 100));
11449 if (split_stack_fn
== NULL_RTX
)
11450 split_stack_fn
= gen_rtx_SYMBOL_REF (Pmode
, "__morestack");
11451 fn
= split_stack_fn
;
11453 /* Get more stack space. We pass in the desired stack space and the
11454 size of the arguments to copy to the new stack. In 32-bit mode
11455 we push the parameters; __morestack will return on a new stack
11456 anyhow. In 64-bit mode we pass the parameters in r10 and
11458 allocate_rtx
= GEN_INT (allocate
);
11459 args_size
= crtl
->args
.size
>= 0 ? crtl
->args
.size
: 0;
11460 call_fusage
= NULL_RTX
;
11465 reg10
= gen_rtx_REG (Pmode
, R10_REG
);
11466 reg11
= gen_rtx_REG (Pmode
, R11_REG
);
11468 /* If this function uses a static chain, it will be in %r10.
11469 Preserve it across the call to __morestack. */
11470 if (DECL_STATIC_CHAIN (cfun
->decl
))
11474 rax
= gen_rtx_REG (word_mode
, AX_REG
);
11475 emit_move_insn (rax
, gen_rtx_REG (word_mode
, R10_REG
));
11476 use_reg (&call_fusage
, rax
);
11479 if (ix86_cmodel
== CM_LARGE
|| ix86_cmodel
== CM_LARGE_PIC
)
11481 HOST_WIDE_INT argval
;
11483 gcc_assert (Pmode
== DImode
);
11484 /* When using the large model we need to load the address
11485 into a register, and we've run out of registers. So we
11486 switch to a different calling convention, and we call a
11487 different function: __morestack_large. We pass the
11488 argument size in the upper 32 bits of r10 and pass the
11489 frame size in the lower 32 bits. */
11490 gcc_assert ((allocate
& (HOST_WIDE_INT
) 0xffffffff) == allocate
);
11491 gcc_assert ((args_size
& 0xffffffff) == args_size
);
11493 if (split_stack_fn_large
== NULL_RTX
)
11494 split_stack_fn_large
=
11495 gen_rtx_SYMBOL_REF (Pmode
, "__morestack_large_model");
11497 if (ix86_cmodel
== CM_LARGE_PIC
)
11501 label
= gen_label_rtx ();
11502 emit_label (label
);
11503 LABEL_PRESERVE_P (label
) = 1;
11504 emit_insn (gen_set_rip_rex64 (reg10
, label
));
11505 emit_insn (gen_set_got_offset_rex64 (reg11
, label
));
11506 emit_insn (ix86_gen_add3 (reg10
, reg10
, reg11
));
11507 x
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, split_stack_fn_large
),
11509 x
= gen_rtx_CONST (Pmode
, x
);
11510 emit_move_insn (reg11
, x
);
11511 x
= gen_rtx_PLUS (Pmode
, reg10
, reg11
);
11512 x
= gen_const_mem (Pmode
, x
);
11513 emit_move_insn (reg11
, x
);
11516 emit_move_insn (reg11
, split_stack_fn_large
);
11520 argval
= ((args_size
<< 16) << 16) + allocate
;
11521 emit_move_insn (reg10
, GEN_INT (argval
));
11525 emit_move_insn (reg10
, allocate_rtx
);
11526 emit_move_insn (reg11
, GEN_INT (args_size
));
11527 use_reg (&call_fusage
, reg11
);
11530 use_reg (&call_fusage
, reg10
);
11534 emit_insn (gen_push (GEN_INT (args_size
)));
11535 emit_insn (gen_push (allocate_rtx
));
11537 call_insn
= ix86_expand_call (NULL_RTX
, gen_rtx_MEM (QImode
, fn
),
11538 GEN_INT (UNITS_PER_WORD
), constm1_rtx
,
11540 add_function_usage_to (call_insn
, call_fusage
);
11542 /* In order to make call/return prediction work right, we now need
11543 to execute a return instruction. See
11544 libgcc/config/i386/morestack.S for the details on how this works.
11546 For flow purposes gcc must not see this as a return
11547 instruction--we need control flow to continue at the subsequent
11548 label. Therefore, we use an unspec. */
11549 gcc_assert (crtl
->args
.pops_args
< 65536);
11550 emit_insn (gen_split_stack_return (GEN_INT (crtl
->args
.pops_args
)));
11552 /* If we are in 64-bit mode and this function uses a static chain,
11553 we saved %r10 in %rax before calling _morestack. */
11554 if (TARGET_64BIT
&& DECL_STATIC_CHAIN (cfun
->decl
))
11555 emit_move_insn (gen_rtx_REG (word_mode
, R10_REG
),
11556 gen_rtx_REG (word_mode
, AX_REG
));
11558 /* If this function calls va_start, we need to store a pointer to
11559 the arguments on the old stack, because they may not have been
11560 all copied to the new stack. At this point the old stack can be
11561 found at the frame pointer value used by __morestack, because
11562 __morestack has set that up before calling back to us. Here we
11563 store that pointer in a scratch register, and in
11564 ix86_expand_prologue we store the scratch register in a stack
11566 if (cfun
->machine
->split_stack_varargs_pointer
!= NULL_RTX
)
11568 unsigned int scratch_regno
;
11572 scratch_regno
= split_stack_prologue_scratch_regno ();
11573 scratch_reg
= gen_rtx_REG (Pmode
, scratch_regno
);
11574 frame_reg
= gen_rtx_REG (Pmode
, BP_REG
);
11578 return address within this function
11579 return address of caller of this function
11581 So we add three words to get to the stack arguments.
11585 return address within this function
11586 first argument to __morestack
11587 second argument to __morestack
11588 return address of caller of this function
11590 So we add five words to get to the stack arguments.
11592 words
= TARGET_64BIT
? 3 : 5;
11593 emit_insn (gen_rtx_SET (VOIDmode
, scratch_reg
,
11594 gen_rtx_PLUS (Pmode
, frame_reg
,
11595 GEN_INT (words
* UNITS_PER_WORD
))));
11597 varargs_label
= gen_label_rtx ();
11598 emit_jump_insn (gen_jump (varargs_label
));
11599 JUMP_LABEL (get_last_insn ()) = varargs_label
;
11604 emit_label (label
);
11605 LABEL_NUSES (label
) = 1;
11607 /* If this function calls va_start, we now have to set the scratch
11608 register for the case where we do not call __morestack. In this
11609 case we need to set it based on the stack pointer. */
11610 if (cfun
->machine
->split_stack_varargs_pointer
!= NULL_RTX
)
11612 emit_insn (gen_rtx_SET (VOIDmode
, scratch_reg
,
11613 gen_rtx_PLUS (Pmode
, stack_pointer_rtx
,
11614 GEN_INT (UNITS_PER_WORD
))));
11616 emit_label (varargs_label
);
11617 LABEL_NUSES (varargs_label
) = 1;
11621 /* We may have to tell the dataflow pass that the split stack prologue
11622 is initializing a scratch register. */
11625 ix86_live_on_entry (bitmap regs
)
11627 if (cfun
->machine
->split_stack_varargs_pointer
!= NULL_RTX
)
11629 gcc_assert (flag_split_stack
);
11630 bitmap_set_bit (regs
, split_stack_prologue_scratch_regno ());
11634 /* Determine if op is suitable SUBREG RTX for address. */
11637 ix86_address_subreg_operand (rtx op
)
11639 enum machine_mode mode
;
11644 mode
= GET_MODE (op
);
11646 if (GET_MODE_CLASS (mode
) != MODE_INT
)
11649 /* Don't allow SUBREGs that span more than a word. It can lead to spill
11650 failures when the register is one word out of a two word structure. */
11651 if (GET_MODE_SIZE (mode
) > UNITS_PER_WORD
)
11654 /* Allow only SUBREGs of non-eliminable hard registers. */
11655 return register_no_elim_operand (op
, mode
);
11658 /* Extract the parts of an RTL expression that is a valid memory address
11659 for an instruction. Return 0 if the structure of the address is
11660 grossly off. Return -1 if the address contains ASHIFT, so it is not
11661 strictly valid, but still used for computing length of lea instruction. */
11664 ix86_decompose_address (rtx addr
, struct ix86_address
*out
)
11666 rtx base
= NULL_RTX
, index
= NULL_RTX
, disp
= NULL_RTX
;
11667 rtx base_reg
, index_reg
;
11668 HOST_WIDE_INT scale
= 1;
11669 rtx scale_rtx
= NULL_RTX
;
11672 enum ix86_address_seg seg
= SEG_DEFAULT
;
11674 /* Allow zero-extended SImode addresses,
11675 they will be emitted with addr32 prefix. */
11676 if (TARGET_64BIT
&& GET_MODE (addr
) == DImode
)
11678 if (GET_CODE (addr
) == ZERO_EXTEND
11679 && GET_MODE (XEXP (addr
, 0)) == SImode
)
11681 addr
= XEXP (addr
, 0);
11682 if (CONST_INT_P (addr
))
11685 else if (GET_CODE (addr
) == AND
11686 && const_32bit_mask (XEXP (addr
, 1), DImode
))
11688 addr
= XEXP (addr
, 0);
11690 /* Adjust SUBREGs. */
11691 if (GET_CODE (addr
) == SUBREG
11692 && GET_MODE (SUBREG_REG (addr
)) == SImode
)
11694 addr
= SUBREG_REG (addr
);
11695 if (CONST_INT_P (addr
))
11698 else if (GET_MODE (addr
) == DImode
)
11699 addr
= gen_rtx_SUBREG (SImode
, addr
, 0);
11700 else if (GET_MODE (addr
) != VOIDmode
)
11705 /* Allow SImode subregs of DImode addresses,
11706 they will be emitted with addr32 prefix. */
11707 if (TARGET_64BIT
&& GET_MODE (addr
) == SImode
)
11709 if (GET_CODE (addr
) == SUBREG
11710 && GET_MODE (SUBREG_REG (addr
)) == DImode
)
11712 addr
= SUBREG_REG (addr
);
11713 if (CONST_INT_P (addr
))
11720 else if (GET_CODE (addr
) == SUBREG
)
11722 if (ix86_address_subreg_operand (SUBREG_REG (addr
)))
11727 else if (GET_CODE (addr
) == PLUS
)
11729 rtx addends
[4], op
;
11737 addends
[n
++] = XEXP (op
, 1);
11740 while (GET_CODE (op
) == PLUS
);
11745 for (i
= n
; i
>= 0; --i
)
11748 switch (GET_CODE (op
))
11753 index
= XEXP (op
, 0);
11754 scale_rtx
= XEXP (op
, 1);
11760 index
= XEXP (op
, 0);
11761 tmp
= XEXP (op
, 1);
11762 if (!CONST_INT_P (tmp
))
11764 scale
= INTVAL (tmp
);
11765 if ((unsigned HOST_WIDE_INT
) scale
> 3)
11767 scale
= 1 << scale
;
11772 if (GET_CODE (op
) != UNSPEC
)
11777 if (XINT (op
, 1) == UNSPEC_TP
11778 && TARGET_TLS_DIRECT_SEG_REFS
11779 && seg
== SEG_DEFAULT
)
11780 seg
= TARGET_64BIT
? SEG_FS
: SEG_GS
;
11786 if (!ix86_address_subreg_operand (SUBREG_REG (op
)))
11813 else if (GET_CODE (addr
) == MULT
)
11815 index
= XEXP (addr
, 0); /* index*scale */
11816 scale_rtx
= XEXP (addr
, 1);
11818 else if (GET_CODE (addr
) == ASHIFT
)
11820 /* We're called for lea too, which implements ashift on occasion. */
11821 index
= XEXP (addr
, 0);
11822 tmp
= XEXP (addr
, 1);
11823 if (!CONST_INT_P (tmp
))
11825 scale
= INTVAL (tmp
);
11826 if ((unsigned HOST_WIDE_INT
) scale
> 3)
11828 scale
= 1 << scale
;
11831 else if (CONST_INT_P (addr
))
11833 if (!x86_64_immediate_operand (addr
, VOIDmode
))
11836 /* Constant addresses are sign extended to 64bit, we have to
11837 prevent addresses from 0x80000000 to 0xffffffff in x32 mode. */
11839 && val_signbit_known_set_p (SImode
, INTVAL (addr
)))
11845 disp
= addr
; /* displacement */
11851 else if (GET_CODE (index
) == SUBREG
11852 && ix86_address_subreg_operand (SUBREG_REG (index
)))
11858 /* Address override works only on the (%reg) part of %fs:(%reg). */
11859 if (seg
!= SEG_DEFAULT
11860 && ((base
&& GET_MODE (base
) != word_mode
)
11861 || (index
&& GET_MODE (index
) != word_mode
)))
11864 /* Extract the integral value of scale. */
11867 if (!CONST_INT_P (scale_rtx
))
11869 scale
= INTVAL (scale_rtx
);
11872 base_reg
= base
&& GET_CODE (base
) == SUBREG
? SUBREG_REG (base
) : base
;
11873 index_reg
= index
&& GET_CODE (index
) == SUBREG
? SUBREG_REG (index
) : index
;
11875 /* Avoid useless 0 displacement. */
11876 if (disp
== const0_rtx
&& (base
|| index
))
11879 /* Allow arg pointer and stack pointer as index if there is not scaling. */
11880 if (base_reg
&& index_reg
&& scale
== 1
11881 && (index_reg
== arg_pointer_rtx
11882 || index_reg
== frame_pointer_rtx
11883 || (REG_P (index_reg
) && REGNO (index_reg
) == STACK_POINTER_REGNUM
)))
11886 tmp
= base
, base
= index
, index
= tmp
;
11887 tmp
= base_reg
, base_reg
= index_reg
, index_reg
= tmp
;
11890 /* Special case: %ebp cannot be encoded as a base without a displacement.
11894 && (base_reg
== hard_frame_pointer_rtx
11895 || base_reg
== frame_pointer_rtx
11896 || base_reg
== arg_pointer_rtx
11897 || (REG_P (base_reg
)
11898 && (REGNO (base_reg
) == HARD_FRAME_POINTER_REGNUM
11899 || REGNO (base_reg
) == R13_REG
))))
11902 /* Special case: on K6, [%esi] makes the instruction vector decoded.
11903 Avoid this by transforming to [%esi+0].
11904 Reload calls address legitimization without cfun defined, so we need
11905 to test cfun for being non-NULL. */
11906 if (TARGET_K6
&& cfun
&& optimize_function_for_speed_p (cfun
)
11907 && base_reg
&& !index_reg
&& !disp
11908 && REG_P (base_reg
) && REGNO (base_reg
) == SI_REG
)
11911 /* Special case: encode reg+reg instead of reg*2. */
11912 if (!base
&& index
&& scale
== 2)
11913 base
= index
, base_reg
= index_reg
, scale
= 1;
11915 /* Special case: scaling cannot be encoded without base or displacement. */
11916 if (!base
&& !disp
&& index
&& scale
!= 1)
11920 out
->index
= index
;
11922 out
->scale
= scale
;
11928 /* Return cost of the memory address x.
11929 For i386, it is better to use a complex address than let gcc copy
11930 the address into a reg and make a new pseudo. But not if the address
11931 requires to two regs - that would mean more pseudos with longer
11934 ix86_address_cost (rtx x
, bool speed ATTRIBUTE_UNUSED
)
11936 struct ix86_address parts
;
11938 int ok
= ix86_decompose_address (x
, &parts
);
11942 if (parts
.base
&& GET_CODE (parts
.base
) == SUBREG
)
11943 parts
.base
= SUBREG_REG (parts
.base
);
11944 if (parts
.index
&& GET_CODE (parts
.index
) == SUBREG
)
11945 parts
.index
= SUBREG_REG (parts
.index
);
11947 /* Attempt to minimize number of registers in the address. */
11949 && (!REG_P (parts
.base
) || REGNO (parts
.base
) >= FIRST_PSEUDO_REGISTER
))
11951 && (!REG_P (parts
.index
)
11952 || REGNO (parts
.index
) >= FIRST_PSEUDO_REGISTER
)))
11956 && (!REG_P (parts
.base
) || REGNO (parts
.base
) >= FIRST_PSEUDO_REGISTER
)
11958 && (!REG_P (parts
.index
) || REGNO (parts
.index
) >= FIRST_PSEUDO_REGISTER
)
11959 && parts
.base
!= parts
.index
)
11962 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
11963 since it's predecode logic can't detect the length of instructions
11964 and it degenerates to vector decoded. Increase cost of such
11965 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
11966 to split such addresses or even refuse such addresses at all.
11968 Following addressing modes are affected:
11973 The first and last case may be avoidable by explicitly coding the zero in
11974 memory address, but I don't have AMD-K6 machine handy to check this
11978 && ((!parts
.disp
&& parts
.base
&& parts
.index
&& parts
.scale
!= 1)
11979 || (parts
.disp
&& !parts
.base
&& parts
.index
&& parts
.scale
!= 1)
11980 || (!parts
.disp
&& parts
.base
&& parts
.index
&& parts
.scale
== 1)))
11986 /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O as
11987 this is used for to form addresses to local data when -fPIC is in
11991 darwin_local_data_pic (rtx disp
)
11993 return (GET_CODE (disp
) == UNSPEC
11994 && XINT (disp
, 1) == UNSPEC_MACHOPIC_OFFSET
);
11997 /* Determine if a given RTX is a valid constant. We already know this
11998 satisfies CONSTANT_P. */
12001 ix86_legitimate_constant_p (enum machine_mode mode ATTRIBUTE_UNUSED
, rtx x
)
12003 switch (GET_CODE (x
))
12008 if (GET_CODE (x
) == PLUS
)
12010 if (!CONST_INT_P (XEXP (x
, 1)))
12015 if (TARGET_MACHO
&& darwin_local_data_pic (x
))
12018 /* Only some unspecs are valid as "constants". */
12019 if (GET_CODE (x
) == UNSPEC
)
12020 switch (XINT (x
, 1))
12023 case UNSPEC_GOTOFF
:
12024 case UNSPEC_PLTOFF
:
12025 return TARGET_64BIT
;
12027 case UNSPEC_NTPOFF
:
12028 x
= XVECEXP (x
, 0, 0);
12029 return (GET_CODE (x
) == SYMBOL_REF
12030 && SYMBOL_REF_TLS_MODEL (x
) == TLS_MODEL_LOCAL_EXEC
);
12031 case UNSPEC_DTPOFF
:
12032 x
= XVECEXP (x
, 0, 0);
12033 return (GET_CODE (x
) == SYMBOL_REF
12034 && SYMBOL_REF_TLS_MODEL (x
) == TLS_MODEL_LOCAL_DYNAMIC
);
12039 /* We must have drilled down to a symbol. */
12040 if (GET_CODE (x
) == LABEL_REF
)
12042 if (GET_CODE (x
) != SYMBOL_REF
)
12047 /* TLS symbols are never valid. */
12048 if (SYMBOL_REF_TLS_MODEL (x
))
12051 /* DLLIMPORT symbols are never valid. */
12052 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
12053 && SYMBOL_REF_DLLIMPORT_P (x
))
12057 /* mdynamic-no-pic */
12058 if (MACHO_DYNAMIC_NO_PIC_P
)
12059 return machopic_symbol_defined_p (x
);
12064 if (GET_MODE (x
) == TImode
12065 && x
!= CONST0_RTX (TImode
)
12071 if (!standard_sse_constant_p (x
))
12078 /* Otherwise we handle everything else in the move patterns. */
12082 /* Determine if it's legal to put X into the constant pool. This
12083 is not possible for the address of thread-local symbols, which
12084 is checked above. */
12087 ix86_cannot_force_const_mem (enum machine_mode mode
, rtx x
)
12089 /* We can always put integral constants and vectors in memory. */
12090 switch (GET_CODE (x
))
12100 return !ix86_legitimate_constant_p (mode
, x
);
12104 /* Nonzero if the constant value X is a legitimate general operand
12105 when generating PIC code. It is given that flag_pic is on and
12106 that X satisfies CONSTANT_P or is a CONST_DOUBLE. */
12109 legitimate_pic_operand_p (rtx x
)
12113 switch (GET_CODE (x
))
12116 inner
= XEXP (x
, 0);
12117 if (GET_CODE (inner
) == PLUS
12118 && CONST_INT_P (XEXP (inner
, 1)))
12119 inner
= XEXP (inner
, 0);
12121 /* Only some unspecs are valid as "constants". */
12122 if (GET_CODE (inner
) == UNSPEC
)
12123 switch (XINT (inner
, 1))
12126 case UNSPEC_GOTOFF
:
12127 case UNSPEC_PLTOFF
:
12128 return TARGET_64BIT
;
12130 x
= XVECEXP (inner
, 0, 0);
12131 return (GET_CODE (x
) == SYMBOL_REF
12132 && SYMBOL_REF_TLS_MODEL (x
) == TLS_MODEL_LOCAL_EXEC
);
12133 case UNSPEC_MACHOPIC_OFFSET
:
12134 return legitimate_pic_address_disp_p (x
);
12142 return legitimate_pic_address_disp_p (x
);
12149 /* Determine if a given CONST RTX is a valid memory displacement
12153 legitimate_pic_address_disp_p (rtx disp
)
12157 /* In 64bit mode we can allow direct addresses of symbols and labels
12158 when they are not dynamic symbols. */
12161 rtx op0
= disp
, op1
;
12163 switch (GET_CODE (disp
))
12169 if (GET_CODE (XEXP (disp
, 0)) != PLUS
)
12171 op0
= XEXP (XEXP (disp
, 0), 0);
12172 op1
= XEXP (XEXP (disp
, 0), 1);
12173 if (!CONST_INT_P (op1
)
12174 || INTVAL (op1
) >= 16*1024*1024
12175 || INTVAL (op1
) < -16*1024*1024)
12177 if (GET_CODE (op0
) == LABEL_REF
)
12179 if (GET_CODE (op0
) == CONST
12180 && GET_CODE (XEXP (op0
, 0)) == UNSPEC
12181 && XINT (XEXP (op0
, 0), 1) == UNSPEC_PCREL
)
12183 if (GET_CODE (op0
) == UNSPEC
12184 && XINT (op0
, 1) == UNSPEC_PCREL
)
12186 if (GET_CODE (op0
) != SYMBOL_REF
)
12191 /* TLS references should always be enclosed in UNSPEC. */
12192 if (SYMBOL_REF_TLS_MODEL (op0
))
12194 if (!SYMBOL_REF_FAR_ADDR_P (op0
) && SYMBOL_REF_LOCAL_P (op0
)
12195 && ix86_cmodel
!= CM_LARGE_PIC
)
12203 if (GET_CODE (disp
) != CONST
)
12205 disp
= XEXP (disp
, 0);
12209 /* We are unsafe to allow PLUS expressions. This limit allowed distance
12210 of GOT tables. We should not need these anyway. */
12211 if (GET_CODE (disp
) != UNSPEC
12212 || (XINT (disp
, 1) != UNSPEC_GOTPCREL
12213 && XINT (disp
, 1) != UNSPEC_GOTOFF
12214 && XINT (disp
, 1) != UNSPEC_PCREL
12215 && XINT (disp
, 1) != UNSPEC_PLTOFF
))
12218 if (GET_CODE (XVECEXP (disp
, 0, 0)) != SYMBOL_REF
12219 && GET_CODE (XVECEXP (disp
, 0, 0)) != LABEL_REF
)
12225 if (GET_CODE (disp
) == PLUS
)
12227 if (!CONST_INT_P (XEXP (disp
, 1)))
12229 disp
= XEXP (disp
, 0);
12233 if (TARGET_MACHO
&& darwin_local_data_pic (disp
))
12236 if (GET_CODE (disp
) != UNSPEC
)
12239 switch (XINT (disp
, 1))
12244 /* We need to check for both symbols and labels because VxWorks loads
12245 text labels with @GOT rather than @GOTOFF. See gotoff_operand for
12247 return (GET_CODE (XVECEXP (disp
, 0, 0)) == SYMBOL_REF
12248 || GET_CODE (XVECEXP (disp
, 0, 0)) == LABEL_REF
);
12249 case UNSPEC_GOTOFF
:
12250 /* Refuse GOTOFF in 64bit mode since it is always 64bit when used.
12251 While ABI specify also 32bit relocation but we don't produce it in
12252 small PIC model at all. */
12253 if ((GET_CODE (XVECEXP (disp
, 0, 0)) == SYMBOL_REF
12254 || GET_CODE (XVECEXP (disp
, 0, 0)) == LABEL_REF
)
12256 return gotoff_operand (XVECEXP (disp
, 0, 0), Pmode
);
12258 case UNSPEC_GOTTPOFF
:
12259 case UNSPEC_GOTNTPOFF
:
12260 case UNSPEC_INDNTPOFF
:
12263 disp
= XVECEXP (disp
, 0, 0);
12264 return (GET_CODE (disp
) == SYMBOL_REF
12265 && SYMBOL_REF_TLS_MODEL (disp
) == TLS_MODEL_INITIAL_EXEC
);
12266 case UNSPEC_NTPOFF
:
12267 disp
= XVECEXP (disp
, 0, 0);
12268 return (GET_CODE (disp
) == SYMBOL_REF
12269 && SYMBOL_REF_TLS_MODEL (disp
) == TLS_MODEL_LOCAL_EXEC
);
12270 case UNSPEC_DTPOFF
:
12271 disp
= XVECEXP (disp
, 0, 0);
12272 return (GET_CODE (disp
) == SYMBOL_REF
12273 && SYMBOL_REF_TLS_MODEL (disp
) == TLS_MODEL_LOCAL_DYNAMIC
);
12279 /* Our implementation of LEGITIMIZE_RELOAD_ADDRESS. Returns a value to
12280 replace the input X, or the original X if no replacement is called for.
12281 The output parameter *WIN is 1 if the calling macro should goto WIN,
12282 0 if it should not. */
12285 ix86_legitimize_reload_address (rtx x
,
12286 enum machine_mode mode ATTRIBUTE_UNUSED
,
12287 int opnum
, int type
,
12288 int ind_levels ATTRIBUTE_UNUSED
)
12290 /* Reload can generate:
12292 (plus:DI (plus:DI (unspec:DI [(const_int 0 [0])] UNSPEC_TP)
12296 This RTX is rejected from ix86_legitimate_address_p due to
12297 non-strictness of base register 97. Following this rejection,
12298 reload pushes all three components into separate registers,
12299 creating invalid memory address RTX.
12301 Following code reloads only the invalid part of the
12302 memory address RTX. */
12304 if (GET_CODE (x
) == PLUS
12305 && REG_P (XEXP (x
, 1))
12306 && GET_CODE (XEXP (x
, 0)) == PLUS
12307 && REG_P (XEXP (XEXP (x
, 0), 1)))
12310 bool something_reloaded
= false;
12312 base
= XEXP (XEXP (x
, 0), 1);
12313 if (!REG_OK_FOR_BASE_STRICT_P (base
))
12315 push_reload (base
, NULL_RTX
, &XEXP (XEXP (x
, 0), 1), NULL
,
12316 BASE_REG_CLASS
, GET_MODE (x
), VOIDmode
, 0, 0,
12317 opnum
, (enum reload_type
) type
);
12318 something_reloaded
= true;
12321 index
= XEXP (x
, 1);
12322 if (!REG_OK_FOR_INDEX_STRICT_P (index
))
12324 push_reload (index
, NULL_RTX
, &XEXP (x
, 1), NULL
,
12325 INDEX_REG_CLASS
, GET_MODE (x
), VOIDmode
, 0, 0,
12326 opnum
, (enum reload_type
) type
);
12327 something_reloaded
= true;
12330 gcc_assert (something_reloaded
);
12337 /* Recognizes RTL expressions that are valid memory addresses for an
12338 instruction. The MODE argument is the machine mode for the MEM
12339 expression that wants to use this address.
12341 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
12342 convert common non-canonical forms to canonical form so that they will
12346 ix86_legitimate_address_p (enum machine_mode mode ATTRIBUTE_UNUSED
,
12347 rtx addr
, bool strict
)
12349 struct ix86_address parts
;
12350 rtx base
, index
, disp
;
12351 HOST_WIDE_INT scale
;
12353 if (ix86_decompose_address (addr
, &parts
) <= 0)
12354 /* Decomposition failed. */
12358 index
= parts
.index
;
12360 scale
= parts
.scale
;
12362 /* Validate base register. */
12369 else if (GET_CODE (base
) == SUBREG
&& REG_P (SUBREG_REG (base
)))
12370 reg
= SUBREG_REG (base
);
12372 /* Base is not a register. */
12375 if (GET_MODE (base
) != SImode
&& GET_MODE (base
) != DImode
)
12378 if ((strict
&& ! REG_OK_FOR_BASE_STRICT_P (reg
))
12379 || (! strict
&& ! REG_OK_FOR_BASE_NONSTRICT_P (reg
)))
12380 /* Base is not valid. */
12384 /* Validate index register. */
12391 else if (GET_CODE (index
) == SUBREG
&& REG_P (SUBREG_REG (index
)))
12392 reg
= SUBREG_REG (index
);
12394 /* Index is not a register. */
12397 if (GET_MODE (index
) != SImode
&& GET_MODE (index
) != DImode
)
12400 if ((strict
&& ! REG_OK_FOR_INDEX_STRICT_P (reg
))
12401 || (! strict
&& ! REG_OK_FOR_INDEX_NONSTRICT_P (reg
)))
12402 /* Index is not valid. */
12406 /* Index and base should have the same mode. */
12408 && GET_MODE (base
) != GET_MODE (index
))
12411 /* Validate scale factor. */
12415 /* Scale without index. */
12418 if (scale
!= 2 && scale
!= 4 && scale
!= 8)
12419 /* Scale is not a valid multiplier. */
12423 /* Validate displacement. */
12426 if (GET_CODE (disp
) == CONST
12427 && GET_CODE (XEXP (disp
, 0)) == UNSPEC
12428 && XINT (XEXP (disp
, 0), 1) != UNSPEC_MACHOPIC_OFFSET
)
12429 switch (XINT (XEXP (disp
, 0), 1))
12431 /* Refuse GOTOFF and GOT in 64bit mode since it is always 64bit when
12432 used. While ABI specify also 32bit relocations, we don't produce
12433 them at all and use IP relative instead. */
12435 case UNSPEC_GOTOFF
:
12436 gcc_assert (flag_pic
);
12438 goto is_legitimate_pic
;
12440 /* 64bit address unspec. */
12443 case UNSPEC_GOTPCREL
:
12445 gcc_assert (flag_pic
);
12446 goto is_legitimate_pic
;
12448 case UNSPEC_GOTTPOFF
:
12449 case UNSPEC_GOTNTPOFF
:
12450 case UNSPEC_INDNTPOFF
:
12451 case UNSPEC_NTPOFF
:
12452 case UNSPEC_DTPOFF
:
12455 case UNSPEC_STACK_CHECK
:
12456 gcc_assert (flag_split_stack
);
12460 /* Invalid address unspec. */
12464 else if (SYMBOLIC_CONST (disp
)
12468 && MACHOPIC_INDIRECT
12469 && !machopic_operand_p (disp
)
12475 if (TARGET_64BIT
&& (index
|| base
))
12477 /* foo@dtpoff(%rX) is ok. */
12478 if (GET_CODE (disp
) != CONST
12479 || GET_CODE (XEXP (disp
, 0)) != PLUS
12480 || GET_CODE (XEXP (XEXP (disp
, 0), 0)) != UNSPEC
12481 || !CONST_INT_P (XEXP (XEXP (disp
, 0), 1))
12482 || (XINT (XEXP (XEXP (disp
, 0), 0), 1) != UNSPEC_DTPOFF
12483 && XINT (XEXP (XEXP (disp
, 0), 0), 1) != UNSPEC_NTPOFF
))
12484 /* Non-constant pic memory reference. */
12487 else if ((!TARGET_MACHO
|| flag_pic
)
12488 && ! legitimate_pic_address_disp_p (disp
))
12489 /* Displacement is an invalid pic construct. */
12492 else if (MACHO_DYNAMIC_NO_PIC_P
12493 && !ix86_legitimate_constant_p (Pmode
, disp
))
12494 /* displacment must be referenced via non_lazy_pointer */
12498 /* This code used to verify that a symbolic pic displacement
12499 includes the pic_offset_table_rtx register.
12501 While this is good idea, unfortunately these constructs may
12502 be created by "adds using lea" optimization for incorrect
12511 This code is nonsensical, but results in addressing
12512 GOT table with pic_offset_table_rtx base. We can't
12513 just refuse it easily, since it gets matched by
12514 "addsi3" pattern, that later gets split to lea in the
12515 case output register differs from input. While this
12516 can be handled by separate addsi pattern for this case
12517 that never results in lea, this seems to be easier and
12518 correct fix for crash to disable this test. */
12520 else if (GET_CODE (disp
) != LABEL_REF
12521 && !CONST_INT_P (disp
)
12522 && (GET_CODE (disp
) != CONST
12523 || !ix86_legitimate_constant_p (Pmode
, disp
))
12524 && (GET_CODE (disp
) != SYMBOL_REF
12525 || !ix86_legitimate_constant_p (Pmode
, disp
)))
12526 /* Displacement is not constant. */
12528 else if (TARGET_64BIT
12529 && !x86_64_immediate_operand (disp
, VOIDmode
))
12530 /* Displacement is out of range. */
12534 /* Everything looks valid. */
12538 /* Determine if a given RTX is a valid constant address. */
12541 constant_address_p (rtx x
)
12543 return CONSTANT_P (x
) && ix86_legitimate_address_p (Pmode
, x
, 1);
12546 /* Return a unique alias set for the GOT. */
12548 static alias_set_type
12549 ix86_GOT_alias_set (void)
12551 static alias_set_type set
= -1;
12553 set
= new_alias_set ();
12557 /* Return a legitimate reference for ORIG (an address) using the
12558 register REG. If REG is 0, a new pseudo is generated.
12560 There are two types of references that must be handled:
12562 1. Global data references must load the address from the GOT, via
12563 the PIC reg. An insn is emitted to do this load, and the reg is
12566 2. Static data references, constant pool addresses, and code labels
12567 compute the address as an offset from the GOT, whose base is in
12568 the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to
12569 differentiate them from global data objects. The returned
12570 address is the PIC reg + an unspec constant.
12572 TARGET_LEGITIMATE_ADDRESS_P rejects symbolic references unless the PIC
12573 reg also appears in the address. */
12576 legitimize_pic_address (rtx orig
, rtx reg
)
12579 rtx new_rtx
= orig
;
12583 if (TARGET_MACHO
&& !TARGET_64BIT
)
12586 reg
= gen_reg_rtx (Pmode
);
12587 /* Use the generic Mach-O PIC machinery. */
12588 return machopic_legitimize_pic_address (orig
, GET_MODE (orig
), reg
);
12592 if (TARGET_64BIT
&& legitimate_pic_address_disp_p (addr
))
12594 else if (TARGET_64BIT
12595 && ix86_cmodel
!= CM_SMALL_PIC
12596 && gotoff_operand (addr
, Pmode
))
12599 /* This symbol may be referenced via a displacement from the PIC
12600 base address (@GOTOFF). */
12602 if (reload_in_progress
)
12603 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM
, true);
12604 if (GET_CODE (addr
) == CONST
)
12605 addr
= XEXP (addr
, 0);
12606 if (GET_CODE (addr
) == PLUS
)
12608 new_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, XEXP (addr
, 0)),
12610 new_rtx
= gen_rtx_PLUS (Pmode
, new_rtx
, XEXP (addr
, 1));
12613 new_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, addr
), UNSPEC_GOTOFF
);
12614 new_rtx
= gen_rtx_CONST (Pmode
, new_rtx
);
12616 tmpreg
= gen_reg_rtx (Pmode
);
12619 emit_move_insn (tmpreg
, new_rtx
);
12623 new_rtx
= expand_simple_binop (Pmode
, PLUS
, reg
, pic_offset_table_rtx
,
12624 tmpreg
, 1, OPTAB_DIRECT
);
12627 else new_rtx
= gen_rtx_PLUS (Pmode
, pic_offset_table_rtx
, tmpreg
);
12629 else if (!TARGET_64BIT
&& gotoff_operand (addr
, Pmode
))
12631 /* This symbol may be referenced via a displacement from the PIC
12632 base address (@GOTOFF). */
12634 if (reload_in_progress
)
12635 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM
, true);
12636 if (GET_CODE (addr
) == CONST
)
12637 addr
= XEXP (addr
, 0);
12638 if (GET_CODE (addr
) == PLUS
)
12640 new_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, XEXP (addr
, 0)),
12642 new_rtx
= gen_rtx_PLUS (Pmode
, new_rtx
, XEXP (addr
, 1));
12645 new_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, addr
), UNSPEC_GOTOFF
);
12646 new_rtx
= gen_rtx_CONST (Pmode
, new_rtx
);
12647 new_rtx
= gen_rtx_PLUS (Pmode
, pic_offset_table_rtx
, new_rtx
);
12651 emit_move_insn (reg
, new_rtx
);
12655 else if ((GET_CODE (addr
) == SYMBOL_REF
&& SYMBOL_REF_TLS_MODEL (addr
) == 0)
12656 /* We can't use @GOTOFF for text labels on VxWorks;
12657 see gotoff_operand. */
12658 || (TARGET_VXWORKS_RTP
&& GET_CODE (addr
) == LABEL_REF
))
12660 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
)
12662 if (GET_CODE (addr
) == SYMBOL_REF
&& SYMBOL_REF_DLLIMPORT_P (addr
))
12663 return legitimize_dllimport_symbol (addr
, true);
12664 if (GET_CODE (addr
) == CONST
&& GET_CODE (XEXP (addr
, 0)) == PLUS
12665 && GET_CODE (XEXP (XEXP (addr
, 0), 0)) == SYMBOL_REF
12666 && SYMBOL_REF_DLLIMPORT_P (XEXP (XEXP (addr
, 0), 0)))
12668 rtx t
= legitimize_dllimport_symbol (XEXP (XEXP (addr
, 0), 0), true);
12669 return gen_rtx_PLUS (Pmode
, t
, XEXP (XEXP (addr
, 0), 1));
12673 /* For x64 PE-COFF there is no GOT table. So we use address
12675 if (TARGET_64BIT
&& DEFAULT_ABI
== MS_ABI
)
12677 new_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, addr
), UNSPEC_PCREL
);
12678 new_rtx
= gen_rtx_CONST (Pmode
, new_rtx
);
12681 reg
= gen_reg_rtx (Pmode
);
12682 emit_move_insn (reg
, new_rtx
);
12685 else if (TARGET_64BIT
&& ix86_cmodel
!= CM_LARGE_PIC
)
12687 new_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, addr
), UNSPEC_GOTPCREL
);
12688 new_rtx
= gen_rtx_CONST (Pmode
, new_rtx
);
12689 new_rtx
= gen_const_mem (Pmode
, new_rtx
);
12690 set_mem_alias_set (new_rtx
, ix86_GOT_alias_set ());
12693 reg
= gen_reg_rtx (Pmode
);
12694 /* Use directly gen_movsi, otherwise the address is loaded
12695 into register for CSE. We don't want to CSE this addresses,
12696 instead we CSE addresses from the GOT table, so skip this. */
12697 emit_insn (gen_movsi (reg
, new_rtx
));
12702 /* This symbol must be referenced via a load from the
12703 Global Offset Table (@GOT). */
12705 if (reload_in_progress
)
12706 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM
, true);
12707 new_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, addr
), UNSPEC_GOT
);
12708 new_rtx
= gen_rtx_CONST (Pmode
, new_rtx
);
12710 new_rtx
= force_reg (Pmode
, new_rtx
);
12711 new_rtx
= gen_rtx_PLUS (Pmode
, pic_offset_table_rtx
, new_rtx
);
12712 new_rtx
= gen_const_mem (Pmode
, new_rtx
);
12713 set_mem_alias_set (new_rtx
, ix86_GOT_alias_set ());
12716 reg
= gen_reg_rtx (Pmode
);
12717 emit_move_insn (reg
, new_rtx
);
12723 if (CONST_INT_P (addr
)
12724 && !x86_64_immediate_operand (addr
, VOIDmode
))
12728 emit_move_insn (reg
, addr
);
12732 new_rtx
= force_reg (Pmode
, addr
);
12734 else if (GET_CODE (addr
) == CONST
)
12736 addr
= XEXP (addr
, 0);
12738 /* We must match stuff we generate before. Assume the only
12739 unspecs that can get here are ours. Not that we could do
12740 anything with them anyway.... */
12741 if (GET_CODE (addr
) == UNSPEC
12742 || (GET_CODE (addr
) == PLUS
12743 && GET_CODE (XEXP (addr
, 0)) == UNSPEC
))
12745 gcc_assert (GET_CODE (addr
) == PLUS
);
12747 if (GET_CODE (addr
) == PLUS
)
12749 rtx op0
= XEXP (addr
, 0), op1
= XEXP (addr
, 1);
12751 /* Check first to see if this is a constant offset from a @GOTOFF
12752 symbol reference. */
12753 if (gotoff_operand (op0
, Pmode
)
12754 && CONST_INT_P (op1
))
12758 if (reload_in_progress
)
12759 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM
, true);
12760 new_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, op0
),
12762 new_rtx
= gen_rtx_PLUS (Pmode
, new_rtx
, op1
);
12763 new_rtx
= gen_rtx_CONST (Pmode
, new_rtx
);
12764 new_rtx
= gen_rtx_PLUS (Pmode
, pic_offset_table_rtx
, new_rtx
);
12768 emit_move_insn (reg
, new_rtx
);
12774 if (INTVAL (op1
) < -16*1024*1024
12775 || INTVAL (op1
) >= 16*1024*1024)
12777 if (!x86_64_immediate_operand (op1
, Pmode
))
12778 op1
= force_reg (Pmode
, op1
);
12779 new_rtx
= gen_rtx_PLUS (Pmode
, force_reg (Pmode
, op0
), op1
);
12785 base
= legitimize_pic_address (XEXP (addr
, 0), reg
);
12786 new_rtx
= legitimize_pic_address (XEXP (addr
, 1),
12787 base
== reg
? NULL_RTX
: reg
);
12789 if (CONST_INT_P (new_rtx
))
12790 new_rtx
= plus_constant (Pmode
, base
, INTVAL (new_rtx
));
12793 if (GET_CODE (new_rtx
) == PLUS
&& CONSTANT_P (XEXP (new_rtx
, 1)))
12795 base
= gen_rtx_PLUS (Pmode
, base
, XEXP (new_rtx
, 0));
12796 new_rtx
= XEXP (new_rtx
, 1);
12798 new_rtx
= gen_rtx_PLUS (Pmode
, base
, new_rtx
);
12806 /* Load the thread pointer. If TO_REG is true, force it into a register. */
12809 get_thread_pointer (enum machine_mode tp_mode
, bool to_reg
)
12811 rtx tp
= gen_rtx_UNSPEC (ptr_mode
, gen_rtvec (1, const0_rtx
), UNSPEC_TP
);
12813 if (GET_MODE (tp
) != tp_mode
)
12815 gcc_assert (GET_MODE (tp
) == SImode
);
12816 gcc_assert (tp_mode
== DImode
);
12818 tp
= gen_rtx_ZERO_EXTEND (tp_mode
, tp
);
12822 tp
= copy_to_mode_reg (tp_mode
, tp
);
12827 /* Construct the SYMBOL_REF for the tls_get_addr function. */
12829 static GTY(()) rtx ix86_tls_symbol
;
12832 ix86_tls_get_addr (void)
12834 if (!ix86_tls_symbol
)
12837 = ((TARGET_ANY_GNU_TLS
&& !TARGET_64BIT
)
12838 ? "___tls_get_addr" : "__tls_get_addr");
12840 ix86_tls_symbol
= gen_rtx_SYMBOL_REF (Pmode
, sym
);
12843 return ix86_tls_symbol
;
12846 /* Construct the SYMBOL_REF for the _TLS_MODULE_BASE_ symbol. */
12848 static GTY(()) rtx ix86_tls_module_base_symbol
;
12851 ix86_tls_module_base (void)
12853 if (!ix86_tls_module_base_symbol
)
12855 ix86_tls_module_base_symbol
12856 = gen_rtx_SYMBOL_REF (Pmode
, "_TLS_MODULE_BASE_");
12858 SYMBOL_REF_FLAGS (ix86_tls_module_base_symbol
)
12859 |= TLS_MODEL_GLOBAL_DYNAMIC
<< SYMBOL_FLAG_TLS_SHIFT
;
12862 return ix86_tls_module_base_symbol
;
12865 /* A subroutine of ix86_legitimize_address and ix86_expand_move. FOR_MOV is
12866 false if we expect this to be used for a memory address and true if
12867 we expect to load the address into a register. */
12870 legitimize_tls_address (rtx x
, enum tls_model model
, bool for_mov
)
12872 rtx dest
, base
, off
;
12873 rtx pic
= NULL_RTX
, tp
= NULL_RTX
;
12874 enum machine_mode tp_mode
= Pmode
;
12879 case TLS_MODEL_GLOBAL_DYNAMIC
:
12880 dest
= gen_reg_rtx (Pmode
);
12885 pic
= pic_offset_table_rtx
;
12888 pic
= gen_reg_rtx (Pmode
);
12889 emit_insn (gen_set_got (pic
));
12893 if (TARGET_GNU2_TLS
)
12896 emit_insn (gen_tls_dynamic_gnu2_64 (dest
, x
));
12898 emit_insn (gen_tls_dynamic_gnu2_32 (dest
, x
, pic
));
12900 tp
= get_thread_pointer (Pmode
, true);
12901 dest
= force_reg (Pmode
, gen_rtx_PLUS (Pmode
, tp
, dest
));
12903 set_unique_reg_note (get_last_insn (), REG_EQUAL
, x
);
12907 rtx caddr
= ix86_tls_get_addr ();
12911 rtx rax
= gen_rtx_REG (Pmode
, AX_REG
), insns
;
12914 emit_call_insn (ix86_gen_tls_global_dynamic_64 (rax
, x
,
12916 insns
= get_insns ();
12919 RTL_CONST_CALL_P (insns
) = 1;
12920 emit_libcall_block (insns
, dest
, rax
, x
);
12923 emit_insn (gen_tls_global_dynamic_32 (dest
, x
, pic
, caddr
));
12927 case TLS_MODEL_LOCAL_DYNAMIC
:
12928 base
= gen_reg_rtx (Pmode
);
12933 pic
= pic_offset_table_rtx
;
12936 pic
= gen_reg_rtx (Pmode
);
12937 emit_insn (gen_set_got (pic
));
12941 if (TARGET_GNU2_TLS
)
12943 rtx tmp
= ix86_tls_module_base ();
12946 emit_insn (gen_tls_dynamic_gnu2_64 (base
, tmp
));
12948 emit_insn (gen_tls_dynamic_gnu2_32 (base
, tmp
, pic
));
12950 tp
= get_thread_pointer (Pmode
, true);
12951 set_unique_reg_note (get_last_insn (), REG_EQUAL
,
12952 gen_rtx_MINUS (Pmode
, tmp
, tp
));
12956 rtx caddr
= ix86_tls_get_addr ();
12960 rtx rax
= gen_rtx_REG (Pmode
, AX_REG
), insns
, eqv
;
12963 emit_call_insn (ix86_gen_tls_local_dynamic_base_64 (rax
,
12965 insns
= get_insns ();
12968 /* Attach a unique REG_EQUAL, to allow the RTL optimizers to
12969 share the LD_BASE result with other LD model accesses. */
12970 eqv
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, const0_rtx
),
12971 UNSPEC_TLS_LD_BASE
);
12973 RTL_CONST_CALL_P (insns
) = 1;
12974 emit_libcall_block (insns
, base
, rax
, eqv
);
12977 emit_insn (gen_tls_local_dynamic_base_32 (base
, pic
, caddr
));
12980 off
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, x
), UNSPEC_DTPOFF
);
12981 off
= gen_rtx_CONST (Pmode
, off
);
12983 dest
= force_reg (Pmode
, gen_rtx_PLUS (Pmode
, base
, off
));
12985 if (TARGET_GNU2_TLS
)
12987 dest
= force_reg (Pmode
, gen_rtx_PLUS (Pmode
, dest
, tp
));
12989 set_unique_reg_note (get_last_insn (), REG_EQUAL
, x
);
12993 case TLS_MODEL_INITIAL_EXEC
:
12996 if (TARGET_SUN_TLS
&& !TARGET_X32
)
12998 /* The Sun linker took the AMD64 TLS spec literally
12999 and can only handle %rax as destination of the
13000 initial executable code sequence. */
13002 dest
= gen_reg_rtx (DImode
);
13003 emit_insn (gen_tls_initial_exec_64_sun (dest
, x
));
13007 /* Generate DImode references to avoid %fs:(%reg32)
13008 problems and linker IE->LE relaxation bug. */
13011 type
= UNSPEC_GOTNTPOFF
;
13015 if (reload_in_progress
)
13016 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM
, true);
13017 pic
= pic_offset_table_rtx
;
13018 type
= TARGET_ANY_GNU_TLS
? UNSPEC_GOTNTPOFF
: UNSPEC_GOTTPOFF
;
13020 else if (!TARGET_ANY_GNU_TLS
)
13022 pic
= gen_reg_rtx (Pmode
);
13023 emit_insn (gen_set_got (pic
));
13024 type
= UNSPEC_GOTTPOFF
;
13029 type
= UNSPEC_INDNTPOFF
;
13032 off
= gen_rtx_UNSPEC (tp_mode
, gen_rtvec (1, x
), type
);
13033 off
= gen_rtx_CONST (tp_mode
, off
);
13035 off
= gen_rtx_PLUS (tp_mode
, pic
, off
);
13036 off
= gen_const_mem (tp_mode
, off
);
13037 set_mem_alias_set (off
, ix86_GOT_alias_set ());
13039 if (TARGET_64BIT
|| TARGET_ANY_GNU_TLS
)
13041 base
= get_thread_pointer (tp_mode
,
13042 for_mov
|| !TARGET_TLS_DIRECT_SEG_REFS
);
13043 off
= force_reg (tp_mode
, off
);
13044 return gen_rtx_PLUS (tp_mode
, base
, off
);
13048 base
= get_thread_pointer (Pmode
, true);
13049 dest
= gen_reg_rtx (Pmode
);
13050 emit_insn (ix86_gen_sub3 (dest
, base
, off
));
13054 case TLS_MODEL_LOCAL_EXEC
:
13055 off
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, x
),
13056 (TARGET_64BIT
|| TARGET_ANY_GNU_TLS
)
13057 ? UNSPEC_NTPOFF
: UNSPEC_TPOFF
);
13058 off
= gen_rtx_CONST (Pmode
, off
);
13060 if (TARGET_64BIT
|| TARGET_ANY_GNU_TLS
)
13062 base
= get_thread_pointer (Pmode
,
13063 for_mov
|| !TARGET_TLS_DIRECT_SEG_REFS
);
13064 return gen_rtx_PLUS (Pmode
, base
, off
);
13068 base
= get_thread_pointer (Pmode
, true);
13069 dest
= gen_reg_rtx (Pmode
);
13070 emit_insn (ix86_gen_sub3 (dest
, base
, off
));
13075 gcc_unreachable ();
13081 /* Create or return the unique __imp_DECL dllimport symbol corresponding
13084 static GTY((if_marked ("tree_map_marked_p"), param_is (struct tree_map
)))
13085 htab_t dllimport_map
;
13088 get_dllimport_decl (tree decl
)
13090 struct tree_map
*h
, in
;
13093 const char *prefix
;
13094 size_t namelen
, prefixlen
;
13099 if (!dllimport_map
)
13100 dllimport_map
= htab_create_ggc (512, tree_map_hash
, tree_map_eq
, 0);
13102 in
.hash
= htab_hash_pointer (decl
);
13103 in
.base
.from
= decl
;
13104 loc
= htab_find_slot_with_hash (dllimport_map
, &in
, in
.hash
, INSERT
);
13105 h
= (struct tree_map
*) *loc
;
13109 *loc
= h
= ggc_alloc_tree_map ();
13111 h
->base
.from
= decl
;
13112 h
->to
= to
= build_decl (DECL_SOURCE_LOCATION (decl
),
13113 VAR_DECL
, NULL
, ptr_type_node
);
13114 DECL_ARTIFICIAL (to
) = 1;
13115 DECL_IGNORED_P (to
) = 1;
13116 DECL_EXTERNAL (to
) = 1;
13117 TREE_READONLY (to
) = 1;
13119 name
= IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl
));
13120 name
= targetm
.strip_name_encoding (name
);
13121 prefix
= name
[0] == FASTCALL_PREFIX
|| user_label_prefix
[0] == 0
13122 ? "*__imp_" : "*__imp__";
13123 namelen
= strlen (name
);
13124 prefixlen
= strlen (prefix
);
13125 imp_name
= (char *) alloca (namelen
+ prefixlen
+ 1);
13126 memcpy (imp_name
, prefix
, prefixlen
);
13127 memcpy (imp_name
+ prefixlen
, name
, namelen
+ 1);
13129 name
= ggc_alloc_string (imp_name
, namelen
+ prefixlen
);
13130 rtl
= gen_rtx_SYMBOL_REF (Pmode
, name
);
13131 SET_SYMBOL_REF_DECL (rtl
, to
);
13132 SYMBOL_REF_FLAGS (rtl
) = SYMBOL_FLAG_LOCAL
;
13134 rtl
= gen_const_mem (Pmode
, rtl
);
13135 set_mem_alias_set (rtl
, ix86_GOT_alias_set ());
13137 SET_DECL_RTL (to
, rtl
);
13138 SET_DECL_ASSEMBLER_NAME (to
, get_identifier (name
));
13143 /* Expand SYMBOL into its corresponding dllimport symbol. WANT_REG is
13144 true if we require the result be a register. */
13147 legitimize_dllimport_symbol (rtx symbol
, bool want_reg
)
13152 gcc_assert (SYMBOL_REF_DECL (symbol
));
13153 imp_decl
= get_dllimport_decl (SYMBOL_REF_DECL (symbol
));
13155 x
= DECL_RTL (imp_decl
);
13157 x
= force_reg (Pmode
, x
);
13161 /* Try machine-dependent ways of modifying an illegitimate address
13162 to be legitimate. If we find one, return the new, valid address.
13163 This macro is used in only one place: `memory_address' in explow.c.
13165 OLDX is the address as it was before break_out_memory_refs was called.
13166 In some cases it is useful to look at this to decide what needs to be done.
13168 It is always safe for this macro to do nothing. It exists to recognize
13169 opportunities to optimize the output.
13171 For the 80386, we handle X+REG by loading X into a register R and
13172 using R+REG. R will go in a general reg and indexing will be used.
13173 However, if REG is a broken-out memory address or multiplication,
13174 nothing needs to be done because REG can certainly go in a general reg.
13176 When -fpic is used, special handling is needed for symbolic references.
13177 See comments by legitimize_pic_address in i386.c for details. */
13180 ix86_legitimize_address (rtx x
, rtx oldx ATTRIBUTE_UNUSED
,
13181 enum machine_mode mode
)
13186 log
= GET_CODE (x
) == SYMBOL_REF
? SYMBOL_REF_TLS_MODEL (x
) : 0;
13188 return legitimize_tls_address (x
, (enum tls_model
) log
, false);
13189 if (GET_CODE (x
) == CONST
13190 && GET_CODE (XEXP (x
, 0)) == PLUS
13191 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == SYMBOL_REF
13192 && (log
= SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x
, 0), 0))))
13194 rtx t
= legitimize_tls_address (XEXP (XEXP (x
, 0), 0),
13195 (enum tls_model
) log
, false);
13196 return gen_rtx_PLUS (Pmode
, t
, XEXP (XEXP (x
, 0), 1));
13199 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
)
13201 if (GET_CODE (x
) == SYMBOL_REF
&& SYMBOL_REF_DLLIMPORT_P (x
))
13202 return legitimize_dllimport_symbol (x
, true);
13203 if (GET_CODE (x
) == CONST
13204 && GET_CODE (XEXP (x
, 0)) == PLUS
13205 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == SYMBOL_REF
13206 && SYMBOL_REF_DLLIMPORT_P (XEXP (XEXP (x
, 0), 0)))
13208 rtx t
= legitimize_dllimport_symbol (XEXP (XEXP (x
, 0), 0), true);
13209 return gen_rtx_PLUS (Pmode
, t
, XEXP (XEXP (x
, 0), 1));
13213 if (flag_pic
&& SYMBOLIC_CONST (x
))
13214 return legitimize_pic_address (x
, 0);
13217 if (MACHO_DYNAMIC_NO_PIC_P
&& SYMBOLIC_CONST (x
))
13218 return machopic_indirect_data_reference (x
, 0);
13221 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
13222 if (GET_CODE (x
) == ASHIFT
13223 && CONST_INT_P (XEXP (x
, 1))
13224 && (unsigned HOST_WIDE_INT
) INTVAL (XEXP (x
, 1)) < 4)
13227 log
= INTVAL (XEXP (x
, 1));
13228 x
= gen_rtx_MULT (Pmode
, force_reg (Pmode
, XEXP (x
, 0)),
13229 GEN_INT (1 << log
));
13232 if (GET_CODE (x
) == PLUS
)
13234 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
13236 if (GET_CODE (XEXP (x
, 0)) == ASHIFT
13237 && CONST_INT_P (XEXP (XEXP (x
, 0), 1))
13238 && (unsigned HOST_WIDE_INT
) INTVAL (XEXP (XEXP (x
, 0), 1)) < 4)
13241 log
= INTVAL (XEXP (XEXP (x
, 0), 1));
13242 XEXP (x
, 0) = gen_rtx_MULT (Pmode
,
13243 force_reg (Pmode
, XEXP (XEXP (x
, 0), 0)),
13244 GEN_INT (1 << log
));
13247 if (GET_CODE (XEXP (x
, 1)) == ASHIFT
13248 && CONST_INT_P (XEXP (XEXP (x
, 1), 1))
13249 && (unsigned HOST_WIDE_INT
) INTVAL (XEXP (XEXP (x
, 1), 1)) < 4)
13252 log
= INTVAL (XEXP (XEXP (x
, 1), 1));
13253 XEXP (x
, 1) = gen_rtx_MULT (Pmode
,
13254 force_reg (Pmode
, XEXP (XEXP (x
, 1), 0)),
13255 GEN_INT (1 << log
));
13258 /* Put multiply first if it isn't already. */
13259 if (GET_CODE (XEXP (x
, 1)) == MULT
)
13261 rtx tmp
= XEXP (x
, 0);
13262 XEXP (x
, 0) = XEXP (x
, 1);
13267 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
13268 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
13269 created by virtual register instantiation, register elimination, and
13270 similar optimizations. */
13271 if (GET_CODE (XEXP (x
, 0)) == MULT
&& GET_CODE (XEXP (x
, 1)) == PLUS
)
13274 x
= gen_rtx_PLUS (Pmode
,
13275 gen_rtx_PLUS (Pmode
, XEXP (x
, 0),
13276 XEXP (XEXP (x
, 1), 0)),
13277 XEXP (XEXP (x
, 1), 1));
13281 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
13282 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
13283 else if (GET_CODE (x
) == PLUS
&& GET_CODE (XEXP (x
, 0)) == PLUS
13284 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == MULT
13285 && GET_CODE (XEXP (XEXP (x
, 0), 1)) == PLUS
13286 && CONSTANT_P (XEXP (x
, 1)))
13289 rtx other
= NULL_RTX
;
13291 if (CONST_INT_P (XEXP (x
, 1)))
13293 constant
= XEXP (x
, 1);
13294 other
= XEXP (XEXP (XEXP (x
, 0), 1), 1);
13296 else if (CONST_INT_P (XEXP (XEXP (XEXP (x
, 0), 1), 1)))
13298 constant
= XEXP (XEXP (XEXP (x
, 0), 1), 1);
13299 other
= XEXP (x
, 1);
13307 x
= gen_rtx_PLUS (Pmode
,
13308 gen_rtx_PLUS (Pmode
, XEXP (XEXP (x
, 0), 0),
13309 XEXP (XEXP (XEXP (x
, 0), 1), 0)),
13310 plus_constant (Pmode
, other
,
13311 INTVAL (constant
)));
13315 if (changed
&& ix86_legitimate_address_p (mode
, x
, false))
13318 if (GET_CODE (XEXP (x
, 0)) == MULT
)
13321 XEXP (x
, 0) = force_operand (XEXP (x
, 0), 0);
13324 if (GET_CODE (XEXP (x
, 1)) == MULT
)
13327 XEXP (x
, 1) = force_operand (XEXP (x
, 1), 0);
13331 && REG_P (XEXP (x
, 1))
13332 && REG_P (XEXP (x
, 0)))
13335 if (flag_pic
&& SYMBOLIC_CONST (XEXP (x
, 1)))
13338 x
= legitimize_pic_address (x
, 0);
13341 if (changed
&& ix86_legitimate_address_p (mode
, x
, false))
13344 if (REG_P (XEXP (x
, 0)))
13346 rtx temp
= gen_reg_rtx (Pmode
);
13347 rtx val
= force_operand (XEXP (x
, 1), temp
);
13350 if (GET_MODE (val
) != Pmode
)
13351 val
= convert_to_mode (Pmode
, val
, 1);
13352 emit_move_insn (temp
, val
);
13355 XEXP (x
, 1) = temp
;
13359 else if (REG_P (XEXP (x
, 1)))
13361 rtx temp
= gen_reg_rtx (Pmode
);
13362 rtx val
= force_operand (XEXP (x
, 0), temp
);
13365 if (GET_MODE (val
) != Pmode
)
13366 val
= convert_to_mode (Pmode
, val
, 1);
13367 emit_move_insn (temp
, val
);
13370 XEXP (x
, 0) = temp
;
13378 /* Print an integer constant expression in assembler syntax. Addition
13379 and subtraction are the only arithmetic that may appear in these
13380 expressions. FILE is the stdio stream to write to, X is the rtx, and
13381 CODE is the operand print code from the output string. */
13384 output_pic_addr_const (FILE *file
, rtx x
, int code
)
13388 switch (GET_CODE (x
))
13391 gcc_assert (flag_pic
);
13396 if (TARGET_64BIT
|| ! TARGET_MACHO_BRANCH_ISLANDS
)
13397 output_addr_const (file
, x
);
13400 const char *name
= XSTR (x
, 0);
13402 /* Mark the decl as referenced so that cgraph will
13403 output the function. */
13404 if (SYMBOL_REF_DECL (x
))
13405 mark_decl_referenced (SYMBOL_REF_DECL (x
));
13408 if (MACHOPIC_INDIRECT
13409 && machopic_classify_symbol (x
) == MACHOPIC_UNDEFINED_FUNCTION
)
13410 name
= machopic_indirection_name (x
, /*stub_p=*/true);
13412 assemble_name (file
, name
);
13414 if (!TARGET_MACHO
&& !(TARGET_64BIT
&& DEFAULT_ABI
== MS_ABI
)
13415 && code
== 'P' && ! SYMBOL_REF_LOCAL_P (x
))
13416 fputs ("@PLT", file
);
13423 ASM_GENERATE_INTERNAL_LABEL (buf
, "L", CODE_LABEL_NUMBER (x
));
13424 assemble_name (asm_out_file
, buf
);
13428 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (x
));
13432 /* This used to output parentheses around the expression,
13433 but that does not work on the 386 (either ATT or BSD assembler). */
13434 output_pic_addr_const (file
, XEXP (x
, 0), code
);
13438 if (GET_MODE (x
) == VOIDmode
)
13440 /* We can use %d if the number is <32 bits and positive. */
13441 if (CONST_DOUBLE_HIGH (x
) || CONST_DOUBLE_LOW (x
) < 0)
13442 fprintf (file
, "0x%lx%08lx",
13443 (unsigned long) CONST_DOUBLE_HIGH (x
),
13444 (unsigned long) CONST_DOUBLE_LOW (x
));
13446 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, CONST_DOUBLE_LOW (x
));
13449 /* We can't handle floating point constants;
13450 TARGET_PRINT_OPERAND must handle them. */
13451 output_operand_lossage ("floating constant misused");
13455 /* Some assemblers need integer constants to appear first. */
13456 if (CONST_INT_P (XEXP (x
, 0)))
13458 output_pic_addr_const (file
, XEXP (x
, 0), code
);
13460 output_pic_addr_const (file
, XEXP (x
, 1), code
);
13464 gcc_assert (CONST_INT_P (XEXP (x
, 1)));
13465 output_pic_addr_const (file
, XEXP (x
, 1), code
);
13467 output_pic_addr_const (file
, XEXP (x
, 0), code
);
13473 putc (ASSEMBLER_DIALECT
== ASM_INTEL
? '(' : '[', file
);
13474 output_pic_addr_const (file
, XEXP (x
, 0), code
);
13476 output_pic_addr_const (file
, XEXP (x
, 1), code
);
13478 putc (ASSEMBLER_DIALECT
== ASM_INTEL
? ')' : ']', file
);
13482 if (XINT (x
, 1) == UNSPEC_STACK_CHECK
)
13484 bool f
= i386_asm_output_addr_const_extra (file
, x
);
13489 gcc_assert (XVECLEN (x
, 0) == 1);
13490 output_pic_addr_const (file
, XVECEXP (x
, 0, 0), code
);
13491 switch (XINT (x
, 1))
13494 fputs ("@GOT", file
);
13496 case UNSPEC_GOTOFF
:
13497 fputs ("@GOTOFF", file
);
13499 case UNSPEC_PLTOFF
:
13500 fputs ("@PLTOFF", file
);
13503 fputs (ASSEMBLER_DIALECT
== ASM_ATT
?
13504 "(%rip)" : "[rip]", file
);
13506 case UNSPEC_GOTPCREL
:
13507 fputs (ASSEMBLER_DIALECT
== ASM_ATT
?
13508 "@GOTPCREL(%rip)" : "@GOTPCREL[rip]", file
);
13510 case UNSPEC_GOTTPOFF
:
13511 /* FIXME: This might be @TPOFF in Sun ld too. */
13512 fputs ("@gottpoff", file
);
13515 fputs ("@tpoff", file
);
13517 case UNSPEC_NTPOFF
:
13519 fputs ("@tpoff", file
);
13521 fputs ("@ntpoff", file
);
13523 case UNSPEC_DTPOFF
:
13524 fputs ("@dtpoff", file
);
13526 case UNSPEC_GOTNTPOFF
:
13528 fputs (ASSEMBLER_DIALECT
== ASM_ATT
?
13529 "@gottpoff(%rip)": "@gottpoff[rip]", file
);
13531 fputs ("@gotntpoff", file
);
13533 case UNSPEC_INDNTPOFF
:
13534 fputs ("@indntpoff", file
);
13537 case UNSPEC_MACHOPIC_OFFSET
:
13539 machopic_output_function_base_name (file
);
13543 output_operand_lossage ("invalid UNSPEC as operand");
13549 output_operand_lossage ("invalid expression as operand");
13553 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
13554 We need to emit DTP-relative relocations. */
13556 static void ATTRIBUTE_UNUSED
13557 i386_output_dwarf_dtprel (FILE *file
, int size
, rtx x
)
13559 fputs (ASM_LONG
, file
);
13560 output_addr_const (file
, x
);
13561 fputs ("@dtpoff", file
);
13567 fputs (", 0", file
);
13570 gcc_unreachable ();
13574 /* Return true if X is a representation of the PIC register. This copes
13575 with calls from ix86_find_base_term, where the register might have
13576 been replaced by a cselib value. */
13579 ix86_pic_register_p (rtx x
)
13581 if (GET_CODE (x
) == VALUE
&& CSELIB_VAL_PTR (x
))
13582 return (pic_offset_table_rtx
13583 && rtx_equal_for_cselib_p (x
, pic_offset_table_rtx
));
13585 return REG_P (x
) && REGNO (x
) == PIC_OFFSET_TABLE_REGNUM
;
13588 /* Helper function for ix86_delegitimize_address.
13589 Attempt to delegitimize TLS local-exec accesses. */
13592 ix86_delegitimize_tls_address (rtx orig_x
)
13594 rtx x
= orig_x
, unspec
;
13595 struct ix86_address addr
;
13597 if (!TARGET_TLS_DIRECT_SEG_REFS
)
13601 if (GET_CODE (x
) != PLUS
|| GET_MODE (x
) != Pmode
)
13603 if (ix86_decompose_address (x
, &addr
) == 0
13604 || addr
.seg
!= (TARGET_64BIT
? SEG_FS
: SEG_GS
)
13605 || addr
.disp
== NULL_RTX
13606 || GET_CODE (addr
.disp
) != CONST
)
13608 unspec
= XEXP (addr
.disp
, 0);
13609 if (GET_CODE (unspec
) == PLUS
&& CONST_INT_P (XEXP (unspec
, 1)))
13610 unspec
= XEXP (unspec
, 0);
13611 if (GET_CODE (unspec
) != UNSPEC
|| XINT (unspec
, 1) != UNSPEC_NTPOFF
)
13613 x
= XVECEXP (unspec
, 0, 0);
13614 gcc_assert (GET_CODE (x
) == SYMBOL_REF
);
13615 if (unspec
!= XEXP (addr
.disp
, 0))
13616 x
= gen_rtx_PLUS (Pmode
, x
, XEXP (XEXP (addr
.disp
, 0), 1));
13619 rtx idx
= addr
.index
;
13620 if (addr
.scale
!= 1)
13621 idx
= gen_rtx_MULT (Pmode
, idx
, GEN_INT (addr
.scale
));
13622 x
= gen_rtx_PLUS (Pmode
, idx
, x
);
13625 x
= gen_rtx_PLUS (Pmode
, addr
.base
, x
);
13626 if (MEM_P (orig_x
))
13627 x
= replace_equiv_address_nv (orig_x
, x
);
13631 /* In the name of slightly smaller debug output, and to cater to
13632 general assembler lossage, recognize PIC+GOTOFF and turn it back
13633 into a direct symbol reference.
13635 On Darwin, this is necessary to avoid a crash, because Darwin
13636 has a different PIC label for each routine but the DWARF debugging
13637 information is not associated with any particular routine, so it's
13638 necessary to remove references to the PIC label from RTL stored by
13639 the DWARF output code. */
13642 ix86_delegitimize_address (rtx x
)
13644 rtx orig_x
= delegitimize_mem_from_attrs (x
);
13645 /* addend is NULL or some rtx if x is something+GOTOFF where
13646 something doesn't include the PIC register. */
13647 rtx addend
= NULL_RTX
;
13648 /* reg_addend is NULL or a multiple of some register. */
13649 rtx reg_addend
= NULL_RTX
;
13650 /* const_addend is NULL or a const_int. */
13651 rtx const_addend
= NULL_RTX
;
13652 /* This is the result, or NULL. */
13653 rtx result
= NULL_RTX
;
13662 if (GET_CODE (x
) == CONST
13663 && GET_CODE (XEXP (x
, 0)) == PLUS
13664 && GET_MODE (XEXP (x
, 0)) == Pmode
13665 && CONST_INT_P (XEXP (XEXP (x
, 0), 1))
13666 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == UNSPEC
13667 && XINT (XEXP (XEXP (x
, 0), 0), 1) == UNSPEC_PCREL
)
13669 rtx x2
= XVECEXP (XEXP (XEXP (x
, 0), 0), 0, 0);
13670 x
= gen_rtx_PLUS (Pmode
, XEXP (XEXP (x
, 0), 1), x2
);
13671 if (MEM_P (orig_x
))
13672 x
= replace_equiv_address_nv (orig_x
, x
);
13675 if (GET_CODE (x
) != CONST
13676 || GET_CODE (XEXP (x
, 0)) != UNSPEC
13677 || (XINT (XEXP (x
, 0), 1) != UNSPEC_GOTPCREL
13678 && XINT (XEXP (x
, 0), 1) != UNSPEC_PCREL
)
13679 || (!MEM_P (orig_x
) && XINT (XEXP (x
, 0), 1) != UNSPEC_PCREL
))
13680 return ix86_delegitimize_tls_address (orig_x
);
13681 x
= XVECEXP (XEXP (x
, 0), 0, 0);
13682 if (GET_MODE (orig_x
) != GET_MODE (x
) && MEM_P (orig_x
))
13684 x
= simplify_gen_subreg (GET_MODE (orig_x
), x
,
13692 if (GET_CODE (x
) != PLUS
13693 || GET_CODE (XEXP (x
, 1)) != CONST
)
13694 return ix86_delegitimize_tls_address (orig_x
);
13696 if (ix86_pic_register_p (XEXP (x
, 0)))
13697 /* %ebx + GOT/GOTOFF */
13699 else if (GET_CODE (XEXP (x
, 0)) == PLUS
)
13701 /* %ebx + %reg * scale + GOT/GOTOFF */
13702 reg_addend
= XEXP (x
, 0);
13703 if (ix86_pic_register_p (XEXP (reg_addend
, 0)))
13704 reg_addend
= XEXP (reg_addend
, 1);
13705 else if (ix86_pic_register_p (XEXP (reg_addend
, 1)))
13706 reg_addend
= XEXP (reg_addend
, 0);
13709 reg_addend
= NULL_RTX
;
13710 addend
= XEXP (x
, 0);
13714 addend
= XEXP (x
, 0);
13716 x
= XEXP (XEXP (x
, 1), 0);
13717 if (GET_CODE (x
) == PLUS
13718 && CONST_INT_P (XEXP (x
, 1)))
13720 const_addend
= XEXP (x
, 1);
13724 if (GET_CODE (x
) == UNSPEC
13725 && ((XINT (x
, 1) == UNSPEC_GOT
&& MEM_P (orig_x
) && !addend
)
13726 || (XINT (x
, 1) == UNSPEC_GOTOFF
&& !MEM_P (orig_x
))))
13727 result
= XVECEXP (x
, 0, 0);
13729 if (TARGET_MACHO
&& darwin_local_data_pic (x
)
13730 && !MEM_P (orig_x
))
13731 result
= XVECEXP (x
, 0, 0);
13734 return ix86_delegitimize_tls_address (orig_x
);
13737 result
= gen_rtx_CONST (Pmode
, gen_rtx_PLUS (Pmode
, result
, const_addend
));
13739 result
= gen_rtx_PLUS (Pmode
, reg_addend
, result
);
13742 /* If the rest of original X doesn't involve the PIC register, add
13743 addend and subtract pic_offset_table_rtx. This can happen e.g.
13745 leal (%ebx, %ecx, 4), %ecx
13747 movl foo@GOTOFF(%ecx), %edx
13748 in which case we return (%ecx - %ebx) + foo. */
13749 if (pic_offset_table_rtx
)
13750 result
= gen_rtx_PLUS (Pmode
, gen_rtx_MINUS (Pmode
, copy_rtx (addend
),
13751 pic_offset_table_rtx
),
13756 if (GET_MODE (orig_x
) != Pmode
&& MEM_P (orig_x
))
13758 result
= simplify_gen_subreg (GET_MODE (orig_x
), result
, Pmode
, 0);
13759 if (result
== NULL_RTX
)
13765 /* If X is a machine specific address (i.e. a symbol or label being
13766 referenced as a displacement from the GOT implemented using an
13767 UNSPEC), then return the base term. Otherwise return X. */
13770 ix86_find_base_term (rtx x
)
13776 if (GET_CODE (x
) != CONST
)
13778 term
= XEXP (x
, 0);
13779 if (GET_CODE (term
) == PLUS
13780 && (CONST_INT_P (XEXP (term
, 1))
13781 || GET_CODE (XEXP (term
, 1)) == CONST_DOUBLE
))
13782 term
= XEXP (term
, 0);
13783 if (GET_CODE (term
) != UNSPEC
13784 || (XINT (term
, 1) != UNSPEC_GOTPCREL
13785 && XINT (term
, 1) != UNSPEC_PCREL
))
13788 return XVECEXP (term
, 0, 0);
13791 return ix86_delegitimize_address (x
);
13795 put_condition_code (enum rtx_code code
, enum machine_mode mode
, bool reverse
,
13796 bool fp
, FILE *file
)
13798 const char *suffix
;
13800 if (mode
== CCFPmode
|| mode
== CCFPUmode
)
13802 code
= ix86_fp_compare_code_to_integer (code
);
13806 code
= reverse_condition (code
);
13857 gcc_assert (mode
== CCmode
|| mode
== CCNOmode
|| mode
== CCGCmode
);
13861 /* ??? Use "nbe" instead of "a" for fcmov lossage on some assemblers.
13862 Those same assemblers have the same but opposite lossage on cmov. */
13863 if (mode
== CCmode
)
13864 suffix
= fp
? "nbe" : "a";
13865 else if (mode
== CCCmode
)
13868 gcc_unreachable ();
13884 gcc_unreachable ();
13888 gcc_assert (mode
== CCmode
|| mode
== CCCmode
);
13905 gcc_unreachable ();
13909 /* ??? As above. */
13910 gcc_assert (mode
== CCmode
|| mode
== CCCmode
);
13911 suffix
= fp
? "nb" : "ae";
13914 gcc_assert (mode
== CCmode
|| mode
== CCGCmode
|| mode
== CCNOmode
);
13918 /* ??? As above. */
13919 if (mode
== CCmode
)
13921 else if (mode
== CCCmode
)
13922 suffix
= fp
? "nb" : "ae";
13924 gcc_unreachable ();
13927 suffix
= fp
? "u" : "p";
13930 suffix
= fp
? "nu" : "np";
13933 gcc_unreachable ();
13935 fputs (suffix
, file
);
13938 /* Print the name of register X to FILE based on its machine mode and number.
13939 If CODE is 'w', pretend the mode is HImode.
13940 If CODE is 'b', pretend the mode is QImode.
13941 If CODE is 'k', pretend the mode is SImode.
13942 If CODE is 'q', pretend the mode is DImode.
13943 If CODE is 'x', pretend the mode is V4SFmode.
13944 If CODE is 't', pretend the mode is V8SFmode.
13945 If CODE is 'h', pretend the reg is the 'high' byte register.
13946 If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op.
13947 If CODE is 'd', duplicate the operand for AVX instruction.
13951 print_reg (rtx x
, int code
, FILE *file
)
13954 bool duplicated
= code
== 'd' && TARGET_AVX
;
13956 gcc_assert (x
== pc_rtx
13957 || (REGNO (x
) != ARG_POINTER_REGNUM
13958 && REGNO (x
) != FRAME_POINTER_REGNUM
13959 && REGNO (x
) != FLAGS_REG
13960 && REGNO (x
) != FPSR_REG
13961 && REGNO (x
) != FPCR_REG
));
13963 if (ASSEMBLER_DIALECT
== ASM_ATT
)
13968 gcc_assert (TARGET_64BIT
);
13969 fputs ("rip", file
);
13973 if (code
== 'w' || MMX_REG_P (x
))
13975 else if (code
== 'b')
13977 else if (code
== 'k')
13979 else if (code
== 'q')
13981 else if (code
== 'y')
13983 else if (code
== 'h')
13985 else if (code
== 'x')
13987 else if (code
== 't')
13990 code
= GET_MODE_SIZE (GET_MODE (x
));
13992 /* Irritatingly, AMD extended registers use different naming convention
13993 from the normal registers: "r%d[bwd]" */
13994 if (REX_INT_REG_P (x
))
13996 gcc_assert (TARGET_64BIT
);
13998 fprint_ul (file
, REGNO (x
) - FIRST_REX_INT_REG
+ 8);
14002 error ("extended registers have no high halves");
14017 error ("unsupported operand size for extended register");
14027 if (STACK_TOP_P (x
))
14036 if (! ANY_FP_REG_P (x
))
14037 putc (code
== 8 && TARGET_64BIT
? 'r' : 'e', file
);
14042 reg
= hi_reg_name
[REGNO (x
)];
14045 if (REGNO (x
) >= ARRAY_SIZE (qi_reg_name
))
14047 reg
= qi_reg_name
[REGNO (x
)];
14050 if (REGNO (x
) >= ARRAY_SIZE (qi_high_reg_name
))
14052 reg
= qi_high_reg_name
[REGNO (x
)];
14057 gcc_assert (!duplicated
);
14059 fputs (hi_reg_name
[REGNO (x
)] + 1, file
);
14064 gcc_unreachable ();
14070 if (ASSEMBLER_DIALECT
== ASM_ATT
)
14071 fprintf (file
, ", %%%s", reg
);
14073 fprintf (file
, ", %s", reg
);
14077 /* Locate some local-dynamic symbol still in use by this function
14078 so that we can print its name in some tls_local_dynamic_base
14082 get_some_local_dynamic_name_1 (rtx
*px
, void *data ATTRIBUTE_UNUSED
)
14086 if (GET_CODE (x
) == SYMBOL_REF
14087 && SYMBOL_REF_TLS_MODEL (x
) == TLS_MODEL_LOCAL_DYNAMIC
)
14089 cfun
->machine
->some_ld_name
= XSTR (x
, 0);
14096 static const char *
14097 get_some_local_dynamic_name (void)
14101 if (cfun
->machine
->some_ld_name
)
14102 return cfun
->machine
->some_ld_name
;
14104 for (insn
= get_insns (); insn
; insn
= NEXT_INSN (insn
))
14105 if (NONDEBUG_INSN_P (insn
)
14106 && for_each_rtx (&PATTERN (insn
), get_some_local_dynamic_name_1
, 0))
14107 return cfun
->machine
->some_ld_name
;
14112 /* Meaning of CODE:
14113 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
14114 C -- print opcode suffix for set/cmov insn.
14115 c -- like C, but print reversed condition
14116 F,f -- likewise, but for floating-point.
14117 O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.",
14119 R -- print the prefix for register names.
14120 z -- print the opcode suffix for the size of the current operand.
14121 Z -- likewise, with special suffixes for x87 instructions.
14122 * -- print a star (in certain assembler syntax)
14123 A -- print an absolute memory reference.
14124 E -- print address with DImode register names if TARGET_64BIT.
14125 w -- print the operand as if it's a "word" (HImode) even if it isn't.
14126 s -- print a shift double count, followed by the assemblers argument
14128 b -- print the QImode name of the register for the indicated operand.
14129 %b0 would print %al if operands[0] is reg 0.
14130 w -- likewise, print the HImode name of the register.
14131 k -- likewise, print the SImode name of the register.
14132 q -- likewise, print the DImode name of the register.
14133 x -- likewise, print the V4SFmode name of the register.
14134 t -- likewise, print the V8SFmode name of the register.
14135 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
14136 y -- print "st(0)" instead of "st" as a register.
14137 d -- print duplicated register operand for AVX instruction.
14138 D -- print condition for SSE cmp instruction.
14139 P -- if PIC, print an @PLT suffix.
14140 p -- print raw symbol name.
14141 X -- don't print any sort of PIC '@' suffix for a symbol.
14142 & -- print some in-use local-dynamic symbol name.
14143 H -- print a memory address offset by 8; used for sse high-parts
14144 Y -- print condition for XOP pcom* instruction.
14145 + -- print a branch hint as 'cs' or 'ds' prefix
14146 ; -- print a semicolon (after prefixes due to bug in older gas).
14147 ~ -- print "i" if TARGET_AVX2, "f" otherwise.
14148 @ -- print a segment register of thread base pointer load
14149 ^ -- print addr32 prefix if TARGET_64BIT and Pmode != word_mode
14153 ix86_print_operand (FILE *file
, rtx x
, int code
)
14160 switch (ASSEMBLER_DIALECT
)
14167 /* Intel syntax. For absolute addresses, registers should not
14168 be surrounded by braces. */
14172 ix86_print_operand (file
, x
, 0);
14179 gcc_unreachable ();
14182 ix86_print_operand (file
, x
, 0);
14186 /* Wrap address in an UNSPEC to declare special handling. */
14188 x
= gen_rtx_UNSPEC (DImode
, gen_rtvec (1, x
), UNSPEC_LEA_ADDR
);
14190 output_address (x
);
14194 if (ASSEMBLER_DIALECT
== ASM_ATT
)
14199 if (ASSEMBLER_DIALECT
== ASM_ATT
)
14204 if (ASSEMBLER_DIALECT
== ASM_ATT
)
14209 if (ASSEMBLER_DIALECT
== ASM_ATT
)
14214 if (ASSEMBLER_DIALECT
== ASM_ATT
)
14219 if (ASSEMBLER_DIALECT
== ASM_ATT
)
14224 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
14225 if (ASSEMBLER_DIALECT
!= ASM_ATT
)
14228 switch (GET_MODE_SIZE (GET_MODE (x
)))
14243 output_operand_lossage
14244 ("invalid operand size for operand code 'O'");
14253 if (GET_MODE_CLASS (GET_MODE (x
)) == MODE_INT
)
14255 /* Opcodes don't get size suffixes if using Intel opcodes. */
14256 if (ASSEMBLER_DIALECT
== ASM_INTEL
)
14259 switch (GET_MODE_SIZE (GET_MODE (x
)))
14278 output_operand_lossage
14279 ("invalid operand size for operand code 'z'");
14284 if (GET_MODE_CLASS (GET_MODE (x
)) == MODE_FLOAT
)
14286 (0, "non-integer operand used with operand code 'z'");
14290 /* 387 opcodes don't get size suffixes if using Intel opcodes. */
14291 if (ASSEMBLER_DIALECT
== ASM_INTEL
)
14294 if (GET_MODE_CLASS (GET_MODE (x
)) == MODE_INT
)
14296 switch (GET_MODE_SIZE (GET_MODE (x
)))
14299 #ifdef HAVE_AS_IX86_FILDS
14309 #ifdef HAVE_AS_IX86_FILDQ
14312 fputs ("ll", file
);
14320 else if (GET_MODE_CLASS (GET_MODE (x
)) == MODE_FLOAT
)
14322 /* 387 opcodes don't get size suffixes
14323 if the operands are registers. */
14324 if (STACK_REG_P (x
))
14327 switch (GET_MODE_SIZE (GET_MODE (x
)))
14348 output_operand_lossage
14349 ("invalid operand type used with operand code 'Z'");
14353 output_operand_lossage
14354 ("invalid operand size for operand code 'Z'");
14372 if (CONST_INT_P (x
) || ! SHIFT_DOUBLE_OMITS_COUNT
)
14374 ix86_print_operand (file
, x
, 0);
14375 fputs (", ", file
);
14380 switch (GET_CODE (x
))
14383 fputs ("neq", file
);
14386 fputs ("eq", file
);
14390 fputs (INTEGRAL_MODE_P (GET_MODE (x
)) ? "ge" : "unlt", file
);
14394 fputs (INTEGRAL_MODE_P (GET_MODE (x
)) ? "gt" : "unle", file
);
14398 fputs ("le", file
);
14402 fputs ("lt", file
);
14405 fputs ("unord", file
);
14408 fputs ("ord", file
);
14411 fputs ("ueq", file
);
14414 fputs ("nlt", file
);
14417 fputs ("nle", file
);
14420 fputs ("ule", file
);
14423 fputs ("ult", file
);
14426 fputs ("une", file
);
14429 output_operand_lossage ("operand is not a condition code, "
14430 "invalid operand code 'Y'");
14436 /* Little bit of braindamage here. The SSE compare instructions
14437 does use completely different names for the comparisons that the
14438 fp conditional moves. */
14439 switch (GET_CODE (x
))
14444 fputs ("eq_us", file
);
14448 fputs ("eq", file
);
14453 fputs ("nge", file
);
14457 fputs ("lt", file
);
14462 fputs ("ngt", file
);
14466 fputs ("le", file
);
14469 fputs ("unord", file
);
14474 fputs ("neq_oq", file
);
14478 fputs ("neq", file
);
14483 fputs ("ge", file
);
14487 fputs ("nlt", file
);
14492 fputs ("gt", file
);
14496 fputs ("nle", file
);
14499 fputs ("ord", file
);
14502 output_operand_lossage ("operand is not a condition code, "
14503 "invalid operand code 'D'");
14510 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
14511 if (ASSEMBLER_DIALECT
== ASM_ATT
)
14517 if (!COMPARISON_P (x
))
14519 output_operand_lossage ("operand is not a condition code, "
14520 "invalid operand code '%c'", code
);
14523 put_condition_code (GET_CODE (x
), GET_MODE (XEXP (x
, 0)),
14524 code
== 'c' || code
== 'f',
14525 code
== 'F' || code
== 'f',
14530 if (!offsettable_memref_p (x
))
14532 output_operand_lossage ("operand is not an offsettable memory "
14533 "reference, invalid operand code 'H'");
14536 /* It doesn't actually matter what mode we use here, as we're
14537 only going to use this for printing. */
14538 x
= adjust_address_nv (x
, DImode
, 8);
14542 gcc_assert (CONST_INT_P (x
));
14544 if (INTVAL (x
) & IX86_HLE_ACQUIRE
)
14545 #ifdef HAVE_AS_IX86_HLE
14546 fputs ("xacquire ", file
);
14548 fputs ("\n" ASM_BYTE
"0xf2\n\t", file
);
14550 else if (INTVAL (x
) & IX86_HLE_RELEASE
)
14551 #ifdef HAVE_AS_IX86_HLE
14552 fputs ("xrelease ", file
);
14554 fputs ("\n" ASM_BYTE
"0xf3\n\t", file
);
14556 /* We do not want to print value of the operand. */
14560 if (ASSEMBLER_DIALECT
== ASM_ATT
)
14566 const char *name
= get_some_local_dynamic_name ();
14568 output_operand_lossage ("'%%&' used without any "
14569 "local dynamic TLS references");
14571 assemble_name (file
, name
);
14580 || optimize_function_for_size_p (cfun
)
14581 || !TARGET_BRANCH_PREDICTION_HINTS
)
14584 x
= find_reg_note (current_output_insn
, REG_BR_PROB
, 0);
14587 int pred_val
= INTVAL (XEXP (x
, 0));
14589 if (pred_val
< REG_BR_PROB_BASE
* 45 / 100
14590 || pred_val
> REG_BR_PROB_BASE
* 55 / 100)
14592 bool taken
= pred_val
> REG_BR_PROB_BASE
/ 2;
14594 = final_forward_branch_p (current_output_insn
) == 0;
14596 /* Emit hints only in the case default branch prediction
14597 heuristics would fail. */
14598 if (taken
!= cputaken
)
14600 /* We use 3e (DS) prefix for taken branches and
14601 2e (CS) prefix for not taken branches. */
14603 fputs ("ds ; ", file
);
14605 fputs ("cs ; ", file
);
14613 #ifndef HAVE_AS_IX86_REP_LOCK_PREFIX
14619 if (ASSEMBLER_DIALECT
== ASM_ATT
)
14622 /* The kernel uses a different segment register for performance
14623 reasons; a system call would not have to trash the userspace
14624 segment register, which would be expensive. */
14625 if (TARGET_64BIT
&& ix86_cmodel
!= CM_KERNEL
)
14626 fputs ("fs", file
);
14628 fputs ("gs", file
);
14632 putc (TARGET_AVX2
? 'i' : 'f', file
);
14636 if (TARGET_64BIT
&& Pmode
!= word_mode
)
14637 fputs ("addr32 ", file
);
14641 output_operand_lossage ("invalid operand code '%c'", code
);
14646 print_reg (x
, code
, file
);
14648 else if (MEM_P (x
))
14650 /* No `byte ptr' prefix for call instructions or BLKmode operands. */
14651 if (ASSEMBLER_DIALECT
== ASM_INTEL
&& code
!= 'X' && code
!= 'P'
14652 && GET_MODE (x
) != BLKmode
)
14655 switch (GET_MODE_SIZE (GET_MODE (x
)))
14657 case 1: size
= "BYTE"; break;
14658 case 2: size
= "WORD"; break;
14659 case 4: size
= "DWORD"; break;
14660 case 8: size
= "QWORD"; break;
14661 case 12: size
= "TBYTE"; break;
14663 if (GET_MODE (x
) == XFmode
)
14668 case 32: size
= "YMMWORD"; break;
14670 gcc_unreachable ();
14673 /* Check for explicit size override (codes 'b', 'w', 'k',
14677 else if (code
== 'w')
14679 else if (code
== 'k')
14681 else if (code
== 'q')
14683 else if (code
== 'x')
14686 fputs (size
, file
);
14687 fputs (" PTR ", file
);
14691 /* Avoid (%rip) for call operands. */
14692 if (CONSTANT_ADDRESS_P (x
) && code
== 'P'
14693 && !CONST_INT_P (x
))
14694 output_addr_const (file
, x
);
14695 else if (this_is_asm_operands
&& ! address_operand (x
, VOIDmode
))
14696 output_operand_lossage ("invalid constraints for operand");
14698 output_address (x
);
14701 else if (GET_CODE (x
) == CONST_DOUBLE
&& GET_MODE (x
) == SFmode
)
14706 REAL_VALUE_FROM_CONST_DOUBLE (r
, x
);
14707 REAL_VALUE_TO_TARGET_SINGLE (r
, l
);
14709 if (ASSEMBLER_DIALECT
== ASM_ATT
)
14711 /* Sign extend 32bit SFmode immediate to 8 bytes. */
14713 fprintf (file
, "0x%08llx", (unsigned long long) (int) l
);
14715 fprintf (file
, "0x%08x", (unsigned int) l
);
14718 else if (GET_CODE (x
) == CONST_DOUBLE
&& GET_MODE (x
) == DFmode
)
14723 REAL_VALUE_FROM_CONST_DOUBLE (r
, x
);
14724 REAL_VALUE_TO_TARGET_DOUBLE (r
, l
);
14726 if (ASSEMBLER_DIALECT
== ASM_ATT
)
14728 fprintf (file
, "0x%lx%08lx", l
[1] & 0xffffffff, l
[0] & 0xffffffff);
14731 /* These float cases don't actually occur as immediate operands. */
14732 else if (GET_CODE (x
) == CONST_DOUBLE
&& GET_MODE (x
) == XFmode
)
14736 real_to_decimal (dstr
, CONST_DOUBLE_REAL_VALUE (x
), sizeof (dstr
), 0, 1);
14737 fputs (dstr
, file
);
14742 /* We have patterns that allow zero sets of memory, for instance.
14743 In 64-bit mode, we should probably support all 8-byte vectors,
14744 since we can in fact encode that into an immediate. */
14745 if (GET_CODE (x
) == CONST_VECTOR
)
14747 gcc_assert (x
== CONST0_RTX (GET_MODE (x
)));
14751 if (code
!= 'P' && code
!= 'p')
14753 if (CONST_INT_P (x
) || GET_CODE (x
) == CONST_DOUBLE
)
14755 if (ASSEMBLER_DIALECT
== ASM_ATT
)
14758 else if (GET_CODE (x
) == CONST
|| GET_CODE (x
) == SYMBOL_REF
14759 || GET_CODE (x
) == LABEL_REF
)
14761 if (ASSEMBLER_DIALECT
== ASM_ATT
)
14764 fputs ("OFFSET FLAT:", file
);
14767 if (CONST_INT_P (x
))
14768 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (x
));
14769 else if (flag_pic
|| MACHOPIC_INDIRECT
)
14770 output_pic_addr_const (file
, x
, code
);
14772 output_addr_const (file
, x
);
14777 ix86_print_operand_punct_valid_p (unsigned char code
)
14779 return (code
== '@' || code
== '*' || code
== '+' || code
== '&'
14780 || code
== ';' || code
== '~' || code
== '^');
14783 /* Print a memory operand whose address is ADDR. */
14786 ix86_print_operand_address (FILE *file
, rtx addr
)
14788 struct ix86_address parts
;
14789 rtx base
, index
, disp
;
14795 if (GET_CODE (addr
) == UNSPEC
&& XINT (addr
, 1) == UNSPEC_VSIBADDR
)
14797 ok
= ix86_decompose_address (XVECEXP (addr
, 0, 0), &parts
);
14798 gcc_assert (parts
.index
== NULL_RTX
);
14799 parts
.index
= XVECEXP (addr
, 0, 1);
14800 parts
.scale
= INTVAL (XVECEXP (addr
, 0, 2));
14801 addr
= XVECEXP (addr
, 0, 0);
14804 else if (GET_CODE (addr
) == UNSPEC
&& XINT (addr
, 1) == UNSPEC_LEA_ADDR
)
14806 gcc_assert (TARGET_64BIT
);
14807 ok
= ix86_decompose_address (XVECEXP (addr
, 0, 0), &parts
);
14811 ok
= ix86_decompose_address (addr
, &parts
);
14815 if (parts
.base
&& GET_CODE (parts
.base
) == SUBREG
)
14817 rtx tmp
= SUBREG_REG (parts
.base
);
14818 parts
.base
= simplify_subreg (GET_MODE (parts
.base
),
14819 tmp
, GET_MODE (tmp
), 0);
14822 if (parts
.index
&& GET_CODE (parts
.index
) == SUBREG
)
14824 rtx tmp
= SUBREG_REG (parts
.index
);
14825 parts
.index
= simplify_subreg (GET_MODE (parts
.index
),
14826 tmp
, GET_MODE (tmp
), 0);
14830 index
= parts
.index
;
14832 scale
= parts
.scale
;
14840 if (ASSEMBLER_DIALECT
== ASM_ATT
)
14842 fputs ((parts
.seg
== SEG_FS
? "fs:" : "gs:"), file
);
14845 gcc_unreachable ();
14848 /* Use one byte shorter RIP relative addressing for 64bit mode. */
14849 if (TARGET_64BIT
&& !base
&& !index
)
14853 if (GET_CODE (disp
) == CONST
14854 && GET_CODE (XEXP (disp
, 0)) == PLUS
14855 && CONST_INT_P (XEXP (XEXP (disp
, 0), 1)))
14856 symbol
= XEXP (XEXP (disp
, 0), 0);
14858 if (GET_CODE (symbol
) == LABEL_REF
14859 || (GET_CODE (symbol
) == SYMBOL_REF
14860 && SYMBOL_REF_TLS_MODEL (symbol
) == 0))
14863 if (!base
&& !index
)
14865 /* Displacement only requires special attention. */
14867 if (CONST_INT_P (disp
))
14869 if (ASSEMBLER_DIALECT
== ASM_INTEL
&& parts
.seg
== SEG_DEFAULT
)
14870 fputs ("ds:", file
);
14871 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (disp
));
14874 output_pic_addr_const (file
, disp
, 0);
14876 output_addr_const (file
, disp
);
14880 /* Print SImode register names to force addr32 prefix. */
14881 if (GET_CODE (addr
) == SUBREG
)
14883 gcc_assert (TARGET_64BIT
);
14884 gcc_assert (GET_MODE (addr
) == SImode
);
14885 gcc_assert (GET_MODE (SUBREG_REG (addr
)) == DImode
);
14886 gcc_assert (!code
);
14889 else if (GET_CODE (addr
) == ZERO_EXTEND
14890 || GET_CODE (addr
) == AND
)
14892 gcc_assert (TARGET_64BIT
);
14893 gcc_assert (GET_MODE (addr
) == DImode
);
14894 gcc_assert (!code
);
14898 if (ASSEMBLER_DIALECT
== ASM_ATT
)
14903 output_pic_addr_const (file
, disp
, 0);
14904 else if (GET_CODE (disp
) == LABEL_REF
)
14905 output_asm_label (disp
);
14907 output_addr_const (file
, disp
);
14912 print_reg (base
, code
, file
);
14916 print_reg (index
, vsib
? 0 : code
, file
);
14917 if (scale
!= 1 || vsib
)
14918 fprintf (file
, ",%d", scale
);
14924 rtx offset
= NULL_RTX
;
14928 /* Pull out the offset of a symbol; print any symbol itself. */
14929 if (GET_CODE (disp
) == CONST
14930 && GET_CODE (XEXP (disp
, 0)) == PLUS
14931 && CONST_INT_P (XEXP (XEXP (disp
, 0), 1)))
14933 offset
= XEXP (XEXP (disp
, 0), 1);
14934 disp
= gen_rtx_CONST (VOIDmode
,
14935 XEXP (XEXP (disp
, 0), 0));
14939 output_pic_addr_const (file
, disp
, 0);
14940 else if (GET_CODE (disp
) == LABEL_REF
)
14941 output_asm_label (disp
);
14942 else if (CONST_INT_P (disp
))
14945 output_addr_const (file
, disp
);
14951 print_reg (base
, code
, file
);
14954 if (INTVAL (offset
) >= 0)
14956 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (offset
));
14960 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (offset
));
14967 print_reg (index
, vsib
? 0 : code
, file
);
14968 if (scale
!= 1 || vsib
)
14969 fprintf (file
, "*%d", scale
);
14976 /* Implementation of TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */
14979 i386_asm_output_addr_const_extra (FILE *file
, rtx x
)
14983 if (GET_CODE (x
) != UNSPEC
)
14986 op
= XVECEXP (x
, 0, 0);
14987 switch (XINT (x
, 1))
14989 case UNSPEC_GOTTPOFF
:
14990 output_addr_const (file
, op
);
14991 /* FIXME: This might be @TPOFF in Sun ld. */
14992 fputs ("@gottpoff", file
);
14995 output_addr_const (file
, op
);
14996 fputs ("@tpoff", file
);
14998 case UNSPEC_NTPOFF
:
14999 output_addr_const (file
, op
);
15001 fputs ("@tpoff", file
);
15003 fputs ("@ntpoff", file
);
15005 case UNSPEC_DTPOFF
:
15006 output_addr_const (file
, op
);
15007 fputs ("@dtpoff", file
);
15009 case UNSPEC_GOTNTPOFF
:
15010 output_addr_const (file
, op
);
15012 fputs (ASSEMBLER_DIALECT
== ASM_ATT
?
15013 "@gottpoff(%rip)" : "@gottpoff[rip]", file
);
15015 fputs ("@gotntpoff", file
);
15017 case UNSPEC_INDNTPOFF
:
15018 output_addr_const (file
, op
);
15019 fputs ("@indntpoff", file
);
15022 case UNSPEC_MACHOPIC_OFFSET
:
15023 output_addr_const (file
, op
);
15025 machopic_output_function_base_name (file
);
15029 case UNSPEC_STACK_CHECK
:
15033 gcc_assert (flag_split_stack
);
15035 #ifdef TARGET_THREAD_SPLIT_STACK_OFFSET
15036 offset
= TARGET_THREAD_SPLIT_STACK_OFFSET
;
15038 gcc_unreachable ();
15041 fprintf (file
, "%s:%d", TARGET_64BIT
? "%fs" : "%gs", offset
);
15052 /* Split one or more double-mode RTL references into pairs of half-mode
15053 references. The RTL can be REG, offsettable MEM, integer constant, or
15054 CONST_DOUBLE. "operands" is a pointer to an array of double-mode RTLs to
15055 split and "num" is its length. lo_half and hi_half are output arrays
15056 that parallel "operands". */
15059 split_double_mode (enum machine_mode mode
, rtx operands
[],
15060 int num
, rtx lo_half
[], rtx hi_half
[])
15062 enum machine_mode half_mode
;
15068 half_mode
= DImode
;
15071 half_mode
= SImode
;
15074 gcc_unreachable ();
15077 byte
= GET_MODE_SIZE (half_mode
);
15081 rtx op
= operands
[num
];
15083 /* simplify_subreg refuse to split volatile memory addresses,
15084 but we still have to handle it. */
15087 lo_half
[num
] = adjust_address (op
, half_mode
, 0);
15088 hi_half
[num
] = adjust_address (op
, half_mode
, byte
);
15092 lo_half
[num
] = simplify_gen_subreg (half_mode
, op
,
15093 GET_MODE (op
) == VOIDmode
15094 ? mode
: GET_MODE (op
), 0);
15095 hi_half
[num
] = simplify_gen_subreg (half_mode
, op
,
15096 GET_MODE (op
) == VOIDmode
15097 ? mode
: GET_MODE (op
), byte
);
15102 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
15103 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
15104 is the expression of the binary operation. The output may either be
15105 emitted here, or returned to the caller, like all output_* functions.
15107 There is no guarantee that the operands are the same mode, as they
15108 might be within FLOAT or FLOAT_EXTEND expressions. */
15110 #ifndef SYSV386_COMPAT
15111 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
15112 wants to fix the assemblers because that causes incompatibility
15113 with gcc. No-one wants to fix gcc because that causes
15114 incompatibility with assemblers... You can use the option of
15115 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
15116 #define SYSV386_COMPAT 1
15120 output_387_binary_op (rtx insn
, rtx
*operands
)
15122 static char buf
[40];
15125 int is_sse
= SSE_REG_P (operands
[0]) || SSE_REG_P (operands
[1]) || SSE_REG_P (operands
[2]);
15127 #ifdef ENABLE_CHECKING
15128 /* Even if we do not want to check the inputs, this documents input
15129 constraints. Which helps in understanding the following code. */
15130 if (STACK_REG_P (operands
[0])
15131 && ((REG_P (operands
[1])
15132 && REGNO (operands
[0]) == REGNO (operands
[1])
15133 && (STACK_REG_P (operands
[2]) || MEM_P (operands
[2])))
15134 || (REG_P (operands
[2])
15135 && REGNO (operands
[0]) == REGNO (operands
[2])
15136 && (STACK_REG_P (operands
[1]) || MEM_P (operands
[1]))))
15137 && (STACK_TOP_P (operands
[1]) || STACK_TOP_P (operands
[2])))
15140 gcc_assert (is_sse
);
15143 switch (GET_CODE (operands
[3]))
15146 if (GET_MODE_CLASS (GET_MODE (operands
[1])) == MODE_INT
15147 || GET_MODE_CLASS (GET_MODE (operands
[2])) == MODE_INT
)
15155 if (GET_MODE_CLASS (GET_MODE (operands
[1])) == MODE_INT
15156 || GET_MODE_CLASS (GET_MODE (operands
[2])) == MODE_INT
)
15164 if (GET_MODE_CLASS (GET_MODE (operands
[1])) == MODE_INT
15165 || GET_MODE_CLASS (GET_MODE (operands
[2])) == MODE_INT
)
15173 if (GET_MODE_CLASS (GET_MODE (operands
[1])) == MODE_INT
15174 || GET_MODE_CLASS (GET_MODE (operands
[2])) == MODE_INT
)
15182 gcc_unreachable ();
15189 strcpy (buf
, ssep
);
15190 if (GET_MODE (operands
[0]) == SFmode
)
15191 strcat (buf
, "ss\t{%2, %1, %0|%0, %1, %2}");
15193 strcat (buf
, "sd\t{%2, %1, %0|%0, %1, %2}");
15197 strcpy (buf
, ssep
+ 1);
15198 if (GET_MODE (operands
[0]) == SFmode
)
15199 strcat (buf
, "ss\t{%2, %0|%0, %2}");
15201 strcat (buf
, "sd\t{%2, %0|%0, %2}");
15207 switch (GET_CODE (operands
[3]))
15211 if (REG_P (operands
[2]) && REGNO (operands
[0]) == REGNO (operands
[2]))
15213 rtx temp
= operands
[2];
15214 operands
[2] = operands
[1];
15215 operands
[1] = temp
;
15218 /* know operands[0] == operands[1]. */
15220 if (MEM_P (operands
[2]))
15226 if (find_regno_note (insn
, REG_DEAD
, REGNO (operands
[2])))
15228 if (STACK_TOP_P (operands
[0]))
15229 /* How is it that we are storing to a dead operand[2]?
15230 Well, presumably operands[1] is dead too. We can't
15231 store the result to st(0) as st(0) gets popped on this
15232 instruction. Instead store to operands[2] (which I
15233 think has to be st(1)). st(1) will be popped later.
15234 gcc <= 2.8.1 didn't have this check and generated
15235 assembly code that the Unixware assembler rejected. */
15236 p
= "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
15238 p
= "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
15242 if (STACK_TOP_P (operands
[0]))
15243 p
= "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
15245 p
= "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
15250 if (MEM_P (operands
[1]))
15256 if (MEM_P (operands
[2]))
15262 if (find_regno_note (insn
, REG_DEAD
, REGNO (operands
[2])))
15265 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
15266 derived assemblers, confusingly reverse the direction of
15267 the operation for fsub{r} and fdiv{r} when the
15268 destination register is not st(0). The Intel assembler
15269 doesn't have this brain damage. Read !SYSV386_COMPAT to
15270 figure out what the hardware really does. */
15271 if (STACK_TOP_P (operands
[0]))
15272 p
= "{p\t%0, %2|rp\t%2, %0}";
15274 p
= "{rp\t%2, %0|p\t%0, %2}";
15276 if (STACK_TOP_P (operands
[0]))
15277 /* As above for fmul/fadd, we can't store to st(0). */
15278 p
= "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
15280 p
= "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
15285 if (find_regno_note (insn
, REG_DEAD
, REGNO (operands
[1])))
15288 if (STACK_TOP_P (operands
[0]))
15289 p
= "{rp\t%0, %1|p\t%1, %0}";
15291 p
= "{p\t%1, %0|rp\t%0, %1}";
15293 if (STACK_TOP_P (operands
[0]))
15294 p
= "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
15296 p
= "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
15301 if (STACK_TOP_P (operands
[0]))
15303 if (STACK_TOP_P (operands
[1]))
15304 p
= "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
15306 p
= "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
15309 else if (STACK_TOP_P (operands
[1]))
15312 p
= "{\t%1, %0|r\t%0, %1}";
15314 p
= "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
15320 p
= "{r\t%2, %0|\t%0, %2}";
15322 p
= "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
15328 gcc_unreachable ();
15335 /* Return needed mode for entity in optimize_mode_switching pass. */
15338 ix86_mode_needed (int entity
, rtx insn
)
15340 enum attr_i387_cw mode
;
15342 /* The mode UNINITIALIZED is used to store control word after a
15343 function call or ASM pattern. The mode ANY specify that function
15344 has no requirements on the control word and make no changes in the
15345 bits we are interested in. */
15348 || (NONJUMP_INSN_P (insn
)
15349 && (asm_noperands (PATTERN (insn
)) >= 0
15350 || GET_CODE (PATTERN (insn
)) == ASM_INPUT
)))
15351 return I387_CW_UNINITIALIZED
;
15353 if (recog_memoized (insn
) < 0)
15354 return I387_CW_ANY
;
15356 mode
= get_attr_i387_cw (insn
);
15361 if (mode
== I387_CW_TRUNC
)
15366 if (mode
== I387_CW_FLOOR
)
15371 if (mode
== I387_CW_CEIL
)
15376 if (mode
== I387_CW_MASK_PM
)
15381 gcc_unreachable ();
15384 return I387_CW_ANY
;
15387 /* Output code to initialize control word copies used by trunc?f?i and
15388 rounding patterns. CURRENT_MODE is set to current control word,
15389 while NEW_MODE is set to new control word. */
15392 emit_i387_cw_initialization (int mode
)
15394 rtx stored_mode
= assign_386_stack_local (HImode
, SLOT_CW_STORED
);
15397 enum ix86_stack_slot slot
;
15399 rtx reg
= gen_reg_rtx (HImode
);
15401 emit_insn (gen_x86_fnstcw_1 (stored_mode
));
15402 emit_move_insn (reg
, copy_rtx (stored_mode
));
15404 if (TARGET_64BIT
|| TARGET_PARTIAL_REG_STALL
15405 || optimize_function_for_size_p (cfun
))
15409 case I387_CW_TRUNC
:
15410 /* round toward zero (truncate) */
15411 emit_insn (gen_iorhi3 (reg
, reg
, GEN_INT (0x0c00)));
15412 slot
= SLOT_CW_TRUNC
;
15415 case I387_CW_FLOOR
:
15416 /* round down toward -oo */
15417 emit_insn (gen_andhi3 (reg
, reg
, GEN_INT (~0x0c00)));
15418 emit_insn (gen_iorhi3 (reg
, reg
, GEN_INT (0x0400)));
15419 slot
= SLOT_CW_FLOOR
;
15423 /* round up toward +oo */
15424 emit_insn (gen_andhi3 (reg
, reg
, GEN_INT (~0x0c00)));
15425 emit_insn (gen_iorhi3 (reg
, reg
, GEN_INT (0x0800)));
15426 slot
= SLOT_CW_CEIL
;
15429 case I387_CW_MASK_PM
:
15430 /* mask precision exception for nearbyint() */
15431 emit_insn (gen_iorhi3 (reg
, reg
, GEN_INT (0x0020)));
15432 slot
= SLOT_CW_MASK_PM
;
15436 gcc_unreachable ();
15443 case I387_CW_TRUNC
:
15444 /* round toward zero (truncate) */
15445 emit_insn (gen_movsi_insv_1 (reg
, GEN_INT (0xc)));
15446 slot
= SLOT_CW_TRUNC
;
15449 case I387_CW_FLOOR
:
15450 /* round down toward -oo */
15451 emit_insn (gen_movsi_insv_1 (reg
, GEN_INT (0x4)));
15452 slot
= SLOT_CW_FLOOR
;
15456 /* round up toward +oo */
15457 emit_insn (gen_movsi_insv_1 (reg
, GEN_INT (0x8)));
15458 slot
= SLOT_CW_CEIL
;
15461 case I387_CW_MASK_PM
:
15462 /* mask precision exception for nearbyint() */
15463 emit_insn (gen_iorhi3 (reg
, reg
, GEN_INT (0x0020)));
15464 slot
= SLOT_CW_MASK_PM
;
15468 gcc_unreachable ();
15472 gcc_assert (slot
< MAX_386_STACK_LOCALS
);
15474 new_mode
= assign_386_stack_local (HImode
, slot
);
15475 emit_move_insn (new_mode
, reg
);
15478 /* Output code for INSN to convert a float to a signed int. OPERANDS
15479 are the insn operands. The output may be [HSD]Imode and the input
15480 operand may be [SDX]Fmode. */
15483 output_fix_trunc (rtx insn
, rtx
*operands
, bool fisttp
)
15485 int stack_top_dies
= find_regno_note (insn
, REG_DEAD
, FIRST_STACK_REG
) != 0;
15486 int dimode_p
= GET_MODE (operands
[0]) == DImode
;
15487 int round_mode
= get_attr_i387_cw (insn
);
15489 /* Jump through a hoop or two for DImode, since the hardware has no
15490 non-popping instruction. We used to do this a different way, but
15491 that was somewhat fragile and broke with post-reload splitters. */
15492 if ((dimode_p
|| fisttp
) && !stack_top_dies
)
15493 output_asm_insn ("fld\t%y1", operands
);
15495 gcc_assert (STACK_TOP_P (operands
[1]));
15496 gcc_assert (MEM_P (operands
[0]));
15497 gcc_assert (GET_MODE (operands
[1]) != TFmode
);
15500 output_asm_insn ("fisttp%Z0\t%0", operands
);
15503 if (round_mode
!= I387_CW_ANY
)
15504 output_asm_insn ("fldcw\t%3", operands
);
15505 if (stack_top_dies
|| dimode_p
)
15506 output_asm_insn ("fistp%Z0\t%0", operands
);
15508 output_asm_insn ("fist%Z0\t%0", operands
);
15509 if (round_mode
!= I387_CW_ANY
)
15510 output_asm_insn ("fldcw\t%2", operands
);
15516 /* Output code for x87 ffreep insn. The OPNO argument, which may only
15517 have the values zero or one, indicates the ffreep insn's operand
15518 from the OPERANDS array. */
15520 static const char *
15521 output_387_ffreep (rtx
*operands ATTRIBUTE_UNUSED
, int opno
)
15523 if (TARGET_USE_FFREEP
)
15524 #ifdef HAVE_AS_IX86_FFREEP
15525 return opno
? "ffreep\t%y1" : "ffreep\t%y0";
15528 static char retval
[32];
15529 int regno
= REGNO (operands
[opno
]);
15531 gcc_assert (FP_REGNO_P (regno
));
15533 regno
-= FIRST_STACK_REG
;
15535 snprintf (retval
, sizeof (retval
), ASM_SHORT
"0xc%ddf", regno
);
15540 return opno
? "fstp\t%y1" : "fstp\t%y0";
15544 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
15545 should be used. UNORDERED_P is true when fucom should be used. */
15548 output_fp_compare (rtx insn
, rtx
*operands
, bool eflags_p
, bool unordered_p
)
15550 int stack_top_dies
;
15551 rtx cmp_op0
, cmp_op1
;
15552 int is_sse
= SSE_REG_P (operands
[0]) || SSE_REG_P (operands
[1]);
15556 cmp_op0
= operands
[0];
15557 cmp_op1
= operands
[1];
15561 cmp_op0
= operands
[1];
15562 cmp_op1
= operands
[2];
15567 if (GET_MODE (operands
[0]) == SFmode
)
15569 return "%vucomiss\t{%1, %0|%0, %1}";
15571 return "%vcomiss\t{%1, %0|%0, %1}";
15574 return "%vucomisd\t{%1, %0|%0, %1}";
15576 return "%vcomisd\t{%1, %0|%0, %1}";
15579 gcc_assert (STACK_TOP_P (cmp_op0
));
15581 stack_top_dies
= find_regno_note (insn
, REG_DEAD
, FIRST_STACK_REG
) != 0;
15583 if (cmp_op1
== CONST0_RTX (GET_MODE (cmp_op1
)))
15585 if (stack_top_dies
)
15587 output_asm_insn ("ftst\n\tfnstsw\t%0", operands
);
15588 return output_387_ffreep (operands
, 1);
15591 return "ftst\n\tfnstsw\t%0";
15594 if (STACK_REG_P (cmp_op1
)
15596 && find_regno_note (insn
, REG_DEAD
, REGNO (cmp_op1
))
15597 && REGNO (cmp_op1
) != FIRST_STACK_REG
)
15599 /* If both the top of the 387 stack dies, and the other operand
15600 is also a stack register that dies, then this must be a
15601 `fcompp' float compare */
15605 /* There is no double popping fcomi variant. Fortunately,
15606 eflags is immune from the fstp's cc clobbering. */
15608 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands
);
15610 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands
);
15611 return output_387_ffreep (operands
, 0);
15616 return "fucompp\n\tfnstsw\t%0";
15618 return "fcompp\n\tfnstsw\t%0";
15623 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
15625 static const char * const alt
[16] =
15627 "fcom%Z2\t%y2\n\tfnstsw\t%0",
15628 "fcomp%Z2\t%y2\n\tfnstsw\t%0",
15629 "fucom%Z2\t%y2\n\tfnstsw\t%0",
15630 "fucomp%Z2\t%y2\n\tfnstsw\t%0",
15632 "ficom%Z2\t%y2\n\tfnstsw\t%0",
15633 "ficomp%Z2\t%y2\n\tfnstsw\t%0",
15637 "fcomi\t{%y1, %0|%0, %y1}",
15638 "fcomip\t{%y1, %0|%0, %y1}",
15639 "fucomi\t{%y1, %0|%0, %y1}",
15640 "fucomip\t{%y1, %0|%0, %y1}",
15651 mask
= eflags_p
<< 3;
15652 mask
|= (GET_MODE_CLASS (GET_MODE (cmp_op1
)) == MODE_INT
) << 2;
15653 mask
|= unordered_p
<< 1;
15654 mask
|= stack_top_dies
;
15656 gcc_assert (mask
< 16);
15665 ix86_output_addr_vec_elt (FILE *file
, int value
)
15667 const char *directive
= ASM_LONG
;
15671 directive
= ASM_QUAD
;
15673 gcc_assert (!TARGET_64BIT
);
15676 fprintf (file
, "%s%s%d\n", directive
, LPREFIX
, value
);
15680 ix86_output_addr_diff_elt (FILE *file
, int value
, int rel
)
15682 const char *directive
= ASM_LONG
;
15685 if (TARGET_64BIT
&& CASE_VECTOR_MODE
== DImode
)
15686 directive
= ASM_QUAD
;
15688 gcc_assert (!TARGET_64BIT
);
15690 /* We can't use @GOTOFF for text labels on VxWorks; see gotoff_operand. */
15691 if (TARGET_64BIT
|| TARGET_VXWORKS_RTP
)
15692 fprintf (file
, "%s%s%d-%s%d\n",
15693 directive
, LPREFIX
, value
, LPREFIX
, rel
);
15694 else if (HAVE_AS_GOTOFF_IN_DATA
)
15695 fprintf (file
, ASM_LONG
"%s%d@GOTOFF\n", LPREFIX
, value
);
15697 else if (TARGET_MACHO
)
15699 fprintf (file
, ASM_LONG
"%s%d-", LPREFIX
, value
);
15700 machopic_output_function_base_name (file
);
15705 asm_fprintf (file
, ASM_LONG
"%U%s+[.-%s%d]\n",
15706 GOT_SYMBOL_NAME
, LPREFIX
, value
);
15709 /* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
15713 ix86_expand_clear (rtx dest
)
15717 /* We play register width games, which are only valid after reload. */
15718 gcc_assert (reload_completed
);
15720 /* Avoid HImode and its attendant prefix byte. */
15721 if (GET_MODE_SIZE (GET_MODE (dest
)) < 4)
15722 dest
= gen_rtx_REG (SImode
, REGNO (dest
));
15723 tmp
= gen_rtx_SET (VOIDmode
, dest
, const0_rtx
);
15725 /* This predicate should match that for movsi_xor and movdi_xor_rex64. */
15726 if (!TARGET_USE_MOV0
|| optimize_insn_for_speed_p ())
15728 rtx clob
= gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCmode
, FLAGS_REG
));
15729 tmp
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, tmp
, clob
));
15735 /* X is an unchanging MEM. If it is a constant pool reference, return
15736 the constant pool rtx, else NULL. */
15739 maybe_get_pool_constant (rtx x
)
15741 x
= ix86_delegitimize_address (XEXP (x
, 0));
15743 if (GET_CODE (x
) == SYMBOL_REF
&& CONSTANT_POOL_ADDRESS_P (x
))
15744 return get_pool_constant (x
);
15750 ix86_expand_move (enum machine_mode mode
, rtx operands
[])
15753 enum tls_model model
;
15758 if (GET_CODE (op1
) == SYMBOL_REF
)
15760 model
= SYMBOL_REF_TLS_MODEL (op1
);
15763 op1
= legitimize_tls_address (op1
, model
, true);
15764 op1
= force_operand (op1
, op0
);
15767 if (GET_MODE (op1
) != mode
)
15768 op1
= convert_to_mode (mode
, op1
, 1);
15770 else if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
15771 && SYMBOL_REF_DLLIMPORT_P (op1
))
15772 op1
= legitimize_dllimport_symbol (op1
, false);
15774 else if (GET_CODE (op1
) == CONST
15775 && GET_CODE (XEXP (op1
, 0)) == PLUS
15776 && GET_CODE (XEXP (XEXP (op1
, 0), 0)) == SYMBOL_REF
)
15778 rtx addend
= XEXP (XEXP (op1
, 0), 1);
15779 rtx symbol
= XEXP (XEXP (op1
, 0), 0);
15782 model
= SYMBOL_REF_TLS_MODEL (symbol
);
15784 tmp
= legitimize_tls_address (symbol
, model
, true);
15785 else if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
15786 && SYMBOL_REF_DLLIMPORT_P (symbol
))
15787 tmp
= legitimize_dllimport_symbol (symbol
, true);
15791 tmp
= force_operand (tmp
, NULL
);
15792 tmp
= expand_simple_binop (Pmode
, PLUS
, tmp
, addend
,
15793 op0
, 1, OPTAB_DIRECT
);
15796 if (GET_MODE (tmp
) != mode
)
15797 op1
= convert_to_mode (mode
, tmp
, 1);
15801 if ((flag_pic
|| MACHOPIC_INDIRECT
)
15802 && symbolic_operand (op1
, mode
))
15804 if (TARGET_MACHO
&& !TARGET_64BIT
)
15807 /* dynamic-no-pic */
15808 if (MACHOPIC_INDIRECT
)
15810 rtx temp
= ((reload_in_progress
15811 || ((op0
&& REG_P (op0
))
15813 ? op0
: gen_reg_rtx (Pmode
));
15814 op1
= machopic_indirect_data_reference (op1
, temp
);
15816 op1
= machopic_legitimize_pic_address (op1
, mode
,
15817 temp
== op1
? 0 : temp
);
15819 if (op0
!= op1
&& GET_CODE (op0
) != MEM
)
15821 rtx insn
= gen_rtx_SET (VOIDmode
, op0
, op1
);
15825 if (GET_CODE (op0
) == MEM
)
15826 op1
= force_reg (Pmode
, op1
);
15830 if (GET_CODE (temp
) != REG
)
15831 temp
= gen_reg_rtx (Pmode
);
15832 temp
= legitimize_pic_address (op1
, temp
);
15837 /* dynamic-no-pic */
15843 op1
= force_reg (mode
, op1
);
15844 else if (!(TARGET_64BIT
&& x86_64_movabs_operand (op1
, DImode
)))
15846 rtx reg
= can_create_pseudo_p () ? NULL_RTX
: op0
;
15847 op1
= legitimize_pic_address (op1
, reg
);
15850 if (GET_MODE (op1
) != mode
)
15851 op1
= convert_to_mode (mode
, op1
, 1);
15858 && (PUSH_ROUNDING (GET_MODE_SIZE (mode
)) != GET_MODE_SIZE (mode
)
15859 || !push_operand (op0
, mode
))
15861 op1
= force_reg (mode
, op1
);
15863 if (push_operand (op0
, mode
)
15864 && ! general_no_elim_operand (op1
, mode
))
15865 op1
= copy_to_mode_reg (mode
, op1
);
15867 /* Force large constants in 64bit compilation into register
15868 to get them CSEed. */
15869 if (can_create_pseudo_p ()
15870 && (mode
== DImode
) && TARGET_64BIT
15871 && immediate_operand (op1
, mode
)
15872 && !x86_64_zext_immediate_operand (op1
, VOIDmode
)
15873 && !register_operand (op0
, mode
)
15875 op1
= copy_to_mode_reg (mode
, op1
);
15877 if (can_create_pseudo_p ()
15878 && FLOAT_MODE_P (mode
)
15879 && GET_CODE (op1
) == CONST_DOUBLE
)
15881 /* If we are loading a floating point constant to a register,
15882 force the value to memory now, since we'll get better code
15883 out the back end. */
15885 op1
= validize_mem (force_const_mem (mode
, op1
));
15886 if (!register_operand (op0
, mode
))
15888 rtx temp
= gen_reg_rtx (mode
);
15889 emit_insn (gen_rtx_SET (VOIDmode
, temp
, op1
));
15890 emit_move_insn (op0
, temp
);
15896 emit_insn (gen_rtx_SET (VOIDmode
, op0
, op1
));
15900 ix86_expand_vector_move (enum machine_mode mode
, rtx operands
[])
15902 rtx op0
= operands
[0], op1
= operands
[1];
15903 unsigned int align
= GET_MODE_ALIGNMENT (mode
);
15905 /* Force constants other than zero into memory. We do not know how
15906 the instructions used to build constants modify the upper 64 bits
15907 of the register, once we have that information we may be able
15908 to handle some of them more efficiently. */
15909 if (can_create_pseudo_p ()
15910 && register_operand (op0
, mode
)
15911 && (CONSTANT_P (op1
)
15912 || (GET_CODE (op1
) == SUBREG
15913 && CONSTANT_P (SUBREG_REG (op1
))))
15914 && !standard_sse_constant_p (op1
))
15915 op1
= validize_mem (force_const_mem (mode
, op1
));
15917 /* We need to check memory alignment for SSE mode since attribute
15918 can make operands unaligned. */
15919 if (can_create_pseudo_p ()
15920 && SSE_REG_MODE_P (mode
)
15921 && ((MEM_P (op0
) && (MEM_ALIGN (op0
) < align
))
15922 || (MEM_P (op1
) && (MEM_ALIGN (op1
) < align
))))
15926 /* ix86_expand_vector_move_misalign() does not like constants ... */
15927 if (CONSTANT_P (op1
)
15928 || (GET_CODE (op1
) == SUBREG
15929 && CONSTANT_P (SUBREG_REG (op1
))))
15930 op1
= validize_mem (force_const_mem (mode
, op1
));
15932 /* ... nor both arguments in memory. */
15933 if (!register_operand (op0
, mode
)
15934 && !register_operand (op1
, mode
))
15935 op1
= force_reg (mode
, op1
);
15937 tmp
[0] = op0
; tmp
[1] = op1
;
15938 ix86_expand_vector_move_misalign (mode
, tmp
);
15942 /* Make operand1 a register if it isn't already. */
15943 if (can_create_pseudo_p ()
15944 && !register_operand (op0
, mode
)
15945 && !register_operand (op1
, mode
))
15947 emit_move_insn (op0
, force_reg (GET_MODE (op0
), op1
));
15951 emit_insn (gen_rtx_SET (VOIDmode
, op0
, op1
));
15954 /* Split 32-byte AVX unaligned load and store if needed. */
15957 ix86_avx256_split_vector_move_misalign (rtx op0
, rtx op1
)
15960 rtx (*extract
) (rtx
, rtx
, rtx
);
15961 rtx (*move_unaligned
) (rtx
, rtx
);
15962 enum machine_mode mode
;
15964 switch (GET_MODE (op0
))
15967 gcc_unreachable ();
15969 extract
= gen_avx_vextractf128v32qi
;
15970 move_unaligned
= gen_avx_movdqu256
;
15974 extract
= gen_avx_vextractf128v8sf
;
15975 move_unaligned
= gen_avx_movups256
;
15979 extract
= gen_avx_vextractf128v4df
;
15980 move_unaligned
= gen_avx_movupd256
;
15985 if (MEM_P (op1
) && TARGET_AVX256_SPLIT_UNALIGNED_LOAD
)
15987 rtx r
= gen_reg_rtx (mode
);
15988 m
= adjust_address (op1
, mode
, 0);
15989 emit_move_insn (r
, m
);
15990 m
= adjust_address (op1
, mode
, 16);
15991 r
= gen_rtx_VEC_CONCAT (GET_MODE (op0
), r
, m
);
15992 emit_move_insn (op0
, r
);
15994 else if (MEM_P (op0
) && TARGET_AVX256_SPLIT_UNALIGNED_STORE
)
15996 m
= adjust_address (op0
, mode
, 0);
15997 emit_insn (extract (m
, op1
, const0_rtx
));
15998 m
= adjust_address (op0
, mode
, 16);
15999 emit_insn (extract (m
, op1
, const1_rtx
));
16002 emit_insn (move_unaligned (op0
, op1
));
16005 /* Implement the movmisalign patterns for SSE. Non-SSE modes go
16006 straight to ix86_expand_vector_move. */
16007 /* Code generation for scalar reg-reg moves of single and double precision data:
16008 if (x86_sse_partial_reg_dependency == true | x86_sse_split_regs == true)
16012 if (x86_sse_partial_reg_dependency == true)
16017 Code generation for scalar loads of double precision data:
16018 if (x86_sse_split_regs == true)
16019 movlpd mem, reg (gas syntax)
16023 Code generation for unaligned packed loads of single precision data
16024 (x86_sse_unaligned_move_optimal overrides x86_sse_partial_reg_dependency):
16025 if (x86_sse_unaligned_move_optimal)
16028 if (x86_sse_partial_reg_dependency == true)
16040 Code generation for unaligned packed loads of double precision data
16041 (x86_sse_unaligned_move_optimal overrides x86_sse_split_regs):
16042 if (x86_sse_unaligned_move_optimal)
16045 if (x86_sse_split_regs == true)
16058 ix86_expand_vector_move_misalign (enum machine_mode mode
, rtx operands
[])
16066 && GET_MODE_SIZE (mode
) == 32)
16068 switch (GET_MODE_CLASS (mode
))
16070 case MODE_VECTOR_INT
:
16072 op0
= gen_lowpart (V32QImode
, op0
);
16073 op1
= gen_lowpart (V32QImode
, op1
);
16076 case MODE_VECTOR_FLOAT
:
16077 ix86_avx256_split_vector_move_misalign (op0
, op1
);
16081 gcc_unreachable ();
16089 /* ??? If we have typed data, then it would appear that using
16090 movdqu is the only way to get unaligned data loaded with
16092 if (TARGET_SSE2
&& GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
)
16094 op0
= gen_lowpart (V16QImode
, op0
);
16095 op1
= gen_lowpart (V16QImode
, op1
);
16096 /* We will eventually emit movups based on insn attributes. */
16097 emit_insn (gen_sse2_movdqu (op0
, op1
));
16099 else if (TARGET_SSE2
&& mode
== V2DFmode
)
16104 || TARGET_SSE_UNALIGNED_LOAD_OPTIMAL
16105 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
16106 || optimize_function_for_size_p (cfun
))
16108 /* We will eventually emit movups based on insn attributes. */
16109 emit_insn (gen_sse2_movupd (op0
, op1
));
16113 /* When SSE registers are split into halves, we can avoid
16114 writing to the top half twice. */
16115 if (TARGET_SSE_SPLIT_REGS
)
16117 emit_clobber (op0
);
16122 /* ??? Not sure about the best option for the Intel chips.
16123 The following would seem to satisfy; the register is
16124 entirely cleared, breaking the dependency chain. We
16125 then store to the upper half, with a dependency depth
16126 of one. A rumor has it that Intel recommends two movsd
16127 followed by an unpacklpd, but this is unconfirmed. And
16128 given that the dependency depth of the unpacklpd would
16129 still be one, I'm not sure why this would be better. */
16130 zero
= CONST0_RTX (V2DFmode
);
16133 m
= adjust_address (op1
, DFmode
, 0);
16134 emit_insn (gen_sse2_loadlpd (op0
, zero
, m
));
16135 m
= adjust_address (op1
, DFmode
, 8);
16136 emit_insn (gen_sse2_loadhpd (op0
, op0
, m
));
16141 || TARGET_SSE_UNALIGNED_LOAD_OPTIMAL
16142 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
16143 || optimize_function_for_size_p (cfun
))
16145 op0
= gen_lowpart (V4SFmode
, op0
);
16146 op1
= gen_lowpart (V4SFmode
, op1
);
16147 emit_insn (gen_sse_movups (op0
, op1
));
16151 if (TARGET_SSE_PARTIAL_REG_DEPENDENCY
)
16152 emit_move_insn (op0
, CONST0_RTX (mode
));
16154 emit_clobber (op0
);
16156 if (mode
!= V4SFmode
)
16157 op0
= gen_lowpart (V4SFmode
, op0
);
16159 m
= adjust_address (op1
, V2SFmode
, 0);
16160 emit_insn (gen_sse_loadlps (op0
, op0
, m
));
16161 m
= adjust_address (op1
, V2SFmode
, 8);
16162 emit_insn (gen_sse_loadhps (op0
, op0
, m
));
16165 else if (MEM_P (op0
))
16167 if (TARGET_SSE2
&& GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
)
16169 op0
= gen_lowpart (V16QImode
, op0
);
16170 op1
= gen_lowpart (V16QImode
, op1
);
16171 /* We will eventually emit movups based on insn attributes. */
16172 emit_insn (gen_sse2_movdqu (op0
, op1
));
16174 else if (TARGET_SSE2
&& mode
== V2DFmode
)
16177 || TARGET_SSE_UNALIGNED_STORE_OPTIMAL
16178 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
16179 || optimize_function_for_size_p (cfun
))
16180 /* We will eventually emit movups based on insn attributes. */
16181 emit_insn (gen_sse2_movupd (op0
, op1
));
16184 m
= adjust_address (op0
, DFmode
, 0);
16185 emit_insn (gen_sse2_storelpd (m
, op1
));
16186 m
= adjust_address (op0
, DFmode
, 8);
16187 emit_insn (gen_sse2_storehpd (m
, op1
));
16192 if (mode
!= V4SFmode
)
16193 op1
= gen_lowpart (V4SFmode
, op1
);
16196 || TARGET_SSE_UNALIGNED_STORE_OPTIMAL
16197 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
16198 || optimize_function_for_size_p (cfun
))
16200 op0
= gen_lowpart (V4SFmode
, op0
);
16201 emit_insn (gen_sse_movups (op0
, op1
));
16205 m
= adjust_address (op0
, V2SFmode
, 0);
16206 emit_insn (gen_sse_storelps (m
, op1
));
16207 m
= adjust_address (op0
, V2SFmode
, 8);
16208 emit_insn (gen_sse_storehps (m
, op1
));
16213 gcc_unreachable ();
16216 /* Expand a push in MODE. This is some mode for which we do not support
16217 proper push instructions, at least from the registers that we expect
16218 the value to live in. */
16221 ix86_expand_push (enum machine_mode mode
, rtx x
)
16225 tmp
= expand_simple_binop (Pmode
, PLUS
, stack_pointer_rtx
,
16226 GEN_INT (-GET_MODE_SIZE (mode
)),
16227 stack_pointer_rtx
, 1, OPTAB_DIRECT
);
16228 if (tmp
!= stack_pointer_rtx
)
16229 emit_move_insn (stack_pointer_rtx
, tmp
);
16231 tmp
= gen_rtx_MEM (mode
, stack_pointer_rtx
);
16233 /* When we push an operand onto stack, it has to be aligned at least
16234 at the function argument boundary. However since we don't have
16235 the argument type, we can't determine the actual argument
16237 emit_move_insn (tmp
, x
);
16240 /* Helper function of ix86_fixup_binary_operands to canonicalize
16241 operand order. Returns true if the operands should be swapped. */
16244 ix86_swap_binary_operands_p (enum rtx_code code
, enum machine_mode mode
,
16247 rtx dst
= operands
[0];
16248 rtx src1
= operands
[1];
16249 rtx src2
= operands
[2];
16251 /* If the operation is not commutative, we can't do anything. */
16252 if (GET_RTX_CLASS (code
) != RTX_COMM_ARITH
)
16255 /* Highest priority is that src1 should match dst. */
16256 if (rtx_equal_p (dst
, src1
))
16258 if (rtx_equal_p (dst
, src2
))
16261 /* Next highest priority is that immediate constants come second. */
16262 if (immediate_operand (src2
, mode
))
16264 if (immediate_operand (src1
, mode
))
16267 /* Lowest priority is that memory references should come second. */
16277 /* Fix up OPERANDS to satisfy ix86_binary_operator_ok. Return the
16278 destination to use for the operation. If different from the true
16279 destination in operands[0], a copy operation will be required. */
16282 ix86_fixup_binary_operands (enum rtx_code code
, enum machine_mode mode
,
16285 rtx dst
= operands
[0];
16286 rtx src1
= operands
[1];
16287 rtx src2
= operands
[2];
16289 /* Canonicalize operand order. */
16290 if (ix86_swap_binary_operands_p (code
, mode
, operands
))
16294 /* It is invalid to swap operands of different modes. */
16295 gcc_assert (GET_MODE (src1
) == GET_MODE (src2
));
16302 /* Both source operands cannot be in memory. */
16303 if (MEM_P (src1
) && MEM_P (src2
))
16305 /* Optimization: Only read from memory once. */
16306 if (rtx_equal_p (src1
, src2
))
16308 src2
= force_reg (mode
, src2
);
16312 src2
= force_reg (mode
, src2
);
16315 /* If the destination is memory, and we do not have matching source
16316 operands, do things in registers. */
16317 if (MEM_P (dst
) && !rtx_equal_p (dst
, src1
))
16318 dst
= gen_reg_rtx (mode
);
16320 /* Source 1 cannot be a constant. */
16321 if (CONSTANT_P (src1
))
16322 src1
= force_reg (mode
, src1
);
16324 /* Source 1 cannot be a non-matching memory. */
16325 if (MEM_P (src1
) && !rtx_equal_p (dst
, src1
))
16326 src1
= force_reg (mode
, src1
);
16328 /* Improve address combine. */
16330 && GET_MODE_CLASS (mode
) == MODE_INT
16332 src2
= force_reg (mode
, src2
);
16334 operands
[1] = src1
;
16335 operands
[2] = src2
;
16339 /* Similarly, but assume that the destination has already been
16340 set up properly. */
16343 ix86_fixup_binary_operands_no_copy (enum rtx_code code
,
16344 enum machine_mode mode
, rtx operands
[])
16346 rtx dst
= ix86_fixup_binary_operands (code
, mode
, operands
);
16347 gcc_assert (dst
== operands
[0]);
16350 /* Attempt to expand a binary operator. Make the expansion closer to the
16351 actual machine, then just general_operand, which will allow 3 separate
16352 memory references (one output, two input) in a single insn. */
16355 ix86_expand_binary_operator (enum rtx_code code
, enum machine_mode mode
,
16358 rtx src1
, src2
, dst
, op
, clob
;
16360 dst
= ix86_fixup_binary_operands (code
, mode
, operands
);
16361 src1
= operands
[1];
16362 src2
= operands
[2];
16364 /* Emit the instruction. */
16366 op
= gen_rtx_SET (VOIDmode
, dst
, gen_rtx_fmt_ee (code
, mode
, src1
, src2
));
16367 if (reload_in_progress
)
16369 /* Reload doesn't know about the flags register, and doesn't know that
16370 it doesn't want to clobber it. We can only do this with PLUS. */
16371 gcc_assert (code
== PLUS
);
16374 else if (reload_completed
16376 && !rtx_equal_p (dst
, src1
))
16378 /* This is going to be an LEA; avoid splitting it later. */
16383 clob
= gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCmode
, FLAGS_REG
));
16384 emit_insn (gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, op
, clob
)));
16387 /* Fix up the destination if needed. */
16388 if (dst
!= operands
[0])
16389 emit_move_insn (operands
[0], dst
);
16392 /* Return TRUE or FALSE depending on whether the binary operator meets the
16393 appropriate constraints. */
16396 ix86_binary_operator_ok (enum rtx_code code
, enum machine_mode mode
,
16399 rtx dst
= operands
[0];
16400 rtx src1
= operands
[1];
16401 rtx src2
= operands
[2];
16403 /* Both source operands cannot be in memory. */
16404 if (MEM_P (src1
) && MEM_P (src2
))
16407 /* Canonicalize operand order for commutative operators. */
16408 if (ix86_swap_binary_operands_p (code
, mode
, operands
))
16415 /* If the destination is memory, we must have a matching source operand. */
16416 if (MEM_P (dst
) && !rtx_equal_p (dst
, src1
))
16419 /* Source 1 cannot be a constant. */
16420 if (CONSTANT_P (src1
))
16423 /* Source 1 cannot be a non-matching memory. */
16424 if (MEM_P (src1
) && !rtx_equal_p (dst
, src1
))
16425 /* Support "andhi/andsi/anddi" as a zero-extending move. */
16426 return (code
== AND
16429 || (TARGET_64BIT
&& mode
== DImode
))
16430 && satisfies_constraint_L (src2
));
16435 /* Attempt to expand a unary operator. Make the expansion closer to the
16436 actual machine, then just general_operand, which will allow 2 separate
16437 memory references (one output, one input) in a single insn. */
16440 ix86_expand_unary_operator (enum rtx_code code
, enum machine_mode mode
,
16443 int matching_memory
;
16444 rtx src
, dst
, op
, clob
;
16449 /* If the destination is memory, and we do not have matching source
16450 operands, do things in registers. */
16451 matching_memory
= 0;
16454 if (rtx_equal_p (dst
, src
))
16455 matching_memory
= 1;
16457 dst
= gen_reg_rtx (mode
);
16460 /* When source operand is memory, destination must match. */
16461 if (MEM_P (src
) && !matching_memory
)
16462 src
= force_reg (mode
, src
);
16464 /* Emit the instruction. */
16466 op
= gen_rtx_SET (VOIDmode
, dst
, gen_rtx_fmt_e (code
, mode
, src
));
16467 if (reload_in_progress
|| code
== NOT
)
16469 /* Reload doesn't know about the flags register, and doesn't know that
16470 it doesn't want to clobber it. */
16471 gcc_assert (code
== NOT
);
16476 clob
= gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCmode
, FLAGS_REG
));
16477 emit_insn (gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, op
, clob
)));
16480 /* Fix up the destination if needed. */
16481 if (dst
!= operands
[0])
16482 emit_move_insn (operands
[0], dst
);
16485 /* Split 32bit/64bit divmod with 8bit unsigned divmod if dividend and
16486 divisor are within the range [0-255]. */
16489 ix86_split_idivmod (enum machine_mode mode
, rtx operands
[],
16492 rtx end_label
, qimode_label
;
16493 rtx insn
, div
, mod
;
16494 rtx scratch
, tmp0
, tmp1
, tmp2
;
16495 rtx (*gen_divmod4_1
) (rtx
, rtx
, rtx
, rtx
);
16496 rtx (*gen_zero_extend
) (rtx
, rtx
);
16497 rtx (*gen_test_ccno_1
) (rtx
, rtx
);
16502 gen_divmod4_1
= signed_p
? gen_divmodsi4_1
: gen_udivmodsi4_1
;
16503 gen_test_ccno_1
= gen_testsi_ccno_1
;
16504 gen_zero_extend
= gen_zero_extendqisi2
;
16507 gen_divmod4_1
= signed_p
? gen_divmoddi4_1
: gen_udivmoddi4_1
;
16508 gen_test_ccno_1
= gen_testdi_ccno_1
;
16509 gen_zero_extend
= gen_zero_extendqidi2
;
16512 gcc_unreachable ();
16515 end_label
= gen_label_rtx ();
16516 qimode_label
= gen_label_rtx ();
16518 scratch
= gen_reg_rtx (mode
);
16520 /* Use 8bit unsigned divimod if dividend and divisor are within
16521 the range [0-255]. */
16522 emit_move_insn (scratch
, operands
[2]);
16523 scratch
= expand_simple_binop (mode
, IOR
, scratch
, operands
[3],
16524 scratch
, 1, OPTAB_DIRECT
);
16525 emit_insn (gen_test_ccno_1 (scratch
, GEN_INT (-0x100)));
16526 tmp0
= gen_rtx_REG (CCNOmode
, FLAGS_REG
);
16527 tmp0
= gen_rtx_EQ (VOIDmode
, tmp0
, const0_rtx
);
16528 tmp0
= gen_rtx_IF_THEN_ELSE (VOIDmode
, tmp0
,
16529 gen_rtx_LABEL_REF (VOIDmode
, qimode_label
),
16531 insn
= emit_jump_insn (gen_rtx_SET (VOIDmode
, pc_rtx
, tmp0
));
16532 predict_jump (REG_BR_PROB_BASE
* 50 / 100);
16533 JUMP_LABEL (insn
) = qimode_label
;
16535 /* Generate original signed/unsigned divimod. */
16536 div
= gen_divmod4_1 (operands
[0], operands
[1],
16537 operands
[2], operands
[3]);
16540 /* Branch to the end. */
16541 emit_jump_insn (gen_jump (end_label
));
16544 /* Generate 8bit unsigned divide. */
16545 emit_label (qimode_label
);
16546 /* Don't use operands[0] for result of 8bit divide since not all
16547 registers support QImode ZERO_EXTRACT. */
16548 tmp0
= simplify_gen_subreg (HImode
, scratch
, mode
, 0);
16549 tmp1
= simplify_gen_subreg (HImode
, operands
[2], mode
, 0);
16550 tmp2
= simplify_gen_subreg (QImode
, operands
[3], mode
, 0);
16551 emit_insn (gen_udivmodhiqi3 (tmp0
, tmp1
, tmp2
));
16555 div
= gen_rtx_DIV (SImode
, operands
[2], operands
[3]);
16556 mod
= gen_rtx_MOD (SImode
, operands
[2], operands
[3]);
16560 div
= gen_rtx_UDIV (SImode
, operands
[2], operands
[3]);
16561 mod
= gen_rtx_UMOD (SImode
, operands
[2], operands
[3]);
16564 /* Extract remainder from AH. */
16565 tmp1
= gen_rtx_ZERO_EXTRACT (mode
, tmp0
, GEN_INT (8), GEN_INT (8));
16566 if (REG_P (operands
[1]))
16567 insn
= emit_move_insn (operands
[1], tmp1
);
16570 /* Need a new scratch register since the old one has result
16572 scratch
= gen_reg_rtx (mode
);
16573 emit_move_insn (scratch
, tmp1
);
16574 insn
= emit_move_insn (operands
[1], scratch
);
16576 set_unique_reg_note (insn
, REG_EQUAL
, mod
);
16578 /* Zero extend quotient from AL. */
16579 tmp1
= gen_lowpart (QImode
, tmp0
);
16580 insn
= emit_insn (gen_zero_extend (operands
[0], tmp1
));
16581 set_unique_reg_note (insn
, REG_EQUAL
, div
);
16583 emit_label (end_label
);
16586 #define LEA_MAX_STALL (3)
16587 #define LEA_SEARCH_THRESHOLD (LEA_MAX_STALL << 1)
16589 /* Increase given DISTANCE in half-cycles according to
16590 dependencies between PREV and NEXT instructions.
16591 Add 1 half-cycle if there is no dependency and
16592 go to next cycle if there is some dependecy. */
16594 static unsigned int
16595 increase_distance (rtx prev
, rtx next
, unsigned int distance
)
16600 if (!prev
|| !next
)
16601 return distance
+ (distance
& 1) + 2;
16603 if (!DF_INSN_USES (next
) || !DF_INSN_DEFS (prev
))
16604 return distance
+ 1;
16606 for (use_rec
= DF_INSN_USES (next
); *use_rec
; use_rec
++)
16607 for (def_rec
= DF_INSN_DEFS (prev
); *def_rec
; def_rec
++)
16608 if (!DF_REF_IS_ARTIFICIAL (*def_rec
)
16609 && DF_REF_REGNO (*use_rec
) == DF_REF_REGNO (*def_rec
))
16610 return distance
+ (distance
& 1) + 2;
16612 return distance
+ 1;
16615 /* Function checks if instruction INSN defines register number
16616 REGNO1 or REGNO2. */
16619 insn_defines_reg (unsigned int regno1
, unsigned int regno2
,
16624 for (def_rec
= DF_INSN_DEFS (insn
); *def_rec
; def_rec
++)
16625 if (DF_REF_REG_DEF_P (*def_rec
)
16626 && !DF_REF_IS_ARTIFICIAL (*def_rec
)
16627 && (regno1
== DF_REF_REGNO (*def_rec
)
16628 || regno2
== DF_REF_REGNO (*def_rec
)))
16636 /* Function checks if instruction INSN uses register number
16637 REGNO as a part of address expression. */
16640 insn_uses_reg_mem (unsigned int regno
, rtx insn
)
16644 for (use_rec
= DF_INSN_USES (insn
); *use_rec
; use_rec
++)
16645 if (DF_REF_REG_MEM_P (*use_rec
) && regno
== DF_REF_REGNO (*use_rec
))
16651 /* Search backward for non-agu definition of register number REGNO1
16652 or register number REGNO2 in basic block starting from instruction
16653 START up to head of basic block or instruction INSN.
16655 Function puts true value into *FOUND var if definition was found
16656 and false otherwise.
16658 Distance in half-cycles between START and found instruction or head
16659 of BB is added to DISTANCE and returned. */
16662 distance_non_agu_define_in_bb (unsigned int regno1
, unsigned int regno2
,
16663 rtx insn
, int distance
,
16664 rtx start
, bool *found
)
16666 basic_block bb
= start
? BLOCK_FOR_INSN (start
) : NULL
;
16674 && distance
< LEA_SEARCH_THRESHOLD
)
16676 if (NONDEBUG_INSN_P (prev
) && NONJUMP_INSN_P (prev
))
16678 distance
= increase_distance (prev
, next
, distance
);
16679 if (insn_defines_reg (regno1
, regno2
, prev
))
16681 if (recog_memoized (prev
) < 0
16682 || get_attr_type (prev
) != TYPE_LEA
)
16691 if (prev
== BB_HEAD (bb
))
16694 prev
= PREV_INSN (prev
);
16700 /* Search backward for non-agu definition of register number REGNO1
16701 or register number REGNO2 in INSN's basic block until
16702 1. Pass LEA_SEARCH_THRESHOLD instructions, or
16703 2. Reach neighbour BBs boundary, or
16704 3. Reach agu definition.
16705 Returns the distance between the non-agu definition point and INSN.
16706 If no definition point, returns -1. */
16709 distance_non_agu_define (unsigned int regno1
, unsigned int regno2
,
16712 basic_block bb
= BLOCK_FOR_INSN (insn
);
16714 bool found
= false;
16716 if (insn
!= BB_HEAD (bb
))
16717 distance
= distance_non_agu_define_in_bb (regno1
, regno2
, insn
,
16718 distance
, PREV_INSN (insn
),
16721 if (!found
&& distance
< LEA_SEARCH_THRESHOLD
)
16725 bool simple_loop
= false;
16727 FOR_EACH_EDGE (e
, ei
, bb
->preds
)
16730 simple_loop
= true;
16735 distance
= distance_non_agu_define_in_bb (regno1
, regno2
,
16737 BB_END (bb
), &found
);
16740 int shortest_dist
= -1;
16741 bool found_in_bb
= false;
16743 FOR_EACH_EDGE (e
, ei
, bb
->preds
)
16746 = distance_non_agu_define_in_bb (regno1
, regno2
,
16752 if (shortest_dist
< 0)
16753 shortest_dist
= bb_dist
;
16754 else if (bb_dist
> 0)
16755 shortest_dist
= MIN (bb_dist
, shortest_dist
);
16761 distance
= shortest_dist
;
16765 /* get_attr_type may modify recog data. We want to make sure
16766 that recog data is valid for instruction INSN, on which
16767 distance_non_agu_define is called. INSN is unchanged here. */
16768 extract_insn_cached (insn
);
16773 return distance
>> 1;
16776 /* Return the distance in half-cycles between INSN and the next
16777 insn that uses register number REGNO in memory address added
16778 to DISTANCE. Return -1 if REGNO0 is set.
16780 Put true value into *FOUND if register usage was found and
16782 Put true value into *REDEFINED if register redefinition was
16783 found and false otherwise. */
16786 distance_agu_use_in_bb (unsigned int regno
,
16787 rtx insn
, int distance
, rtx start
,
16788 bool *found
, bool *redefined
)
16790 basic_block bb
= start
? BLOCK_FOR_INSN (start
) : NULL
;
16795 *redefined
= false;
16799 && distance
< LEA_SEARCH_THRESHOLD
)
16801 if (NONDEBUG_INSN_P (next
) && NONJUMP_INSN_P (next
))
16803 distance
= increase_distance(prev
, next
, distance
);
16804 if (insn_uses_reg_mem (regno
, next
))
16806 /* Return DISTANCE if OP0 is used in memory
16807 address in NEXT. */
16812 if (insn_defines_reg (regno
, INVALID_REGNUM
, next
))
16814 /* Return -1 if OP0 is set in NEXT. */
16822 if (next
== BB_END (bb
))
16825 next
= NEXT_INSN (next
);
16831 /* Return the distance between INSN and the next insn that uses
16832 register number REGNO0 in memory address. Return -1 if no such
16833 a use is found within LEA_SEARCH_THRESHOLD or REGNO0 is set. */
16836 distance_agu_use (unsigned int regno0
, rtx insn
)
16838 basic_block bb
= BLOCK_FOR_INSN (insn
);
16840 bool found
= false;
16841 bool redefined
= false;
16843 if (insn
!= BB_END (bb
))
16844 distance
= distance_agu_use_in_bb (regno0
, insn
, distance
,
16846 &found
, &redefined
);
16848 if (!found
&& !redefined
&& distance
< LEA_SEARCH_THRESHOLD
)
16852 bool simple_loop
= false;
16854 FOR_EACH_EDGE (e
, ei
, bb
->succs
)
16857 simple_loop
= true;
16862 distance
= distance_agu_use_in_bb (regno0
, insn
,
16863 distance
, BB_HEAD (bb
),
16864 &found
, &redefined
);
16867 int shortest_dist
= -1;
16868 bool found_in_bb
= false;
16869 bool redefined_in_bb
= false;
16871 FOR_EACH_EDGE (e
, ei
, bb
->succs
)
16874 = distance_agu_use_in_bb (regno0
, insn
,
16875 distance
, BB_HEAD (e
->dest
),
16876 &found_in_bb
, &redefined_in_bb
);
16879 if (shortest_dist
< 0)
16880 shortest_dist
= bb_dist
;
16881 else if (bb_dist
> 0)
16882 shortest_dist
= MIN (bb_dist
, shortest_dist
);
16888 distance
= shortest_dist
;
16892 if (!found
|| redefined
)
16895 return distance
>> 1;
16898 /* Define this macro to tune LEA priority vs ADD, it take effect when
16899 there is a dilemma of choicing LEA or ADD
16900 Negative value: ADD is more preferred than LEA
16902 Positive value: LEA is more preferred than ADD*/
16903 #define IX86_LEA_PRIORITY 0
16905 /* Return true if usage of lea INSN has performance advantage
16906 over a sequence of instructions. Instructions sequence has
16907 SPLIT_COST cycles higher latency than lea latency. */
16910 ix86_lea_outperforms (rtx insn
, unsigned int regno0
, unsigned int regno1
,
16911 unsigned int regno2
, int split_cost
)
16913 int dist_define
, dist_use
;
16915 dist_define
= distance_non_agu_define (regno1
, regno2
, insn
);
16916 dist_use
= distance_agu_use (regno0
, insn
);
16918 if (dist_define
< 0 || dist_define
>= LEA_MAX_STALL
)
16920 /* If there is no non AGU operand definition, no AGU
16921 operand usage and split cost is 0 then both lea
16922 and non lea variants have same priority. Currently
16923 we prefer lea for 64 bit code and non lea on 32 bit
16925 if (dist_use
< 0 && split_cost
== 0)
16926 return TARGET_64BIT
|| IX86_LEA_PRIORITY
;
16931 /* With longer definitions distance lea is more preferable.
16932 Here we change it to take into account splitting cost and
16934 dist_define
+= split_cost
+ IX86_LEA_PRIORITY
;
16936 /* If there is no use in memory addess then we just check
16937 that split cost does not exceed AGU stall. */
16939 return dist_define
>= LEA_MAX_STALL
;
16941 /* If this insn has both backward non-agu dependence and forward
16942 agu dependence, the one with short distance takes effect. */
16943 return dist_define
>= dist_use
;
16946 /* Return true if it is legal to clobber flags by INSN and
16947 false otherwise. */
16950 ix86_ok_to_clobber_flags (rtx insn
)
16952 basic_block bb
= BLOCK_FOR_INSN (insn
);
16958 if (NONDEBUG_INSN_P (insn
))
16960 for (use
= DF_INSN_USES (insn
); *use
; use
++)
16961 if (DF_REF_REG_USE_P (*use
) && DF_REF_REGNO (*use
) == FLAGS_REG
)
16964 if (insn_defines_reg (FLAGS_REG
, INVALID_REGNUM
, insn
))
16968 if (insn
== BB_END (bb
))
16971 insn
= NEXT_INSN (insn
);
16974 live
= df_get_live_out(bb
);
16975 return !REGNO_REG_SET_P (live
, FLAGS_REG
);
16978 /* Return true if we need to split op0 = op1 + op2 into a sequence of
16979 move and add to avoid AGU stalls. */
16982 ix86_avoid_lea_for_add (rtx insn
, rtx operands
[])
16984 unsigned int regno0
, regno1
, regno2
;
16986 /* Check if we need to optimize. */
16987 if (!TARGET_OPT_AGU
|| optimize_function_for_size_p (cfun
))
16990 /* Check it is correct to split here. */
16991 if (!ix86_ok_to_clobber_flags(insn
))
16994 regno0
= true_regnum (operands
[0]);
16995 regno1
= true_regnum (operands
[1]);
16996 regno2
= true_regnum (operands
[2]);
16998 /* We need to split only adds with non destructive
16999 destination operand. */
17000 if (regno0
== regno1
|| regno0
== regno2
)
17003 return !ix86_lea_outperforms (insn
, regno0
, regno1
, regno2
, 1);
17006 /* Return true if we should emit lea instruction instead of mov
17010 ix86_use_lea_for_mov (rtx insn
, rtx operands
[])
17012 unsigned int regno0
, regno1
;
17014 /* Check if we need to optimize. */
17015 if (!TARGET_OPT_AGU
|| optimize_function_for_size_p (cfun
))
17018 /* Use lea for reg to reg moves only. */
17019 if (!REG_P (operands
[0]) || !REG_P (operands
[1]))
17022 regno0
= true_regnum (operands
[0]);
17023 regno1
= true_regnum (operands
[1]);
17025 return ix86_lea_outperforms (insn
, regno0
, regno1
, INVALID_REGNUM
, 0);
17028 /* Return true if we need to split lea into a sequence of
17029 instructions to avoid AGU stalls. */
17032 ix86_avoid_lea_for_addr (rtx insn
, rtx operands
[])
17034 unsigned int regno0
, regno1
, regno2
;
17036 struct ix86_address parts
;
17039 /* FIXME: Handle zero-extended addresses. */
17040 if (GET_CODE (operands
[1]) == ZERO_EXTEND
17041 || GET_CODE (operands
[1]) == AND
)
17044 /* Check we need to optimize. */
17045 if (!TARGET_OPT_AGU
|| optimize_function_for_size_p (cfun
))
17048 /* Check it is correct to split here. */
17049 if (!ix86_ok_to_clobber_flags(insn
))
17052 ok
= ix86_decompose_address (operands
[1], &parts
);
17055 /* We should not split into add if non legitimate pic
17056 operand is used as displacement. */
17057 if (parts
.disp
&& flag_pic
&& !LEGITIMATE_PIC_OPERAND_P (parts
.disp
))
17060 regno0
= true_regnum (operands
[0]) ;
17061 regno1
= INVALID_REGNUM
;
17062 regno2
= INVALID_REGNUM
;
17065 regno1
= true_regnum (parts
.base
);
17067 regno2
= true_regnum (parts
.index
);
17071 /* Compute how many cycles we will add to execution time
17072 if split lea into a sequence of instructions. */
17073 if (parts
.base
|| parts
.index
)
17075 /* Have to use mov instruction if non desctructive
17076 destination form is used. */
17077 if (regno1
!= regno0
&& regno2
!= regno0
)
17080 /* Have to add index to base if both exist. */
17081 if (parts
.base
&& parts
.index
)
17084 /* Have to use shift and adds if scale is 2 or greater. */
17085 if (parts
.scale
> 1)
17087 if (regno0
!= regno1
)
17089 else if (regno2
== regno0
)
17092 split_cost
+= parts
.scale
;
17095 /* Have to use add instruction with immediate if
17096 disp is non zero. */
17097 if (parts
.disp
&& parts
.disp
!= const0_rtx
)
17100 /* Subtract the price of lea. */
17104 return !ix86_lea_outperforms (insn
, regno0
, regno1
, regno2
, split_cost
);
17107 /* Emit x86 binary operand CODE in mode MODE, where the first operand
17108 matches destination. RTX includes clobber of FLAGS_REG. */
17111 ix86_emit_binop (enum rtx_code code
, enum machine_mode mode
,
17116 op
= gen_rtx_SET (VOIDmode
, dst
, gen_rtx_fmt_ee (code
, mode
, dst
, src
));
17117 clob
= gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCmode
, FLAGS_REG
));
17119 emit_insn (gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, op
, clob
)));
17122 /* Split lea instructions into a sequence of instructions
17123 which are executed on ALU to avoid AGU stalls.
17124 It is assumed that it is allowed to clobber flags register
17125 at lea position. */
17128 ix86_split_lea_for_addr (rtx operands
[], enum machine_mode mode
)
17130 unsigned int regno0
, regno1
, regno2
;
17131 struct ix86_address parts
;
17135 ok
= ix86_decompose_address (operands
[1], &parts
);
17138 target
= operands
[0];
17140 regno0
= true_regnum (target
);
17141 regno1
= INVALID_REGNUM
;
17142 regno2
= INVALID_REGNUM
;
17146 if (GET_MODE (parts
.base
) != mode
)
17147 parts
.base
= gen_lowpart (mode
, parts
.base
);
17148 regno1
= true_regnum (parts
.base
);
17153 if (GET_MODE (parts
.index
) != mode
)
17154 parts
.index
= gen_lowpart (mode
, parts
.index
);
17155 regno2
= true_regnum (parts
.index
);
17158 if (parts
.scale
> 1)
17160 /* Case r1 = r1 + ... */
17161 if (regno1
== regno0
)
17163 /* If we have a case r1 = r1 + C * r1 then we
17164 should use multiplication which is very
17165 expensive. Assume cost model is wrong if we
17166 have such case here. */
17167 gcc_assert (regno2
!= regno0
);
17169 for (adds
= parts
.scale
; adds
> 0; adds
--)
17170 ix86_emit_binop (PLUS
, mode
, target
, parts
.index
);
17174 /* r1 = r2 + r3 * C case. Need to move r3 into r1. */
17175 if (regno0
!= regno2
)
17176 emit_insn (gen_rtx_SET (VOIDmode
, target
, parts
.index
));
17178 /* Use shift for scaling. */
17179 ix86_emit_binop (ASHIFT
, mode
, target
,
17180 GEN_INT (exact_log2 (parts
.scale
)));
17183 ix86_emit_binop (PLUS
, mode
, target
, parts
.base
);
17185 if (parts
.disp
&& parts
.disp
!= const0_rtx
)
17186 ix86_emit_binop (PLUS
, mode
, target
, parts
.disp
);
17189 else if (!parts
.base
&& !parts
.index
)
17191 gcc_assert(parts
.disp
);
17192 emit_insn (gen_rtx_SET (VOIDmode
, target
, parts
.disp
));
17198 if (regno0
!= regno2
)
17199 emit_insn (gen_rtx_SET (VOIDmode
, target
, parts
.index
));
17201 else if (!parts
.index
)
17203 if (regno0
!= regno1
)
17204 emit_insn (gen_rtx_SET (VOIDmode
, target
, parts
.base
));
17208 if (regno0
== regno1
)
17210 else if (regno0
== regno2
)
17214 emit_insn (gen_rtx_SET (VOIDmode
, target
, parts
.base
));
17218 ix86_emit_binop (PLUS
, mode
, target
, tmp
);
17221 if (parts
.disp
&& parts
.disp
!= const0_rtx
)
17222 ix86_emit_binop (PLUS
, mode
, target
, parts
.disp
);
17226 /* Return true if it is ok to optimize an ADD operation to LEA
17227 operation to avoid flag register consumation. For most processors,
17228 ADD is faster than LEA. For the processors like ATOM, if the
17229 destination register of LEA holds an actual address which will be
17230 used soon, LEA is better and otherwise ADD is better. */
17233 ix86_lea_for_add_ok (rtx insn
, rtx operands
[])
17235 unsigned int regno0
= true_regnum (operands
[0]);
17236 unsigned int regno1
= true_regnum (operands
[1]);
17237 unsigned int regno2
= true_regnum (operands
[2]);
17239 /* If a = b + c, (a!=b && a!=c), must use lea form. */
17240 if (regno0
!= regno1
&& regno0
!= regno2
)
17243 if (!TARGET_OPT_AGU
|| optimize_function_for_size_p (cfun
))
17246 return ix86_lea_outperforms (insn
, regno0
, regno1
, regno2
, 0);
17249 /* Return true if destination reg of SET_BODY is shift count of
17253 ix86_dep_by_shift_count_body (const_rtx set_body
, const_rtx use_body
)
17259 /* Retrieve destination of SET_BODY. */
17260 switch (GET_CODE (set_body
))
17263 set_dest
= SET_DEST (set_body
);
17264 if (!set_dest
|| !REG_P (set_dest
))
17268 for (i
= XVECLEN (set_body
, 0) - 1; i
>= 0; i
--)
17269 if (ix86_dep_by_shift_count_body (XVECEXP (set_body
, 0, i
),
17277 /* Retrieve shift count of USE_BODY. */
17278 switch (GET_CODE (use_body
))
17281 shift_rtx
= XEXP (use_body
, 1);
17284 for (i
= XVECLEN (use_body
, 0) - 1; i
>= 0; i
--)
17285 if (ix86_dep_by_shift_count_body (set_body
,
17286 XVECEXP (use_body
, 0, i
)))
17294 && (GET_CODE (shift_rtx
) == ASHIFT
17295 || GET_CODE (shift_rtx
) == LSHIFTRT
17296 || GET_CODE (shift_rtx
) == ASHIFTRT
17297 || GET_CODE (shift_rtx
) == ROTATE
17298 || GET_CODE (shift_rtx
) == ROTATERT
))
17300 rtx shift_count
= XEXP (shift_rtx
, 1);
17302 /* Return true if shift count is dest of SET_BODY. */
17303 if (REG_P (shift_count
)
17304 && true_regnum (set_dest
) == true_regnum (shift_count
))
17311 /* Return true if destination reg of SET_INSN is shift count of
17315 ix86_dep_by_shift_count (const_rtx set_insn
, const_rtx use_insn
)
17317 return ix86_dep_by_shift_count_body (PATTERN (set_insn
),
17318 PATTERN (use_insn
));
17321 /* Return TRUE or FALSE depending on whether the unary operator meets the
17322 appropriate constraints. */
17325 ix86_unary_operator_ok (enum rtx_code code ATTRIBUTE_UNUSED
,
17326 enum machine_mode mode ATTRIBUTE_UNUSED
,
17327 rtx operands
[2] ATTRIBUTE_UNUSED
)
17329 /* If one of operands is memory, source and destination must match. */
17330 if ((MEM_P (operands
[0])
17331 || MEM_P (operands
[1]))
17332 && ! rtx_equal_p (operands
[0], operands
[1]))
17337 /* Return TRUE if the operands to a vec_interleave_{high,low}v2df
17338 are ok, keeping in mind the possible movddup alternative. */
17341 ix86_vec_interleave_v2df_operator_ok (rtx operands
[3], bool high
)
17343 if (MEM_P (operands
[0]))
17344 return rtx_equal_p (operands
[0], operands
[1 + high
]);
17345 if (MEM_P (operands
[1]) && MEM_P (operands
[2]))
17346 return TARGET_SSE3
&& rtx_equal_p (operands
[1], operands
[2]);
17350 /* Post-reload splitter for converting an SF or DFmode value in an
17351 SSE register into an unsigned SImode. */
17354 ix86_split_convert_uns_si_sse (rtx operands
[])
17356 enum machine_mode vecmode
;
17357 rtx value
, large
, zero_or_two31
, input
, two31
, x
;
17359 large
= operands
[1];
17360 zero_or_two31
= operands
[2];
17361 input
= operands
[3];
17362 two31
= operands
[4];
17363 vecmode
= GET_MODE (large
);
17364 value
= gen_rtx_REG (vecmode
, REGNO (operands
[0]));
17366 /* Load up the value into the low element. We must ensure that the other
17367 elements are valid floats -- zero is the easiest such value. */
17370 if (vecmode
== V4SFmode
)
17371 emit_insn (gen_vec_setv4sf_0 (value
, CONST0_RTX (V4SFmode
), input
));
17373 emit_insn (gen_sse2_loadlpd (value
, CONST0_RTX (V2DFmode
), input
));
17377 input
= gen_rtx_REG (vecmode
, REGNO (input
));
17378 emit_move_insn (value
, CONST0_RTX (vecmode
));
17379 if (vecmode
== V4SFmode
)
17380 emit_insn (gen_sse_movss (value
, value
, input
));
17382 emit_insn (gen_sse2_movsd (value
, value
, input
));
17385 emit_move_insn (large
, two31
);
17386 emit_move_insn (zero_or_two31
, MEM_P (two31
) ? large
: two31
);
17388 x
= gen_rtx_fmt_ee (LE
, vecmode
, large
, value
);
17389 emit_insn (gen_rtx_SET (VOIDmode
, large
, x
));
17391 x
= gen_rtx_AND (vecmode
, zero_or_two31
, large
);
17392 emit_insn (gen_rtx_SET (VOIDmode
, zero_or_two31
, x
));
17394 x
= gen_rtx_MINUS (vecmode
, value
, zero_or_two31
);
17395 emit_insn (gen_rtx_SET (VOIDmode
, value
, x
));
17397 large
= gen_rtx_REG (V4SImode
, REGNO (large
));
17398 emit_insn (gen_ashlv4si3 (large
, large
, GEN_INT (31)));
17400 x
= gen_rtx_REG (V4SImode
, REGNO (value
));
17401 if (vecmode
== V4SFmode
)
17402 emit_insn (gen_fix_truncv4sfv4si2 (x
, value
));
17404 emit_insn (gen_sse2_cvttpd2dq (x
, value
));
17407 emit_insn (gen_xorv4si3 (value
, value
, large
));
17410 /* Convert an unsigned DImode value into a DFmode, using only SSE.
17411 Expects the 64-bit DImode to be supplied in a pair of integral
17412 registers. Requires SSE2; will use SSE3 if available. For x86_32,
17413 -mfpmath=sse, !optimize_size only. */
17416 ix86_expand_convert_uns_didf_sse (rtx target
, rtx input
)
17418 REAL_VALUE_TYPE bias_lo_rvt
, bias_hi_rvt
;
17419 rtx int_xmm
, fp_xmm
;
17420 rtx biases
, exponents
;
17423 int_xmm
= gen_reg_rtx (V4SImode
);
17424 if (TARGET_INTER_UNIT_MOVES
)
17425 emit_insn (gen_movdi_to_sse (int_xmm
, input
));
17426 else if (TARGET_SSE_SPLIT_REGS
)
17428 emit_clobber (int_xmm
);
17429 emit_move_insn (gen_lowpart (DImode
, int_xmm
), input
);
17433 x
= gen_reg_rtx (V2DImode
);
17434 ix86_expand_vector_init_one_nonzero (false, V2DImode
, x
, input
, 0);
17435 emit_move_insn (int_xmm
, gen_lowpart (V4SImode
, x
));
17438 x
= gen_rtx_CONST_VECTOR (V4SImode
,
17439 gen_rtvec (4, GEN_INT (0x43300000UL
),
17440 GEN_INT (0x45300000UL
),
17441 const0_rtx
, const0_rtx
));
17442 exponents
= validize_mem (force_const_mem (V4SImode
, x
));
17444 /* int_xmm = {0x45300000UL, fp_xmm/hi, 0x43300000, fp_xmm/lo } */
17445 emit_insn (gen_vec_interleave_lowv4si (int_xmm
, int_xmm
, exponents
));
17447 /* Concatenating (juxtaposing) (0x43300000UL ## fp_value_low_xmm)
17448 yields a valid DF value equal to (0x1.0p52 + double(fp_value_lo_xmm)).
17449 Similarly (0x45300000UL ## fp_value_hi_xmm) yields
17450 (0x1.0p84 + double(fp_value_hi_xmm)).
17451 Note these exponents differ by 32. */
17453 fp_xmm
= copy_to_mode_reg (V2DFmode
, gen_lowpart (V2DFmode
, int_xmm
));
17455 /* Subtract off those 0x1.0p52 and 0x1.0p84 biases, to produce values
17456 in [0,2**32-1] and [0]+[2**32,2**64-1] respectively. */
17457 real_ldexp (&bias_lo_rvt
, &dconst1
, 52);
17458 real_ldexp (&bias_hi_rvt
, &dconst1
, 84);
17459 biases
= const_double_from_real_value (bias_lo_rvt
, DFmode
);
17460 x
= const_double_from_real_value (bias_hi_rvt
, DFmode
);
17461 biases
= gen_rtx_CONST_VECTOR (V2DFmode
, gen_rtvec (2, biases
, x
));
17462 biases
= validize_mem (force_const_mem (V2DFmode
, biases
));
17463 emit_insn (gen_subv2df3 (fp_xmm
, fp_xmm
, biases
));
17465 /* Add the upper and lower DFmode values together. */
17467 emit_insn (gen_sse3_haddv2df3 (fp_xmm
, fp_xmm
, fp_xmm
));
17470 x
= copy_to_mode_reg (V2DFmode
, fp_xmm
);
17471 emit_insn (gen_vec_interleave_highv2df (fp_xmm
, fp_xmm
, fp_xmm
));
17472 emit_insn (gen_addv2df3 (fp_xmm
, fp_xmm
, x
));
17475 ix86_expand_vector_extract (false, target
, fp_xmm
, 0);
17478 /* Not used, but eases macroization of patterns. */
17480 ix86_expand_convert_uns_sixf_sse (rtx target ATTRIBUTE_UNUSED
,
17481 rtx input ATTRIBUTE_UNUSED
)
17483 gcc_unreachable ();
17486 /* Convert an unsigned SImode value into a DFmode. Only currently used
17487 for SSE, but applicable anywhere. */
17490 ix86_expand_convert_uns_sidf_sse (rtx target
, rtx input
)
17492 REAL_VALUE_TYPE TWO31r
;
17495 x
= expand_simple_binop (SImode
, PLUS
, input
, GEN_INT (-2147483647 - 1),
17496 NULL
, 1, OPTAB_DIRECT
);
17498 fp
= gen_reg_rtx (DFmode
);
17499 emit_insn (gen_floatsidf2 (fp
, x
));
17501 real_ldexp (&TWO31r
, &dconst1
, 31);
17502 x
= const_double_from_real_value (TWO31r
, DFmode
);
17504 x
= expand_simple_binop (DFmode
, PLUS
, fp
, x
, target
, 0, OPTAB_DIRECT
);
17506 emit_move_insn (target
, x
);
17509 /* Convert a signed DImode value into a DFmode. Only used for SSE in
17510 32-bit mode; otherwise we have a direct convert instruction. */
17513 ix86_expand_convert_sign_didf_sse (rtx target
, rtx input
)
17515 REAL_VALUE_TYPE TWO32r
;
17516 rtx fp_lo
, fp_hi
, x
;
17518 fp_lo
= gen_reg_rtx (DFmode
);
17519 fp_hi
= gen_reg_rtx (DFmode
);
17521 emit_insn (gen_floatsidf2 (fp_hi
, gen_highpart (SImode
, input
)));
17523 real_ldexp (&TWO32r
, &dconst1
, 32);
17524 x
= const_double_from_real_value (TWO32r
, DFmode
);
17525 fp_hi
= expand_simple_binop (DFmode
, MULT
, fp_hi
, x
, fp_hi
, 0, OPTAB_DIRECT
);
17527 ix86_expand_convert_uns_sidf_sse (fp_lo
, gen_lowpart (SImode
, input
));
17529 x
= expand_simple_binop (DFmode
, PLUS
, fp_hi
, fp_lo
, target
,
17532 emit_move_insn (target
, x
);
17535 /* Convert an unsigned SImode value into a SFmode, using only SSE.
17536 For x86_32, -mfpmath=sse, !optimize_size only. */
17538 ix86_expand_convert_uns_sisf_sse (rtx target
, rtx input
)
17540 REAL_VALUE_TYPE ONE16r
;
17541 rtx fp_hi
, fp_lo
, int_hi
, int_lo
, x
;
17543 real_ldexp (&ONE16r
, &dconst1
, 16);
17544 x
= const_double_from_real_value (ONE16r
, SFmode
);
17545 int_lo
= expand_simple_binop (SImode
, AND
, input
, GEN_INT(0xffff),
17546 NULL
, 0, OPTAB_DIRECT
);
17547 int_hi
= expand_simple_binop (SImode
, LSHIFTRT
, input
, GEN_INT(16),
17548 NULL
, 0, OPTAB_DIRECT
);
17549 fp_hi
= gen_reg_rtx (SFmode
);
17550 fp_lo
= gen_reg_rtx (SFmode
);
17551 emit_insn (gen_floatsisf2 (fp_hi
, int_hi
));
17552 emit_insn (gen_floatsisf2 (fp_lo
, int_lo
));
17553 fp_hi
= expand_simple_binop (SFmode
, MULT
, fp_hi
, x
, fp_hi
,
17555 fp_hi
= expand_simple_binop (SFmode
, PLUS
, fp_hi
, fp_lo
, target
,
17557 if (!rtx_equal_p (target
, fp_hi
))
17558 emit_move_insn (target
, fp_hi
);
17561 /* floatunsv{4,8}siv{4,8}sf2 expander. Expand code to convert
17562 a vector of unsigned ints VAL to vector of floats TARGET. */
17565 ix86_expand_vector_convert_uns_vsivsf (rtx target
, rtx val
)
17568 REAL_VALUE_TYPE TWO16r
;
17569 enum machine_mode intmode
= GET_MODE (val
);
17570 enum machine_mode fltmode
= GET_MODE (target
);
17571 rtx (*cvt
) (rtx
, rtx
);
17573 if (intmode
== V4SImode
)
17574 cvt
= gen_floatv4siv4sf2
;
17576 cvt
= gen_floatv8siv8sf2
;
17577 tmp
[0] = ix86_build_const_vector (intmode
, 1, GEN_INT (0xffff));
17578 tmp
[0] = force_reg (intmode
, tmp
[0]);
17579 tmp
[1] = expand_simple_binop (intmode
, AND
, val
, tmp
[0], NULL_RTX
, 1,
17581 tmp
[2] = expand_simple_binop (intmode
, LSHIFTRT
, val
, GEN_INT (16),
17582 NULL_RTX
, 1, OPTAB_DIRECT
);
17583 tmp
[3] = gen_reg_rtx (fltmode
);
17584 emit_insn (cvt (tmp
[3], tmp
[1]));
17585 tmp
[4] = gen_reg_rtx (fltmode
);
17586 emit_insn (cvt (tmp
[4], tmp
[2]));
17587 real_ldexp (&TWO16r
, &dconst1
, 16);
17588 tmp
[5] = const_double_from_real_value (TWO16r
, SFmode
);
17589 tmp
[5] = force_reg (fltmode
, ix86_build_const_vector (fltmode
, 1, tmp
[5]));
17590 tmp
[6] = expand_simple_binop (fltmode
, MULT
, tmp
[4], tmp
[5], NULL_RTX
, 1,
17592 tmp
[7] = expand_simple_binop (fltmode
, PLUS
, tmp
[3], tmp
[6], target
, 1,
17594 if (tmp
[7] != target
)
17595 emit_move_insn (target
, tmp
[7]);
17598 /* Adjust a V*SFmode/V*DFmode value VAL so that *sfix_trunc* resp. fix_trunc*
17599 pattern can be used on it instead of *ufix_trunc* resp. fixuns_trunc*.
17600 This is done by doing just signed conversion if < 0x1p31, and otherwise by
17601 subtracting 0x1p31 first and xoring in 0x80000000 from *XORP afterwards. */
17604 ix86_expand_adjust_ufix_to_sfix_si (rtx val
, rtx
*xorp
)
17606 REAL_VALUE_TYPE TWO31r
;
17607 rtx two31r
, tmp
[4];
17608 enum machine_mode mode
= GET_MODE (val
);
17609 enum machine_mode scalarmode
= GET_MODE_INNER (mode
);
17610 enum machine_mode intmode
= GET_MODE_SIZE (mode
) == 32 ? V8SImode
: V4SImode
;
17611 rtx (*cmp
) (rtx
, rtx
, rtx
, rtx
);
17614 for (i
= 0; i
< 3; i
++)
17615 tmp
[i
] = gen_reg_rtx (mode
);
17616 real_ldexp (&TWO31r
, &dconst1
, 31);
17617 two31r
= const_double_from_real_value (TWO31r
, scalarmode
);
17618 two31r
= ix86_build_const_vector (mode
, 1, two31r
);
17619 two31r
= force_reg (mode
, two31r
);
17622 case V8SFmode
: cmp
= gen_avx_maskcmpv8sf3
; break;
17623 case V4SFmode
: cmp
= gen_sse_maskcmpv4sf3
; break;
17624 case V4DFmode
: cmp
= gen_avx_maskcmpv4df3
; break;
17625 case V2DFmode
: cmp
= gen_sse2_maskcmpv2df3
; break;
17626 default: gcc_unreachable ();
17628 tmp
[3] = gen_rtx_LE (mode
, two31r
, val
);
17629 emit_insn (cmp (tmp
[0], two31r
, val
, tmp
[3]));
17630 tmp
[1] = expand_simple_binop (mode
, AND
, tmp
[0], two31r
, tmp
[1],
17632 if (intmode
== V4SImode
|| TARGET_AVX2
)
17633 *xorp
= expand_simple_binop (intmode
, ASHIFT
,
17634 gen_lowpart (intmode
, tmp
[0]),
17635 GEN_INT (31), NULL_RTX
, 0,
17639 rtx two31
= GEN_INT ((unsigned HOST_WIDE_INT
) 1 << 31);
17640 two31
= ix86_build_const_vector (intmode
, 1, two31
);
17641 *xorp
= expand_simple_binop (intmode
, AND
,
17642 gen_lowpart (intmode
, tmp
[0]),
17643 two31
, NULL_RTX
, 0,
17646 return expand_simple_binop (mode
, MINUS
, val
, tmp
[1], tmp
[2],
17650 /* A subroutine of ix86_build_signbit_mask. If VECT is true,
17651 then replicate the value for all elements of the vector
17655 ix86_build_const_vector (enum machine_mode mode
, bool vect
, rtx value
)
17659 enum machine_mode scalar_mode
;
17676 n_elt
= GET_MODE_NUNITS (mode
);
17677 v
= rtvec_alloc (n_elt
);
17678 scalar_mode
= GET_MODE_INNER (mode
);
17680 RTVEC_ELT (v
, 0) = value
;
17682 for (i
= 1; i
< n_elt
; ++i
)
17683 RTVEC_ELT (v
, i
) = vect
? value
: CONST0_RTX (scalar_mode
);
17685 return gen_rtx_CONST_VECTOR (mode
, v
);
17688 gcc_unreachable ();
17692 /* A subroutine of ix86_expand_fp_absneg_operator, copysign expanders
17693 and ix86_expand_int_vcond. Create a mask for the sign bit in MODE
17694 for an SSE register. If VECT is true, then replicate the mask for
17695 all elements of the vector register. If INVERT is true, then create
17696 a mask excluding the sign bit. */
17699 ix86_build_signbit_mask (enum machine_mode mode
, bool vect
, bool invert
)
17701 enum machine_mode vec_mode
, imode
;
17702 HOST_WIDE_INT hi
, lo
;
17707 /* Find the sign bit, sign extended to 2*HWI. */
17715 mode
= GET_MODE_INNER (mode
);
17717 lo
= 0x80000000, hi
= lo
< 0;
17725 mode
= GET_MODE_INNER (mode
);
17727 if (HOST_BITS_PER_WIDE_INT
>= 64)
17728 lo
= (HOST_WIDE_INT
)1 << shift
, hi
= -1;
17730 lo
= 0, hi
= (HOST_WIDE_INT
)1 << (shift
- HOST_BITS_PER_WIDE_INT
);
17735 vec_mode
= VOIDmode
;
17736 if (HOST_BITS_PER_WIDE_INT
>= 64)
17739 lo
= 0, hi
= (HOST_WIDE_INT
)1 << shift
;
17746 lo
= 0, hi
= (HOST_WIDE_INT
)1 << (shift
- HOST_BITS_PER_WIDE_INT
);
17750 lo
= ~lo
, hi
= ~hi
;
17756 mask
= immed_double_const (lo
, hi
, imode
);
17758 vec
= gen_rtvec (2, v
, mask
);
17759 v
= gen_rtx_CONST_VECTOR (V2DImode
, vec
);
17760 v
= copy_to_mode_reg (mode
, gen_lowpart (mode
, v
));
17767 gcc_unreachable ();
17771 lo
= ~lo
, hi
= ~hi
;
17773 /* Force this value into the low part of a fp vector constant. */
17774 mask
= immed_double_const (lo
, hi
, imode
);
17775 mask
= gen_lowpart (mode
, mask
);
17777 if (vec_mode
== VOIDmode
)
17778 return force_reg (mode
, mask
);
17780 v
= ix86_build_const_vector (vec_mode
, vect
, mask
);
17781 return force_reg (vec_mode
, v
);
17784 /* Generate code for floating point ABS or NEG. */
17787 ix86_expand_fp_absneg_operator (enum rtx_code code
, enum machine_mode mode
,
17790 rtx mask
, set
, dst
, src
;
17791 bool use_sse
= false;
17792 bool vector_mode
= VECTOR_MODE_P (mode
);
17793 enum machine_mode vmode
= mode
;
17797 else if (mode
== TFmode
)
17799 else if (TARGET_SSE_MATH
)
17801 use_sse
= SSE_FLOAT_MODE_P (mode
);
17802 if (mode
== SFmode
)
17804 else if (mode
== DFmode
)
17808 /* NEG and ABS performed with SSE use bitwise mask operations.
17809 Create the appropriate mask now. */
17811 mask
= ix86_build_signbit_mask (vmode
, vector_mode
, code
== ABS
);
17818 set
= gen_rtx_fmt_e (code
, mode
, src
);
17819 set
= gen_rtx_SET (VOIDmode
, dst
, set
);
17826 use
= gen_rtx_USE (VOIDmode
, mask
);
17828 par
= gen_rtvec (2, set
, use
);
17831 clob
= gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCmode
, FLAGS_REG
));
17832 par
= gen_rtvec (3, set
, use
, clob
);
17834 emit_insn (gen_rtx_PARALLEL (VOIDmode
, par
));
17840 /* Expand a copysign operation. Special case operand 0 being a constant. */
17843 ix86_expand_copysign (rtx operands
[])
17845 enum machine_mode mode
, vmode
;
17846 rtx dest
, op0
, op1
, mask
, nmask
;
17848 dest
= operands
[0];
17852 mode
= GET_MODE (dest
);
17854 if (mode
== SFmode
)
17856 else if (mode
== DFmode
)
17861 if (GET_CODE (op0
) == CONST_DOUBLE
)
17863 rtx (*copysign_insn
)(rtx
, rtx
, rtx
, rtx
);
17865 if (real_isneg (CONST_DOUBLE_REAL_VALUE (op0
)))
17866 op0
= simplify_unary_operation (ABS
, mode
, op0
, mode
);
17868 if (mode
== SFmode
|| mode
== DFmode
)
17870 if (op0
== CONST0_RTX (mode
))
17871 op0
= CONST0_RTX (vmode
);
17874 rtx v
= ix86_build_const_vector (vmode
, false, op0
);
17876 op0
= force_reg (vmode
, v
);
17879 else if (op0
!= CONST0_RTX (mode
))
17880 op0
= force_reg (mode
, op0
);
17882 mask
= ix86_build_signbit_mask (vmode
, 0, 0);
17884 if (mode
== SFmode
)
17885 copysign_insn
= gen_copysignsf3_const
;
17886 else if (mode
== DFmode
)
17887 copysign_insn
= gen_copysigndf3_const
;
17889 copysign_insn
= gen_copysigntf3_const
;
17891 emit_insn (copysign_insn (dest
, op0
, op1
, mask
));
17895 rtx (*copysign_insn
)(rtx
, rtx
, rtx
, rtx
, rtx
, rtx
);
17897 nmask
= ix86_build_signbit_mask (vmode
, 0, 1);
17898 mask
= ix86_build_signbit_mask (vmode
, 0, 0);
17900 if (mode
== SFmode
)
17901 copysign_insn
= gen_copysignsf3_var
;
17902 else if (mode
== DFmode
)
17903 copysign_insn
= gen_copysigndf3_var
;
17905 copysign_insn
= gen_copysigntf3_var
;
17907 emit_insn (copysign_insn (dest
, NULL_RTX
, op0
, op1
, nmask
, mask
));
17911 /* Deconstruct a copysign operation into bit masks. Operand 0 is known to
17912 be a constant, and so has already been expanded into a vector constant. */
17915 ix86_split_copysign_const (rtx operands
[])
17917 enum machine_mode mode
, vmode
;
17918 rtx dest
, op0
, mask
, x
;
17920 dest
= operands
[0];
17922 mask
= operands
[3];
17924 mode
= GET_MODE (dest
);
17925 vmode
= GET_MODE (mask
);
17927 dest
= simplify_gen_subreg (vmode
, dest
, mode
, 0);
17928 x
= gen_rtx_AND (vmode
, dest
, mask
);
17929 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
17931 if (op0
!= CONST0_RTX (vmode
))
17933 x
= gen_rtx_IOR (vmode
, dest
, op0
);
17934 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
17938 /* Deconstruct a copysign operation into bit masks. Operand 0 is variable,
17939 so we have to do two masks. */
17942 ix86_split_copysign_var (rtx operands
[])
17944 enum machine_mode mode
, vmode
;
17945 rtx dest
, scratch
, op0
, op1
, mask
, nmask
, x
;
17947 dest
= operands
[0];
17948 scratch
= operands
[1];
17951 nmask
= operands
[4];
17952 mask
= operands
[5];
17954 mode
= GET_MODE (dest
);
17955 vmode
= GET_MODE (mask
);
17957 if (rtx_equal_p (op0
, op1
))
17959 /* Shouldn't happen often (it's useless, obviously), but when it does
17960 we'd generate incorrect code if we continue below. */
17961 emit_move_insn (dest
, op0
);
17965 if (REG_P (mask
) && REGNO (dest
) == REGNO (mask
)) /* alternative 0 */
17967 gcc_assert (REGNO (op1
) == REGNO (scratch
));
17969 x
= gen_rtx_AND (vmode
, scratch
, mask
);
17970 emit_insn (gen_rtx_SET (VOIDmode
, scratch
, x
));
17973 op0
= simplify_gen_subreg (vmode
, op0
, mode
, 0);
17974 x
= gen_rtx_NOT (vmode
, dest
);
17975 x
= gen_rtx_AND (vmode
, x
, op0
);
17976 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
17980 if (REGNO (op1
) == REGNO (scratch
)) /* alternative 1,3 */
17982 x
= gen_rtx_AND (vmode
, scratch
, mask
);
17984 else /* alternative 2,4 */
17986 gcc_assert (REGNO (mask
) == REGNO (scratch
));
17987 op1
= simplify_gen_subreg (vmode
, op1
, mode
, 0);
17988 x
= gen_rtx_AND (vmode
, scratch
, op1
);
17990 emit_insn (gen_rtx_SET (VOIDmode
, scratch
, x
));
17992 if (REGNO (op0
) == REGNO (dest
)) /* alternative 1,2 */
17994 dest
= simplify_gen_subreg (vmode
, op0
, mode
, 0);
17995 x
= gen_rtx_AND (vmode
, dest
, nmask
);
17997 else /* alternative 3,4 */
17999 gcc_assert (REGNO (nmask
) == REGNO (dest
));
18001 op0
= simplify_gen_subreg (vmode
, op0
, mode
, 0);
18002 x
= gen_rtx_AND (vmode
, dest
, op0
);
18004 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
18007 x
= gen_rtx_IOR (vmode
, dest
, scratch
);
18008 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
18011 /* Return TRUE or FALSE depending on whether the first SET in INSN
18012 has source and destination with matching CC modes, and that the
18013 CC mode is at least as constrained as REQ_MODE. */
18016 ix86_match_ccmode (rtx insn
, enum machine_mode req_mode
)
18019 enum machine_mode set_mode
;
18021 set
= PATTERN (insn
);
18022 if (GET_CODE (set
) == PARALLEL
)
18023 set
= XVECEXP (set
, 0, 0);
18024 gcc_assert (GET_CODE (set
) == SET
);
18025 gcc_assert (GET_CODE (SET_SRC (set
)) == COMPARE
);
18027 set_mode
= GET_MODE (SET_DEST (set
));
18031 if (req_mode
!= CCNOmode
18032 && (req_mode
!= CCmode
18033 || XEXP (SET_SRC (set
), 1) != const0_rtx
))
18037 if (req_mode
== CCGCmode
)
18041 if (req_mode
== CCGOCmode
|| req_mode
== CCNOmode
)
18045 if (req_mode
== CCZmode
)
18055 if (set_mode
!= req_mode
)
18060 gcc_unreachable ();
18063 return GET_MODE (SET_SRC (set
)) == set_mode
;
18066 /* Generate insn patterns to do an integer compare of OPERANDS. */
18069 ix86_expand_int_compare (enum rtx_code code
, rtx op0
, rtx op1
)
18071 enum machine_mode cmpmode
;
18074 cmpmode
= SELECT_CC_MODE (code
, op0
, op1
);
18075 flags
= gen_rtx_REG (cmpmode
, FLAGS_REG
);
18077 /* This is very simple, but making the interface the same as in the
18078 FP case makes the rest of the code easier. */
18079 tmp
= gen_rtx_COMPARE (cmpmode
, op0
, op1
);
18080 emit_insn (gen_rtx_SET (VOIDmode
, flags
, tmp
));
18082 /* Return the test that should be put into the flags user, i.e.
18083 the bcc, scc, or cmov instruction. */
18084 return gen_rtx_fmt_ee (code
, VOIDmode
, flags
, const0_rtx
);
18087 /* Figure out whether to use ordered or unordered fp comparisons.
18088 Return the appropriate mode to use. */
18091 ix86_fp_compare_mode (enum rtx_code code ATTRIBUTE_UNUSED
)
18093 /* ??? In order to make all comparisons reversible, we do all comparisons
18094 non-trapping when compiling for IEEE. Once gcc is able to distinguish
18095 all forms trapping and nontrapping comparisons, we can make inequality
18096 comparisons trapping again, since it results in better code when using
18097 FCOM based compares. */
18098 return TARGET_IEEE_FP
? CCFPUmode
: CCFPmode
;
18102 ix86_cc_mode (enum rtx_code code
, rtx op0
, rtx op1
)
18104 enum machine_mode mode
= GET_MODE (op0
);
18106 if (SCALAR_FLOAT_MODE_P (mode
))
18108 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode
));
18109 return ix86_fp_compare_mode (code
);
18114 /* Only zero flag is needed. */
18115 case EQ
: /* ZF=0 */
18116 case NE
: /* ZF!=0 */
18118 /* Codes needing carry flag. */
18119 case GEU
: /* CF=0 */
18120 case LTU
: /* CF=1 */
18121 /* Detect overflow checks. They need just the carry flag. */
18122 if (GET_CODE (op0
) == PLUS
18123 && rtx_equal_p (op1
, XEXP (op0
, 0)))
18127 case GTU
: /* CF=0 & ZF=0 */
18128 case LEU
: /* CF=1 | ZF=1 */
18129 /* Detect overflow checks. They need just the carry flag. */
18130 if (GET_CODE (op0
) == MINUS
18131 && rtx_equal_p (op1
, XEXP (op0
, 0)))
18135 /* Codes possibly doable only with sign flag when
18136 comparing against zero. */
18137 case GE
: /* SF=OF or SF=0 */
18138 case LT
: /* SF<>OF or SF=1 */
18139 if (op1
== const0_rtx
)
18142 /* For other cases Carry flag is not required. */
18144 /* Codes doable only with sign flag when comparing
18145 against zero, but we miss jump instruction for it
18146 so we need to use relational tests against overflow
18147 that thus needs to be zero. */
18148 case GT
: /* ZF=0 & SF=OF */
18149 case LE
: /* ZF=1 | SF<>OF */
18150 if (op1
== const0_rtx
)
18154 /* strcmp pattern do (use flags) and combine may ask us for proper
18159 gcc_unreachable ();
18163 /* Return the fixed registers used for condition codes. */
18166 ix86_fixed_condition_code_regs (unsigned int *p1
, unsigned int *p2
)
18173 /* If two condition code modes are compatible, return a condition code
18174 mode which is compatible with both. Otherwise, return
18177 static enum machine_mode
18178 ix86_cc_modes_compatible (enum machine_mode m1
, enum machine_mode m2
)
18183 if (GET_MODE_CLASS (m1
) != MODE_CC
|| GET_MODE_CLASS (m2
) != MODE_CC
)
18186 if ((m1
== CCGCmode
&& m2
== CCGOCmode
)
18187 || (m1
== CCGOCmode
&& m2
== CCGCmode
))
18190 if (m1
== CCZmode
&& (m2
== CCGCmode
|| m2
== CCGOCmode
))
18192 else if (m2
== CCZmode
&& (m1
== CCGCmode
|| m1
== CCGOCmode
))
18198 gcc_unreachable ();
18228 /* These are only compatible with themselves, which we already
18235 /* Return a comparison we can do and that it is equivalent to
18236 swap_condition (code) apart possibly from orderedness.
18237 But, never change orderedness if TARGET_IEEE_FP, returning
18238 UNKNOWN in that case if necessary. */
18240 static enum rtx_code
18241 ix86_fp_swap_condition (enum rtx_code code
)
18245 case GT
: /* GTU - CF=0 & ZF=0 */
18246 return TARGET_IEEE_FP
? UNKNOWN
: UNLT
;
18247 case GE
: /* GEU - CF=0 */
18248 return TARGET_IEEE_FP
? UNKNOWN
: UNLE
;
18249 case UNLT
: /* LTU - CF=1 */
18250 return TARGET_IEEE_FP
? UNKNOWN
: GT
;
18251 case UNLE
: /* LEU - CF=1 | ZF=1 */
18252 return TARGET_IEEE_FP
? UNKNOWN
: GE
;
18254 return swap_condition (code
);
18258 /* Return cost of comparison CODE using the best strategy for performance.
18259 All following functions do use number of instructions as a cost metrics.
18260 In future this should be tweaked to compute bytes for optimize_size and
18261 take into account performance of various instructions on various CPUs. */
18264 ix86_fp_comparison_cost (enum rtx_code code
)
18268 /* The cost of code using bit-twiddling on %ah. */
18285 arith_cost
= TARGET_IEEE_FP
? 5 : 4;
18289 arith_cost
= TARGET_IEEE_FP
? 6 : 4;
18292 gcc_unreachable ();
18295 switch (ix86_fp_comparison_strategy (code
))
18297 case IX86_FPCMP_COMI
:
18298 return arith_cost
> 4 ? 3 : 2;
18299 case IX86_FPCMP_SAHF
:
18300 return arith_cost
> 4 ? 4 : 3;
18306 /* Return strategy to use for floating-point. We assume that fcomi is always
18307 preferrable where available, since that is also true when looking at size
18308 (2 bytes, vs. 3 for fnstsw+sahf and at least 5 for fnstsw+test). */
18310 enum ix86_fpcmp_strategy
18311 ix86_fp_comparison_strategy (enum rtx_code code ATTRIBUTE_UNUSED
)
18313 /* Do fcomi/sahf based test when profitable. */
18316 return IX86_FPCMP_COMI
;
18318 if (TARGET_SAHF
&& (TARGET_USE_SAHF
|| optimize_function_for_size_p (cfun
)))
18319 return IX86_FPCMP_SAHF
;
18321 return IX86_FPCMP_ARITH
;
18324 /* Swap, force into registers, or otherwise massage the two operands
18325 to a fp comparison. The operands are updated in place; the new
18326 comparison code is returned. */
18328 static enum rtx_code
18329 ix86_prepare_fp_compare_args (enum rtx_code code
, rtx
*pop0
, rtx
*pop1
)
18331 enum machine_mode fpcmp_mode
= ix86_fp_compare_mode (code
);
18332 rtx op0
= *pop0
, op1
= *pop1
;
18333 enum machine_mode op_mode
= GET_MODE (op0
);
18334 int is_sse
= TARGET_SSE_MATH
&& SSE_FLOAT_MODE_P (op_mode
);
18336 /* All of the unordered compare instructions only work on registers.
18337 The same is true of the fcomi compare instructions. The XFmode
18338 compare instructions require registers except when comparing
18339 against zero or when converting operand 1 from fixed point to
18343 && (fpcmp_mode
== CCFPUmode
18344 || (op_mode
== XFmode
18345 && ! (standard_80387_constant_p (op0
) == 1
18346 || standard_80387_constant_p (op1
) == 1)
18347 && GET_CODE (op1
) != FLOAT
)
18348 || ix86_fp_comparison_strategy (code
) == IX86_FPCMP_COMI
))
18350 op0
= force_reg (op_mode
, op0
);
18351 op1
= force_reg (op_mode
, op1
);
18355 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
18356 things around if they appear profitable, otherwise force op0
18357 into a register. */
18359 if (standard_80387_constant_p (op0
) == 0
18361 && ! (standard_80387_constant_p (op1
) == 0
18364 enum rtx_code new_code
= ix86_fp_swap_condition (code
);
18365 if (new_code
!= UNKNOWN
)
18368 tmp
= op0
, op0
= op1
, op1
= tmp
;
18374 op0
= force_reg (op_mode
, op0
);
18376 if (CONSTANT_P (op1
))
18378 int tmp
= standard_80387_constant_p (op1
);
18380 op1
= validize_mem (force_const_mem (op_mode
, op1
));
18384 op1
= force_reg (op_mode
, op1
);
18387 op1
= force_reg (op_mode
, op1
);
18391 /* Try to rearrange the comparison to make it cheaper. */
18392 if (ix86_fp_comparison_cost (code
)
18393 > ix86_fp_comparison_cost (swap_condition (code
))
18394 && (REG_P (op1
) || can_create_pseudo_p ()))
18397 tmp
= op0
, op0
= op1
, op1
= tmp
;
18398 code
= swap_condition (code
);
18400 op0
= force_reg (op_mode
, op0
);
18408 /* Convert comparison codes we use to represent FP comparison to integer
18409 code that will result in proper branch. Return UNKNOWN if no such code
18413 ix86_fp_compare_code_to_integer (enum rtx_code code
)
18442 /* Generate insn patterns to do a floating point compare of OPERANDS. */
18445 ix86_expand_fp_compare (enum rtx_code code
, rtx op0
, rtx op1
, rtx scratch
)
18447 enum machine_mode fpcmp_mode
, intcmp_mode
;
18450 fpcmp_mode
= ix86_fp_compare_mode (code
);
18451 code
= ix86_prepare_fp_compare_args (code
, &op0
, &op1
);
18453 /* Do fcomi/sahf based test when profitable. */
18454 switch (ix86_fp_comparison_strategy (code
))
18456 case IX86_FPCMP_COMI
:
18457 intcmp_mode
= fpcmp_mode
;
18458 tmp
= gen_rtx_COMPARE (fpcmp_mode
, op0
, op1
);
18459 tmp
= gen_rtx_SET (VOIDmode
, gen_rtx_REG (fpcmp_mode
, FLAGS_REG
),
18464 case IX86_FPCMP_SAHF
:
18465 intcmp_mode
= fpcmp_mode
;
18466 tmp
= gen_rtx_COMPARE (fpcmp_mode
, op0
, op1
);
18467 tmp
= gen_rtx_SET (VOIDmode
, gen_rtx_REG (fpcmp_mode
, FLAGS_REG
),
18471 scratch
= gen_reg_rtx (HImode
);
18472 tmp2
= gen_rtx_CLOBBER (VOIDmode
, scratch
);
18473 emit_insn (gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, tmp
, tmp2
)));
18476 case IX86_FPCMP_ARITH
:
18477 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
18478 tmp
= gen_rtx_COMPARE (fpcmp_mode
, op0
, op1
);
18479 tmp2
= gen_rtx_UNSPEC (HImode
, gen_rtvec (1, tmp
), UNSPEC_FNSTSW
);
18481 scratch
= gen_reg_rtx (HImode
);
18482 emit_insn (gen_rtx_SET (VOIDmode
, scratch
, tmp2
));
18484 /* In the unordered case, we have to check C2 for NaN's, which
18485 doesn't happen to work out to anything nice combination-wise.
18486 So do some bit twiddling on the value we've got in AH to come
18487 up with an appropriate set of condition codes. */
18489 intcmp_mode
= CCNOmode
;
18494 if (code
== GT
|| !TARGET_IEEE_FP
)
18496 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x45)));
18501 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
18502 emit_insn (gen_addqi_ext_1 (scratch
, scratch
, constm1_rtx
));
18503 emit_insn (gen_cmpqi_ext_3 (scratch
, GEN_INT (0x44)));
18504 intcmp_mode
= CCmode
;
18510 if (code
== LT
&& TARGET_IEEE_FP
)
18512 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
18513 emit_insn (gen_cmpqi_ext_3 (scratch
, const1_rtx
));
18514 intcmp_mode
= CCmode
;
18519 emit_insn (gen_testqi_ext_ccno_0 (scratch
, const1_rtx
));
18525 if (code
== GE
|| !TARGET_IEEE_FP
)
18527 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x05)));
18532 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
18533 emit_insn (gen_xorqi_cc_ext_1 (scratch
, scratch
, const1_rtx
));
18539 if (code
== LE
&& TARGET_IEEE_FP
)
18541 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
18542 emit_insn (gen_addqi_ext_1 (scratch
, scratch
, constm1_rtx
));
18543 emit_insn (gen_cmpqi_ext_3 (scratch
, GEN_INT (0x40)));
18544 intcmp_mode
= CCmode
;
18549 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x45)));
18555 if (code
== EQ
&& TARGET_IEEE_FP
)
18557 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
18558 emit_insn (gen_cmpqi_ext_3 (scratch
, GEN_INT (0x40)));
18559 intcmp_mode
= CCmode
;
18564 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x40)));
18570 if (code
== NE
&& TARGET_IEEE_FP
)
18572 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
18573 emit_insn (gen_xorqi_cc_ext_1 (scratch
, scratch
,
18579 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x40)));
18585 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x04)));
18589 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x04)));
18594 gcc_unreachable ();
18602 /* Return the test that should be put into the flags user, i.e.
18603 the bcc, scc, or cmov instruction. */
18604 return gen_rtx_fmt_ee (code
, VOIDmode
,
18605 gen_rtx_REG (intcmp_mode
, FLAGS_REG
),
18610 ix86_expand_compare (enum rtx_code code
, rtx op0
, rtx op1
)
18614 if (GET_MODE_CLASS (GET_MODE (op0
)) == MODE_CC
)
18615 ret
= gen_rtx_fmt_ee (code
, VOIDmode
, op0
, op1
);
18617 else if (SCALAR_FLOAT_MODE_P (GET_MODE (op0
)))
18619 gcc_assert (!DECIMAL_FLOAT_MODE_P (GET_MODE (op0
)));
18620 ret
= ix86_expand_fp_compare (code
, op0
, op1
, NULL_RTX
);
18623 ret
= ix86_expand_int_compare (code
, op0
, op1
);
18629 ix86_expand_branch (enum rtx_code code
, rtx op0
, rtx op1
, rtx label
)
18631 enum machine_mode mode
= GET_MODE (op0
);
18643 tmp
= ix86_expand_compare (code
, op0
, op1
);
18644 tmp
= gen_rtx_IF_THEN_ELSE (VOIDmode
, tmp
,
18645 gen_rtx_LABEL_REF (VOIDmode
, label
),
18647 emit_jump_insn (gen_rtx_SET (VOIDmode
, pc_rtx
, tmp
));
18654 /* Expand DImode branch into multiple compare+branch. */
18656 rtx lo
[2], hi
[2], label2
;
18657 enum rtx_code code1
, code2
, code3
;
18658 enum machine_mode submode
;
18660 if (CONSTANT_P (op0
) && !CONSTANT_P (op1
))
18662 tmp
= op0
, op0
= op1
, op1
= tmp
;
18663 code
= swap_condition (code
);
18666 split_double_mode (mode
, &op0
, 1, lo
+0, hi
+0);
18667 split_double_mode (mode
, &op1
, 1, lo
+1, hi
+1);
18669 submode
= mode
== DImode
? SImode
: DImode
;
18671 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
18672 avoid two branches. This costs one extra insn, so disable when
18673 optimizing for size. */
18675 if ((code
== EQ
|| code
== NE
)
18676 && (!optimize_insn_for_size_p ()
18677 || hi
[1] == const0_rtx
|| lo
[1] == const0_rtx
))
18682 if (hi
[1] != const0_rtx
)
18683 xor1
= expand_binop (submode
, xor_optab
, xor1
, hi
[1],
18684 NULL_RTX
, 0, OPTAB_WIDEN
);
18687 if (lo
[1] != const0_rtx
)
18688 xor0
= expand_binop (submode
, xor_optab
, xor0
, lo
[1],
18689 NULL_RTX
, 0, OPTAB_WIDEN
);
18691 tmp
= expand_binop (submode
, ior_optab
, xor1
, xor0
,
18692 NULL_RTX
, 0, OPTAB_WIDEN
);
18694 ix86_expand_branch (code
, tmp
, const0_rtx
, label
);
18698 /* Otherwise, if we are doing less-than or greater-or-equal-than,
18699 op1 is a constant and the low word is zero, then we can just
18700 examine the high word. Similarly for low word -1 and
18701 less-or-equal-than or greater-than. */
18703 if (CONST_INT_P (hi
[1]))
18706 case LT
: case LTU
: case GE
: case GEU
:
18707 if (lo
[1] == const0_rtx
)
18709 ix86_expand_branch (code
, hi
[0], hi
[1], label
);
18713 case LE
: case LEU
: case GT
: case GTU
:
18714 if (lo
[1] == constm1_rtx
)
18716 ix86_expand_branch (code
, hi
[0], hi
[1], label
);
18724 /* Otherwise, we need two or three jumps. */
18726 label2
= gen_label_rtx ();
18729 code2
= swap_condition (code
);
18730 code3
= unsigned_condition (code
);
18734 case LT
: case GT
: case LTU
: case GTU
:
18737 case LE
: code1
= LT
; code2
= GT
; break;
18738 case GE
: code1
= GT
; code2
= LT
; break;
18739 case LEU
: code1
= LTU
; code2
= GTU
; break;
18740 case GEU
: code1
= GTU
; code2
= LTU
; break;
18742 case EQ
: code1
= UNKNOWN
; code2
= NE
; break;
18743 case NE
: code2
= UNKNOWN
; break;
18746 gcc_unreachable ();
18751 * if (hi(a) < hi(b)) goto true;
18752 * if (hi(a) > hi(b)) goto false;
18753 * if (lo(a) < lo(b)) goto true;
18757 if (code1
!= UNKNOWN
)
18758 ix86_expand_branch (code1
, hi
[0], hi
[1], label
);
18759 if (code2
!= UNKNOWN
)
18760 ix86_expand_branch (code2
, hi
[0], hi
[1], label2
);
18762 ix86_expand_branch (code3
, lo
[0], lo
[1], label
);
18764 if (code2
!= UNKNOWN
)
18765 emit_label (label2
);
18770 gcc_assert (GET_MODE_CLASS (GET_MODE (op0
)) == MODE_CC
);
18775 /* Split branch based on floating point condition. */
18777 ix86_split_fp_branch (enum rtx_code code
, rtx op1
, rtx op2
,
18778 rtx target1
, rtx target2
, rtx tmp
, rtx pushed
)
18783 if (target2
!= pc_rtx
)
18786 code
= reverse_condition_maybe_unordered (code
);
18791 condition
= ix86_expand_fp_compare (code
, op1
, op2
,
18794 /* Remove pushed operand from stack. */
18796 ix86_free_from_memory (GET_MODE (pushed
));
18798 i
= emit_jump_insn (gen_rtx_SET
18800 gen_rtx_IF_THEN_ELSE (VOIDmode
,
18801 condition
, target1
, target2
)));
18802 if (split_branch_probability
>= 0)
18803 add_reg_note (i
, REG_BR_PROB
, GEN_INT (split_branch_probability
));
18807 ix86_expand_setcc (rtx dest
, enum rtx_code code
, rtx op0
, rtx op1
)
18811 gcc_assert (GET_MODE (dest
) == QImode
);
18813 ret
= ix86_expand_compare (code
, op0
, op1
);
18814 PUT_MODE (ret
, QImode
);
18815 emit_insn (gen_rtx_SET (VOIDmode
, dest
, ret
));
18818 /* Expand comparison setting or clearing carry flag. Return true when
18819 successful and set pop for the operation. */
18821 ix86_expand_carry_flag_compare (enum rtx_code code
, rtx op0
, rtx op1
, rtx
*pop
)
18823 enum machine_mode mode
=
18824 GET_MODE (op0
) != VOIDmode
? GET_MODE (op0
) : GET_MODE (op1
);
18826 /* Do not handle double-mode compares that go through special path. */
18827 if (mode
== (TARGET_64BIT
? TImode
: DImode
))
18830 if (SCALAR_FLOAT_MODE_P (mode
))
18832 rtx compare_op
, compare_seq
;
18834 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode
));
18836 /* Shortcut: following common codes never translate
18837 into carry flag compares. */
18838 if (code
== EQ
|| code
== NE
|| code
== UNEQ
|| code
== LTGT
18839 || code
== ORDERED
|| code
== UNORDERED
)
18842 /* These comparisons require zero flag; swap operands so they won't. */
18843 if ((code
== GT
|| code
== UNLE
|| code
== LE
|| code
== UNGT
)
18844 && !TARGET_IEEE_FP
)
18849 code
= swap_condition (code
);
18852 /* Try to expand the comparison and verify that we end up with
18853 carry flag based comparison. This fails to be true only when
18854 we decide to expand comparison using arithmetic that is not
18855 too common scenario. */
18857 compare_op
= ix86_expand_fp_compare (code
, op0
, op1
, NULL_RTX
);
18858 compare_seq
= get_insns ();
18861 if (GET_MODE (XEXP (compare_op
, 0)) == CCFPmode
18862 || GET_MODE (XEXP (compare_op
, 0)) == CCFPUmode
)
18863 code
= ix86_fp_compare_code_to_integer (GET_CODE (compare_op
));
18865 code
= GET_CODE (compare_op
);
18867 if (code
!= LTU
&& code
!= GEU
)
18870 emit_insn (compare_seq
);
18875 if (!INTEGRAL_MODE_P (mode
))
18884 /* Convert a==0 into (unsigned)a<1. */
18887 if (op1
!= const0_rtx
)
18890 code
= (code
== EQ
? LTU
: GEU
);
18893 /* Convert a>b into b<a or a>=b-1. */
18896 if (CONST_INT_P (op1
))
18898 op1
= gen_int_mode (INTVAL (op1
) + 1, GET_MODE (op0
));
18899 /* Bail out on overflow. We still can swap operands but that
18900 would force loading of the constant into register. */
18901 if (op1
== const0_rtx
18902 || !x86_64_immediate_operand (op1
, GET_MODE (op1
)))
18904 code
= (code
== GTU
? GEU
: LTU
);
18911 code
= (code
== GTU
? LTU
: GEU
);
18915 /* Convert a>=0 into (unsigned)a<0x80000000. */
18918 if (mode
== DImode
|| op1
!= const0_rtx
)
18920 op1
= gen_int_mode (1 << (GET_MODE_BITSIZE (mode
) - 1), mode
);
18921 code
= (code
== LT
? GEU
: LTU
);
18925 if (mode
== DImode
|| op1
!= constm1_rtx
)
18927 op1
= gen_int_mode (1 << (GET_MODE_BITSIZE (mode
) - 1), mode
);
18928 code
= (code
== LE
? GEU
: LTU
);
18934 /* Swapping operands may cause constant to appear as first operand. */
18935 if (!nonimmediate_operand (op0
, VOIDmode
))
18937 if (!can_create_pseudo_p ())
18939 op0
= force_reg (mode
, op0
);
18941 *pop
= ix86_expand_compare (code
, op0
, op1
);
18942 gcc_assert (GET_CODE (*pop
) == LTU
|| GET_CODE (*pop
) == GEU
);
18947 ix86_expand_int_movcc (rtx operands
[])
18949 enum rtx_code code
= GET_CODE (operands
[1]), compare_code
;
18950 rtx compare_seq
, compare_op
;
18951 enum machine_mode mode
= GET_MODE (operands
[0]);
18952 bool sign_bit_compare_p
= false;
18953 rtx op0
= XEXP (operands
[1], 0);
18954 rtx op1
= XEXP (operands
[1], 1);
18956 if (GET_MODE (op0
) == TImode
18957 || (GET_MODE (op0
) == DImode
18962 compare_op
= ix86_expand_compare (code
, op0
, op1
);
18963 compare_seq
= get_insns ();
18966 compare_code
= GET_CODE (compare_op
);
18968 if ((op1
== const0_rtx
&& (code
== GE
|| code
== LT
))
18969 || (op1
== constm1_rtx
&& (code
== GT
|| code
== LE
)))
18970 sign_bit_compare_p
= true;
18972 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
18973 HImode insns, we'd be swallowed in word prefix ops. */
18975 if ((mode
!= HImode
|| TARGET_FAST_PREFIX
)
18976 && (mode
!= (TARGET_64BIT
? TImode
: DImode
))
18977 && CONST_INT_P (operands
[2])
18978 && CONST_INT_P (operands
[3]))
18980 rtx out
= operands
[0];
18981 HOST_WIDE_INT ct
= INTVAL (operands
[2]);
18982 HOST_WIDE_INT cf
= INTVAL (operands
[3]);
18983 HOST_WIDE_INT diff
;
18986 /* Sign bit compares are better done using shifts than we do by using
18988 if (sign_bit_compare_p
18989 || ix86_expand_carry_flag_compare (code
, op0
, op1
, &compare_op
))
18991 /* Detect overlap between destination and compare sources. */
18994 if (!sign_bit_compare_p
)
18997 bool fpcmp
= false;
18999 compare_code
= GET_CODE (compare_op
);
19001 flags
= XEXP (compare_op
, 0);
19003 if (GET_MODE (flags
) == CCFPmode
19004 || GET_MODE (flags
) == CCFPUmode
)
19008 = ix86_fp_compare_code_to_integer (compare_code
);
19011 /* To simplify rest of code, restrict to the GEU case. */
19012 if (compare_code
== LTU
)
19014 HOST_WIDE_INT tmp
= ct
;
19017 compare_code
= reverse_condition (compare_code
);
19018 code
= reverse_condition (code
);
19023 PUT_CODE (compare_op
,
19024 reverse_condition_maybe_unordered
19025 (GET_CODE (compare_op
)));
19027 PUT_CODE (compare_op
,
19028 reverse_condition (GET_CODE (compare_op
)));
19032 if (reg_overlap_mentioned_p (out
, op0
)
19033 || reg_overlap_mentioned_p (out
, op1
))
19034 tmp
= gen_reg_rtx (mode
);
19036 if (mode
== DImode
)
19037 emit_insn (gen_x86_movdicc_0_m1 (tmp
, flags
, compare_op
));
19039 emit_insn (gen_x86_movsicc_0_m1 (gen_lowpart (SImode
, tmp
),
19040 flags
, compare_op
));
19044 if (code
== GT
|| code
== GE
)
19045 code
= reverse_condition (code
);
19048 HOST_WIDE_INT tmp
= ct
;
19053 tmp
= emit_store_flag (tmp
, code
, op0
, op1
, VOIDmode
, 0, -1);
19066 tmp
= expand_simple_binop (mode
, PLUS
,
19068 copy_rtx (tmp
), 1, OPTAB_DIRECT
);
19079 tmp
= expand_simple_binop (mode
, IOR
,
19081 copy_rtx (tmp
), 1, OPTAB_DIRECT
);
19083 else if (diff
== -1 && ct
)
19093 tmp
= expand_simple_unop (mode
, NOT
, tmp
, copy_rtx (tmp
), 1);
19095 tmp
= expand_simple_binop (mode
, PLUS
,
19096 copy_rtx (tmp
), GEN_INT (cf
),
19097 copy_rtx (tmp
), 1, OPTAB_DIRECT
);
19105 * andl cf - ct, dest
19115 tmp
= expand_simple_unop (mode
, NOT
, tmp
, copy_rtx (tmp
), 1);
19118 tmp
= expand_simple_binop (mode
, AND
,
19120 gen_int_mode (cf
- ct
, mode
),
19121 copy_rtx (tmp
), 1, OPTAB_DIRECT
);
19123 tmp
= expand_simple_binop (mode
, PLUS
,
19124 copy_rtx (tmp
), GEN_INT (ct
),
19125 copy_rtx (tmp
), 1, OPTAB_DIRECT
);
19128 if (!rtx_equal_p (tmp
, out
))
19129 emit_move_insn (copy_rtx (out
), copy_rtx (tmp
));
19136 enum machine_mode cmp_mode
= GET_MODE (op0
);
19139 tmp
= ct
, ct
= cf
, cf
= tmp
;
19142 if (SCALAR_FLOAT_MODE_P (cmp_mode
))
19144 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode
));
19146 /* We may be reversing unordered compare to normal compare, that
19147 is not valid in general (we may convert non-trapping condition
19148 to trapping one), however on i386 we currently emit all
19149 comparisons unordered. */
19150 compare_code
= reverse_condition_maybe_unordered (compare_code
);
19151 code
= reverse_condition_maybe_unordered (code
);
19155 compare_code
= reverse_condition (compare_code
);
19156 code
= reverse_condition (code
);
19160 compare_code
= UNKNOWN
;
19161 if (GET_MODE_CLASS (GET_MODE (op0
)) == MODE_INT
19162 && CONST_INT_P (op1
))
19164 if (op1
== const0_rtx
19165 && (code
== LT
|| code
== GE
))
19166 compare_code
= code
;
19167 else if (op1
== constm1_rtx
)
19171 else if (code
== GT
)
19176 /* Optimize dest = (op0 < 0) ? -1 : cf. */
19177 if (compare_code
!= UNKNOWN
19178 && GET_MODE (op0
) == GET_MODE (out
)
19179 && (cf
== -1 || ct
== -1))
19181 /* If lea code below could be used, only optimize
19182 if it results in a 2 insn sequence. */
19184 if (! (diff
== 1 || diff
== 2 || diff
== 4 || diff
== 8
19185 || diff
== 3 || diff
== 5 || diff
== 9)
19186 || (compare_code
== LT
&& ct
== -1)
19187 || (compare_code
== GE
&& cf
== -1))
19190 * notl op1 (if necessary)
19198 code
= reverse_condition (code
);
19201 out
= emit_store_flag (out
, code
, op0
, op1
, VOIDmode
, 0, -1);
19203 out
= expand_simple_binop (mode
, IOR
,
19205 out
, 1, OPTAB_DIRECT
);
19206 if (out
!= operands
[0])
19207 emit_move_insn (operands
[0], out
);
19214 if ((diff
== 1 || diff
== 2 || diff
== 4 || diff
== 8
19215 || diff
== 3 || diff
== 5 || diff
== 9)
19216 && ((mode
!= QImode
&& mode
!= HImode
) || !TARGET_PARTIAL_REG_STALL
)
19218 || x86_64_immediate_operand (GEN_INT (cf
), VOIDmode
)))
19224 * lea cf(dest*(ct-cf)),dest
19228 * This also catches the degenerate setcc-only case.
19234 out
= emit_store_flag (out
, code
, op0
, op1
, VOIDmode
, 0, 1);
19237 /* On x86_64 the lea instruction operates on Pmode, so we need
19238 to get arithmetics done in proper mode to match. */
19240 tmp
= copy_rtx (out
);
19244 out1
= copy_rtx (out
);
19245 tmp
= gen_rtx_MULT (mode
, out1
, GEN_INT (diff
& ~1));
19249 tmp
= gen_rtx_PLUS (mode
, tmp
, out1
);
19255 tmp
= gen_rtx_PLUS (mode
, tmp
, GEN_INT (cf
));
19258 if (!rtx_equal_p (tmp
, out
))
19261 out
= force_operand (tmp
, copy_rtx (out
));
19263 emit_insn (gen_rtx_SET (VOIDmode
, copy_rtx (out
), copy_rtx (tmp
)));
19265 if (!rtx_equal_p (out
, operands
[0]))
19266 emit_move_insn (operands
[0], copy_rtx (out
));
19272 * General case: Jumpful:
19273 * xorl dest,dest cmpl op1, op2
19274 * cmpl op1, op2 movl ct, dest
19275 * setcc dest jcc 1f
19276 * decl dest movl cf, dest
19277 * andl (cf-ct),dest 1:
19280 * Size 20. Size 14.
19282 * This is reasonably steep, but branch mispredict costs are
19283 * high on modern cpus, so consider failing only if optimizing
19287 if ((!TARGET_CMOVE
|| (mode
== QImode
&& TARGET_PARTIAL_REG_STALL
))
19288 && BRANCH_COST (optimize_insn_for_speed_p (),
19293 enum machine_mode cmp_mode
= GET_MODE (op0
);
19298 if (SCALAR_FLOAT_MODE_P (cmp_mode
))
19300 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode
));
19302 /* We may be reversing unordered compare to normal compare,
19303 that is not valid in general (we may convert non-trapping
19304 condition to trapping one), however on i386 we currently
19305 emit all comparisons unordered. */
19306 code
= reverse_condition_maybe_unordered (code
);
19310 code
= reverse_condition (code
);
19311 if (compare_code
!= UNKNOWN
)
19312 compare_code
= reverse_condition (compare_code
);
19316 if (compare_code
!= UNKNOWN
)
19318 /* notl op1 (if needed)
19323 For x < 0 (resp. x <= -1) there will be no notl,
19324 so if possible swap the constants to get rid of the
19326 True/false will be -1/0 while code below (store flag
19327 followed by decrement) is 0/-1, so the constants need
19328 to be exchanged once more. */
19330 if (compare_code
== GE
|| !cf
)
19332 code
= reverse_condition (code
);
19337 HOST_WIDE_INT tmp
= cf
;
19342 out
= emit_store_flag (out
, code
, op0
, op1
, VOIDmode
, 0, -1);
19346 out
= emit_store_flag (out
, code
, op0
, op1
, VOIDmode
, 0, 1);
19348 out
= expand_simple_binop (mode
, PLUS
, copy_rtx (out
),
19350 copy_rtx (out
), 1, OPTAB_DIRECT
);
19353 out
= expand_simple_binop (mode
, AND
, copy_rtx (out
),
19354 gen_int_mode (cf
- ct
, mode
),
19355 copy_rtx (out
), 1, OPTAB_DIRECT
);
19357 out
= expand_simple_binop (mode
, PLUS
, copy_rtx (out
), GEN_INT (ct
),
19358 copy_rtx (out
), 1, OPTAB_DIRECT
);
19359 if (!rtx_equal_p (out
, operands
[0]))
19360 emit_move_insn (operands
[0], copy_rtx (out
));
19366 if (!TARGET_CMOVE
|| (mode
== QImode
&& TARGET_PARTIAL_REG_STALL
))
19368 /* Try a few things more with specific constants and a variable. */
19371 rtx var
, orig_out
, out
, tmp
;
19373 if (BRANCH_COST (optimize_insn_for_speed_p (), false) <= 2)
19376 /* If one of the two operands is an interesting constant, load a
19377 constant with the above and mask it in with a logical operation. */
19379 if (CONST_INT_P (operands
[2]))
19382 if (INTVAL (operands
[2]) == 0 && operands
[3] != constm1_rtx
)
19383 operands
[3] = constm1_rtx
, op
= and_optab
;
19384 else if (INTVAL (operands
[2]) == -1 && operands
[3] != const0_rtx
)
19385 operands
[3] = const0_rtx
, op
= ior_optab
;
19389 else if (CONST_INT_P (operands
[3]))
19392 if (INTVAL (operands
[3]) == 0 && operands
[2] != constm1_rtx
)
19393 operands
[2] = constm1_rtx
, op
= and_optab
;
19394 else if (INTVAL (operands
[3]) == -1 && operands
[3] != const0_rtx
)
19395 operands
[2] = const0_rtx
, op
= ior_optab
;
19402 orig_out
= operands
[0];
19403 tmp
= gen_reg_rtx (mode
);
19406 /* Recurse to get the constant loaded. */
19407 if (ix86_expand_int_movcc (operands
) == 0)
19410 /* Mask in the interesting variable. */
19411 out
= expand_binop (mode
, op
, var
, tmp
, orig_out
, 0,
19413 if (!rtx_equal_p (out
, orig_out
))
19414 emit_move_insn (copy_rtx (orig_out
), copy_rtx (out
));
19420 * For comparison with above,
19430 if (! nonimmediate_operand (operands
[2], mode
))
19431 operands
[2] = force_reg (mode
, operands
[2]);
19432 if (! nonimmediate_operand (operands
[3], mode
))
19433 operands
[3] = force_reg (mode
, operands
[3]);
19435 if (! register_operand (operands
[2], VOIDmode
)
19437 || ! register_operand (operands
[3], VOIDmode
)))
19438 operands
[2] = force_reg (mode
, operands
[2]);
19441 && ! register_operand (operands
[3], VOIDmode
))
19442 operands
[3] = force_reg (mode
, operands
[3]);
19444 emit_insn (compare_seq
);
19445 emit_insn (gen_rtx_SET (VOIDmode
, operands
[0],
19446 gen_rtx_IF_THEN_ELSE (mode
,
19447 compare_op
, operands
[2],
19452 /* Swap, force into registers, or otherwise massage the two operands
19453 to an sse comparison with a mask result. Thus we differ a bit from
19454 ix86_prepare_fp_compare_args which expects to produce a flags result.
19456 The DEST operand exists to help determine whether to commute commutative
19457 operators. The POP0/POP1 operands are updated in place. The new
19458 comparison code is returned, or UNKNOWN if not implementable. */
19460 static enum rtx_code
19461 ix86_prepare_sse_fp_compare_args (rtx dest
, enum rtx_code code
,
19462 rtx
*pop0
, rtx
*pop1
)
19470 /* AVX supports all the needed comparisons. */
19473 /* We have no LTGT as an operator. We could implement it with
19474 NE & ORDERED, but this requires an extra temporary. It's
19475 not clear that it's worth it. */
19482 /* These are supported directly. */
19489 /* AVX has 3 operand comparisons, no need to swap anything. */
19492 /* For commutative operators, try to canonicalize the destination
19493 operand to be first in the comparison - this helps reload to
19494 avoid extra moves. */
19495 if (!dest
|| !rtx_equal_p (dest
, *pop1
))
19503 /* These are not supported directly before AVX, and furthermore
19504 ix86_expand_sse_fp_minmax only optimizes LT/UNGE. Swap the
19505 comparison operands to transform into something that is
19510 code
= swap_condition (code
);
19514 gcc_unreachable ();
19520 /* Detect conditional moves that exactly match min/max operational
19521 semantics. Note that this is IEEE safe, as long as we don't
19522 interchange the operands.
19524 Returns FALSE if this conditional move doesn't match a MIN/MAX,
19525 and TRUE if the operation is successful and instructions are emitted. */
19528 ix86_expand_sse_fp_minmax (rtx dest
, enum rtx_code code
, rtx cmp_op0
,
19529 rtx cmp_op1
, rtx if_true
, rtx if_false
)
19531 enum machine_mode mode
;
19537 else if (code
== UNGE
)
19540 if_true
= if_false
;
19546 if (rtx_equal_p (cmp_op0
, if_true
) && rtx_equal_p (cmp_op1
, if_false
))
19548 else if (rtx_equal_p (cmp_op1
, if_true
) && rtx_equal_p (cmp_op0
, if_false
))
19553 mode
= GET_MODE (dest
);
19555 /* We want to check HONOR_NANS and HONOR_SIGNED_ZEROS here,
19556 but MODE may be a vector mode and thus not appropriate. */
19557 if (!flag_finite_math_only
|| !flag_unsafe_math_optimizations
)
19559 int u
= is_min
? UNSPEC_IEEE_MIN
: UNSPEC_IEEE_MAX
;
19562 if_true
= force_reg (mode
, if_true
);
19563 v
= gen_rtvec (2, if_true
, if_false
);
19564 tmp
= gen_rtx_UNSPEC (mode
, v
, u
);
19568 code
= is_min
? SMIN
: SMAX
;
19569 tmp
= gen_rtx_fmt_ee (code
, mode
, if_true
, if_false
);
19572 emit_insn (gen_rtx_SET (VOIDmode
, dest
, tmp
));
19576 /* Expand an sse vector comparison. Return the register with the result. */
19579 ix86_expand_sse_cmp (rtx dest
, enum rtx_code code
, rtx cmp_op0
, rtx cmp_op1
,
19580 rtx op_true
, rtx op_false
)
19582 enum machine_mode mode
= GET_MODE (dest
);
19583 enum machine_mode cmp_mode
= GET_MODE (cmp_op0
);
19586 cmp_op0
= force_reg (cmp_mode
, cmp_op0
);
19587 if (!nonimmediate_operand (cmp_op1
, cmp_mode
))
19588 cmp_op1
= force_reg (cmp_mode
, cmp_op1
);
19591 || reg_overlap_mentioned_p (dest
, op_true
)
19592 || reg_overlap_mentioned_p (dest
, op_false
))
19593 dest
= gen_reg_rtx (mode
);
19595 x
= gen_rtx_fmt_ee (code
, cmp_mode
, cmp_op0
, cmp_op1
);
19596 if (cmp_mode
!= mode
)
19598 x
= force_reg (cmp_mode
, x
);
19599 convert_move (dest
, x
, false);
19602 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
19607 /* Expand DEST = CMP ? OP_TRUE : OP_FALSE into a sequence of logical
19608 operations. This is used for both scalar and vector conditional moves. */
19611 ix86_expand_sse_movcc (rtx dest
, rtx cmp
, rtx op_true
, rtx op_false
)
19613 enum machine_mode mode
= GET_MODE (dest
);
19616 if (vector_all_ones_operand (op_true
, mode
)
19617 && rtx_equal_p (op_false
, CONST0_RTX (mode
)))
19619 emit_insn (gen_rtx_SET (VOIDmode
, dest
, cmp
));
19621 else if (op_false
== CONST0_RTX (mode
))
19623 op_true
= force_reg (mode
, op_true
);
19624 x
= gen_rtx_AND (mode
, cmp
, op_true
);
19625 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
19627 else if (op_true
== CONST0_RTX (mode
))
19629 op_false
= force_reg (mode
, op_false
);
19630 x
= gen_rtx_NOT (mode
, cmp
);
19631 x
= gen_rtx_AND (mode
, x
, op_false
);
19632 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
19634 else if (INTEGRAL_MODE_P (mode
) && op_true
== CONSTM1_RTX (mode
))
19636 op_false
= force_reg (mode
, op_false
);
19637 x
= gen_rtx_IOR (mode
, cmp
, op_false
);
19638 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
19640 else if (TARGET_XOP
)
19642 op_true
= force_reg (mode
, op_true
);
19644 if (!nonimmediate_operand (op_false
, mode
))
19645 op_false
= force_reg (mode
, op_false
);
19647 emit_insn (gen_rtx_SET (mode
, dest
,
19648 gen_rtx_IF_THEN_ELSE (mode
, cmp
,
19654 rtx (*gen
) (rtx
, rtx
, rtx
, rtx
) = NULL
;
19656 if (!nonimmediate_operand (op_true
, mode
))
19657 op_true
= force_reg (mode
, op_true
);
19659 op_false
= force_reg (mode
, op_false
);
19665 gen
= gen_sse4_1_blendvps
;
19669 gen
= gen_sse4_1_blendvpd
;
19677 gen
= gen_sse4_1_pblendvb
;
19678 dest
= gen_lowpart (V16QImode
, dest
);
19679 op_false
= gen_lowpart (V16QImode
, op_false
);
19680 op_true
= gen_lowpart (V16QImode
, op_true
);
19681 cmp
= gen_lowpart (V16QImode
, cmp
);
19686 gen
= gen_avx_blendvps256
;
19690 gen
= gen_avx_blendvpd256
;
19698 gen
= gen_avx2_pblendvb
;
19699 dest
= gen_lowpart (V32QImode
, dest
);
19700 op_false
= gen_lowpart (V32QImode
, op_false
);
19701 op_true
= gen_lowpart (V32QImode
, op_true
);
19702 cmp
= gen_lowpart (V32QImode
, cmp
);
19710 emit_insn (gen (dest
, op_false
, op_true
, cmp
));
19713 op_true
= force_reg (mode
, op_true
);
19715 t2
= gen_reg_rtx (mode
);
19717 t3
= gen_reg_rtx (mode
);
19721 x
= gen_rtx_AND (mode
, op_true
, cmp
);
19722 emit_insn (gen_rtx_SET (VOIDmode
, t2
, x
));
19724 x
= gen_rtx_NOT (mode
, cmp
);
19725 x
= gen_rtx_AND (mode
, x
, op_false
);
19726 emit_insn (gen_rtx_SET (VOIDmode
, t3
, x
));
19728 x
= gen_rtx_IOR (mode
, t3
, t2
);
19729 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
19734 /* Expand a floating-point conditional move. Return true if successful. */
19737 ix86_expand_fp_movcc (rtx operands
[])
19739 enum machine_mode mode
= GET_MODE (operands
[0]);
19740 enum rtx_code code
= GET_CODE (operands
[1]);
19741 rtx tmp
, compare_op
;
19742 rtx op0
= XEXP (operands
[1], 0);
19743 rtx op1
= XEXP (operands
[1], 1);
19745 if (TARGET_SSE_MATH
&& SSE_FLOAT_MODE_P (mode
))
19747 enum machine_mode cmode
;
19749 /* Since we've no cmove for sse registers, don't force bad register
19750 allocation just to gain access to it. Deny movcc when the
19751 comparison mode doesn't match the move mode. */
19752 cmode
= GET_MODE (op0
);
19753 if (cmode
== VOIDmode
)
19754 cmode
= GET_MODE (op1
);
19758 code
= ix86_prepare_sse_fp_compare_args (operands
[0], code
, &op0
, &op1
);
19759 if (code
== UNKNOWN
)
19762 if (ix86_expand_sse_fp_minmax (operands
[0], code
, op0
, op1
,
19763 operands
[2], operands
[3]))
19766 tmp
= ix86_expand_sse_cmp (operands
[0], code
, op0
, op1
,
19767 operands
[2], operands
[3]);
19768 ix86_expand_sse_movcc (operands
[0], tmp
, operands
[2], operands
[3]);
19772 /* The floating point conditional move instructions don't directly
19773 support conditions resulting from a signed integer comparison. */
19775 compare_op
= ix86_expand_compare (code
, op0
, op1
);
19776 if (!fcmov_comparison_operator (compare_op
, VOIDmode
))
19778 tmp
= gen_reg_rtx (QImode
);
19779 ix86_expand_setcc (tmp
, code
, op0
, op1
);
19781 compare_op
= ix86_expand_compare (NE
, tmp
, const0_rtx
);
19784 emit_insn (gen_rtx_SET (VOIDmode
, operands
[0],
19785 gen_rtx_IF_THEN_ELSE (mode
, compare_op
,
19786 operands
[2], operands
[3])));
19791 /* Expand a floating-point vector conditional move; a vcond operation
19792 rather than a movcc operation. */
19795 ix86_expand_fp_vcond (rtx operands
[])
19797 enum rtx_code code
= GET_CODE (operands
[3]);
19800 code
= ix86_prepare_sse_fp_compare_args (operands
[0], code
,
19801 &operands
[4], &operands
[5]);
19802 if (code
== UNKNOWN
)
19805 switch (GET_CODE (operands
[3]))
19808 temp
= ix86_expand_sse_cmp (operands
[0], ORDERED
, operands
[4],
19809 operands
[5], operands
[0], operands
[0]);
19810 cmp
= ix86_expand_sse_cmp (operands
[0], NE
, operands
[4],
19811 operands
[5], operands
[1], operands
[2]);
19815 temp
= ix86_expand_sse_cmp (operands
[0], UNORDERED
, operands
[4],
19816 operands
[5], operands
[0], operands
[0]);
19817 cmp
= ix86_expand_sse_cmp (operands
[0], EQ
, operands
[4],
19818 operands
[5], operands
[1], operands
[2]);
19822 gcc_unreachable ();
19824 cmp
= expand_simple_binop (GET_MODE (cmp
), code
, temp
, cmp
, cmp
, 1,
19826 ix86_expand_sse_movcc (operands
[0], cmp
, operands
[1], operands
[2]);
19830 if (ix86_expand_sse_fp_minmax (operands
[0], code
, operands
[4],
19831 operands
[5], operands
[1], operands
[2]))
19834 cmp
= ix86_expand_sse_cmp (operands
[0], code
, operands
[4], operands
[5],
19835 operands
[1], operands
[2]);
19836 ix86_expand_sse_movcc (operands
[0], cmp
, operands
[1], operands
[2]);
19840 /* Expand a signed/unsigned integral vector conditional move. */
19843 ix86_expand_int_vcond (rtx operands
[])
19845 enum machine_mode data_mode
= GET_MODE (operands
[0]);
19846 enum machine_mode mode
= GET_MODE (operands
[4]);
19847 enum rtx_code code
= GET_CODE (operands
[3]);
19848 bool negate
= false;
19851 cop0
= operands
[4];
19852 cop1
= operands
[5];
19854 /* Try to optimize x < 0 ? -1 : 0 into (signed) x >> 31
19855 and x < 0 ? 1 : 0 into (unsigned) x >> 31. */
19856 if ((code
== LT
|| code
== GE
)
19857 && data_mode
== mode
19858 && cop1
== CONST0_RTX (mode
)
19859 && operands
[1 + (code
== LT
)] == CONST0_RTX (data_mode
)
19860 && GET_MODE_SIZE (GET_MODE_INNER (data_mode
)) > 1
19861 && GET_MODE_SIZE (GET_MODE_INNER (data_mode
)) <= 8
19862 && (GET_MODE_SIZE (data_mode
) == 16
19863 || (TARGET_AVX2
&& GET_MODE_SIZE (data_mode
) == 32)))
19865 rtx negop
= operands
[2 - (code
== LT
)];
19866 int shift
= GET_MODE_BITSIZE (GET_MODE_INNER (data_mode
)) - 1;
19867 if (negop
== CONST1_RTX (data_mode
))
19869 rtx res
= expand_simple_binop (mode
, LSHIFTRT
, cop0
, GEN_INT (shift
),
19870 operands
[0], 1, OPTAB_DIRECT
);
19871 if (res
!= operands
[0])
19872 emit_move_insn (operands
[0], res
);
19875 else if (GET_MODE_INNER (data_mode
) != DImode
19876 && vector_all_ones_operand (negop
, data_mode
))
19878 rtx res
= expand_simple_binop (mode
, ASHIFTRT
, cop0
, GEN_INT (shift
),
19879 operands
[0], 0, OPTAB_DIRECT
);
19880 if (res
!= operands
[0])
19881 emit_move_insn (operands
[0], res
);
19886 if (!nonimmediate_operand (cop1
, mode
))
19887 cop1
= force_reg (mode
, cop1
);
19888 if (!general_operand (operands
[1], data_mode
))
19889 operands
[1] = force_reg (data_mode
, operands
[1]);
19890 if (!general_operand (operands
[2], data_mode
))
19891 operands
[2] = force_reg (data_mode
, operands
[2]);
19893 /* XOP supports all of the comparisons on all 128-bit vector int types. */
19895 && (mode
== V16QImode
|| mode
== V8HImode
19896 || mode
== V4SImode
|| mode
== V2DImode
))
19900 /* Canonicalize the comparison to EQ, GT, GTU. */
19911 code
= reverse_condition (code
);
19917 code
= reverse_condition (code
);
19923 code
= swap_condition (code
);
19924 x
= cop0
, cop0
= cop1
, cop1
= x
;
19928 gcc_unreachable ();
19931 /* Only SSE4.1/SSE4.2 supports V2DImode. */
19932 if (mode
== V2DImode
)
19937 /* SSE4.1 supports EQ. */
19938 if (!TARGET_SSE4_1
)
19944 /* SSE4.2 supports GT/GTU. */
19945 if (!TARGET_SSE4_2
)
19950 gcc_unreachable ();
19954 /* Unsigned parallel compare is not supported by the hardware.
19955 Play some tricks to turn this into a signed comparison
19959 cop0
= force_reg (mode
, cop0
);
19969 rtx (*gen_sub3
) (rtx
, rtx
, rtx
);
19973 case V8SImode
: gen_sub3
= gen_subv8si3
; break;
19974 case V4DImode
: gen_sub3
= gen_subv4di3
; break;
19975 case V4SImode
: gen_sub3
= gen_subv4si3
; break;
19976 case V2DImode
: gen_sub3
= gen_subv2di3
; break;
19978 gcc_unreachable ();
19980 /* Subtract (-(INT MAX) - 1) from both operands to make
19982 mask
= ix86_build_signbit_mask (mode
, true, false);
19983 t1
= gen_reg_rtx (mode
);
19984 emit_insn (gen_sub3 (t1
, cop0
, mask
));
19986 t2
= gen_reg_rtx (mode
);
19987 emit_insn (gen_sub3 (t2
, cop1
, mask
));
19999 /* Perform a parallel unsigned saturating subtraction. */
20000 x
= gen_reg_rtx (mode
);
20001 emit_insn (gen_rtx_SET (VOIDmode
, x
,
20002 gen_rtx_US_MINUS (mode
, cop0
, cop1
)));
20005 cop1
= CONST0_RTX (mode
);
20011 gcc_unreachable ();
20016 /* Allow the comparison to be done in one mode, but the movcc to
20017 happen in another mode. */
20018 if (data_mode
== mode
)
20020 x
= ix86_expand_sse_cmp (operands
[0], code
, cop0
, cop1
,
20021 operands
[1+negate
], operands
[2-negate
]);
20025 gcc_assert (GET_MODE_SIZE (data_mode
) == GET_MODE_SIZE (mode
));
20026 x
= ix86_expand_sse_cmp (gen_lowpart (mode
, operands
[0]),
20028 operands
[1+negate
], operands
[2-negate
]);
20029 x
= gen_lowpart (data_mode
, x
);
20032 ix86_expand_sse_movcc (operands
[0], x
, operands
[1+negate
],
20033 operands
[2-negate
]);
20037 /* Expand a variable vector permutation. */
20040 ix86_expand_vec_perm (rtx operands
[])
20042 rtx target
= operands
[0];
20043 rtx op0
= operands
[1];
20044 rtx op1
= operands
[2];
20045 rtx mask
= operands
[3];
20046 rtx t1
, t2
, t3
, t4
, vt
, vt2
, vec
[32];
20047 enum machine_mode mode
= GET_MODE (op0
);
20048 enum machine_mode maskmode
= GET_MODE (mask
);
20050 bool one_operand_shuffle
= rtx_equal_p (op0
, op1
);
20052 /* Number of elements in the vector. */
20053 w
= GET_MODE_NUNITS (mode
);
20054 e
= GET_MODE_UNIT_SIZE (mode
);
20055 gcc_assert (w
<= 32);
20059 if (mode
== V4DImode
|| mode
== V4DFmode
|| mode
== V16HImode
)
20061 /* Unfortunately, the VPERMQ and VPERMPD instructions only support
20062 an constant shuffle operand. With a tiny bit of effort we can
20063 use VPERMD instead. A re-interpretation stall for V4DFmode is
20064 unfortunate but there's no avoiding it.
20065 Similarly for V16HImode we don't have instructions for variable
20066 shuffling, while for V32QImode we can use after preparing suitable
20067 masks vpshufb; vpshufb; vpermq; vpor. */
20069 if (mode
== V16HImode
)
20071 maskmode
= mode
= V32QImode
;
20077 maskmode
= mode
= V8SImode
;
20081 t1
= gen_reg_rtx (maskmode
);
20083 /* Replicate the low bits of the V4DImode mask into V8SImode:
20085 t1 = { A A B B C C D D }. */
20086 for (i
= 0; i
< w
/ 2; ++i
)
20087 vec
[i
*2 + 1] = vec
[i
*2] = GEN_INT (i
* 2);
20088 vt
= gen_rtx_CONST_VECTOR (maskmode
, gen_rtvec_v (w
, vec
));
20089 vt
= force_reg (maskmode
, vt
);
20090 mask
= gen_lowpart (maskmode
, mask
);
20091 if (maskmode
== V8SImode
)
20092 emit_insn (gen_avx2_permvarv8si (t1
, mask
, vt
));
20094 emit_insn (gen_avx2_pshufbv32qi3 (t1
, mask
, vt
));
20096 /* Multiply the shuffle indicies by two. */
20097 t1
= expand_simple_binop (maskmode
, PLUS
, t1
, t1
, t1
, 1,
20100 /* Add one to the odd shuffle indicies:
20101 t1 = { A*2, A*2+1, B*2, B*2+1, ... }. */
20102 for (i
= 0; i
< w
/ 2; ++i
)
20104 vec
[i
* 2] = const0_rtx
;
20105 vec
[i
* 2 + 1] = const1_rtx
;
20107 vt
= gen_rtx_CONST_VECTOR (maskmode
, gen_rtvec_v (w
, vec
));
20108 vt
= force_const_mem (maskmode
, vt
);
20109 t1
= expand_simple_binop (maskmode
, PLUS
, t1
, vt
, t1
, 1,
20112 /* Continue as if V8SImode (resp. V32QImode) was used initially. */
20113 operands
[3] = mask
= t1
;
20114 target
= gen_lowpart (mode
, target
);
20115 op0
= gen_lowpart (mode
, op0
);
20116 op1
= gen_lowpart (mode
, op1
);
20122 /* The VPERMD and VPERMPS instructions already properly ignore
20123 the high bits of the shuffle elements. No need for us to
20124 perform an AND ourselves. */
20125 if (one_operand_shuffle
)
20126 emit_insn (gen_avx2_permvarv8si (target
, op0
, mask
));
20129 t1
= gen_reg_rtx (V8SImode
);
20130 t2
= gen_reg_rtx (V8SImode
);
20131 emit_insn (gen_avx2_permvarv8si (t1
, op0
, mask
));
20132 emit_insn (gen_avx2_permvarv8si (t2
, op1
, mask
));
20138 mask
= gen_lowpart (V8SFmode
, mask
);
20139 if (one_operand_shuffle
)
20140 emit_insn (gen_avx2_permvarv8sf (target
, op0
, mask
));
20143 t1
= gen_reg_rtx (V8SFmode
);
20144 t2
= gen_reg_rtx (V8SFmode
);
20145 emit_insn (gen_avx2_permvarv8sf (t1
, op0
, mask
));
20146 emit_insn (gen_avx2_permvarv8sf (t2
, op1
, mask
));
20152 /* By combining the two 128-bit input vectors into one 256-bit
20153 input vector, we can use VPERMD and VPERMPS for the full
20154 two-operand shuffle. */
20155 t1
= gen_reg_rtx (V8SImode
);
20156 t2
= gen_reg_rtx (V8SImode
);
20157 emit_insn (gen_avx_vec_concatv8si (t1
, op0
, op1
));
20158 emit_insn (gen_avx_vec_concatv8si (t2
, mask
, mask
));
20159 emit_insn (gen_avx2_permvarv8si (t1
, t1
, t2
));
20160 emit_insn (gen_avx_vextractf128v8si (target
, t1
, const0_rtx
));
20164 t1
= gen_reg_rtx (V8SFmode
);
20165 t2
= gen_reg_rtx (V8SImode
);
20166 mask
= gen_lowpart (V4SImode
, mask
);
20167 emit_insn (gen_avx_vec_concatv8sf (t1
, op0
, op1
));
20168 emit_insn (gen_avx_vec_concatv8si (t2
, mask
, mask
));
20169 emit_insn (gen_avx2_permvarv8sf (t1
, t1
, t2
));
20170 emit_insn (gen_avx_vextractf128v8sf (target
, t1
, const0_rtx
));
20174 t1
= gen_reg_rtx (V32QImode
);
20175 t2
= gen_reg_rtx (V32QImode
);
20176 t3
= gen_reg_rtx (V32QImode
);
20177 vt2
= GEN_INT (128);
20178 for (i
= 0; i
< 32; i
++)
20180 vt
= gen_rtx_CONST_VECTOR (V32QImode
, gen_rtvec_v (32, vec
));
20181 vt
= force_reg (V32QImode
, vt
);
20182 for (i
= 0; i
< 32; i
++)
20183 vec
[i
] = i
< 16 ? vt2
: const0_rtx
;
20184 vt2
= gen_rtx_CONST_VECTOR (V32QImode
, gen_rtvec_v (32, vec
));
20185 vt2
= force_reg (V32QImode
, vt2
);
20186 /* From mask create two adjusted masks, which contain the same
20187 bits as mask in the low 7 bits of each vector element.
20188 The first mask will have the most significant bit clear
20189 if it requests element from the same 128-bit lane
20190 and MSB set if it requests element from the other 128-bit lane.
20191 The second mask will have the opposite values of the MSB,
20192 and additionally will have its 128-bit lanes swapped.
20193 E.g. { 07 12 1e 09 ... | 17 19 05 1f ... } mask vector will have
20194 t1 { 07 92 9e 09 ... | 17 19 85 1f ... } and
20195 t3 { 97 99 05 9f ... | 87 12 1e 89 ... } where each ...
20196 stands for other 12 bytes. */
20197 /* The bit whether element is from the same lane or the other
20198 lane is bit 4, so shift it up by 3 to the MSB position. */
20199 emit_insn (gen_ashlv4di3 (gen_lowpart (V4DImode
, t1
),
20200 gen_lowpart (V4DImode
, mask
),
20202 /* Clear MSB bits from the mask just in case it had them set. */
20203 emit_insn (gen_avx2_andnotv32qi3 (t2
, vt
, mask
));
20204 /* After this t1 will have MSB set for elements from other lane. */
20205 emit_insn (gen_xorv32qi3 (t1
, t1
, vt2
));
20206 /* Clear bits other than MSB. */
20207 emit_insn (gen_andv32qi3 (t1
, t1
, vt
));
20208 /* Or in the lower bits from mask into t3. */
20209 emit_insn (gen_iorv32qi3 (t3
, t1
, t2
));
20210 /* And invert MSB bits in t1, so MSB is set for elements from the same
20212 emit_insn (gen_xorv32qi3 (t1
, t1
, vt
));
20213 /* Swap 128-bit lanes in t3. */
20214 emit_insn (gen_avx2_permv4di_1 (gen_lowpart (V4DImode
, t3
),
20215 gen_lowpart (V4DImode
, t3
),
20216 const2_rtx
, GEN_INT (3),
20217 const0_rtx
, const1_rtx
));
20218 /* And or in the lower bits from mask into t1. */
20219 emit_insn (gen_iorv32qi3 (t1
, t1
, t2
));
20220 if (one_operand_shuffle
)
20222 /* Each of these shuffles will put 0s in places where
20223 element from the other 128-bit lane is needed, otherwise
20224 will shuffle in the requested value. */
20225 emit_insn (gen_avx2_pshufbv32qi3 (t3
, op0
, t3
));
20226 emit_insn (gen_avx2_pshufbv32qi3 (t1
, op0
, t1
));
20227 /* For t3 the 128-bit lanes are swapped again. */
20228 emit_insn (gen_avx2_permv4di_1 (gen_lowpart (V4DImode
, t3
),
20229 gen_lowpart (V4DImode
, t3
),
20230 const2_rtx
, GEN_INT (3),
20231 const0_rtx
, const1_rtx
));
20232 /* And oring both together leads to the result. */
20233 emit_insn (gen_iorv32qi3 (target
, t1
, t3
));
20237 t4
= gen_reg_rtx (V32QImode
);
20238 /* Similarly to the above one_operand_shuffle code,
20239 just for repeated twice for each operand. merge_two:
20240 code will merge the two results together. */
20241 emit_insn (gen_avx2_pshufbv32qi3 (t4
, op0
, t3
));
20242 emit_insn (gen_avx2_pshufbv32qi3 (t3
, op1
, t3
));
20243 emit_insn (gen_avx2_pshufbv32qi3 (t2
, op0
, t1
));
20244 emit_insn (gen_avx2_pshufbv32qi3 (t1
, op1
, t1
));
20245 emit_insn (gen_avx2_permv4di_1 (gen_lowpart (V4DImode
, t4
),
20246 gen_lowpart (V4DImode
, t4
),
20247 const2_rtx
, GEN_INT (3),
20248 const0_rtx
, const1_rtx
));
20249 emit_insn (gen_avx2_permv4di_1 (gen_lowpart (V4DImode
, t3
),
20250 gen_lowpart (V4DImode
, t3
),
20251 const2_rtx
, GEN_INT (3),
20252 const0_rtx
, const1_rtx
));
20253 emit_insn (gen_iorv32qi3 (t4
, t2
, t4
));
20254 emit_insn (gen_iorv32qi3 (t3
, t1
, t3
));
20260 gcc_assert (GET_MODE_SIZE (mode
) <= 16);
20267 /* The XOP VPPERM insn supports three inputs. By ignoring the
20268 one_operand_shuffle special case, we avoid creating another
20269 set of constant vectors in memory. */
20270 one_operand_shuffle
= false;
20272 /* mask = mask & {2*w-1, ...} */
20273 vt
= GEN_INT (2*w
- 1);
20277 /* mask = mask & {w-1, ...} */
20278 vt
= GEN_INT (w
- 1);
20281 for (i
= 0; i
< w
; i
++)
20283 vt
= gen_rtx_CONST_VECTOR (maskmode
, gen_rtvec_v (w
, vec
));
20284 mask
= expand_simple_binop (maskmode
, AND
, mask
, vt
,
20285 NULL_RTX
, 0, OPTAB_DIRECT
);
20287 /* For non-QImode operations, convert the word permutation control
20288 into a byte permutation control. */
20289 if (mode
!= V16QImode
)
20291 mask
= expand_simple_binop (maskmode
, ASHIFT
, mask
,
20292 GEN_INT (exact_log2 (e
)),
20293 NULL_RTX
, 0, OPTAB_DIRECT
);
20295 /* Convert mask to vector of chars. */
20296 mask
= force_reg (V16QImode
, gen_lowpart (V16QImode
, mask
));
20298 /* Replicate each of the input bytes into byte positions:
20299 (v2di) --> {0,0,0,0,0,0,0,0, 8,8,8,8,8,8,8,8}
20300 (v4si) --> {0,0,0,0, 4,4,4,4, 8,8,8,8, 12,12,12,12}
20301 (v8hi) --> {0,0, 2,2, 4,4, 6,6, ...}. */
20302 for (i
= 0; i
< 16; ++i
)
20303 vec
[i
] = GEN_INT (i
/e
* e
);
20304 vt
= gen_rtx_CONST_VECTOR (V16QImode
, gen_rtvec_v (16, vec
));
20305 vt
= force_const_mem (V16QImode
, vt
);
20307 emit_insn (gen_xop_pperm (mask
, mask
, mask
, vt
));
20309 emit_insn (gen_ssse3_pshufbv16qi3 (mask
, mask
, vt
));
20311 /* Convert it into the byte positions by doing
20312 mask = mask + {0,1,..,16/w, 0,1,..,16/w, ...} */
20313 for (i
= 0; i
< 16; ++i
)
20314 vec
[i
] = GEN_INT (i
% e
);
20315 vt
= gen_rtx_CONST_VECTOR (V16QImode
, gen_rtvec_v (16, vec
));
20316 vt
= force_const_mem (V16QImode
, vt
);
20317 emit_insn (gen_addv16qi3 (mask
, mask
, vt
));
20320 /* The actual shuffle operations all operate on V16QImode. */
20321 op0
= gen_lowpart (V16QImode
, op0
);
20322 op1
= gen_lowpart (V16QImode
, op1
);
20323 target
= gen_lowpart (V16QImode
, target
);
20327 emit_insn (gen_xop_pperm (target
, op0
, op1
, mask
));
20329 else if (one_operand_shuffle
)
20331 emit_insn (gen_ssse3_pshufbv16qi3 (target
, op0
, mask
));
20338 /* Shuffle the two input vectors independently. */
20339 t1
= gen_reg_rtx (V16QImode
);
20340 t2
= gen_reg_rtx (V16QImode
);
20341 emit_insn (gen_ssse3_pshufbv16qi3 (t1
, op0
, mask
));
20342 emit_insn (gen_ssse3_pshufbv16qi3 (t2
, op1
, mask
));
20345 /* Then merge them together. The key is whether any given control
20346 element contained a bit set that indicates the second word. */
20347 mask
= operands
[3];
20349 if (maskmode
== V2DImode
&& !TARGET_SSE4_1
)
20351 /* Without SSE4.1, we don't have V2DImode EQ. Perform one
20352 more shuffle to convert the V2DI input mask into a V4SI
20353 input mask. At which point the masking that expand_int_vcond
20354 will work as desired. */
20355 rtx t3
= gen_reg_rtx (V4SImode
);
20356 emit_insn (gen_sse2_pshufd_1 (t3
, gen_lowpart (V4SImode
, mask
),
20357 const0_rtx
, const0_rtx
,
20358 const2_rtx
, const2_rtx
));
20360 maskmode
= V4SImode
;
20364 for (i
= 0; i
< w
; i
++)
20366 vt
= gen_rtx_CONST_VECTOR (maskmode
, gen_rtvec_v (w
, vec
));
20367 vt
= force_reg (maskmode
, vt
);
20368 mask
= expand_simple_binop (maskmode
, AND
, mask
, vt
,
20369 NULL_RTX
, 0, OPTAB_DIRECT
);
20371 xops
[0] = gen_lowpart (mode
, operands
[0]);
20372 xops
[1] = gen_lowpart (mode
, t2
);
20373 xops
[2] = gen_lowpart (mode
, t1
);
20374 xops
[3] = gen_rtx_EQ (maskmode
, mask
, vt
);
20377 ok
= ix86_expand_int_vcond (xops
);
20382 /* Unpack OP[1] into the next wider integer vector type. UNSIGNED_P is
20383 true if we should do zero extension, else sign extension. HIGH_P is
20384 true if we want the N/2 high elements, else the low elements. */
20387 ix86_expand_sse_unpack (rtx dest
, rtx src
, bool unsigned_p
, bool high_p
)
20389 enum machine_mode imode
= GET_MODE (src
);
20394 rtx (*unpack
)(rtx
, rtx
);
20395 rtx (*extract
)(rtx
, rtx
) = NULL
;
20396 enum machine_mode halfmode
= BLKmode
;
20402 unpack
= gen_avx2_zero_extendv16qiv16hi2
;
20404 unpack
= gen_avx2_sign_extendv16qiv16hi2
;
20405 halfmode
= V16QImode
;
20407 = high_p
? gen_vec_extract_hi_v32qi
: gen_vec_extract_lo_v32qi
;
20411 unpack
= gen_avx2_zero_extendv8hiv8si2
;
20413 unpack
= gen_avx2_sign_extendv8hiv8si2
;
20414 halfmode
= V8HImode
;
20416 = high_p
? gen_vec_extract_hi_v16hi
: gen_vec_extract_lo_v16hi
;
20420 unpack
= gen_avx2_zero_extendv4siv4di2
;
20422 unpack
= gen_avx2_sign_extendv4siv4di2
;
20423 halfmode
= V4SImode
;
20425 = high_p
? gen_vec_extract_hi_v8si
: gen_vec_extract_lo_v8si
;
20429 unpack
= gen_sse4_1_zero_extendv8qiv8hi2
;
20431 unpack
= gen_sse4_1_sign_extendv8qiv8hi2
;
20435 unpack
= gen_sse4_1_zero_extendv4hiv4si2
;
20437 unpack
= gen_sse4_1_sign_extendv4hiv4si2
;
20441 unpack
= gen_sse4_1_zero_extendv2siv2di2
;
20443 unpack
= gen_sse4_1_sign_extendv2siv2di2
;
20446 gcc_unreachable ();
20449 if (GET_MODE_SIZE (imode
) == 32)
20451 tmp
= gen_reg_rtx (halfmode
);
20452 emit_insn (extract (tmp
, src
));
20456 /* Shift higher 8 bytes to lower 8 bytes. */
20457 tmp
= gen_reg_rtx (imode
);
20458 emit_insn (gen_sse2_lshrv1ti3 (gen_lowpart (V1TImode
, tmp
),
20459 gen_lowpart (V1TImode
, src
),
20465 emit_insn (unpack (dest
, tmp
));
20469 rtx (*unpack
)(rtx
, rtx
, rtx
);
20475 unpack
= gen_vec_interleave_highv16qi
;
20477 unpack
= gen_vec_interleave_lowv16qi
;
20481 unpack
= gen_vec_interleave_highv8hi
;
20483 unpack
= gen_vec_interleave_lowv8hi
;
20487 unpack
= gen_vec_interleave_highv4si
;
20489 unpack
= gen_vec_interleave_lowv4si
;
20492 gcc_unreachable ();
20496 tmp
= force_reg (imode
, CONST0_RTX (imode
));
20498 tmp
= ix86_expand_sse_cmp (gen_reg_rtx (imode
), GT
, CONST0_RTX (imode
),
20499 src
, pc_rtx
, pc_rtx
);
20501 emit_insn (unpack (gen_lowpart (imode
, dest
), src
, tmp
));
20505 /* Expand conditional increment or decrement using adb/sbb instructions.
20506 The default case using setcc followed by the conditional move can be
20507 done by generic code. */
20509 ix86_expand_int_addcc (rtx operands
[])
20511 enum rtx_code code
= GET_CODE (operands
[1]);
20513 rtx (*insn
)(rtx
, rtx
, rtx
, rtx
, rtx
);
20515 rtx val
= const0_rtx
;
20516 bool fpcmp
= false;
20517 enum machine_mode mode
;
20518 rtx op0
= XEXP (operands
[1], 0);
20519 rtx op1
= XEXP (operands
[1], 1);
20521 if (operands
[3] != const1_rtx
20522 && operands
[3] != constm1_rtx
)
20524 if (!ix86_expand_carry_flag_compare (code
, op0
, op1
, &compare_op
))
20526 code
= GET_CODE (compare_op
);
20528 flags
= XEXP (compare_op
, 0);
20530 if (GET_MODE (flags
) == CCFPmode
20531 || GET_MODE (flags
) == CCFPUmode
)
20534 code
= ix86_fp_compare_code_to_integer (code
);
20541 PUT_CODE (compare_op
,
20542 reverse_condition_maybe_unordered
20543 (GET_CODE (compare_op
)));
20545 PUT_CODE (compare_op
, reverse_condition (GET_CODE (compare_op
)));
20548 mode
= GET_MODE (operands
[0]);
20550 /* Construct either adc or sbb insn. */
20551 if ((code
== LTU
) == (operands
[3] == constm1_rtx
))
20556 insn
= gen_subqi3_carry
;
20559 insn
= gen_subhi3_carry
;
20562 insn
= gen_subsi3_carry
;
20565 insn
= gen_subdi3_carry
;
20568 gcc_unreachable ();
20576 insn
= gen_addqi3_carry
;
20579 insn
= gen_addhi3_carry
;
20582 insn
= gen_addsi3_carry
;
20585 insn
= gen_adddi3_carry
;
20588 gcc_unreachable ();
20591 emit_insn (insn (operands
[0], operands
[2], val
, flags
, compare_op
));
20597 /* Split operands 0 and 1 into half-mode parts. Similar to split_double_mode,
20598 but works for floating pointer parameters and nonoffsetable memories.
20599 For pushes, it returns just stack offsets; the values will be saved
20600 in the right order. Maximally three parts are generated. */
20603 ix86_split_to_parts (rtx operand
, rtx
*parts
, enum machine_mode mode
)
20608 size
= mode
==XFmode
? 3 : GET_MODE_SIZE (mode
) / 4;
20610 size
= (GET_MODE_SIZE (mode
) + 4) / 8;
20612 gcc_assert (!REG_P (operand
) || !MMX_REGNO_P (REGNO (operand
)));
20613 gcc_assert (size
>= 2 && size
<= 4);
20615 /* Optimize constant pool reference to immediates. This is used by fp
20616 moves, that force all constants to memory to allow combining. */
20617 if (MEM_P (operand
) && MEM_READONLY_P (operand
))
20619 rtx tmp
= maybe_get_pool_constant (operand
);
20624 if (MEM_P (operand
) && !offsettable_memref_p (operand
))
20626 /* The only non-offsetable memories we handle are pushes. */
20627 int ok
= push_operand (operand
, VOIDmode
);
20631 operand
= copy_rtx (operand
);
20632 PUT_MODE (operand
, word_mode
);
20633 parts
[0] = parts
[1] = parts
[2] = parts
[3] = operand
;
20637 if (GET_CODE (operand
) == CONST_VECTOR
)
20639 enum machine_mode imode
= int_mode_for_mode (mode
);
20640 /* Caution: if we looked through a constant pool memory above,
20641 the operand may actually have a different mode now. That's
20642 ok, since we want to pun this all the way back to an integer. */
20643 operand
= simplify_subreg (imode
, operand
, GET_MODE (operand
), 0);
20644 gcc_assert (operand
!= NULL
);
20650 if (mode
== DImode
)
20651 split_double_mode (mode
, &operand
, 1, &parts
[0], &parts
[1]);
20656 if (REG_P (operand
))
20658 gcc_assert (reload_completed
);
20659 for (i
= 0; i
< size
; i
++)
20660 parts
[i
] = gen_rtx_REG (SImode
, REGNO (operand
) + i
);
20662 else if (offsettable_memref_p (operand
))
20664 operand
= adjust_address (operand
, SImode
, 0);
20665 parts
[0] = operand
;
20666 for (i
= 1; i
< size
; i
++)
20667 parts
[i
] = adjust_address (operand
, SImode
, 4 * i
);
20669 else if (GET_CODE (operand
) == CONST_DOUBLE
)
20674 REAL_VALUE_FROM_CONST_DOUBLE (r
, operand
);
20678 real_to_target (l
, &r
, mode
);
20679 parts
[3] = gen_int_mode (l
[3], SImode
);
20680 parts
[2] = gen_int_mode (l
[2], SImode
);
20683 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r
, l
);
20684 parts
[2] = gen_int_mode (l
[2], SImode
);
20687 REAL_VALUE_TO_TARGET_DOUBLE (r
, l
);
20690 gcc_unreachable ();
20692 parts
[1] = gen_int_mode (l
[1], SImode
);
20693 parts
[0] = gen_int_mode (l
[0], SImode
);
20696 gcc_unreachable ();
20701 if (mode
== TImode
)
20702 split_double_mode (mode
, &operand
, 1, &parts
[0], &parts
[1]);
20703 if (mode
== XFmode
|| mode
== TFmode
)
20705 enum machine_mode upper_mode
= mode
==XFmode
? SImode
: DImode
;
20706 if (REG_P (operand
))
20708 gcc_assert (reload_completed
);
20709 parts
[0] = gen_rtx_REG (DImode
, REGNO (operand
) + 0);
20710 parts
[1] = gen_rtx_REG (upper_mode
, REGNO (operand
) + 1);
20712 else if (offsettable_memref_p (operand
))
20714 operand
= adjust_address (operand
, DImode
, 0);
20715 parts
[0] = operand
;
20716 parts
[1] = adjust_address (operand
, upper_mode
, 8);
20718 else if (GET_CODE (operand
) == CONST_DOUBLE
)
20723 REAL_VALUE_FROM_CONST_DOUBLE (r
, operand
);
20724 real_to_target (l
, &r
, mode
);
20726 /* Do not use shift by 32 to avoid warning on 32bit systems. */
20727 if (HOST_BITS_PER_WIDE_INT
>= 64)
20730 ((l
[0] & (((HOST_WIDE_INT
) 2 << 31) - 1))
20731 + ((((HOST_WIDE_INT
) l
[1]) << 31) << 1),
20734 parts
[0] = immed_double_const (l
[0], l
[1], DImode
);
20736 if (upper_mode
== SImode
)
20737 parts
[1] = gen_int_mode (l
[2], SImode
);
20738 else if (HOST_BITS_PER_WIDE_INT
>= 64)
20741 ((l
[2] & (((HOST_WIDE_INT
) 2 << 31) - 1))
20742 + ((((HOST_WIDE_INT
) l
[3]) << 31) << 1),
20745 parts
[1] = immed_double_const (l
[2], l
[3], DImode
);
20748 gcc_unreachable ();
20755 /* Emit insns to perform a move or push of DI, DF, XF, and TF values.
20756 Return false when normal moves are needed; true when all required
20757 insns have been emitted. Operands 2-4 contain the input values
20758 int the correct order; operands 5-7 contain the output values. */
20761 ix86_split_long_move (rtx operands
[])
20766 int collisions
= 0;
20767 enum machine_mode mode
= GET_MODE (operands
[0]);
20768 bool collisionparts
[4];
20770 /* The DFmode expanders may ask us to move double.
20771 For 64bit target this is single move. By hiding the fact
20772 here we simplify i386.md splitters. */
20773 if (TARGET_64BIT
&& GET_MODE_SIZE (GET_MODE (operands
[0])) == 8)
20775 /* Optimize constant pool reference to immediates. This is used by
20776 fp moves, that force all constants to memory to allow combining. */
20778 if (MEM_P (operands
[1])
20779 && GET_CODE (XEXP (operands
[1], 0)) == SYMBOL_REF
20780 && CONSTANT_POOL_ADDRESS_P (XEXP (operands
[1], 0)))
20781 operands
[1] = get_pool_constant (XEXP (operands
[1], 0));
20782 if (push_operand (operands
[0], VOIDmode
))
20784 operands
[0] = copy_rtx (operands
[0]);
20785 PUT_MODE (operands
[0], word_mode
);
20788 operands
[0] = gen_lowpart (DImode
, operands
[0]);
20789 operands
[1] = gen_lowpart (DImode
, operands
[1]);
20790 emit_move_insn (operands
[0], operands
[1]);
20794 /* The only non-offsettable memory we handle is push. */
20795 if (push_operand (operands
[0], VOIDmode
))
20798 gcc_assert (!MEM_P (operands
[0])
20799 || offsettable_memref_p (operands
[0]));
20801 nparts
= ix86_split_to_parts (operands
[1], part
[1], GET_MODE (operands
[0]));
20802 ix86_split_to_parts (operands
[0], part
[0], GET_MODE (operands
[0]));
20804 /* When emitting push, take care for source operands on the stack. */
20805 if (push
&& MEM_P (operands
[1])
20806 && reg_overlap_mentioned_p (stack_pointer_rtx
, operands
[1]))
20808 rtx src_base
= XEXP (part
[1][nparts
- 1], 0);
20810 /* Compensate for the stack decrement by 4. */
20811 if (!TARGET_64BIT
&& nparts
== 3
20812 && mode
== XFmode
&& TARGET_128BIT_LONG_DOUBLE
)
20813 src_base
= plus_constant (Pmode
, src_base
, 4);
20815 /* src_base refers to the stack pointer and is
20816 automatically decreased by emitted push. */
20817 for (i
= 0; i
< nparts
; i
++)
20818 part
[1][i
] = change_address (part
[1][i
],
20819 GET_MODE (part
[1][i
]), src_base
);
20822 /* We need to do copy in the right order in case an address register
20823 of the source overlaps the destination. */
20824 if (REG_P (part
[0][0]) && MEM_P (part
[1][0]))
20828 for (i
= 0; i
< nparts
; i
++)
20831 = reg_overlap_mentioned_p (part
[0][i
], XEXP (part
[1][0], 0));
20832 if (collisionparts
[i
])
20836 /* Collision in the middle part can be handled by reordering. */
20837 if (collisions
== 1 && nparts
== 3 && collisionparts
[1])
20839 tmp
= part
[0][1]; part
[0][1] = part
[0][2]; part
[0][2] = tmp
;
20840 tmp
= part
[1][1]; part
[1][1] = part
[1][2]; part
[1][2] = tmp
;
20842 else if (collisions
== 1
20844 && (collisionparts
[1] || collisionparts
[2]))
20846 if (collisionparts
[1])
20848 tmp
= part
[0][1]; part
[0][1] = part
[0][2]; part
[0][2] = tmp
;
20849 tmp
= part
[1][1]; part
[1][1] = part
[1][2]; part
[1][2] = tmp
;
20853 tmp
= part
[0][2]; part
[0][2] = part
[0][3]; part
[0][3] = tmp
;
20854 tmp
= part
[1][2]; part
[1][2] = part
[1][3]; part
[1][3] = tmp
;
20858 /* If there are more collisions, we can't handle it by reordering.
20859 Do an lea to the last part and use only one colliding move. */
20860 else if (collisions
> 1)
20866 base
= part
[0][nparts
- 1];
20868 /* Handle the case when the last part isn't valid for lea.
20869 Happens in 64-bit mode storing the 12-byte XFmode. */
20870 if (GET_MODE (base
) != Pmode
)
20871 base
= gen_rtx_REG (Pmode
, REGNO (base
));
20873 emit_insn (gen_rtx_SET (VOIDmode
, base
, XEXP (part
[1][0], 0)));
20874 part
[1][0] = replace_equiv_address (part
[1][0], base
);
20875 for (i
= 1; i
< nparts
; i
++)
20877 tmp
= plus_constant (Pmode
, base
, UNITS_PER_WORD
* i
);
20878 part
[1][i
] = replace_equiv_address (part
[1][i
], tmp
);
20889 if (TARGET_128BIT_LONG_DOUBLE
&& mode
== XFmode
)
20890 emit_insn (ix86_gen_add3 (stack_pointer_rtx
,
20891 stack_pointer_rtx
, GEN_INT (-4)));
20892 emit_move_insn (part
[0][2], part
[1][2]);
20894 else if (nparts
== 4)
20896 emit_move_insn (part
[0][3], part
[1][3]);
20897 emit_move_insn (part
[0][2], part
[1][2]);
20902 /* In 64bit mode we don't have 32bit push available. In case this is
20903 register, it is OK - we will just use larger counterpart. We also
20904 retype memory - these comes from attempt to avoid REX prefix on
20905 moving of second half of TFmode value. */
20906 if (GET_MODE (part
[1][1]) == SImode
)
20908 switch (GET_CODE (part
[1][1]))
20911 part
[1][1] = adjust_address (part
[1][1], DImode
, 0);
20915 part
[1][1] = gen_rtx_REG (DImode
, REGNO (part
[1][1]));
20919 gcc_unreachable ();
20922 if (GET_MODE (part
[1][0]) == SImode
)
20923 part
[1][0] = part
[1][1];
20926 emit_move_insn (part
[0][1], part
[1][1]);
20927 emit_move_insn (part
[0][0], part
[1][0]);
20931 /* Choose correct order to not overwrite the source before it is copied. */
20932 if ((REG_P (part
[0][0])
20933 && REG_P (part
[1][1])
20934 && (REGNO (part
[0][0]) == REGNO (part
[1][1])
20936 && REGNO (part
[0][0]) == REGNO (part
[1][2]))
20938 && REGNO (part
[0][0]) == REGNO (part
[1][3]))))
20940 && reg_overlap_mentioned_p (part
[0][0], XEXP (part
[1][0], 0))))
20942 for (i
= 0, j
= nparts
- 1; i
< nparts
; i
++, j
--)
20944 operands
[2 + i
] = part
[0][j
];
20945 operands
[6 + i
] = part
[1][j
];
20950 for (i
= 0; i
< nparts
; i
++)
20952 operands
[2 + i
] = part
[0][i
];
20953 operands
[6 + i
] = part
[1][i
];
20957 /* If optimizing for size, attempt to locally unCSE nonzero constants. */
20958 if (optimize_insn_for_size_p ())
20960 for (j
= 0; j
< nparts
- 1; j
++)
20961 if (CONST_INT_P (operands
[6 + j
])
20962 && operands
[6 + j
] != const0_rtx
20963 && REG_P (operands
[2 + j
]))
20964 for (i
= j
; i
< nparts
- 1; i
++)
20965 if (CONST_INT_P (operands
[7 + i
])
20966 && INTVAL (operands
[7 + i
]) == INTVAL (operands
[6 + j
]))
20967 operands
[7 + i
] = operands
[2 + j
];
20970 for (i
= 0; i
< nparts
; i
++)
20971 emit_move_insn (operands
[2 + i
], operands
[6 + i
]);
20976 /* Helper function of ix86_split_ashl used to generate an SImode/DImode
20977 left shift by a constant, either using a single shift or
20978 a sequence of add instructions. */
20981 ix86_expand_ashl_const (rtx operand
, int count
, enum machine_mode mode
)
20983 rtx (*insn
)(rtx
, rtx
, rtx
);
20986 || (count
* ix86_cost
->add
<= ix86_cost
->shift_const
20987 && !optimize_insn_for_size_p ()))
20989 insn
= mode
== DImode
? gen_addsi3
: gen_adddi3
;
20990 while (count
-- > 0)
20991 emit_insn (insn (operand
, operand
, operand
));
20995 insn
= mode
== DImode
? gen_ashlsi3
: gen_ashldi3
;
20996 emit_insn (insn (operand
, operand
, GEN_INT (count
)));
21001 ix86_split_ashl (rtx
*operands
, rtx scratch
, enum machine_mode mode
)
21003 rtx (*gen_ashl3
)(rtx
, rtx
, rtx
);
21004 rtx (*gen_shld
)(rtx
, rtx
, rtx
);
21005 int half_width
= GET_MODE_BITSIZE (mode
) >> 1;
21007 rtx low
[2], high
[2];
21010 if (CONST_INT_P (operands
[2]))
21012 split_double_mode (mode
, operands
, 2, low
, high
);
21013 count
= INTVAL (operands
[2]) & (GET_MODE_BITSIZE (mode
) - 1);
21015 if (count
>= half_width
)
21017 emit_move_insn (high
[0], low
[1]);
21018 emit_move_insn (low
[0], const0_rtx
);
21020 if (count
> half_width
)
21021 ix86_expand_ashl_const (high
[0], count
- half_width
, mode
);
21025 gen_shld
= mode
== DImode
? gen_x86_shld
: gen_x86_64_shld
;
21027 if (!rtx_equal_p (operands
[0], operands
[1]))
21028 emit_move_insn (operands
[0], operands
[1]);
21030 emit_insn (gen_shld (high
[0], low
[0], GEN_INT (count
)));
21031 ix86_expand_ashl_const (low
[0], count
, mode
);
21036 split_double_mode (mode
, operands
, 1, low
, high
);
21038 gen_ashl3
= mode
== DImode
? gen_ashlsi3
: gen_ashldi3
;
21040 if (operands
[1] == const1_rtx
)
21042 /* Assuming we've chosen a QImode capable registers, then 1 << N
21043 can be done with two 32/64-bit shifts, no branches, no cmoves. */
21044 if (ANY_QI_REG_P (low
[0]) && ANY_QI_REG_P (high
[0]))
21046 rtx s
, d
, flags
= gen_rtx_REG (CCZmode
, FLAGS_REG
);
21048 ix86_expand_clear (low
[0]);
21049 ix86_expand_clear (high
[0]);
21050 emit_insn (gen_testqi_ccz_1 (operands
[2], GEN_INT (half_width
)));
21052 d
= gen_lowpart (QImode
, low
[0]);
21053 d
= gen_rtx_STRICT_LOW_PART (VOIDmode
, d
);
21054 s
= gen_rtx_EQ (QImode
, flags
, const0_rtx
);
21055 emit_insn (gen_rtx_SET (VOIDmode
, d
, s
));
21057 d
= gen_lowpart (QImode
, high
[0]);
21058 d
= gen_rtx_STRICT_LOW_PART (VOIDmode
, d
);
21059 s
= gen_rtx_NE (QImode
, flags
, const0_rtx
);
21060 emit_insn (gen_rtx_SET (VOIDmode
, d
, s
));
21063 /* Otherwise, we can get the same results by manually performing
21064 a bit extract operation on bit 5/6, and then performing the two
21065 shifts. The two methods of getting 0/1 into low/high are exactly
21066 the same size. Avoiding the shift in the bit extract case helps
21067 pentium4 a bit; no one else seems to care much either way. */
21070 enum machine_mode half_mode
;
21071 rtx (*gen_lshr3
)(rtx
, rtx
, rtx
);
21072 rtx (*gen_and3
)(rtx
, rtx
, rtx
);
21073 rtx (*gen_xor3
)(rtx
, rtx
, rtx
);
21074 HOST_WIDE_INT bits
;
21077 if (mode
== DImode
)
21079 half_mode
= SImode
;
21080 gen_lshr3
= gen_lshrsi3
;
21081 gen_and3
= gen_andsi3
;
21082 gen_xor3
= gen_xorsi3
;
21087 half_mode
= DImode
;
21088 gen_lshr3
= gen_lshrdi3
;
21089 gen_and3
= gen_anddi3
;
21090 gen_xor3
= gen_xordi3
;
21094 if (TARGET_PARTIAL_REG_STALL
&& !optimize_insn_for_size_p ())
21095 x
= gen_rtx_ZERO_EXTEND (half_mode
, operands
[2]);
21097 x
= gen_lowpart (half_mode
, operands
[2]);
21098 emit_insn (gen_rtx_SET (VOIDmode
, high
[0], x
));
21100 emit_insn (gen_lshr3 (high
[0], high
[0], GEN_INT (bits
)));
21101 emit_insn (gen_and3 (high
[0], high
[0], const1_rtx
));
21102 emit_move_insn (low
[0], high
[0]);
21103 emit_insn (gen_xor3 (low
[0], low
[0], const1_rtx
));
21106 emit_insn (gen_ashl3 (low
[0], low
[0], operands
[2]));
21107 emit_insn (gen_ashl3 (high
[0], high
[0], operands
[2]));
21111 if (operands
[1] == constm1_rtx
)
21113 /* For -1 << N, we can avoid the shld instruction, because we
21114 know that we're shifting 0...31/63 ones into a -1. */
21115 emit_move_insn (low
[0], constm1_rtx
);
21116 if (optimize_insn_for_size_p ())
21117 emit_move_insn (high
[0], low
[0]);
21119 emit_move_insn (high
[0], constm1_rtx
);
21123 gen_shld
= mode
== DImode
? gen_x86_shld
: gen_x86_64_shld
;
21125 if (!rtx_equal_p (operands
[0], operands
[1]))
21126 emit_move_insn (operands
[0], operands
[1]);
21128 split_double_mode (mode
, operands
, 1, low
, high
);
21129 emit_insn (gen_shld (high
[0], low
[0], operands
[2]));
21132 emit_insn (gen_ashl3 (low
[0], low
[0], operands
[2]));
21134 if (TARGET_CMOVE
&& scratch
)
21136 rtx (*gen_x86_shift_adj_1
)(rtx
, rtx
, rtx
, rtx
)
21137 = mode
== DImode
? gen_x86_shiftsi_adj_1
: gen_x86_shiftdi_adj_1
;
21139 ix86_expand_clear (scratch
);
21140 emit_insn (gen_x86_shift_adj_1 (high
[0], low
[0], operands
[2], scratch
));
21144 rtx (*gen_x86_shift_adj_2
)(rtx
, rtx
, rtx
)
21145 = mode
== DImode
? gen_x86_shiftsi_adj_2
: gen_x86_shiftdi_adj_2
;
21147 emit_insn (gen_x86_shift_adj_2 (high
[0], low
[0], operands
[2]));
21152 ix86_split_ashr (rtx
*operands
, rtx scratch
, enum machine_mode mode
)
21154 rtx (*gen_ashr3
)(rtx
, rtx
, rtx
)
21155 = mode
== DImode
? gen_ashrsi3
: gen_ashrdi3
;
21156 rtx (*gen_shrd
)(rtx
, rtx
, rtx
);
21157 int half_width
= GET_MODE_BITSIZE (mode
) >> 1;
21159 rtx low
[2], high
[2];
21162 if (CONST_INT_P (operands
[2]))
21164 split_double_mode (mode
, operands
, 2, low
, high
);
21165 count
= INTVAL (operands
[2]) & (GET_MODE_BITSIZE (mode
) - 1);
21167 if (count
== GET_MODE_BITSIZE (mode
) - 1)
21169 emit_move_insn (high
[0], high
[1]);
21170 emit_insn (gen_ashr3 (high
[0], high
[0],
21171 GEN_INT (half_width
- 1)));
21172 emit_move_insn (low
[0], high
[0]);
21175 else if (count
>= half_width
)
21177 emit_move_insn (low
[0], high
[1]);
21178 emit_move_insn (high
[0], low
[0]);
21179 emit_insn (gen_ashr3 (high
[0], high
[0],
21180 GEN_INT (half_width
- 1)));
21182 if (count
> half_width
)
21183 emit_insn (gen_ashr3 (low
[0], low
[0],
21184 GEN_INT (count
- half_width
)));
21188 gen_shrd
= mode
== DImode
? gen_x86_shrd
: gen_x86_64_shrd
;
21190 if (!rtx_equal_p (operands
[0], operands
[1]))
21191 emit_move_insn (operands
[0], operands
[1]);
21193 emit_insn (gen_shrd (low
[0], high
[0], GEN_INT (count
)));
21194 emit_insn (gen_ashr3 (high
[0], high
[0], GEN_INT (count
)));
21199 gen_shrd
= mode
== DImode
? gen_x86_shrd
: gen_x86_64_shrd
;
21201 if (!rtx_equal_p (operands
[0], operands
[1]))
21202 emit_move_insn (operands
[0], operands
[1]);
21204 split_double_mode (mode
, operands
, 1, low
, high
);
21206 emit_insn (gen_shrd (low
[0], high
[0], operands
[2]));
21207 emit_insn (gen_ashr3 (high
[0], high
[0], operands
[2]));
21209 if (TARGET_CMOVE
&& scratch
)
21211 rtx (*gen_x86_shift_adj_1
)(rtx
, rtx
, rtx
, rtx
)
21212 = mode
== DImode
? gen_x86_shiftsi_adj_1
: gen_x86_shiftdi_adj_1
;
21214 emit_move_insn (scratch
, high
[0]);
21215 emit_insn (gen_ashr3 (scratch
, scratch
,
21216 GEN_INT (half_width
- 1)));
21217 emit_insn (gen_x86_shift_adj_1 (low
[0], high
[0], operands
[2],
21222 rtx (*gen_x86_shift_adj_3
)(rtx
, rtx
, rtx
)
21223 = mode
== DImode
? gen_x86_shiftsi_adj_3
: gen_x86_shiftdi_adj_3
;
21225 emit_insn (gen_x86_shift_adj_3 (low
[0], high
[0], operands
[2]));
21231 ix86_split_lshr (rtx
*operands
, rtx scratch
, enum machine_mode mode
)
21233 rtx (*gen_lshr3
)(rtx
, rtx
, rtx
)
21234 = mode
== DImode
? gen_lshrsi3
: gen_lshrdi3
;
21235 rtx (*gen_shrd
)(rtx
, rtx
, rtx
);
21236 int half_width
= GET_MODE_BITSIZE (mode
) >> 1;
21238 rtx low
[2], high
[2];
21241 if (CONST_INT_P (operands
[2]))
21243 split_double_mode (mode
, operands
, 2, low
, high
);
21244 count
= INTVAL (operands
[2]) & (GET_MODE_BITSIZE (mode
) - 1);
21246 if (count
>= half_width
)
21248 emit_move_insn (low
[0], high
[1]);
21249 ix86_expand_clear (high
[0]);
21251 if (count
> half_width
)
21252 emit_insn (gen_lshr3 (low
[0], low
[0],
21253 GEN_INT (count
- half_width
)));
21257 gen_shrd
= mode
== DImode
? gen_x86_shrd
: gen_x86_64_shrd
;
21259 if (!rtx_equal_p (operands
[0], operands
[1]))
21260 emit_move_insn (operands
[0], operands
[1]);
21262 emit_insn (gen_shrd (low
[0], high
[0], GEN_INT (count
)));
21263 emit_insn (gen_lshr3 (high
[0], high
[0], GEN_INT (count
)));
21268 gen_shrd
= mode
== DImode
? gen_x86_shrd
: gen_x86_64_shrd
;
21270 if (!rtx_equal_p (operands
[0], operands
[1]))
21271 emit_move_insn (operands
[0], operands
[1]);
21273 split_double_mode (mode
, operands
, 1, low
, high
);
21275 emit_insn (gen_shrd (low
[0], high
[0], operands
[2]));
21276 emit_insn (gen_lshr3 (high
[0], high
[0], operands
[2]));
21278 if (TARGET_CMOVE
&& scratch
)
21280 rtx (*gen_x86_shift_adj_1
)(rtx
, rtx
, rtx
, rtx
)
21281 = mode
== DImode
? gen_x86_shiftsi_adj_1
: gen_x86_shiftdi_adj_1
;
21283 ix86_expand_clear (scratch
);
21284 emit_insn (gen_x86_shift_adj_1 (low
[0], high
[0], operands
[2],
21289 rtx (*gen_x86_shift_adj_2
)(rtx
, rtx
, rtx
)
21290 = mode
== DImode
? gen_x86_shiftsi_adj_2
: gen_x86_shiftdi_adj_2
;
21292 emit_insn (gen_x86_shift_adj_2 (low
[0], high
[0], operands
[2]));
21297 /* Predict just emitted jump instruction to be taken with probability PROB. */
21299 predict_jump (int prob
)
21301 rtx insn
= get_last_insn ();
21302 gcc_assert (JUMP_P (insn
));
21303 add_reg_note (insn
, REG_BR_PROB
, GEN_INT (prob
));
21306 /* Helper function for the string operations below. Dest VARIABLE whether
21307 it is aligned to VALUE bytes. If true, jump to the label. */
21309 ix86_expand_aligntest (rtx variable
, int value
, bool epilogue
)
21311 rtx label
= gen_label_rtx ();
21312 rtx tmpcount
= gen_reg_rtx (GET_MODE (variable
));
21313 if (GET_MODE (variable
) == DImode
)
21314 emit_insn (gen_anddi3 (tmpcount
, variable
, GEN_INT (value
)));
21316 emit_insn (gen_andsi3 (tmpcount
, variable
, GEN_INT (value
)));
21317 emit_cmp_and_jump_insns (tmpcount
, const0_rtx
, EQ
, 0, GET_MODE (variable
),
21320 predict_jump (REG_BR_PROB_BASE
* 50 / 100);
21322 predict_jump (REG_BR_PROB_BASE
* 90 / 100);
21326 /* Adjust COUNTER by the VALUE. */
21328 ix86_adjust_counter (rtx countreg
, HOST_WIDE_INT value
)
21330 rtx (*gen_add
)(rtx
, rtx
, rtx
)
21331 = GET_MODE (countreg
) == DImode
? gen_adddi3
: gen_addsi3
;
21333 emit_insn (gen_add (countreg
, countreg
, GEN_INT (-value
)));
21336 /* Zero extend possibly SImode EXP to Pmode register. */
21338 ix86_zero_extend_to_Pmode (rtx exp
)
21340 if (GET_MODE (exp
) != Pmode
)
21341 exp
= convert_to_mode (Pmode
, exp
, 1);
21342 return force_reg (Pmode
, exp
);
21345 /* Divide COUNTREG by SCALE. */
21347 scale_counter (rtx countreg
, int scale
)
21353 if (CONST_INT_P (countreg
))
21354 return GEN_INT (INTVAL (countreg
) / scale
);
21355 gcc_assert (REG_P (countreg
));
21357 sc
= expand_simple_binop (GET_MODE (countreg
), LSHIFTRT
, countreg
,
21358 GEN_INT (exact_log2 (scale
)),
21359 NULL
, 1, OPTAB_DIRECT
);
21363 /* Return mode for the memcpy/memset loop counter. Prefer SImode over
21364 DImode for constant loop counts. */
21366 static enum machine_mode
21367 counter_mode (rtx count_exp
)
21369 if (GET_MODE (count_exp
) != VOIDmode
)
21370 return GET_MODE (count_exp
);
21371 if (!CONST_INT_P (count_exp
))
21373 if (TARGET_64BIT
&& (INTVAL (count_exp
) & ~0xffffffff))
21378 /* When SRCPTR is non-NULL, output simple loop to move memory
21379 pointer to SRCPTR to DESTPTR via chunks of MODE unrolled UNROLL times,
21380 overall size is COUNT specified in bytes. When SRCPTR is NULL, output the
21381 equivalent loop to set memory by VALUE (supposed to be in MODE).
21383 The size is rounded down to whole number of chunk size moved at once.
21384 SRCMEM and DESTMEM provide MEMrtx to feed proper aliasing info. */
21388 expand_set_or_movmem_via_loop (rtx destmem
, rtx srcmem
,
21389 rtx destptr
, rtx srcptr
, rtx value
,
21390 rtx count
, enum machine_mode mode
, int unroll
,
21393 rtx out_label
, top_label
, iter
, tmp
;
21394 enum machine_mode iter_mode
= counter_mode (count
);
21395 rtx piece_size
= GEN_INT (GET_MODE_SIZE (mode
) * unroll
);
21396 rtx piece_size_mask
= GEN_INT (~((GET_MODE_SIZE (mode
) * unroll
) - 1));
21402 top_label
= gen_label_rtx ();
21403 out_label
= gen_label_rtx ();
21404 iter
= gen_reg_rtx (iter_mode
);
21406 size
= expand_simple_binop (iter_mode
, AND
, count
, piece_size_mask
,
21407 NULL
, 1, OPTAB_DIRECT
);
21408 /* Those two should combine. */
21409 if (piece_size
== const1_rtx
)
21411 emit_cmp_and_jump_insns (size
, const0_rtx
, EQ
, NULL_RTX
, iter_mode
,
21413 predict_jump (REG_BR_PROB_BASE
* 10 / 100);
21415 emit_move_insn (iter
, const0_rtx
);
21417 emit_label (top_label
);
21419 tmp
= convert_modes (Pmode
, iter_mode
, iter
, true);
21420 x_addr
= gen_rtx_PLUS (Pmode
, destptr
, tmp
);
21421 destmem
= change_address (destmem
, mode
, x_addr
);
21425 y_addr
= gen_rtx_PLUS (Pmode
, srcptr
, copy_rtx (tmp
));
21426 srcmem
= change_address (srcmem
, mode
, y_addr
);
21428 /* When unrolling for chips that reorder memory reads and writes,
21429 we can save registers by using single temporary.
21430 Also using 4 temporaries is overkill in 32bit mode. */
21431 if (!TARGET_64BIT
&& 0)
21433 for (i
= 0; i
< unroll
; i
++)
21438 adjust_address (copy_rtx (destmem
), mode
, GET_MODE_SIZE (mode
));
21440 adjust_address (copy_rtx (srcmem
), mode
, GET_MODE_SIZE (mode
));
21442 emit_move_insn (destmem
, srcmem
);
21448 gcc_assert (unroll
<= 4);
21449 for (i
= 0; i
< unroll
; i
++)
21451 tmpreg
[i
] = gen_reg_rtx (mode
);
21455 adjust_address (copy_rtx (srcmem
), mode
, GET_MODE_SIZE (mode
));
21457 emit_move_insn (tmpreg
[i
], srcmem
);
21459 for (i
= 0; i
< unroll
; i
++)
21464 adjust_address (copy_rtx (destmem
), mode
, GET_MODE_SIZE (mode
));
21466 emit_move_insn (destmem
, tmpreg
[i
]);
21471 for (i
= 0; i
< unroll
; i
++)
21475 adjust_address (copy_rtx (destmem
), mode
, GET_MODE_SIZE (mode
));
21476 emit_move_insn (destmem
, value
);
21479 tmp
= expand_simple_binop (iter_mode
, PLUS
, iter
, piece_size
, iter
,
21480 true, OPTAB_LIB_WIDEN
);
21482 emit_move_insn (iter
, tmp
);
21484 emit_cmp_and_jump_insns (iter
, size
, LT
, NULL_RTX
, iter_mode
,
21486 if (expected_size
!= -1)
21488 expected_size
/= GET_MODE_SIZE (mode
) * unroll
;
21489 if (expected_size
== 0)
21491 else if (expected_size
> REG_BR_PROB_BASE
)
21492 predict_jump (REG_BR_PROB_BASE
- 1);
21494 predict_jump (REG_BR_PROB_BASE
- (REG_BR_PROB_BASE
+ expected_size
/ 2) / expected_size
);
21497 predict_jump (REG_BR_PROB_BASE
* 80 / 100);
21498 iter
= ix86_zero_extend_to_Pmode (iter
);
21499 tmp
= expand_simple_binop (Pmode
, PLUS
, destptr
, iter
, destptr
,
21500 true, OPTAB_LIB_WIDEN
);
21501 if (tmp
!= destptr
)
21502 emit_move_insn (destptr
, tmp
);
21505 tmp
= expand_simple_binop (Pmode
, PLUS
, srcptr
, iter
, srcptr
,
21506 true, OPTAB_LIB_WIDEN
);
21508 emit_move_insn (srcptr
, tmp
);
21510 emit_label (out_label
);
21513 /* Output "rep; mov" instruction.
21514 Arguments have same meaning as for previous function */
21516 expand_movmem_via_rep_mov (rtx destmem
, rtx srcmem
,
21517 rtx destptr
, rtx srcptr
,
21519 enum machine_mode mode
)
21524 HOST_WIDE_INT rounded_count
;
21526 /* If the size is known, it is shorter to use rep movs. */
21527 if (mode
== QImode
&& CONST_INT_P (count
)
21528 && !(INTVAL (count
) & 3))
21531 if (destptr
!= XEXP (destmem
, 0) || GET_MODE (destmem
) != BLKmode
)
21532 destmem
= adjust_automodify_address_nv (destmem
, BLKmode
, destptr
, 0);
21533 if (srcptr
!= XEXP (srcmem
, 0) || GET_MODE (srcmem
) != BLKmode
)
21534 srcmem
= adjust_automodify_address_nv (srcmem
, BLKmode
, srcptr
, 0);
21535 countreg
= ix86_zero_extend_to_Pmode (scale_counter (count
, GET_MODE_SIZE (mode
)));
21536 if (mode
!= QImode
)
21538 destexp
= gen_rtx_ASHIFT (Pmode
, countreg
,
21539 GEN_INT (exact_log2 (GET_MODE_SIZE (mode
))));
21540 destexp
= gen_rtx_PLUS (Pmode
, destexp
, destptr
);
21541 srcexp
= gen_rtx_ASHIFT (Pmode
, countreg
,
21542 GEN_INT (exact_log2 (GET_MODE_SIZE (mode
))));
21543 srcexp
= gen_rtx_PLUS (Pmode
, srcexp
, srcptr
);
21547 destexp
= gen_rtx_PLUS (Pmode
, destptr
, countreg
);
21548 srcexp
= gen_rtx_PLUS (Pmode
, srcptr
, countreg
);
21550 if (CONST_INT_P (count
))
21552 rounded_count
= (INTVAL (count
)
21553 & ~((HOST_WIDE_INT
) GET_MODE_SIZE (mode
) - 1));
21554 destmem
= shallow_copy_rtx (destmem
);
21555 srcmem
= shallow_copy_rtx (srcmem
);
21556 set_mem_size (destmem
, rounded_count
);
21557 set_mem_size (srcmem
, rounded_count
);
21561 if (MEM_SIZE_KNOWN_P (destmem
))
21562 clear_mem_size (destmem
);
21563 if (MEM_SIZE_KNOWN_P (srcmem
))
21564 clear_mem_size (srcmem
);
21566 emit_insn (gen_rep_mov (destptr
, destmem
, srcptr
, srcmem
, countreg
,
21570 /* Output "rep; stos" instruction.
21571 Arguments have same meaning as for previous function */
21573 expand_setmem_via_rep_stos (rtx destmem
, rtx destptr
, rtx value
,
21574 rtx count
, enum machine_mode mode
,
21579 HOST_WIDE_INT rounded_count
;
21581 if (destptr
!= XEXP (destmem
, 0) || GET_MODE (destmem
) != BLKmode
)
21582 destmem
= adjust_automodify_address_nv (destmem
, BLKmode
, destptr
, 0);
21583 value
= force_reg (mode
, gen_lowpart (mode
, value
));
21584 countreg
= ix86_zero_extend_to_Pmode (scale_counter (count
, GET_MODE_SIZE (mode
)));
21585 if (mode
!= QImode
)
21587 destexp
= gen_rtx_ASHIFT (Pmode
, countreg
,
21588 GEN_INT (exact_log2 (GET_MODE_SIZE (mode
))));
21589 destexp
= gen_rtx_PLUS (Pmode
, destexp
, destptr
);
21592 destexp
= gen_rtx_PLUS (Pmode
, destptr
, countreg
);
21593 if (orig_value
== const0_rtx
&& CONST_INT_P (count
))
21595 rounded_count
= (INTVAL (count
)
21596 & ~((HOST_WIDE_INT
) GET_MODE_SIZE (mode
) - 1));
21597 destmem
= shallow_copy_rtx (destmem
);
21598 set_mem_size (destmem
, rounded_count
);
21600 else if (MEM_SIZE_KNOWN_P (destmem
))
21601 clear_mem_size (destmem
);
21602 emit_insn (gen_rep_stos (destptr
, countreg
, destmem
, value
, destexp
));
21606 emit_strmov (rtx destmem
, rtx srcmem
,
21607 rtx destptr
, rtx srcptr
, enum machine_mode mode
, int offset
)
21609 rtx src
= adjust_automodify_address_nv (srcmem
, mode
, srcptr
, offset
);
21610 rtx dest
= adjust_automodify_address_nv (destmem
, mode
, destptr
, offset
);
21611 emit_insn (gen_strmov (destptr
, dest
, srcptr
, src
));
21614 /* Output code to copy at most count & (max_size - 1) bytes from SRC to DEST. */
21616 expand_movmem_epilogue (rtx destmem
, rtx srcmem
,
21617 rtx destptr
, rtx srcptr
, rtx count
, int max_size
)
21620 if (CONST_INT_P (count
))
21622 HOST_WIDE_INT countval
= INTVAL (count
);
21625 if ((countval
& 0x10) && max_size
> 16)
21629 emit_strmov (destmem
, srcmem
, destptr
, srcptr
, DImode
, offset
);
21630 emit_strmov (destmem
, srcmem
, destptr
, srcptr
, DImode
, offset
+ 8);
21633 gcc_unreachable ();
21636 if ((countval
& 0x08) && max_size
> 8)
21639 emit_strmov (destmem
, srcmem
, destptr
, srcptr
, DImode
, offset
);
21642 emit_strmov (destmem
, srcmem
, destptr
, srcptr
, SImode
, offset
);
21643 emit_strmov (destmem
, srcmem
, destptr
, srcptr
, SImode
, offset
+ 4);
21647 if ((countval
& 0x04) && max_size
> 4)
21649 emit_strmov (destmem
, srcmem
, destptr
, srcptr
, SImode
, offset
);
21652 if ((countval
& 0x02) && max_size
> 2)
21654 emit_strmov (destmem
, srcmem
, destptr
, srcptr
, HImode
, offset
);
21657 if ((countval
& 0x01) && max_size
> 1)
21659 emit_strmov (destmem
, srcmem
, destptr
, srcptr
, QImode
, offset
);
21666 count
= expand_simple_binop (GET_MODE (count
), AND
, count
, GEN_INT (max_size
- 1),
21667 count
, 1, OPTAB_DIRECT
);
21668 expand_set_or_movmem_via_loop (destmem
, srcmem
, destptr
, srcptr
, NULL
,
21669 count
, QImode
, 1, 4);
21673 /* When there are stringops, we can cheaply increase dest and src pointers.
21674 Otherwise we save code size by maintaining offset (zero is readily
21675 available from preceding rep operation) and using x86 addressing modes.
21677 if (TARGET_SINGLE_STRINGOP
)
21681 rtx label
= ix86_expand_aligntest (count
, 4, true);
21682 src
= change_address (srcmem
, SImode
, srcptr
);
21683 dest
= change_address (destmem
, SImode
, destptr
);
21684 emit_insn (gen_strmov (destptr
, dest
, srcptr
, src
));
21685 emit_label (label
);
21686 LABEL_NUSES (label
) = 1;
21690 rtx label
= ix86_expand_aligntest (count
, 2, true);
21691 src
= change_address (srcmem
, HImode
, srcptr
);
21692 dest
= change_address (destmem
, HImode
, destptr
);
21693 emit_insn (gen_strmov (destptr
, dest
, srcptr
, src
));
21694 emit_label (label
);
21695 LABEL_NUSES (label
) = 1;
21699 rtx label
= ix86_expand_aligntest (count
, 1, true);
21700 src
= change_address (srcmem
, QImode
, srcptr
);
21701 dest
= change_address (destmem
, QImode
, destptr
);
21702 emit_insn (gen_strmov (destptr
, dest
, srcptr
, src
));
21703 emit_label (label
);
21704 LABEL_NUSES (label
) = 1;
21709 rtx offset
= force_reg (Pmode
, const0_rtx
);
21714 rtx label
= ix86_expand_aligntest (count
, 4, true);
21715 src
= change_address (srcmem
, SImode
, srcptr
);
21716 dest
= change_address (destmem
, SImode
, destptr
);
21717 emit_move_insn (dest
, src
);
21718 tmp
= expand_simple_binop (Pmode
, PLUS
, offset
, GEN_INT (4), NULL
,
21719 true, OPTAB_LIB_WIDEN
);
21721 emit_move_insn (offset
, tmp
);
21722 emit_label (label
);
21723 LABEL_NUSES (label
) = 1;
21727 rtx label
= ix86_expand_aligntest (count
, 2, true);
21728 tmp
= gen_rtx_PLUS (Pmode
, srcptr
, offset
);
21729 src
= change_address (srcmem
, HImode
, tmp
);
21730 tmp
= gen_rtx_PLUS (Pmode
, destptr
, offset
);
21731 dest
= change_address (destmem
, HImode
, tmp
);
21732 emit_move_insn (dest
, src
);
21733 tmp
= expand_simple_binop (Pmode
, PLUS
, offset
, GEN_INT (2), tmp
,
21734 true, OPTAB_LIB_WIDEN
);
21736 emit_move_insn (offset
, tmp
);
21737 emit_label (label
);
21738 LABEL_NUSES (label
) = 1;
21742 rtx label
= ix86_expand_aligntest (count
, 1, true);
21743 tmp
= gen_rtx_PLUS (Pmode
, srcptr
, offset
);
21744 src
= change_address (srcmem
, QImode
, tmp
);
21745 tmp
= gen_rtx_PLUS (Pmode
, destptr
, offset
);
21746 dest
= change_address (destmem
, QImode
, tmp
);
21747 emit_move_insn (dest
, src
);
21748 emit_label (label
);
21749 LABEL_NUSES (label
) = 1;
21754 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
21756 expand_setmem_epilogue_via_loop (rtx destmem
, rtx destptr
, rtx value
,
21757 rtx count
, int max_size
)
21760 expand_simple_binop (counter_mode (count
), AND
, count
,
21761 GEN_INT (max_size
- 1), count
, 1, OPTAB_DIRECT
);
21762 expand_set_or_movmem_via_loop (destmem
, NULL
, destptr
, NULL
,
21763 gen_lowpart (QImode
, value
), count
, QImode
,
21767 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
21769 expand_setmem_epilogue (rtx destmem
, rtx destptr
, rtx value
, rtx count
, int max_size
)
21773 if (CONST_INT_P (count
))
21775 HOST_WIDE_INT countval
= INTVAL (count
);
21778 if ((countval
& 0x10) && max_size
> 16)
21782 dest
= adjust_automodify_address_nv (destmem
, DImode
, destptr
, offset
);
21783 emit_insn (gen_strset (destptr
, dest
, value
));
21784 dest
= adjust_automodify_address_nv (destmem
, DImode
, destptr
, offset
+ 8);
21785 emit_insn (gen_strset (destptr
, dest
, value
));
21788 gcc_unreachable ();
21791 if ((countval
& 0x08) && max_size
> 8)
21795 dest
= adjust_automodify_address_nv (destmem
, DImode
, destptr
, offset
);
21796 emit_insn (gen_strset (destptr
, dest
, value
));
21800 dest
= adjust_automodify_address_nv (destmem
, SImode
, destptr
, offset
);
21801 emit_insn (gen_strset (destptr
, dest
, value
));
21802 dest
= adjust_automodify_address_nv (destmem
, SImode
, destptr
, offset
+ 4);
21803 emit_insn (gen_strset (destptr
, dest
, value
));
21807 if ((countval
& 0x04) && max_size
> 4)
21809 dest
= adjust_automodify_address_nv (destmem
, SImode
, destptr
, offset
);
21810 emit_insn (gen_strset (destptr
, dest
, gen_lowpart (SImode
, value
)));
21813 if ((countval
& 0x02) && max_size
> 2)
21815 dest
= adjust_automodify_address_nv (destmem
, HImode
, destptr
, offset
);
21816 emit_insn (gen_strset (destptr
, dest
, gen_lowpart (HImode
, value
)));
21819 if ((countval
& 0x01) && max_size
> 1)
21821 dest
= adjust_automodify_address_nv (destmem
, QImode
, destptr
, offset
);
21822 emit_insn (gen_strset (destptr
, dest
, gen_lowpart (QImode
, value
)));
21829 expand_setmem_epilogue_via_loop (destmem
, destptr
, value
, count
, max_size
);
21834 rtx label
= ix86_expand_aligntest (count
, 16, true);
21837 dest
= change_address (destmem
, DImode
, destptr
);
21838 emit_insn (gen_strset (destptr
, dest
, value
));
21839 emit_insn (gen_strset (destptr
, dest
, value
));
21843 dest
= change_address (destmem
, SImode
, destptr
);
21844 emit_insn (gen_strset (destptr
, dest
, value
));
21845 emit_insn (gen_strset (destptr
, dest
, value
));
21846 emit_insn (gen_strset (destptr
, dest
, value
));
21847 emit_insn (gen_strset (destptr
, dest
, value
));
21849 emit_label (label
);
21850 LABEL_NUSES (label
) = 1;
21854 rtx label
= ix86_expand_aligntest (count
, 8, true);
21857 dest
= change_address (destmem
, DImode
, destptr
);
21858 emit_insn (gen_strset (destptr
, dest
, value
));
21862 dest
= change_address (destmem
, SImode
, destptr
);
21863 emit_insn (gen_strset (destptr
, dest
, value
));
21864 emit_insn (gen_strset (destptr
, dest
, value
));
21866 emit_label (label
);
21867 LABEL_NUSES (label
) = 1;
21871 rtx label
= ix86_expand_aligntest (count
, 4, true);
21872 dest
= change_address (destmem
, SImode
, destptr
);
21873 emit_insn (gen_strset (destptr
, dest
, gen_lowpart (SImode
, value
)));
21874 emit_label (label
);
21875 LABEL_NUSES (label
) = 1;
21879 rtx label
= ix86_expand_aligntest (count
, 2, true);
21880 dest
= change_address (destmem
, HImode
, destptr
);
21881 emit_insn (gen_strset (destptr
, dest
, gen_lowpart (HImode
, value
)));
21882 emit_label (label
);
21883 LABEL_NUSES (label
) = 1;
21887 rtx label
= ix86_expand_aligntest (count
, 1, true);
21888 dest
= change_address (destmem
, QImode
, destptr
);
21889 emit_insn (gen_strset (destptr
, dest
, gen_lowpart (QImode
, value
)));
21890 emit_label (label
);
21891 LABEL_NUSES (label
) = 1;
21895 /* Copy enough from DEST to SRC to align DEST known to by aligned by ALIGN to
21896 DESIRED_ALIGNMENT. */
21898 expand_movmem_prologue (rtx destmem
, rtx srcmem
,
21899 rtx destptr
, rtx srcptr
, rtx count
,
21900 int align
, int desired_alignment
)
21902 if (align
<= 1 && desired_alignment
> 1)
21904 rtx label
= ix86_expand_aligntest (destptr
, 1, false);
21905 srcmem
= change_address (srcmem
, QImode
, srcptr
);
21906 destmem
= change_address (destmem
, QImode
, destptr
);
21907 emit_insn (gen_strmov (destptr
, destmem
, srcptr
, srcmem
));
21908 ix86_adjust_counter (count
, 1);
21909 emit_label (label
);
21910 LABEL_NUSES (label
) = 1;
21912 if (align
<= 2 && desired_alignment
> 2)
21914 rtx label
= ix86_expand_aligntest (destptr
, 2, false);
21915 srcmem
= change_address (srcmem
, HImode
, srcptr
);
21916 destmem
= change_address (destmem
, HImode
, destptr
);
21917 emit_insn (gen_strmov (destptr
, destmem
, srcptr
, srcmem
));
21918 ix86_adjust_counter (count
, 2);
21919 emit_label (label
);
21920 LABEL_NUSES (label
) = 1;
21922 if (align
<= 4 && desired_alignment
> 4)
21924 rtx label
= ix86_expand_aligntest (destptr
, 4, false);
21925 srcmem
= change_address (srcmem
, SImode
, srcptr
);
21926 destmem
= change_address (destmem
, SImode
, destptr
);
21927 emit_insn (gen_strmov (destptr
, destmem
, srcptr
, srcmem
));
21928 ix86_adjust_counter (count
, 4);
21929 emit_label (label
);
21930 LABEL_NUSES (label
) = 1;
21932 gcc_assert (desired_alignment
<= 8);
21935 /* Copy enough from DST to SRC to align DST known to DESIRED_ALIGN.
21936 ALIGN_BYTES is how many bytes need to be copied. */
21938 expand_constant_movmem_prologue (rtx dst
, rtx
*srcp
, rtx destreg
, rtx srcreg
,
21939 int desired_align
, int align_bytes
)
21942 rtx orig_dst
= dst
;
21943 rtx orig_src
= src
;
21945 int src_align_bytes
= get_mem_align_offset (src
, desired_align
* BITS_PER_UNIT
);
21946 if (src_align_bytes
>= 0)
21947 src_align_bytes
= desired_align
- src_align_bytes
;
21948 if (align_bytes
& 1)
21950 dst
= adjust_automodify_address_nv (dst
, QImode
, destreg
, 0);
21951 src
= adjust_automodify_address_nv (src
, QImode
, srcreg
, 0);
21953 emit_insn (gen_strmov (destreg
, dst
, srcreg
, src
));
21955 if (align_bytes
& 2)
21957 dst
= adjust_automodify_address_nv (dst
, HImode
, destreg
, off
);
21958 src
= adjust_automodify_address_nv (src
, HImode
, srcreg
, off
);
21959 if (MEM_ALIGN (dst
) < 2 * BITS_PER_UNIT
)
21960 set_mem_align (dst
, 2 * BITS_PER_UNIT
);
21961 if (src_align_bytes
>= 0
21962 && (src_align_bytes
& 1) == (align_bytes
& 1)
21963 && MEM_ALIGN (src
) < 2 * BITS_PER_UNIT
)
21964 set_mem_align (src
, 2 * BITS_PER_UNIT
);
21966 emit_insn (gen_strmov (destreg
, dst
, srcreg
, src
));
21968 if (align_bytes
& 4)
21970 dst
= adjust_automodify_address_nv (dst
, SImode
, destreg
, off
);
21971 src
= adjust_automodify_address_nv (src
, SImode
, srcreg
, off
);
21972 if (MEM_ALIGN (dst
) < 4 * BITS_PER_UNIT
)
21973 set_mem_align (dst
, 4 * BITS_PER_UNIT
);
21974 if (src_align_bytes
>= 0)
21976 unsigned int src_align
= 0;
21977 if ((src_align_bytes
& 3) == (align_bytes
& 3))
21979 else if ((src_align_bytes
& 1) == (align_bytes
& 1))
21981 if (MEM_ALIGN (src
) < src_align
* BITS_PER_UNIT
)
21982 set_mem_align (src
, src_align
* BITS_PER_UNIT
);
21985 emit_insn (gen_strmov (destreg
, dst
, srcreg
, src
));
21987 dst
= adjust_automodify_address_nv (dst
, BLKmode
, destreg
, off
);
21988 src
= adjust_automodify_address_nv (src
, BLKmode
, srcreg
, off
);
21989 if (MEM_ALIGN (dst
) < (unsigned int) desired_align
* BITS_PER_UNIT
)
21990 set_mem_align (dst
, desired_align
* BITS_PER_UNIT
);
21991 if (src_align_bytes
>= 0)
21993 unsigned int src_align
= 0;
21994 if ((src_align_bytes
& 7) == (align_bytes
& 7))
21996 else if ((src_align_bytes
& 3) == (align_bytes
& 3))
21998 else if ((src_align_bytes
& 1) == (align_bytes
& 1))
22000 if (src_align
> (unsigned int) desired_align
)
22001 src_align
= desired_align
;
22002 if (MEM_ALIGN (src
) < src_align
* BITS_PER_UNIT
)
22003 set_mem_align (src
, src_align
* BITS_PER_UNIT
);
22005 if (MEM_SIZE_KNOWN_P (orig_dst
))
22006 set_mem_size (dst
, MEM_SIZE (orig_dst
) - align_bytes
);
22007 if (MEM_SIZE_KNOWN_P (orig_src
))
22008 set_mem_size (src
, MEM_SIZE (orig_src
) - align_bytes
);
22013 /* Set enough from DEST to align DEST known to by aligned by ALIGN to
22014 DESIRED_ALIGNMENT. */
22016 expand_setmem_prologue (rtx destmem
, rtx destptr
, rtx value
, rtx count
,
22017 int align
, int desired_alignment
)
22019 if (align
<= 1 && desired_alignment
> 1)
22021 rtx label
= ix86_expand_aligntest (destptr
, 1, false);
22022 destmem
= change_address (destmem
, QImode
, destptr
);
22023 emit_insn (gen_strset (destptr
, destmem
, gen_lowpart (QImode
, value
)));
22024 ix86_adjust_counter (count
, 1);
22025 emit_label (label
);
22026 LABEL_NUSES (label
) = 1;
22028 if (align
<= 2 && desired_alignment
> 2)
22030 rtx label
= ix86_expand_aligntest (destptr
, 2, false);
22031 destmem
= change_address (destmem
, HImode
, destptr
);
22032 emit_insn (gen_strset (destptr
, destmem
, gen_lowpart (HImode
, value
)));
22033 ix86_adjust_counter (count
, 2);
22034 emit_label (label
);
22035 LABEL_NUSES (label
) = 1;
22037 if (align
<= 4 && desired_alignment
> 4)
22039 rtx label
= ix86_expand_aligntest (destptr
, 4, false);
22040 destmem
= change_address (destmem
, SImode
, destptr
);
22041 emit_insn (gen_strset (destptr
, destmem
, gen_lowpart (SImode
, value
)));
22042 ix86_adjust_counter (count
, 4);
22043 emit_label (label
);
22044 LABEL_NUSES (label
) = 1;
22046 gcc_assert (desired_alignment
<= 8);
22049 /* Set enough from DST to align DST known to by aligned by ALIGN to
22050 DESIRED_ALIGN. ALIGN_BYTES is how many bytes need to be stored. */
22052 expand_constant_setmem_prologue (rtx dst
, rtx destreg
, rtx value
,
22053 int desired_align
, int align_bytes
)
22056 rtx orig_dst
= dst
;
22057 if (align_bytes
& 1)
22059 dst
= adjust_automodify_address_nv (dst
, QImode
, destreg
, 0);
22061 emit_insn (gen_strset (destreg
, dst
,
22062 gen_lowpart (QImode
, value
)));
22064 if (align_bytes
& 2)
22066 dst
= adjust_automodify_address_nv (dst
, HImode
, destreg
, off
);
22067 if (MEM_ALIGN (dst
) < 2 * BITS_PER_UNIT
)
22068 set_mem_align (dst
, 2 * BITS_PER_UNIT
);
22070 emit_insn (gen_strset (destreg
, dst
,
22071 gen_lowpart (HImode
, value
)));
22073 if (align_bytes
& 4)
22075 dst
= adjust_automodify_address_nv (dst
, SImode
, destreg
, off
);
22076 if (MEM_ALIGN (dst
) < 4 * BITS_PER_UNIT
)
22077 set_mem_align (dst
, 4 * BITS_PER_UNIT
);
22079 emit_insn (gen_strset (destreg
, dst
,
22080 gen_lowpart (SImode
, value
)));
22082 dst
= adjust_automodify_address_nv (dst
, BLKmode
, destreg
, off
);
22083 if (MEM_ALIGN (dst
) < (unsigned int) desired_align
* BITS_PER_UNIT
)
22084 set_mem_align (dst
, desired_align
* BITS_PER_UNIT
);
22085 if (MEM_SIZE_KNOWN_P (orig_dst
))
22086 set_mem_size (dst
, MEM_SIZE (orig_dst
) - align_bytes
);
22090 /* Given COUNT and EXPECTED_SIZE, decide on codegen of string operation. */
22091 static enum stringop_alg
22092 decide_alg (HOST_WIDE_INT count
, HOST_WIDE_INT expected_size
, bool memset
,
22093 int *dynamic_check
)
22095 const struct stringop_algs
* algs
;
22096 bool optimize_for_speed
;
22097 /* Algorithms using the rep prefix want at least edi and ecx;
22098 additionally, memset wants eax and memcpy wants esi. Don't
22099 consider such algorithms if the user has appropriated those
22100 registers for their own purposes. */
22101 bool rep_prefix_usable
= !(fixed_regs
[CX_REG
] || fixed_regs
[DI_REG
]
22103 ? fixed_regs
[AX_REG
] : fixed_regs
[SI_REG
]));
22105 #define ALG_USABLE_P(alg) (rep_prefix_usable \
22106 || (alg != rep_prefix_1_byte \
22107 && alg != rep_prefix_4_byte \
22108 && alg != rep_prefix_8_byte))
22109 const struct processor_costs
*cost
;
22111 /* Even if the string operation call is cold, we still might spend a lot
22112 of time processing large blocks. */
22113 if (optimize_function_for_size_p (cfun
)
22114 || (optimize_insn_for_size_p ()
22115 && expected_size
!= -1 && expected_size
< 256))
22116 optimize_for_speed
= false;
22118 optimize_for_speed
= true;
22120 cost
= optimize_for_speed
? ix86_cost
: &ix86_size_cost
;
22122 *dynamic_check
= -1;
22124 algs
= &cost
->memset
[TARGET_64BIT
!= 0];
22126 algs
= &cost
->memcpy
[TARGET_64BIT
!= 0];
22127 if (ix86_stringop_alg
!= no_stringop
&& ALG_USABLE_P (ix86_stringop_alg
))
22128 return ix86_stringop_alg
;
22129 /* rep; movq or rep; movl is the smallest variant. */
22130 else if (!optimize_for_speed
)
22132 if (!count
|| (count
& 3))
22133 return rep_prefix_usable
? rep_prefix_1_byte
: loop_1_byte
;
22135 return rep_prefix_usable
? rep_prefix_4_byte
: loop
;
22137 /* Very tiny blocks are best handled via the loop, REP is expensive to setup.
22139 else if (expected_size
!= -1 && expected_size
< 4)
22140 return loop_1_byte
;
22141 else if (expected_size
!= -1)
22144 enum stringop_alg alg
= libcall
;
22145 for (i
= 0; i
< MAX_STRINGOP_ALGS
; i
++)
22147 /* We get here if the algorithms that were not libcall-based
22148 were rep-prefix based and we are unable to use rep prefixes
22149 based on global register usage. Break out of the loop and
22150 use the heuristic below. */
22151 if (algs
->size
[i
].max
== 0)
22153 if (algs
->size
[i
].max
>= expected_size
|| algs
->size
[i
].max
== -1)
22155 enum stringop_alg candidate
= algs
->size
[i
].alg
;
22157 if (candidate
!= libcall
&& ALG_USABLE_P (candidate
))
22159 /* Honor TARGET_INLINE_ALL_STRINGOPS by picking
22160 last non-libcall inline algorithm. */
22161 if (TARGET_INLINE_ALL_STRINGOPS
)
22163 /* When the current size is best to be copied by a libcall,
22164 but we are still forced to inline, run the heuristic below
22165 that will pick code for medium sized blocks. */
22166 if (alg
!= libcall
)
22170 else if (ALG_USABLE_P (candidate
))
22174 gcc_assert (TARGET_INLINE_ALL_STRINGOPS
|| !rep_prefix_usable
);
22176 /* When asked to inline the call anyway, try to pick meaningful choice.
22177 We look for maximal size of block that is faster to copy by hand and
22178 take blocks of at most of that size guessing that average size will
22179 be roughly half of the block.
22181 If this turns out to be bad, we might simply specify the preferred
22182 choice in ix86_costs. */
22183 if ((TARGET_INLINE_ALL_STRINGOPS
|| TARGET_INLINE_STRINGOPS_DYNAMICALLY
)
22184 && (algs
->unknown_size
== libcall
|| !ALG_USABLE_P (algs
->unknown_size
)))
22187 enum stringop_alg alg
;
22189 bool any_alg_usable_p
= true;
22191 for (i
= 0; i
< MAX_STRINGOP_ALGS
; i
++)
22193 enum stringop_alg candidate
= algs
->size
[i
].alg
;
22194 any_alg_usable_p
= any_alg_usable_p
&& ALG_USABLE_P (candidate
);
22196 if (candidate
!= libcall
&& candidate
22197 && ALG_USABLE_P (candidate
))
22198 max
= algs
->size
[i
].max
;
22200 /* If there aren't any usable algorithms, then recursing on
22201 smaller sizes isn't going to find anything. Just return the
22202 simple byte-at-a-time copy loop. */
22203 if (!any_alg_usable_p
)
22205 /* Pick something reasonable. */
22206 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY
)
22207 *dynamic_check
= 128;
22208 return loop_1_byte
;
22212 alg
= decide_alg (count
, max
/ 2, memset
, dynamic_check
);
22213 gcc_assert (*dynamic_check
== -1);
22214 gcc_assert (alg
!= libcall
);
22215 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY
)
22216 *dynamic_check
= max
;
22219 return ALG_USABLE_P (algs
->unknown_size
) ? algs
->unknown_size
: libcall
;
22220 #undef ALG_USABLE_P
22223 /* Decide on alignment. We know that the operand is already aligned to ALIGN
22224 (ALIGN can be based on profile feedback and thus it is not 100% guaranteed). */
22226 decide_alignment (int align
,
22227 enum stringop_alg alg
,
22230 int desired_align
= 0;
22234 gcc_unreachable ();
22236 case unrolled_loop
:
22237 desired_align
= GET_MODE_SIZE (Pmode
);
22239 case rep_prefix_8_byte
:
22242 case rep_prefix_4_byte
:
22243 /* PentiumPro has special logic triggering for 8 byte aligned blocks.
22244 copying whole cacheline at once. */
22245 if (TARGET_PENTIUMPRO
)
22250 case rep_prefix_1_byte
:
22251 /* PentiumPro has special logic triggering for 8 byte aligned blocks.
22252 copying whole cacheline at once. */
22253 if (TARGET_PENTIUMPRO
)
22267 if (desired_align
< align
)
22268 desired_align
= align
;
22269 if (expected_size
!= -1 && expected_size
< 4)
22270 desired_align
= align
;
22271 return desired_align
;
22274 /* Return the smallest power of 2 greater than VAL. */
22276 smallest_pow2_greater_than (int val
)
22284 /* Expand string move (memcpy) operation. Use i386 string operations
22285 when profitable. expand_setmem contains similar code. The code
22286 depends upon architecture, block size and alignment, but always has
22287 the same overall structure:
22289 1) Prologue guard: Conditional that jumps up to epilogues for small
22290 blocks that can be handled by epilogue alone. This is faster
22291 but also needed for correctness, since prologue assume the block
22292 is larger than the desired alignment.
22294 Optional dynamic check for size and libcall for large
22295 blocks is emitted here too, with -minline-stringops-dynamically.
22297 2) Prologue: copy first few bytes in order to get destination
22298 aligned to DESIRED_ALIGN. It is emitted only when ALIGN is less
22299 than DESIRED_ALIGN and up to DESIRED_ALIGN - ALIGN bytes can be
22300 copied. We emit either a jump tree on power of two sized
22301 blocks, or a byte loop.
22303 3) Main body: the copying loop itself, copying in SIZE_NEEDED chunks
22304 with specified algorithm.
22306 4) Epilogue: code copying tail of the block that is too small to be
22307 handled by main body (or up to size guarded by prologue guard). */
22310 ix86_expand_movmem (rtx dst
, rtx src
, rtx count_exp
, rtx align_exp
,
22311 rtx expected_align_exp
, rtx expected_size_exp
)
22317 rtx jump_around_label
= NULL
;
22318 HOST_WIDE_INT align
= 1;
22319 unsigned HOST_WIDE_INT count
= 0;
22320 HOST_WIDE_INT expected_size
= -1;
22321 int size_needed
= 0, epilogue_size_needed
;
22322 int desired_align
= 0, align_bytes
= 0;
22323 enum stringop_alg alg
;
22325 bool need_zero_guard
= false;
22327 if (CONST_INT_P (align_exp
))
22328 align
= INTVAL (align_exp
);
22329 /* i386 can do misaligned access on reasonably increased cost. */
22330 if (CONST_INT_P (expected_align_exp
)
22331 && INTVAL (expected_align_exp
) > align
)
22332 align
= INTVAL (expected_align_exp
);
22333 /* ALIGN is the minimum of destination and source alignment, but we care here
22334 just about destination alignment. */
22335 else if (MEM_ALIGN (dst
) > (unsigned HOST_WIDE_INT
) align
* BITS_PER_UNIT
)
22336 align
= MEM_ALIGN (dst
) / BITS_PER_UNIT
;
22338 if (CONST_INT_P (count_exp
))
22339 count
= expected_size
= INTVAL (count_exp
);
22340 if (CONST_INT_P (expected_size_exp
) && count
== 0)
22341 expected_size
= INTVAL (expected_size_exp
);
22343 /* Make sure we don't need to care about overflow later on. */
22344 if (count
> ((unsigned HOST_WIDE_INT
) 1 << 30))
22347 /* Step 0: Decide on preferred algorithm, desired alignment and
22348 size of chunks to be copied by main loop. */
22350 alg
= decide_alg (count
, expected_size
, false, &dynamic_check
);
22351 desired_align
= decide_alignment (align
, alg
, expected_size
);
22353 if (!TARGET_ALIGN_STRINGOPS
)
22354 align
= desired_align
;
22356 if (alg
== libcall
)
22358 gcc_assert (alg
!= no_stringop
);
22360 count_exp
= copy_to_mode_reg (GET_MODE (count_exp
), count_exp
);
22361 destreg
= copy_addr_to_reg (XEXP (dst
, 0));
22362 srcreg
= copy_addr_to_reg (XEXP (src
, 0));
22367 gcc_unreachable ();
22369 need_zero_guard
= true;
22370 size_needed
= GET_MODE_SIZE (word_mode
);
22372 case unrolled_loop
:
22373 need_zero_guard
= true;
22374 size_needed
= GET_MODE_SIZE (word_mode
) * (TARGET_64BIT
? 4 : 2);
22376 case rep_prefix_8_byte
:
22379 case rep_prefix_4_byte
:
22382 case rep_prefix_1_byte
:
22386 need_zero_guard
= true;
22391 epilogue_size_needed
= size_needed
;
22393 /* Step 1: Prologue guard. */
22395 /* Alignment code needs count to be in register. */
22396 if (CONST_INT_P (count_exp
) && desired_align
> align
)
22398 if (INTVAL (count_exp
) > desired_align
22399 && INTVAL (count_exp
) > size_needed
)
22402 = get_mem_align_offset (dst
, desired_align
* BITS_PER_UNIT
);
22403 if (align_bytes
<= 0)
22406 align_bytes
= desired_align
- align_bytes
;
22408 if (align_bytes
== 0)
22409 count_exp
= force_reg (counter_mode (count_exp
), count_exp
);
22411 gcc_assert (desired_align
>= 1 && align
>= 1);
22413 /* Ensure that alignment prologue won't copy past end of block. */
22414 if (size_needed
> 1 || (desired_align
> 1 && desired_align
> align
))
22416 epilogue_size_needed
= MAX (size_needed
- 1, desired_align
- align
);
22417 /* Epilogue always copies COUNT_EXP & EPILOGUE_SIZE_NEEDED bytes.
22418 Make sure it is power of 2. */
22419 epilogue_size_needed
= smallest_pow2_greater_than (epilogue_size_needed
);
22423 if (count
< (unsigned HOST_WIDE_INT
)epilogue_size_needed
)
22425 /* If main algorithm works on QImode, no epilogue is needed.
22426 For small sizes just don't align anything. */
22427 if (size_needed
== 1)
22428 desired_align
= align
;
22435 label
= gen_label_rtx ();
22436 emit_cmp_and_jump_insns (count_exp
,
22437 GEN_INT (epilogue_size_needed
),
22438 LTU
, 0, counter_mode (count_exp
), 1, label
);
22439 if (expected_size
== -1 || expected_size
< epilogue_size_needed
)
22440 predict_jump (REG_BR_PROB_BASE
* 60 / 100);
22442 predict_jump (REG_BR_PROB_BASE
* 20 / 100);
22446 /* Emit code to decide on runtime whether library call or inline should be
22448 if (dynamic_check
!= -1)
22450 if (CONST_INT_P (count_exp
))
22452 if (UINTVAL (count_exp
) >= (unsigned HOST_WIDE_INT
)dynamic_check
)
22454 emit_block_move_via_libcall (dst
, src
, count_exp
, false);
22455 count_exp
= const0_rtx
;
22461 rtx hot_label
= gen_label_rtx ();
22462 jump_around_label
= gen_label_rtx ();
22463 emit_cmp_and_jump_insns (count_exp
, GEN_INT (dynamic_check
- 1),
22464 LEU
, 0, GET_MODE (count_exp
), 1, hot_label
);
22465 predict_jump (REG_BR_PROB_BASE
* 90 / 100);
22466 emit_block_move_via_libcall (dst
, src
, count_exp
, false);
22467 emit_jump (jump_around_label
);
22468 emit_label (hot_label
);
22472 /* Step 2: Alignment prologue. */
22474 if (desired_align
> align
)
22476 if (align_bytes
== 0)
22478 /* Except for the first move in epilogue, we no longer know
22479 constant offset in aliasing info. It don't seems to worth
22480 the pain to maintain it for the first move, so throw away
22482 src
= change_address (src
, BLKmode
, srcreg
);
22483 dst
= change_address (dst
, BLKmode
, destreg
);
22484 expand_movmem_prologue (dst
, src
, destreg
, srcreg
, count_exp
, align
,
22489 /* If we know how many bytes need to be stored before dst is
22490 sufficiently aligned, maintain aliasing info accurately. */
22491 dst
= expand_constant_movmem_prologue (dst
, &src
, destreg
, srcreg
,
22492 desired_align
, align_bytes
);
22493 count_exp
= plus_constant (counter_mode (count_exp
),
22494 count_exp
, -align_bytes
);
22495 count
-= align_bytes
;
22497 if (need_zero_guard
22498 && (count
< (unsigned HOST_WIDE_INT
) size_needed
22499 || (align_bytes
== 0
22500 && count
< ((unsigned HOST_WIDE_INT
) size_needed
22501 + desired_align
- align
))))
22503 /* It is possible that we copied enough so the main loop will not
22505 gcc_assert (size_needed
> 1);
22506 if (label
== NULL_RTX
)
22507 label
= gen_label_rtx ();
22508 emit_cmp_and_jump_insns (count_exp
,
22509 GEN_INT (size_needed
),
22510 LTU
, 0, counter_mode (count_exp
), 1, label
);
22511 if (expected_size
== -1
22512 || expected_size
< (desired_align
- align
) / 2 + size_needed
)
22513 predict_jump (REG_BR_PROB_BASE
* 20 / 100);
22515 predict_jump (REG_BR_PROB_BASE
* 60 / 100);
22518 if (label
&& size_needed
== 1)
22520 emit_label (label
);
22521 LABEL_NUSES (label
) = 1;
22523 epilogue_size_needed
= 1;
22525 else if (label
== NULL_RTX
)
22526 epilogue_size_needed
= size_needed
;
22528 /* Step 3: Main loop. */
22534 gcc_unreachable ();
22536 expand_set_or_movmem_via_loop (dst
, src
, destreg
, srcreg
, NULL
,
22537 count_exp
, QImode
, 1, expected_size
);
22540 expand_set_or_movmem_via_loop (dst
, src
, destreg
, srcreg
, NULL
,
22541 count_exp
, word_mode
, 1, expected_size
);
22543 case unrolled_loop
:
22544 /* Unroll only by factor of 2 in 32bit mode, since we don't have enough
22545 registers for 4 temporaries anyway. */
22546 expand_set_or_movmem_via_loop (dst
, src
, destreg
, srcreg
, NULL
,
22547 count_exp
, word_mode
, TARGET_64BIT
? 4 : 2,
22550 case rep_prefix_8_byte
:
22551 expand_movmem_via_rep_mov (dst
, src
, destreg
, srcreg
, count_exp
,
22554 case rep_prefix_4_byte
:
22555 expand_movmem_via_rep_mov (dst
, src
, destreg
, srcreg
, count_exp
,
22558 case rep_prefix_1_byte
:
22559 expand_movmem_via_rep_mov (dst
, src
, destreg
, srcreg
, count_exp
,
22563 /* Adjust properly the offset of src and dest memory for aliasing. */
22564 if (CONST_INT_P (count_exp
))
22566 src
= adjust_automodify_address_nv (src
, BLKmode
, srcreg
,
22567 (count
/ size_needed
) * size_needed
);
22568 dst
= adjust_automodify_address_nv (dst
, BLKmode
, destreg
,
22569 (count
/ size_needed
) * size_needed
);
22573 src
= change_address (src
, BLKmode
, srcreg
);
22574 dst
= change_address (dst
, BLKmode
, destreg
);
22577 /* Step 4: Epilogue to copy the remaining bytes. */
22581 /* When the main loop is done, COUNT_EXP might hold original count,
22582 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
22583 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
22584 bytes. Compensate if needed. */
22586 if (size_needed
< epilogue_size_needed
)
22589 expand_simple_binop (counter_mode (count_exp
), AND
, count_exp
,
22590 GEN_INT (size_needed
- 1), count_exp
, 1,
22592 if (tmp
!= count_exp
)
22593 emit_move_insn (count_exp
, tmp
);
22595 emit_label (label
);
22596 LABEL_NUSES (label
) = 1;
22599 if (count_exp
!= const0_rtx
&& epilogue_size_needed
> 1)
22600 expand_movmem_epilogue (dst
, src
, destreg
, srcreg
, count_exp
,
22601 epilogue_size_needed
);
22602 if (jump_around_label
)
22603 emit_label (jump_around_label
);
22607 /* Helper function for memcpy. For QImode value 0xXY produce
22608 0xXYXYXYXY of wide specified by MODE. This is essentially
22609 a * 0x10101010, but we can do slightly better than
22610 synth_mult by unwinding the sequence by hand on CPUs with
22613 promote_duplicated_reg (enum machine_mode mode
, rtx val
)
22615 enum machine_mode valmode
= GET_MODE (val
);
22617 int nops
= mode
== DImode
? 3 : 2;
22619 gcc_assert (mode
== SImode
|| mode
== DImode
);
22620 if (val
== const0_rtx
)
22621 return copy_to_mode_reg (mode
, const0_rtx
);
22622 if (CONST_INT_P (val
))
22624 HOST_WIDE_INT v
= INTVAL (val
) & 255;
22628 if (mode
== DImode
)
22629 v
|= (v
<< 16) << 16;
22630 return copy_to_mode_reg (mode
, gen_int_mode (v
, mode
));
22633 if (valmode
== VOIDmode
)
22635 if (valmode
!= QImode
)
22636 val
= gen_lowpart (QImode
, val
);
22637 if (mode
== QImode
)
22639 if (!TARGET_PARTIAL_REG_STALL
)
22641 if (ix86_cost
->mult_init
[mode
== DImode
? 3 : 2]
22642 + ix86_cost
->mult_bit
* (mode
== DImode
? 8 : 4)
22643 <= (ix86_cost
->shift_const
+ ix86_cost
->add
) * nops
22644 + (COSTS_N_INSNS (TARGET_PARTIAL_REG_STALL
== 0)))
22646 rtx reg
= convert_modes (mode
, QImode
, val
, true);
22647 tmp
= promote_duplicated_reg (mode
, const1_rtx
);
22648 return expand_simple_binop (mode
, MULT
, reg
, tmp
, NULL
, 1,
22653 rtx reg
= convert_modes (mode
, QImode
, val
, true);
22655 if (!TARGET_PARTIAL_REG_STALL
)
22656 if (mode
== SImode
)
22657 emit_insn (gen_movsi_insv_1 (reg
, reg
));
22659 emit_insn (gen_movdi_insv_1 (reg
, reg
));
22662 tmp
= expand_simple_binop (mode
, ASHIFT
, reg
, GEN_INT (8),
22663 NULL
, 1, OPTAB_DIRECT
);
22665 expand_simple_binop (mode
, IOR
, reg
, tmp
, reg
, 1, OPTAB_DIRECT
);
22667 tmp
= expand_simple_binop (mode
, ASHIFT
, reg
, GEN_INT (16),
22668 NULL
, 1, OPTAB_DIRECT
);
22669 reg
= expand_simple_binop (mode
, IOR
, reg
, tmp
, reg
, 1, OPTAB_DIRECT
);
22670 if (mode
== SImode
)
22672 tmp
= expand_simple_binop (mode
, ASHIFT
, reg
, GEN_INT (32),
22673 NULL
, 1, OPTAB_DIRECT
);
22674 reg
= expand_simple_binop (mode
, IOR
, reg
, tmp
, reg
, 1, OPTAB_DIRECT
);
22679 /* Duplicate value VAL using promote_duplicated_reg into maximal size that will
22680 be needed by main loop copying SIZE_NEEDED chunks and prologue getting
22681 alignment from ALIGN to DESIRED_ALIGN. */
22683 promote_duplicated_reg_to_size (rtx val
, int size_needed
, int desired_align
, int align
)
22688 && (size_needed
> 4 || (desired_align
> align
&& desired_align
> 4)))
22689 promoted_val
= promote_duplicated_reg (DImode
, val
);
22690 else if (size_needed
> 2 || (desired_align
> align
&& desired_align
> 2))
22691 promoted_val
= promote_duplicated_reg (SImode
, val
);
22692 else if (size_needed
> 1 || (desired_align
> align
&& desired_align
> 1))
22693 promoted_val
= promote_duplicated_reg (HImode
, val
);
22695 promoted_val
= val
;
22697 return promoted_val
;
22700 /* Expand string clear operation (bzero). Use i386 string operations when
22701 profitable. See expand_movmem comment for explanation of individual
22702 steps performed. */
22704 ix86_expand_setmem (rtx dst
, rtx count_exp
, rtx val_exp
, rtx align_exp
,
22705 rtx expected_align_exp
, rtx expected_size_exp
)
22710 rtx jump_around_label
= NULL
;
22711 HOST_WIDE_INT align
= 1;
22712 unsigned HOST_WIDE_INT count
= 0;
22713 HOST_WIDE_INT expected_size
= -1;
22714 int size_needed
= 0, epilogue_size_needed
;
22715 int desired_align
= 0, align_bytes
= 0;
22716 enum stringop_alg alg
;
22717 rtx promoted_val
= NULL
;
22718 bool force_loopy_epilogue
= false;
22720 bool need_zero_guard
= false;
22722 if (CONST_INT_P (align_exp
))
22723 align
= INTVAL (align_exp
);
22724 /* i386 can do misaligned access on reasonably increased cost. */
22725 if (CONST_INT_P (expected_align_exp
)
22726 && INTVAL (expected_align_exp
) > align
)
22727 align
= INTVAL (expected_align_exp
);
22728 if (CONST_INT_P (count_exp
))
22729 count
= expected_size
= INTVAL (count_exp
);
22730 if (CONST_INT_P (expected_size_exp
) && count
== 0)
22731 expected_size
= INTVAL (expected_size_exp
);
22733 /* Make sure we don't need to care about overflow later on. */
22734 if (count
> ((unsigned HOST_WIDE_INT
) 1 << 30))
22737 /* Step 0: Decide on preferred algorithm, desired alignment and
22738 size of chunks to be copied by main loop. */
22740 alg
= decide_alg (count
, expected_size
, true, &dynamic_check
);
22741 desired_align
= decide_alignment (align
, alg
, expected_size
);
22743 if (!TARGET_ALIGN_STRINGOPS
)
22744 align
= desired_align
;
22746 if (alg
== libcall
)
22748 gcc_assert (alg
!= no_stringop
);
22750 count_exp
= copy_to_mode_reg (counter_mode (count_exp
), count_exp
);
22751 destreg
= copy_addr_to_reg (XEXP (dst
, 0));
22756 gcc_unreachable ();
22758 need_zero_guard
= true;
22759 size_needed
= GET_MODE_SIZE (word_mode
);
22761 case unrolled_loop
:
22762 need_zero_guard
= true;
22763 size_needed
= GET_MODE_SIZE (word_mode
) * 4;
22765 case rep_prefix_8_byte
:
22768 case rep_prefix_4_byte
:
22771 case rep_prefix_1_byte
:
22775 need_zero_guard
= true;
22779 epilogue_size_needed
= size_needed
;
22781 /* Step 1: Prologue guard. */
22783 /* Alignment code needs count to be in register. */
22784 if (CONST_INT_P (count_exp
) && desired_align
> align
)
22786 if (INTVAL (count_exp
) > desired_align
22787 && INTVAL (count_exp
) > size_needed
)
22790 = get_mem_align_offset (dst
, desired_align
* BITS_PER_UNIT
);
22791 if (align_bytes
<= 0)
22794 align_bytes
= desired_align
- align_bytes
;
22796 if (align_bytes
== 0)
22798 enum machine_mode mode
= SImode
;
22799 if (TARGET_64BIT
&& (count
& ~0xffffffff))
22801 count_exp
= force_reg (mode
, count_exp
);
22804 /* Do the cheap promotion to allow better CSE across the
22805 main loop and epilogue (ie one load of the big constant in the
22806 front of all code. */
22807 if (CONST_INT_P (val_exp
))
22808 promoted_val
= promote_duplicated_reg_to_size (val_exp
, size_needed
,
22809 desired_align
, align
);
22810 /* Ensure that alignment prologue won't copy past end of block. */
22811 if (size_needed
> 1 || (desired_align
> 1 && desired_align
> align
))
22813 epilogue_size_needed
= MAX (size_needed
- 1, desired_align
- align
);
22814 /* Epilogue always copies COUNT_EXP & (EPILOGUE_SIZE_NEEDED - 1) bytes.
22815 Make sure it is power of 2. */
22816 epilogue_size_needed
= smallest_pow2_greater_than (epilogue_size_needed
);
22818 /* To improve performance of small blocks, we jump around the VAL
22819 promoting mode. This mean that if the promoted VAL is not constant,
22820 we might not use it in the epilogue and have to use byte
22822 if (epilogue_size_needed
> 2 && !promoted_val
)
22823 force_loopy_epilogue
= true;
22826 if (count
< (unsigned HOST_WIDE_INT
)epilogue_size_needed
)
22828 /* If main algorithm works on QImode, no epilogue is needed.
22829 For small sizes just don't align anything. */
22830 if (size_needed
== 1)
22831 desired_align
= align
;
22838 label
= gen_label_rtx ();
22839 emit_cmp_and_jump_insns (count_exp
,
22840 GEN_INT (epilogue_size_needed
),
22841 LTU
, 0, counter_mode (count_exp
), 1, label
);
22842 if (expected_size
== -1 || expected_size
<= epilogue_size_needed
)
22843 predict_jump (REG_BR_PROB_BASE
* 60 / 100);
22845 predict_jump (REG_BR_PROB_BASE
* 20 / 100);
22848 if (dynamic_check
!= -1)
22850 rtx hot_label
= gen_label_rtx ();
22851 jump_around_label
= gen_label_rtx ();
22852 emit_cmp_and_jump_insns (count_exp
, GEN_INT (dynamic_check
- 1),
22853 LEU
, 0, counter_mode (count_exp
), 1, hot_label
);
22854 predict_jump (REG_BR_PROB_BASE
* 90 / 100);
22855 set_storage_via_libcall (dst
, count_exp
, val_exp
, false);
22856 emit_jump (jump_around_label
);
22857 emit_label (hot_label
);
22860 /* Step 2: Alignment prologue. */
22862 /* Do the expensive promotion once we branched off the small blocks. */
22864 promoted_val
= promote_duplicated_reg_to_size (val_exp
, size_needed
,
22865 desired_align
, align
);
22866 gcc_assert (desired_align
>= 1 && align
>= 1);
22868 if (desired_align
> align
)
22870 if (align_bytes
== 0)
22872 /* Except for the first move in epilogue, we no longer know
22873 constant offset in aliasing info. It don't seems to worth
22874 the pain to maintain it for the first move, so throw away
22876 dst
= change_address (dst
, BLKmode
, destreg
);
22877 expand_setmem_prologue (dst
, destreg
, promoted_val
, count_exp
, align
,
22882 /* If we know how many bytes need to be stored before dst is
22883 sufficiently aligned, maintain aliasing info accurately. */
22884 dst
= expand_constant_setmem_prologue (dst
, destreg
, promoted_val
,
22885 desired_align
, align_bytes
);
22886 count_exp
= plus_constant (counter_mode (count_exp
),
22887 count_exp
, -align_bytes
);
22888 count
-= align_bytes
;
22890 if (need_zero_guard
22891 && (count
< (unsigned HOST_WIDE_INT
) size_needed
22892 || (align_bytes
== 0
22893 && count
< ((unsigned HOST_WIDE_INT
) size_needed
22894 + desired_align
- align
))))
22896 /* It is possible that we copied enough so the main loop will not
22898 gcc_assert (size_needed
> 1);
22899 if (label
== NULL_RTX
)
22900 label
= gen_label_rtx ();
22901 emit_cmp_and_jump_insns (count_exp
,
22902 GEN_INT (size_needed
),
22903 LTU
, 0, counter_mode (count_exp
), 1, label
);
22904 if (expected_size
== -1
22905 || expected_size
< (desired_align
- align
) / 2 + size_needed
)
22906 predict_jump (REG_BR_PROB_BASE
* 20 / 100);
22908 predict_jump (REG_BR_PROB_BASE
* 60 / 100);
22911 if (label
&& size_needed
== 1)
22913 emit_label (label
);
22914 LABEL_NUSES (label
) = 1;
22916 promoted_val
= val_exp
;
22917 epilogue_size_needed
= 1;
22919 else if (label
== NULL_RTX
)
22920 epilogue_size_needed
= size_needed
;
22922 /* Step 3: Main loop. */
22928 gcc_unreachable ();
22930 expand_set_or_movmem_via_loop (dst
, NULL
, destreg
, NULL
, promoted_val
,
22931 count_exp
, QImode
, 1, expected_size
);
22934 expand_set_or_movmem_via_loop (dst
, NULL
, destreg
, NULL
, promoted_val
,
22935 count_exp
, word_mode
, 1, expected_size
);
22937 case unrolled_loop
:
22938 expand_set_or_movmem_via_loop (dst
, NULL
, destreg
, NULL
, promoted_val
,
22939 count_exp
, word_mode
, 4, expected_size
);
22941 case rep_prefix_8_byte
:
22942 expand_setmem_via_rep_stos (dst
, destreg
, promoted_val
, count_exp
,
22945 case rep_prefix_4_byte
:
22946 expand_setmem_via_rep_stos (dst
, destreg
, promoted_val
, count_exp
,
22949 case rep_prefix_1_byte
:
22950 expand_setmem_via_rep_stos (dst
, destreg
, promoted_val
, count_exp
,
22954 /* Adjust properly the offset of src and dest memory for aliasing. */
22955 if (CONST_INT_P (count_exp
))
22956 dst
= adjust_automodify_address_nv (dst
, BLKmode
, destreg
,
22957 (count
/ size_needed
) * size_needed
);
22959 dst
= change_address (dst
, BLKmode
, destreg
);
22961 /* Step 4: Epilogue to copy the remaining bytes. */
22965 /* When the main loop is done, COUNT_EXP might hold original count,
22966 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
22967 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
22968 bytes. Compensate if needed. */
22970 if (size_needed
< epilogue_size_needed
)
22973 expand_simple_binop (counter_mode (count_exp
), AND
, count_exp
,
22974 GEN_INT (size_needed
- 1), count_exp
, 1,
22976 if (tmp
!= count_exp
)
22977 emit_move_insn (count_exp
, tmp
);
22979 emit_label (label
);
22980 LABEL_NUSES (label
) = 1;
22983 if (count_exp
!= const0_rtx
&& epilogue_size_needed
> 1)
22985 if (force_loopy_epilogue
)
22986 expand_setmem_epilogue_via_loop (dst
, destreg
, val_exp
, count_exp
,
22987 epilogue_size_needed
);
22989 expand_setmem_epilogue (dst
, destreg
, promoted_val
, count_exp
,
22990 epilogue_size_needed
);
22992 if (jump_around_label
)
22993 emit_label (jump_around_label
);
22997 /* Expand the appropriate insns for doing strlen if not just doing
23000 out = result, initialized with the start address
23001 align_rtx = alignment of the address.
23002 scratch = scratch register, initialized with the startaddress when
23003 not aligned, otherwise undefined
23005 This is just the body. It needs the initializations mentioned above and
23006 some address computing at the end. These things are done in i386.md. */
23009 ix86_expand_strlensi_unroll_1 (rtx out
, rtx src
, rtx align_rtx
)
23013 rtx align_2_label
= NULL_RTX
;
23014 rtx align_3_label
= NULL_RTX
;
23015 rtx align_4_label
= gen_label_rtx ();
23016 rtx end_0_label
= gen_label_rtx ();
23018 rtx tmpreg
= gen_reg_rtx (SImode
);
23019 rtx scratch
= gen_reg_rtx (SImode
);
23023 if (CONST_INT_P (align_rtx
))
23024 align
= INTVAL (align_rtx
);
23026 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
23028 /* Is there a known alignment and is it less than 4? */
23031 rtx scratch1
= gen_reg_rtx (Pmode
);
23032 emit_move_insn (scratch1
, out
);
23033 /* Is there a known alignment and is it not 2? */
23036 align_3_label
= gen_label_rtx (); /* Label when aligned to 3-byte */
23037 align_2_label
= gen_label_rtx (); /* Label when aligned to 2-byte */
23039 /* Leave just the 3 lower bits. */
23040 align_rtx
= expand_binop (Pmode
, and_optab
, scratch1
, GEN_INT (3),
23041 NULL_RTX
, 0, OPTAB_WIDEN
);
23043 emit_cmp_and_jump_insns (align_rtx
, const0_rtx
, EQ
, NULL
,
23044 Pmode
, 1, align_4_label
);
23045 emit_cmp_and_jump_insns (align_rtx
, const2_rtx
, EQ
, NULL
,
23046 Pmode
, 1, align_2_label
);
23047 emit_cmp_and_jump_insns (align_rtx
, const2_rtx
, GTU
, NULL
,
23048 Pmode
, 1, align_3_label
);
23052 /* Since the alignment is 2, we have to check 2 or 0 bytes;
23053 check if is aligned to 4 - byte. */
23055 align_rtx
= expand_binop (Pmode
, and_optab
, scratch1
, const2_rtx
,
23056 NULL_RTX
, 0, OPTAB_WIDEN
);
23058 emit_cmp_and_jump_insns (align_rtx
, const0_rtx
, EQ
, NULL
,
23059 Pmode
, 1, align_4_label
);
23062 mem
= change_address (src
, QImode
, out
);
23064 /* Now compare the bytes. */
23066 /* Compare the first n unaligned byte on a byte per byte basis. */
23067 emit_cmp_and_jump_insns (mem
, const0_rtx
, EQ
, NULL
,
23068 QImode
, 1, end_0_label
);
23070 /* Increment the address. */
23071 emit_insn (ix86_gen_add3 (out
, out
, const1_rtx
));
23073 /* Not needed with an alignment of 2 */
23076 emit_label (align_2_label
);
23078 emit_cmp_and_jump_insns (mem
, const0_rtx
, EQ
, NULL
, QImode
, 1,
23081 emit_insn (ix86_gen_add3 (out
, out
, const1_rtx
));
23083 emit_label (align_3_label
);
23086 emit_cmp_and_jump_insns (mem
, const0_rtx
, EQ
, NULL
, QImode
, 1,
23089 emit_insn (ix86_gen_add3 (out
, out
, const1_rtx
));
23092 /* Generate loop to check 4 bytes at a time. It is not a good idea to
23093 align this loop. It gives only huge programs, but does not help to
23095 emit_label (align_4_label
);
23097 mem
= change_address (src
, SImode
, out
);
23098 emit_move_insn (scratch
, mem
);
23099 emit_insn (ix86_gen_add3 (out
, out
, GEN_INT (4)));
23101 /* This formula yields a nonzero result iff one of the bytes is zero.
23102 This saves three branches inside loop and many cycles. */
23104 emit_insn (gen_addsi3 (tmpreg
, scratch
, GEN_INT (-0x01010101)));
23105 emit_insn (gen_one_cmplsi2 (scratch
, scratch
));
23106 emit_insn (gen_andsi3 (tmpreg
, tmpreg
, scratch
));
23107 emit_insn (gen_andsi3 (tmpreg
, tmpreg
,
23108 gen_int_mode (0x80808080, SImode
)));
23109 emit_cmp_and_jump_insns (tmpreg
, const0_rtx
, EQ
, 0, SImode
, 1,
23114 rtx reg
= gen_reg_rtx (SImode
);
23115 rtx reg2
= gen_reg_rtx (Pmode
);
23116 emit_move_insn (reg
, tmpreg
);
23117 emit_insn (gen_lshrsi3 (reg
, reg
, GEN_INT (16)));
23119 /* If zero is not in the first two bytes, move two bytes forward. */
23120 emit_insn (gen_testsi_ccno_1 (tmpreg
, GEN_INT (0x8080)));
23121 tmp
= gen_rtx_REG (CCNOmode
, FLAGS_REG
);
23122 tmp
= gen_rtx_EQ (VOIDmode
, tmp
, const0_rtx
);
23123 emit_insn (gen_rtx_SET (VOIDmode
, tmpreg
,
23124 gen_rtx_IF_THEN_ELSE (SImode
, tmp
,
23127 /* Emit lea manually to avoid clobbering of flags. */
23128 emit_insn (gen_rtx_SET (SImode
, reg2
,
23129 gen_rtx_PLUS (Pmode
, out
, const2_rtx
)));
23131 tmp
= gen_rtx_REG (CCNOmode
, FLAGS_REG
);
23132 tmp
= gen_rtx_EQ (VOIDmode
, tmp
, const0_rtx
);
23133 emit_insn (gen_rtx_SET (VOIDmode
, out
,
23134 gen_rtx_IF_THEN_ELSE (Pmode
, tmp
,
23140 rtx end_2_label
= gen_label_rtx ();
23141 /* Is zero in the first two bytes? */
23143 emit_insn (gen_testsi_ccno_1 (tmpreg
, GEN_INT (0x8080)));
23144 tmp
= gen_rtx_REG (CCNOmode
, FLAGS_REG
);
23145 tmp
= gen_rtx_NE (VOIDmode
, tmp
, const0_rtx
);
23146 tmp
= gen_rtx_IF_THEN_ELSE (VOIDmode
, tmp
,
23147 gen_rtx_LABEL_REF (VOIDmode
, end_2_label
),
23149 tmp
= emit_jump_insn (gen_rtx_SET (VOIDmode
, pc_rtx
, tmp
));
23150 JUMP_LABEL (tmp
) = end_2_label
;
23152 /* Not in the first two. Move two bytes forward. */
23153 emit_insn (gen_lshrsi3 (tmpreg
, tmpreg
, GEN_INT (16)));
23154 emit_insn (ix86_gen_add3 (out
, out
, const2_rtx
));
23156 emit_label (end_2_label
);
23160 /* Avoid branch in fixing the byte. */
23161 tmpreg
= gen_lowpart (QImode
, tmpreg
);
23162 emit_insn (gen_addqi3_cc (tmpreg
, tmpreg
, tmpreg
));
23163 tmp
= gen_rtx_REG (CCmode
, FLAGS_REG
);
23164 cmp
= gen_rtx_LTU (VOIDmode
, tmp
, const0_rtx
);
23165 emit_insn (ix86_gen_sub3_carry (out
, out
, GEN_INT (3), tmp
, cmp
));
23167 emit_label (end_0_label
);
23170 /* Expand strlen. */
23173 ix86_expand_strlen (rtx out
, rtx src
, rtx eoschar
, rtx align
)
23175 rtx addr
, scratch1
, scratch2
, scratch3
, scratch4
;
23177 /* The generic case of strlen expander is long. Avoid it's
23178 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
23180 if (TARGET_UNROLL_STRLEN
&& eoschar
== const0_rtx
&& optimize
> 1
23181 && !TARGET_INLINE_ALL_STRINGOPS
23182 && !optimize_insn_for_size_p ()
23183 && (!CONST_INT_P (align
) || INTVAL (align
) < 4))
23186 addr
= force_reg (Pmode
, XEXP (src
, 0));
23187 scratch1
= gen_reg_rtx (Pmode
);
23189 if (TARGET_UNROLL_STRLEN
&& eoschar
== const0_rtx
&& optimize
> 1
23190 && !optimize_insn_for_size_p ())
23192 /* Well it seems that some optimizer does not combine a call like
23193 foo(strlen(bar), strlen(bar));
23194 when the move and the subtraction is done here. It does calculate
23195 the length just once when these instructions are done inside of
23196 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
23197 often used and I use one fewer register for the lifetime of
23198 output_strlen_unroll() this is better. */
23200 emit_move_insn (out
, addr
);
23202 ix86_expand_strlensi_unroll_1 (out
, src
, align
);
23204 /* strlensi_unroll_1 returns the address of the zero at the end of
23205 the string, like memchr(), so compute the length by subtracting
23206 the start address. */
23207 emit_insn (ix86_gen_sub3 (out
, out
, addr
));
23213 /* Can't use this if the user has appropriated eax, ecx, or edi. */
23214 if (fixed_regs
[AX_REG
] || fixed_regs
[CX_REG
] || fixed_regs
[DI_REG
])
23217 scratch2
= gen_reg_rtx (Pmode
);
23218 scratch3
= gen_reg_rtx (Pmode
);
23219 scratch4
= force_reg (Pmode
, constm1_rtx
);
23221 emit_move_insn (scratch3
, addr
);
23222 eoschar
= force_reg (QImode
, eoschar
);
23224 src
= replace_equiv_address_nv (src
, scratch3
);
23226 /* If .md starts supporting :P, this can be done in .md. */
23227 unspec
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (4, src
, eoschar
, align
,
23228 scratch4
), UNSPEC_SCAS
);
23229 emit_insn (gen_strlenqi_1 (scratch1
, scratch3
, unspec
));
23230 emit_insn (ix86_gen_one_cmpl2 (scratch2
, scratch1
));
23231 emit_insn (ix86_gen_add3 (out
, scratch2
, constm1_rtx
));
23236 /* For given symbol (function) construct code to compute address of it's PLT
23237 entry in large x86-64 PIC model. */
23239 construct_plt_address (rtx symbol
)
23243 gcc_assert (GET_CODE (symbol
) == SYMBOL_REF
);
23244 gcc_assert (ix86_cmodel
== CM_LARGE_PIC
);
23245 gcc_assert (Pmode
== DImode
);
23247 tmp
= gen_reg_rtx (Pmode
);
23248 unspec
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, symbol
), UNSPEC_PLTOFF
);
23250 emit_move_insn (tmp
, gen_rtx_CONST (Pmode
, unspec
));
23251 emit_insn (ix86_gen_add3 (tmp
, tmp
, pic_offset_table_rtx
));
23256 ix86_expand_call (rtx retval
, rtx fnaddr
, rtx callarg1
,
23258 rtx pop
, bool sibcall
)
23260 /* We need to represent that SI and DI registers are clobbered
23262 static int clobbered_registers
[] = {
23263 XMM6_REG
, XMM7_REG
, XMM8_REG
,
23264 XMM9_REG
, XMM10_REG
, XMM11_REG
,
23265 XMM12_REG
, XMM13_REG
, XMM14_REG
,
23266 XMM15_REG
, SI_REG
, DI_REG
23268 rtx vec
[ARRAY_SIZE (clobbered_registers
) + 3];
23269 rtx use
= NULL
, call
;
23270 unsigned int vec_len
;
23272 if (pop
== const0_rtx
)
23274 gcc_assert (!TARGET_64BIT
|| !pop
);
23276 if (TARGET_MACHO
&& !TARGET_64BIT
)
23279 if (flag_pic
&& GET_CODE (XEXP (fnaddr
, 0)) == SYMBOL_REF
)
23280 fnaddr
= machopic_indirect_call_target (fnaddr
);
23285 /* Static functions and indirect calls don't need the pic register. */
23286 if (flag_pic
&& (!TARGET_64BIT
|| ix86_cmodel
== CM_LARGE_PIC
)
23287 && GET_CODE (XEXP (fnaddr
, 0)) == SYMBOL_REF
23288 && ! SYMBOL_REF_LOCAL_P (XEXP (fnaddr
, 0)))
23289 use_reg (&use
, pic_offset_table_rtx
);
23292 if (TARGET_64BIT
&& INTVAL (callarg2
) >= 0)
23294 rtx al
= gen_rtx_REG (QImode
, AX_REG
);
23295 emit_move_insn (al
, callarg2
);
23296 use_reg (&use
, al
);
23299 if (ix86_cmodel
== CM_LARGE_PIC
23301 && GET_CODE (XEXP (fnaddr
, 0)) == SYMBOL_REF
23302 && !local_symbolic_operand (XEXP (fnaddr
, 0), VOIDmode
))
23303 fnaddr
= gen_rtx_MEM (QImode
, construct_plt_address (XEXP (fnaddr
, 0)));
23305 ? !sibcall_insn_operand (XEXP (fnaddr
, 0), word_mode
)
23306 : !call_insn_operand (XEXP (fnaddr
, 0), word_mode
))
23308 fnaddr
= XEXP (fnaddr
, 0);
23309 if (GET_MODE (fnaddr
) != word_mode
)
23310 fnaddr
= convert_to_mode (word_mode
, fnaddr
, 1);
23311 fnaddr
= gen_rtx_MEM (QImode
, copy_to_mode_reg (word_mode
, fnaddr
));
23315 call
= gen_rtx_CALL (VOIDmode
, fnaddr
, callarg1
);
23317 call
= gen_rtx_SET (VOIDmode
, retval
, call
);
23318 vec
[vec_len
++] = call
;
23322 pop
= gen_rtx_PLUS (Pmode
, stack_pointer_rtx
, pop
);
23323 pop
= gen_rtx_SET (VOIDmode
, stack_pointer_rtx
, pop
);
23324 vec
[vec_len
++] = pop
;
23327 if (TARGET_64BIT_MS_ABI
23328 && (!callarg2
|| INTVAL (callarg2
) != -2))
23332 vec
[vec_len
++] = gen_rtx_UNSPEC (VOIDmode
, gen_rtvec (1, const0_rtx
),
23333 UNSPEC_MS_TO_SYSV_CALL
);
23335 for (i
= 0; i
< ARRAY_SIZE (clobbered_registers
); i
++)
23337 = gen_rtx_CLOBBER (SSE_REGNO_P (clobbered_registers
[i
])
23339 gen_rtx_REG (SSE_REGNO_P (clobbered_registers
[i
])
23341 clobbered_registers
[i
]));
23344 /* Add UNSPEC_CALL_NEEDS_VZEROUPPER decoration. */
23345 if (TARGET_VZEROUPPER
)
23348 if (cfun
->machine
->callee_pass_avx256_p
)
23350 if (cfun
->machine
->callee_return_avx256_p
)
23351 avx256
= callee_return_pass_avx256
;
23353 avx256
= callee_pass_avx256
;
23355 else if (cfun
->machine
->callee_return_avx256_p
)
23356 avx256
= callee_return_avx256
;
23358 avx256
= call_no_avx256
;
23360 if (reload_completed
)
23361 emit_insn (gen_avx_vzeroupper (GEN_INT (avx256
)));
23363 vec
[vec_len
++] = gen_rtx_UNSPEC (VOIDmode
,
23364 gen_rtvec (1, GEN_INT (avx256
)),
23365 UNSPEC_CALL_NEEDS_VZEROUPPER
);
23369 call
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec_v (vec_len
, vec
));
23370 call
= emit_call_insn (call
);
23372 CALL_INSN_FUNCTION_USAGE (call
) = use
;
23378 ix86_split_call_vzeroupper (rtx insn
, rtx vzeroupper
)
23380 rtx pat
= PATTERN (insn
);
23381 rtvec vec
= XVEC (pat
, 0);
23382 int len
= GET_NUM_ELEM (vec
) - 1;
23384 /* Strip off the last entry of the parallel. */
23385 gcc_assert (GET_CODE (RTVEC_ELT (vec
, len
)) == UNSPEC
);
23386 gcc_assert (XINT (RTVEC_ELT (vec
, len
), 1) == UNSPEC_CALL_NEEDS_VZEROUPPER
);
23388 pat
= RTVEC_ELT (vec
, 0);
23390 pat
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec_v (len
, &RTVEC_ELT (vec
, 0)));
23392 emit_insn (gen_avx_vzeroupper (vzeroupper
));
23393 emit_call_insn (pat
);
23396 /* Output the assembly for a call instruction. */
23399 ix86_output_call_insn (rtx insn
, rtx call_op
)
23401 bool direct_p
= constant_call_address_operand (call_op
, VOIDmode
);
23402 bool seh_nop_p
= false;
23405 if (SIBLING_CALL_P (insn
))
23409 /* SEH epilogue detection requires the indirect branch case
23410 to include REX.W. */
23411 else if (TARGET_SEH
)
23412 xasm
= "rex.W jmp %A0";
23416 output_asm_insn (xasm
, &call_op
);
23420 /* SEH unwinding can require an extra nop to be emitted in several
23421 circumstances. Determine if we have one of those. */
23426 for (i
= NEXT_INSN (insn
); i
; i
= NEXT_INSN (i
))
23428 /* If we get to another real insn, we don't need the nop. */
23432 /* If we get to the epilogue note, prevent a catch region from
23433 being adjacent to the standard epilogue sequence. If non-
23434 call-exceptions, we'll have done this during epilogue emission. */
23435 if (NOTE_P (i
) && NOTE_KIND (i
) == NOTE_INSN_EPILOGUE_BEG
23436 && !flag_non_call_exceptions
23437 && !can_throw_internal (insn
))
23444 /* If we didn't find a real insn following the call, prevent the
23445 unwinder from looking into the next function. */
23451 xasm
= "call\t%P0";
23453 xasm
= "call\t%A0";
23455 output_asm_insn (xasm
, &call_op
);
23463 /* Clear stack slot assignments remembered from previous functions.
23464 This is called from INIT_EXPANDERS once before RTL is emitted for each
23467 static struct machine_function
*
23468 ix86_init_machine_status (void)
23470 struct machine_function
*f
;
23472 f
= ggc_alloc_cleared_machine_function ();
23473 f
->use_fast_prologue_epilogue_nregs
= -1;
23474 f
->tls_descriptor_call_expanded_p
= 0;
23475 f
->call_abi
= ix86_abi
;
23480 /* Return a MEM corresponding to a stack slot with mode MODE.
23481 Allocate a new slot if necessary.
23483 The RTL for a function can have several slots available: N is
23484 which slot to use. */
23487 assign_386_stack_local (enum machine_mode mode
, enum ix86_stack_slot n
)
23489 struct stack_local_entry
*s
;
23491 gcc_assert (n
< MAX_386_STACK_LOCALS
);
23493 /* Virtual slot is valid only before vregs are instantiated. */
23494 gcc_assert ((n
== SLOT_VIRTUAL
) == !virtuals_instantiated
);
23496 for (s
= ix86_stack_locals
; s
; s
= s
->next
)
23497 if (s
->mode
== mode
&& s
->n
== n
)
23498 return validize_mem (copy_rtx (s
->rtl
));
23500 s
= ggc_alloc_stack_local_entry ();
23503 s
->rtl
= assign_stack_local (mode
, GET_MODE_SIZE (mode
), 0);
23505 s
->next
= ix86_stack_locals
;
23506 ix86_stack_locals
= s
;
23507 return validize_mem (s
->rtl
);
23510 /* Calculate the length of the memory address in the instruction encoding.
23511 Includes addr32 prefix, does not include the one-byte modrm, opcode,
23512 or other prefixes. */
23515 memory_address_length (rtx addr
)
23517 struct ix86_address parts
;
23518 rtx base
, index
, disp
;
23522 if (GET_CODE (addr
) == PRE_DEC
23523 || GET_CODE (addr
) == POST_INC
23524 || GET_CODE (addr
) == PRE_MODIFY
23525 || GET_CODE (addr
) == POST_MODIFY
)
23528 ok
= ix86_decompose_address (addr
, &parts
);
23531 if (parts
.base
&& GET_CODE (parts
.base
) == SUBREG
)
23532 parts
.base
= SUBREG_REG (parts
.base
);
23533 if (parts
.index
&& GET_CODE (parts
.index
) == SUBREG
)
23534 parts
.index
= SUBREG_REG (parts
.index
);
23537 index
= parts
.index
;
23540 /* Add length of addr32 prefix. */
23541 len
= (GET_CODE (addr
) == ZERO_EXTEND
23542 || GET_CODE (addr
) == AND
);
23545 - esp as the base always wants an index,
23546 - ebp as the base always wants a displacement,
23547 - r12 as the base always wants an index,
23548 - r13 as the base always wants a displacement. */
23550 /* Register Indirect. */
23551 if (base
&& !index
&& !disp
)
23553 /* esp (for its index) and ebp (for its displacement) need
23554 the two-byte modrm form. Similarly for r12 and r13 in 64-bit
23557 && (addr
== arg_pointer_rtx
23558 || addr
== frame_pointer_rtx
23559 || REGNO (addr
) == SP_REG
23560 || REGNO (addr
) == BP_REG
23561 || REGNO (addr
) == R12_REG
23562 || REGNO (addr
) == R13_REG
))
23566 /* Direct Addressing. In 64-bit mode mod 00 r/m 5
23567 is not disp32, but disp32(%rip), so for disp32
23568 SIB byte is needed, unless print_operand_address
23569 optimizes it into disp32(%rip) or (%rip) is implied
23571 else if (disp
&& !base
&& !index
)
23578 if (GET_CODE (disp
) == CONST
)
23579 symbol
= XEXP (disp
, 0);
23580 if (GET_CODE (symbol
) == PLUS
23581 && CONST_INT_P (XEXP (symbol
, 1)))
23582 symbol
= XEXP (symbol
, 0);
23584 if (GET_CODE (symbol
) != LABEL_REF
23585 && (GET_CODE (symbol
) != SYMBOL_REF
23586 || SYMBOL_REF_TLS_MODEL (symbol
) != 0)
23587 && (GET_CODE (symbol
) != UNSPEC
23588 || (XINT (symbol
, 1) != UNSPEC_GOTPCREL
23589 && XINT (symbol
, 1) != UNSPEC_PCREL
23590 && XINT (symbol
, 1) != UNSPEC_GOTNTPOFF
)))
23597 /* Find the length of the displacement constant. */
23600 if (base
&& satisfies_constraint_K (disp
))
23605 /* ebp always wants a displacement. Similarly r13. */
23606 else if (base
&& REG_P (base
)
23607 && (REGNO (base
) == BP_REG
|| REGNO (base
) == R13_REG
))
23610 /* An index requires the two-byte modrm form.... */
23612 /* ...like esp (or r12), which always wants an index. */
23613 || base
== arg_pointer_rtx
23614 || base
== frame_pointer_rtx
23615 || (base
&& REG_P (base
)
23616 && (REGNO (base
) == SP_REG
|| REGNO (base
) == R12_REG
)))
23633 /* Compute default value for "length_immediate" attribute. When SHORTFORM
23634 is set, expect that insn have 8bit immediate alternative. */
23636 ix86_attr_length_immediate_default (rtx insn
, bool shortform
)
23640 extract_insn_cached (insn
);
23641 for (i
= recog_data
.n_operands
- 1; i
>= 0; --i
)
23642 if (CONSTANT_P (recog_data
.operand
[i
]))
23644 enum attr_mode mode
= get_attr_mode (insn
);
23647 if (shortform
&& CONST_INT_P (recog_data
.operand
[i
]))
23649 HOST_WIDE_INT ival
= INTVAL (recog_data
.operand
[i
]);
23656 ival
= trunc_int_for_mode (ival
, HImode
);
23659 ival
= trunc_int_for_mode (ival
, SImode
);
23664 if (IN_RANGE (ival
, -128, 127))
23681 /* Immediates for DImode instructions are encoded as 32bit sign extended values. */
23686 fatal_insn ("unknown insn mode", insn
);
23691 /* Compute default value for "length_address" attribute. */
23693 ix86_attr_length_address_default (rtx insn
)
23697 if (get_attr_type (insn
) == TYPE_LEA
)
23699 rtx set
= PATTERN (insn
), addr
;
23701 if (GET_CODE (set
) == PARALLEL
)
23702 set
= XVECEXP (set
, 0, 0);
23704 gcc_assert (GET_CODE (set
) == SET
);
23706 addr
= SET_SRC (set
);
23707 if (TARGET_64BIT
&& get_attr_mode (insn
) == MODE_SI
)
23709 if (GET_CODE (addr
) == ZERO_EXTEND
)
23710 addr
= XEXP (addr
, 0);
23711 if (GET_CODE (addr
) == SUBREG
)
23712 addr
= SUBREG_REG (addr
);
23715 return memory_address_length (addr
);
23718 extract_insn_cached (insn
);
23719 for (i
= recog_data
.n_operands
- 1; i
>= 0; --i
)
23720 if (MEM_P (recog_data
.operand
[i
]))
23722 constrain_operands_cached (reload_completed
);
23723 if (which_alternative
!= -1)
23725 const char *constraints
= recog_data
.constraints
[i
];
23726 int alt
= which_alternative
;
23728 while (*constraints
== '=' || *constraints
== '+')
23731 while (*constraints
++ != ',')
23733 /* Skip ignored operands. */
23734 if (*constraints
== 'X')
23737 return memory_address_length (XEXP (recog_data
.operand
[i
], 0));
23742 /* Compute default value for "length_vex" attribute. It includes
23743 2 or 3 byte VEX prefix and 1 opcode byte. */
23746 ix86_attr_length_vex_default (rtx insn
, bool has_0f_opcode
, bool has_vex_w
)
23750 /* Only 0f opcode can use 2 byte VEX prefix and VEX W bit uses 3
23751 byte VEX prefix. */
23752 if (!has_0f_opcode
|| has_vex_w
)
23755 /* We can always use 2 byte VEX prefix in 32bit. */
23759 extract_insn_cached (insn
);
23761 for (i
= recog_data
.n_operands
- 1; i
>= 0; --i
)
23762 if (REG_P (recog_data
.operand
[i
]))
23764 /* REX.W bit uses 3 byte VEX prefix. */
23765 if (GET_MODE (recog_data
.operand
[i
]) == DImode
23766 && GENERAL_REG_P (recog_data
.operand
[i
]))
23771 /* REX.X or REX.B bits use 3 byte VEX prefix. */
23772 if (MEM_P (recog_data
.operand
[i
])
23773 && x86_extended_reg_mentioned_p (recog_data
.operand
[i
]))
23780 /* Return the maximum number of instructions a cpu can issue. */
23783 ix86_issue_rate (void)
23787 case PROCESSOR_PENTIUM
:
23788 case PROCESSOR_ATOM
:
23790 case PROCESSOR_BTVER2
:
23793 case PROCESSOR_PENTIUMPRO
:
23794 case PROCESSOR_PENTIUM4
:
23795 case PROCESSOR_CORE2_32
:
23796 case PROCESSOR_CORE2_64
:
23797 case PROCESSOR_COREI7_32
:
23798 case PROCESSOR_COREI7_64
:
23799 case PROCESSOR_ATHLON
:
23801 case PROCESSOR_AMDFAM10
:
23802 case PROCESSOR_NOCONA
:
23803 case PROCESSOR_GENERIC32
:
23804 case PROCESSOR_GENERIC64
:
23805 case PROCESSOR_BDVER1
:
23806 case PROCESSOR_BDVER2
:
23807 case PROCESSOR_BTVER1
:
23815 /* A subroutine of ix86_adjust_cost -- return TRUE iff INSN reads flags set
23816 by DEP_INSN and nothing set by DEP_INSN. */
23819 ix86_flags_dependent (rtx insn
, rtx dep_insn
, enum attr_type insn_type
)
23823 /* Simplify the test for uninteresting insns. */
23824 if (insn_type
!= TYPE_SETCC
23825 && insn_type
!= TYPE_ICMOV
23826 && insn_type
!= TYPE_FCMOV
23827 && insn_type
!= TYPE_IBR
)
23830 if ((set
= single_set (dep_insn
)) != 0)
23832 set
= SET_DEST (set
);
23835 else if (GET_CODE (PATTERN (dep_insn
)) == PARALLEL
23836 && XVECLEN (PATTERN (dep_insn
), 0) == 2
23837 && GET_CODE (XVECEXP (PATTERN (dep_insn
), 0, 0)) == SET
23838 && GET_CODE (XVECEXP (PATTERN (dep_insn
), 0, 1)) == SET
)
23840 set
= SET_DEST (XVECEXP (PATTERN (dep_insn
), 0, 0));
23841 set2
= SET_DEST (XVECEXP (PATTERN (dep_insn
), 0, 0));
23846 if (!REG_P (set
) || REGNO (set
) != FLAGS_REG
)
23849 /* This test is true if the dependent insn reads the flags but
23850 not any other potentially set register. */
23851 if (!reg_overlap_mentioned_p (set
, PATTERN (insn
)))
23854 if (set2
&& reg_overlap_mentioned_p (set2
, PATTERN (insn
)))
23860 /* Return true iff USE_INSN has a memory address with operands set by
23864 ix86_agi_dependent (rtx set_insn
, rtx use_insn
)
23867 extract_insn_cached (use_insn
);
23868 for (i
= recog_data
.n_operands
- 1; i
>= 0; --i
)
23869 if (MEM_P (recog_data
.operand
[i
]))
23871 rtx addr
= XEXP (recog_data
.operand
[i
], 0);
23872 return modified_in_p (addr
, set_insn
) != 0;
23878 ix86_adjust_cost (rtx insn
, rtx link
, rtx dep_insn
, int cost
)
23880 enum attr_type insn_type
, dep_insn_type
;
23881 enum attr_memory memory
;
23883 int dep_insn_code_number
;
23885 /* Anti and output dependencies have zero cost on all CPUs. */
23886 if (REG_NOTE_KIND (link
) != 0)
23889 dep_insn_code_number
= recog_memoized (dep_insn
);
23891 /* If we can't recognize the insns, we can't really do anything. */
23892 if (dep_insn_code_number
< 0 || recog_memoized (insn
) < 0)
23895 insn_type
= get_attr_type (insn
);
23896 dep_insn_type
= get_attr_type (dep_insn
);
23900 case PROCESSOR_PENTIUM
:
23901 /* Address Generation Interlock adds a cycle of latency. */
23902 if (insn_type
== TYPE_LEA
)
23904 rtx addr
= PATTERN (insn
);
23906 if (GET_CODE (addr
) == PARALLEL
)
23907 addr
= XVECEXP (addr
, 0, 0);
23909 gcc_assert (GET_CODE (addr
) == SET
);
23911 addr
= SET_SRC (addr
);
23912 if (modified_in_p (addr
, dep_insn
))
23915 else if (ix86_agi_dependent (dep_insn
, insn
))
23918 /* ??? Compares pair with jump/setcc. */
23919 if (ix86_flags_dependent (insn
, dep_insn
, insn_type
))
23922 /* Floating point stores require value to be ready one cycle earlier. */
23923 if (insn_type
== TYPE_FMOV
23924 && get_attr_memory (insn
) == MEMORY_STORE
23925 && !ix86_agi_dependent (dep_insn
, insn
))
23929 case PROCESSOR_PENTIUMPRO
:
23930 memory
= get_attr_memory (insn
);
23932 /* INT->FP conversion is expensive. */
23933 if (get_attr_fp_int_src (dep_insn
))
23936 /* There is one cycle extra latency between an FP op and a store. */
23937 if (insn_type
== TYPE_FMOV
23938 && (set
= single_set (dep_insn
)) != NULL_RTX
23939 && (set2
= single_set (insn
)) != NULL_RTX
23940 && rtx_equal_p (SET_DEST (set
), SET_SRC (set2
))
23941 && MEM_P (SET_DEST (set2
)))
23944 /* Show ability of reorder buffer to hide latency of load by executing
23945 in parallel with previous instruction in case
23946 previous instruction is not needed to compute the address. */
23947 if ((memory
== MEMORY_LOAD
|| memory
== MEMORY_BOTH
)
23948 && !ix86_agi_dependent (dep_insn
, insn
))
23950 /* Claim moves to take one cycle, as core can issue one load
23951 at time and the next load can start cycle later. */
23952 if (dep_insn_type
== TYPE_IMOV
23953 || dep_insn_type
== TYPE_FMOV
)
23961 memory
= get_attr_memory (insn
);
23963 /* The esp dependency is resolved before the instruction is really
23965 if ((insn_type
== TYPE_PUSH
|| insn_type
== TYPE_POP
)
23966 && (dep_insn_type
== TYPE_PUSH
|| dep_insn_type
== TYPE_POP
))
23969 /* INT->FP conversion is expensive. */
23970 if (get_attr_fp_int_src (dep_insn
))
23973 /* Show ability of reorder buffer to hide latency of load by executing
23974 in parallel with previous instruction in case
23975 previous instruction is not needed to compute the address. */
23976 if ((memory
== MEMORY_LOAD
|| memory
== MEMORY_BOTH
)
23977 && !ix86_agi_dependent (dep_insn
, insn
))
23979 /* Claim moves to take one cycle, as core can issue one load
23980 at time and the next load can start cycle later. */
23981 if (dep_insn_type
== TYPE_IMOV
23982 || dep_insn_type
== TYPE_FMOV
)
23991 case PROCESSOR_ATHLON
:
23993 case PROCESSOR_AMDFAM10
:
23994 case PROCESSOR_BDVER1
:
23995 case PROCESSOR_BDVER2
:
23996 case PROCESSOR_BTVER1
:
23997 case PROCESSOR_BTVER2
:
23998 case PROCESSOR_ATOM
:
23999 case PROCESSOR_GENERIC32
:
24000 case PROCESSOR_GENERIC64
:
24001 memory
= get_attr_memory (insn
);
24003 /* Show ability of reorder buffer to hide latency of load by executing
24004 in parallel with previous instruction in case
24005 previous instruction is not needed to compute the address. */
24006 if ((memory
== MEMORY_LOAD
|| memory
== MEMORY_BOTH
)
24007 && !ix86_agi_dependent (dep_insn
, insn
))
24009 enum attr_unit unit
= get_attr_unit (insn
);
24012 /* Because of the difference between the length of integer and
24013 floating unit pipeline preparation stages, the memory operands
24014 for floating point are cheaper.
24016 ??? For Athlon it the difference is most probably 2. */
24017 if (unit
== UNIT_INTEGER
|| unit
== UNIT_UNKNOWN
)
24020 loadcost
= TARGET_ATHLON
? 2 : 0;
24022 if (cost
>= loadcost
)
24035 /* How many alternative schedules to try. This should be as wide as the
24036 scheduling freedom in the DFA, but no wider. Making this value too
24037 large results extra work for the scheduler. */
24040 ia32_multipass_dfa_lookahead (void)
24044 case PROCESSOR_PENTIUM
:
24047 case PROCESSOR_PENTIUMPRO
:
24051 case PROCESSOR_CORE2_32
:
24052 case PROCESSOR_CORE2_64
:
24053 case PROCESSOR_COREI7_32
:
24054 case PROCESSOR_COREI7_64
:
24055 /* Generally, we want haifa-sched:max_issue() to look ahead as far
24056 as many instructions can be executed on a cycle, i.e.,
24057 issue_rate. I wonder why tuning for many CPUs does not do this. */
24058 return ix86_issue_rate ();
24065 /* Try to reorder ready list to take advantage of Atom pipelined IMUL
24066 execution. It is applied if
24067 (1) IMUL instruction is on the top of list;
24068 (2) There exists the only producer of independent IMUL instruction in
24070 (3) Put found producer on the top of ready list.
24071 Returns issue rate. */
24074 ix86_sched_reorder(FILE *dump
, int sched_verbose
, rtx
*ready
, int *pn_ready
,
24075 int clock_var ATTRIBUTE_UNUSED
)
24077 static int issue_rate
= -1;
24078 int n_ready
= *pn_ready
;
24079 rtx insn
, insn1
, insn2
;
24081 sd_iterator_def sd_it
;
24085 /* Set up issue rate. */
24086 issue_rate
= ix86_issue_rate();
24088 /* Do reodering for Atom only. */
24089 if (ix86_tune
!= PROCESSOR_ATOM
)
24091 /* Nothing to do if ready list contains only 1 instruction. */
24095 /* Check that IMUL instruction is on the top of ready list. */
24096 insn
= ready
[n_ready
- 1];
24097 if (!NONDEBUG_INSN_P (insn
))
24099 insn
= PATTERN (insn
);
24100 if (GET_CODE (insn
) == PARALLEL
)
24101 insn
= XVECEXP (insn
, 0, 0);
24102 if (GET_CODE (insn
) != SET
)
24104 if (!(GET_CODE (SET_SRC (insn
)) == MULT
24105 && GET_MODE (SET_SRC (insn
)) == SImode
))
24108 /* Search for producer of independent IMUL instruction. */
24109 for (i
= n_ready
- 2; i
>= 0; i
--)
24112 if (!NONDEBUG_INSN_P (insn
))
24114 /* Skip IMUL instruction. */
24115 insn2
= PATTERN (insn
);
24116 if (GET_CODE (insn2
) == PARALLEL
)
24117 insn2
= XVECEXP (insn2
, 0, 0);
24118 if (GET_CODE (insn2
) == SET
24119 && GET_CODE (SET_SRC (insn2
)) == MULT
24120 && GET_MODE (SET_SRC (insn2
)) == SImode
)
24123 FOR_EACH_DEP (insn
, SD_LIST_FORW
, sd_it
, dep
)
24126 con
= DEP_CON (dep
);
24127 if (!NONDEBUG_INSN_P (con
))
24129 insn1
= PATTERN (con
);
24130 if (GET_CODE (insn1
) == PARALLEL
)
24131 insn1
= XVECEXP (insn1
, 0, 0);
24133 if (GET_CODE (insn1
) == SET
24134 && GET_CODE (SET_SRC (insn1
)) == MULT
24135 && GET_MODE (SET_SRC (insn1
)) == SImode
)
24137 sd_iterator_def sd_it1
;
24139 /* Check if there is no other dependee for IMUL. */
24141 FOR_EACH_DEP (con
, SD_LIST_BACK
, sd_it1
, dep1
)
24144 pro
= DEP_PRO (dep1
);
24145 if (!NONDEBUG_INSN_P (pro
))
24158 return issue_rate
; /* Didn't find IMUL producer. */
24160 if (sched_verbose
> 1)
24161 fprintf(dump
, ";;\tatom sched_reorder: swap %d and %d insns\n",
24162 INSN_UID (ready
[index
]), INSN_UID (ready
[n_ready
- 1]));
24164 /* Put IMUL producer (ready[index]) at the top of ready list. */
24165 insn1
= ready
[index
];
24166 for (i
= index
; i
< n_ready
- 1; i
++)
24167 ready
[i
] = ready
[i
+ 1];
24168 ready
[n_ready
- 1] = insn1
;
24175 /* Model decoder of Core 2/i7.
24176 Below hooks for multipass scheduling (see haifa-sched.c:max_issue)
24177 track the instruction fetch block boundaries and make sure that long
24178 (9+ bytes) instructions are assigned to D0. */
24180 /* Maximum length of an insn that can be handled by
24181 a secondary decoder unit. '8' for Core 2/i7. */
24182 static int core2i7_secondary_decoder_max_insn_size
;
24184 /* Ifetch block size, i.e., number of bytes decoder reads per cycle.
24185 '16' for Core 2/i7. */
24186 static int core2i7_ifetch_block_size
;
24188 /* Maximum number of instructions decoder can handle per cycle.
24189 '6' for Core 2/i7. */
24190 static int core2i7_ifetch_block_max_insns
;
24192 typedef struct ix86_first_cycle_multipass_data_
*
24193 ix86_first_cycle_multipass_data_t
;
24194 typedef const struct ix86_first_cycle_multipass_data_
*
24195 const_ix86_first_cycle_multipass_data_t
;
24197 /* A variable to store target state across calls to max_issue within
24199 static struct ix86_first_cycle_multipass_data_ _ix86_first_cycle_multipass_data
,
24200 *ix86_first_cycle_multipass_data
= &_ix86_first_cycle_multipass_data
;
24202 /* Initialize DATA. */
24204 core2i7_first_cycle_multipass_init (void *_data
)
24206 ix86_first_cycle_multipass_data_t data
24207 = (ix86_first_cycle_multipass_data_t
) _data
;
24209 data
->ifetch_block_len
= 0;
24210 data
->ifetch_block_n_insns
= 0;
24211 data
->ready_try_change
= NULL
;
24212 data
->ready_try_change_size
= 0;
24215 /* Advancing the cycle; reset ifetch block counts. */
24217 core2i7_dfa_post_advance_cycle (void)
24219 ix86_first_cycle_multipass_data_t data
= ix86_first_cycle_multipass_data
;
24221 gcc_assert (data
->ifetch_block_n_insns
<= core2i7_ifetch_block_max_insns
);
24223 data
->ifetch_block_len
= 0;
24224 data
->ifetch_block_n_insns
= 0;
24227 static int min_insn_size (rtx
);
24229 /* Filter out insns from ready_try that the core will not be able to issue
24230 on current cycle due to decoder. */
24232 core2i7_first_cycle_multipass_filter_ready_try
24233 (const_ix86_first_cycle_multipass_data_t data
,
24234 char *ready_try
, int n_ready
, bool first_cycle_insn_p
)
24241 if (ready_try
[n_ready
])
24244 insn
= get_ready_element (n_ready
);
24245 insn_size
= min_insn_size (insn
);
24247 if (/* If this is a too long an insn for a secondary decoder ... */
24248 (!first_cycle_insn_p
24249 && insn_size
> core2i7_secondary_decoder_max_insn_size
)
24250 /* ... or it would not fit into the ifetch block ... */
24251 || data
->ifetch_block_len
+ insn_size
> core2i7_ifetch_block_size
24252 /* ... or the decoder is full already ... */
24253 || data
->ifetch_block_n_insns
+ 1 > core2i7_ifetch_block_max_insns
)
24254 /* ... mask the insn out. */
24256 ready_try
[n_ready
] = 1;
24258 if (data
->ready_try_change
)
24259 SET_BIT (data
->ready_try_change
, n_ready
);
24264 /* Prepare for a new round of multipass lookahead scheduling. */
24266 core2i7_first_cycle_multipass_begin (void *_data
, char *ready_try
, int n_ready
,
24267 bool first_cycle_insn_p
)
24269 ix86_first_cycle_multipass_data_t data
24270 = (ix86_first_cycle_multipass_data_t
) _data
;
24271 const_ix86_first_cycle_multipass_data_t prev_data
24272 = ix86_first_cycle_multipass_data
;
24274 /* Restore the state from the end of the previous round. */
24275 data
->ifetch_block_len
= prev_data
->ifetch_block_len
;
24276 data
->ifetch_block_n_insns
= prev_data
->ifetch_block_n_insns
;
24278 /* Filter instructions that cannot be issued on current cycle due to
24279 decoder restrictions. */
24280 core2i7_first_cycle_multipass_filter_ready_try (data
, ready_try
, n_ready
,
24281 first_cycle_insn_p
);
24284 /* INSN is being issued in current solution. Account for its impact on
24285 the decoder model. */
24287 core2i7_first_cycle_multipass_issue (void *_data
, char *ready_try
, int n_ready
,
24288 rtx insn
, const void *_prev_data
)
24290 ix86_first_cycle_multipass_data_t data
24291 = (ix86_first_cycle_multipass_data_t
) _data
;
24292 const_ix86_first_cycle_multipass_data_t prev_data
24293 = (const_ix86_first_cycle_multipass_data_t
) _prev_data
;
24295 int insn_size
= min_insn_size (insn
);
24297 data
->ifetch_block_len
= prev_data
->ifetch_block_len
+ insn_size
;
24298 data
->ifetch_block_n_insns
= prev_data
->ifetch_block_n_insns
+ 1;
24299 gcc_assert (data
->ifetch_block_len
<= core2i7_ifetch_block_size
24300 && data
->ifetch_block_n_insns
<= core2i7_ifetch_block_max_insns
);
24302 /* Allocate or resize the bitmap for storing INSN's effect on ready_try. */
24303 if (!data
->ready_try_change
)
24305 data
->ready_try_change
= sbitmap_alloc (n_ready
);
24306 data
->ready_try_change_size
= n_ready
;
24308 else if (data
->ready_try_change_size
< n_ready
)
24310 data
->ready_try_change
= sbitmap_resize (data
->ready_try_change
,
24312 data
->ready_try_change_size
= n_ready
;
24314 sbitmap_zero (data
->ready_try_change
);
24316 /* Filter out insns from ready_try that the core will not be able to issue
24317 on current cycle due to decoder. */
24318 core2i7_first_cycle_multipass_filter_ready_try (data
, ready_try
, n_ready
,
24322 /* Revert the effect on ready_try. */
24324 core2i7_first_cycle_multipass_backtrack (const void *_data
,
24326 int n_ready ATTRIBUTE_UNUSED
)
24328 const_ix86_first_cycle_multipass_data_t data
24329 = (const_ix86_first_cycle_multipass_data_t
) _data
;
24330 unsigned int i
= 0;
24331 sbitmap_iterator sbi
;
24333 gcc_assert (sbitmap_last_set_bit (data
->ready_try_change
) < n_ready
);
24334 EXECUTE_IF_SET_IN_SBITMAP (data
->ready_try_change
, 0, i
, sbi
)
24340 /* Save the result of multipass lookahead scheduling for the next round. */
24342 core2i7_first_cycle_multipass_end (const void *_data
)
24344 const_ix86_first_cycle_multipass_data_t data
24345 = (const_ix86_first_cycle_multipass_data_t
) _data
;
24346 ix86_first_cycle_multipass_data_t next_data
24347 = ix86_first_cycle_multipass_data
;
24351 next_data
->ifetch_block_len
= data
->ifetch_block_len
;
24352 next_data
->ifetch_block_n_insns
= data
->ifetch_block_n_insns
;
24356 /* Deallocate target data. */
24358 core2i7_first_cycle_multipass_fini (void *_data
)
24360 ix86_first_cycle_multipass_data_t data
24361 = (ix86_first_cycle_multipass_data_t
) _data
;
24363 if (data
->ready_try_change
)
24365 sbitmap_free (data
->ready_try_change
);
24366 data
->ready_try_change
= NULL
;
24367 data
->ready_try_change_size
= 0;
24371 /* Prepare for scheduling pass. */
24373 ix86_sched_init_global (FILE *dump ATTRIBUTE_UNUSED
,
24374 int verbose ATTRIBUTE_UNUSED
,
24375 int max_uid ATTRIBUTE_UNUSED
)
24377 /* Install scheduling hooks for current CPU. Some of these hooks are used
24378 in time-critical parts of the scheduler, so we only set them up when
24379 they are actually used. */
24382 case PROCESSOR_CORE2_32
:
24383 case PROCESSOR_CORE2_64
:
24384 case PROCESSOR_COREI7_32
:
24385 case PROCESSOR_COREI7_64
:
24386 targetm
.sched
.dfa_post_advance_cycle
24387 = core2i7_dfa_post_advance_cycle
;
24388 targetm
.sched
.first_cycle_multipass_init
24389 = core2i7_first_cycle_multipass_init
;
24390 targetm
.sched
.first_cycle_multipass_begin
24391 = core2i7_first_cycle_multipass_begin
;
24392 targetm
.sched
.first_cycle_multipass_issue
24393 = core2i7_first_cycle_multipass_issue
;
24394 targetm
.sched
.first_cycle_multipass_backtrack
24395 = core2i7_first_cycle_multipass_backtrack
;
24396 targetm
.sched
.first_cycle_multipass_end
24397 = core2i7_first_cycle_multipass_end
;
24398 targetm
.sched
.first_cycle_multipass_fini
24399 = core2i7_first_cycle_multipass_fini
;
24401 /* Set decoder parameters. */
24402 core2i7_secondary_decoder_max_insn_size
= 8;
24403 core2i7_ifetch_block_size
= 16;
24404 core2i7_ifetch_block_max_insns
= 6;
24408 targetm
.sched
.dfa_post_advance_cycle
= NULL
;
24409 targetm
.sched
.first_cycle_multipass_init
= NULL
;
24410 targetm
.sched
.first_cycle_multipass_begin
= NULL
;
24411 targetm
.sched
.first_cycle_multipass_issue
= NULL
;
24412 targetm
.sched
.first_cycle_multipass_backtrack
= NULL
;
24413 targetm
.sched
.first_cycle_multipass_end
= NULL
;
24414 targetm
.sched
.first_cycle_multipass_fini
= NULL
;
24420 /* Compute the alignment given to a constant that is being placed in memory.
24421 EXP is the constant and ALIGN is the alignment that the object would
24423 The value of this function is used instead of that alignment to align
24427 ix86_constant_alignment (tree exp
, int align
)
24429 if (TREE_CODE (exp
) == REAL_CST
|| TREE_CODE (exp
) == VECTOR_CST
24430 || TREE_CODE (exp
) == INTEGER_CST
)
24432 if (TYPE_MODE (TREE_TYPE (exp
)) == DFmode
&& align
< 64)
24434 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp
))) && align
< 128)
24437 else if (!optimize_size
&& TREE_CODE (exp
) == STRING_CST
24438 && TREE_STRING_LENGTH (exp
) >= 31 && align
< BITS_PER_WORD
)
24439 return BITS_PER_WORD
;
24444 /* Compute the alignment for a static variable.
24445 TYPE is the data type, and ALIGN is the alignment that
24446 the object would ordinarily have. The value of this function is used
24447 instead of that alignment to align the object. */
24450 ix86_data_alignment (tree type
, int align
)
24452 int max_align
= optimize_size
? BITS_PER_WORD
: MIN (256, MAX_OFILE_ALIGNMENT
);
24454 if (AGGREGATE_TYPE_P (type
)
24455 && TYPE_SIZE (type
)
24456 && TREE_CODE (TYPE_SIZE (type
)) == INTEGER_CST
24457 && (TREE_INT_CST_LOW (TYPE_SIZE (type
)) >= (unsigned) max_align
24458 || TREE_INT_CST_HIGH (TYPE_SIZE (type
)))
24459 && align
< max_align
)
24462 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
24463 to 16byte boundary. */
24466 if (AGGREGATE_TYPE_P (type
)
24467 && TYPE_SIZE (type
)
24468 && TREE_CODE (TYPE_SIZE (type
)) == INTEGER_CST
24469 && (TREE_INT_CST_LOW (TYPE_SIZE (type
)) >= 128
24470 || TREE_INT_CST_HIGH (TYPE_SIZE (type
))) && align
< 128)
24474 if (TREE_CODE (type
) == ARRAY_TYPE
)
24476 if (TYPE_MODE (TREE_TYPE (type
)) == DFmode
&& align
< 64)
24478 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type
))) && align
< 128)
24481 else if (TREE_CODE (type
) == COMPLEX_TYPE
)
24484 if (TYPE_MODE (type
) == DCmode
&& align
< 64)
24486 if ((TYPE_MODE (type
) == XCmode
24487 || TYPE_MODE (type
) == TCmode
) && align
< 128)
24490 else if ((TREE_CODE (type
) == RECORD_TYPE
24491 || TREE_CODE (type
) == UNION_TYPE
24492 || TREE_CODE (type
) == QUAL_UNION_TYPE
)
24493 && TYPE_FIELDS (type
))
24495 if (DECL_MODE (TYPE_FIELDS (type
)) == DFmode
&& align
< 64)
24497 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type
))) && align
< 128)
24500 else if (TREE_CODE (type
) == REAL_TYPE
|| TREE_CODE (type
) == VECTOR_TYPE
24501 || TREE_CODE (type
) == INTEGER_TYPE
)
24503 if (TYPE_MODE (type
) == DFmode
&& align
< 64)
24505 if (ALIGN_MODE_128 (TYPE_MODE (type
)) && align
< 128)
24512 /* Compute the alignment for a local variable or a stack slot. EXP is
24513 the data type or decl itself, MODE is the widest mode available and
24514 ALIGN is the alignment that the object would ordinarily have. The
24515 value of this macro is used instead of that alignment to align the
24519 ix86_local_alignment (tree exp
, enum machine_mode mode
,
24520 unsigned int align
)
24524 if (exp
&& DECL_P (exp
))
24526 type
= TREE_TYPE (exp
);
24535 /* Don't do dynamic stack realignment for long long objects with
24536 -mpreferred-stack-boundary=2. */
24539 && ix86_preferred_stack_boundary
< 64
24540 && (mode
== DImode
|| (type
&& TYPE_MODE (type
) == DImode
))
24541 && (!type
|| !TYPE_USER_ALIGN (type
))
24542 && (!decl
|| !DECL_USER_ALIGN (decl
)))
24545 /* If TYPE is NULL, we are allocating a stack slot for caller-save
24546 register in MODE. We will return the largest alignment of XF
24550 if (mode
== XFmode
&& align
< GET_MODE_ALIGNMENT (DFmode
))
24551 align
= GET_MODE_ALIGNMENT (DFmode
);
24555 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
24556 to 16byte boundary. Exact wording is:
24558 An array uses the same alignment as its elements, except that a local or
24559 global array variable of length at least 16 bytes or
24560 a C99 variable-length array variable always has alignment of at least 16 bytes.
24562 This was added to allow use of aligned SSE instructions at arrays. This
24563 rule is meant for static storage (where compiler can not do the analysis
24564 by itself). We follow it for automatic variables only when convenient.
24565 We fully control everything in the function compiled and functions from
24566 other unit can not rely on the alignment.
24568 Exclude va_list type. It is the common case of local array where
24569 we can not benefit from the alignment. */
24570 if (TARGET_64BIT
&& optimize_function_for_speed_p (cfun
)
24573 if (AGGREGATE_TYPE_P (type
)
24574 && (va_list_type_node
== NULL_TREE
24575 || (TYPE_MAIN_VARIANT (type
)
24576 != TYPE_MAIN_VARIANT (va_list_type_node
)))
24577 && TYPE_SIZE (type
)
24578 && TREE_CODE (TYPE_SIZE (type
)) == INTEGER_CST
24579 && (TREE_INT_CST_LOW (TYPE_SIZE (type
)) >= 16
24580 || TREE_INT_CST_HIGH (TYPE_SIZE (type
))) && align
< 128)
24583 if (TREE_CODE (type
) == ARRAY_TYPE
)
24585 if (TYPE_MODE (TREE_TYPE (type
)) == DFmode
&& align
< 64)
24587 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type
))) && align
< 128)
24590 else if (TREE_CODE (type
) == COMPLEX_TYPE
)
24592 if (TYPE_MODE (type
) == DCmode
&& align
< 64)
24594 if ((TYPE_MODE (type
) == XCmode
24595 || TYPE_MODE (type
) == TCmode
) && align
< 128)
24598 else if ((TREE_CODE (type
) == RECORD_TYPE
24599 || TREE_CODE (type
) == UNION_TYPE
24600 || TREE_CODE (type
) == QUAL_UNION_TYPE
)
24601 && TYPE_FIELDS (type
))
24603 if (DECL_MODE (TYPE_FIELDS (type
)) == DFmode
&& align
< 64)
24605 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type
))) && align
< 128)
24608 else if (TREE_CODE (type
) == REAL_TYPE
|| TREE_CODE (type
) == VECTOR_TYPE
24609 || TREE_CODE (type
) == INTEGER_TYPE
)
24612 if (TYPE_MODE (type
) == DFmode
&& align
< 64)
24614 if (ALIGN_MODE_128 (TYPE_MODE (type
)) && align
< 128)
24620 /* Compute the minimum required alignment for dynamic stack realignment
24621 purposes for a local variable, parameter or a stack slot. EXP is
24622 the data type or decl itself, MODE is its mode and ALIGN is the
24623 alignment that the object would ordinarily have. */
24626 ix86_minimum_alignment (tree exp
, enum machine_mode mode
,
24627 unsigned int align
)
24631 if (exp
&& DECL_P (exp
))
24633 type
= TREE_TYPE (exp
);
24642 if (TARGET_64BIT
|| align
!= 64 || ix86_preferred_stack_boundary
>= 64)
24645 /* Don't do dynamic stack realignment for long long objects with
24646 -mpreferred-stack-boundary=2. */
24647 if ((mode
== DImode
|| (type
&& TYPE_MODE (type
) == DImode
))
24648 && (!type
|| !TYPE_USER_ALIGN (type
))
24649 && (!decl
|| !DECL_USER_ALIGN (decl
)))
24655 /* Find a location for the static chain incoming to a nested function.
24656 This is a register, unless all free registers are used by arguments. */
24659 ix86_static_chain (const_tree fndecl
, bool incoming_p
)
24663 if (!DECL_STATIC_CHAIN (fndecl
))
24668 /* We always use R10 in 64-bit mode. */
24676 /* By default in 32-bit mode we use ECX to pass the static chain. */
24679 fntype
= TREE_TYPE (fndecl
);
24680 ccvt
= ix86_get_callcvt (fntype
);
24681 if ((ccvt
& (IX86_CALLCVT_FASTCALL
| IX86_CALLCVT_THISCALL
)) != 0)
24683 /* Fastcall functions use ecx/edx for arguments, which leaves
24684 us with EAX for the static chain.
24685 Thiscall functions use ecx for arguments, which also
24686 leaves us with EAX for the static chain. */
24689 else if (ix86_function_regparm (fntype
, fndecl
) == 3)
24691 /* For regparm 3, we have no free call-clobbered registers in
24692 which to store the static chain. In order to implement this,
24693 we have the trampoline push the static chain to the stack.
24694 However, we can't push a value below the return address when
24695 we call the nested function directly, so we have to use an
24696 alternate entry point. For this we use ESI, and have the
24697 alternate entry point push ESI, so that things appear the
24698 same once we're executing the nested function. */
24701 if (fndecl
== current_function_decl
)
24702 ix86_static_chain_on_stack
= true;
24703 return gen_frame_mem (SImode
,
24704 plus_constant (Pmode
,
24705 arg_pointer_rtx
, -8));
24711 return gen_rtx_REG (Pmode
, regno
);
24714 /* Emit RTL insns to initialize the variable parts of a trampoline.
24715 FNDECL is the decl of the target address; M_TRAMP is a MEM for
24716 the trampoline, and CHAIN_VALUE is an RTX for the static chain
24717 to be passed to the target function. */
24720 ix86_trampoline_init (rtx m_tramp
, tree fndecl
, rtx chain_value
)
24726 fnaddr
= XEXP (DECL_RTL (fndecl
), 0);
24732 /* Load the function address to r11. Try to load address using
24733 the shorter movl instead of movabs. We may want to support
24734 movq for kernel mode, but kernel does not use trampolines at
24735 the moment. FNADDR is a 32bit address and may not be in
24736 DImode when ptr_mode == SImode. Always use movl in this
24738 if (ptr_mode
== SImode
24739 || x86_64_zext_immediate_operand (fnaddr
, VOIDmode
))
24741 fnaddr
= copy_addr_to_reg (fnaddr
);
24743 mem
= adjust_address (m_tramp
, HImode
, offset
);
24744 emit_move_insn (mem
, gen_int_mode (0xbb41, HImode
));
24746 mem
= adjust_address (m_tramp
, SImode
, offset
+ 2);
24747 emit_move_insn (mem
, gen_lowpart (SImode
, fnaddr
));
24752 mem
= adjust_address (m_tramp
, HImode
, offset
);
24753 emit_move_insn (mem
, gen_int_mode (0xbb49, HImode
));
24755 mem
= adjust_address (m_tramp
, DImode
, offset
+ 2);
24756 emit_move_insn (mem
, fnaddr
);
24760 /* Load static chain using movabs to r10. Use the shorter movl
24761 instead of movabs when ptr_mode == SImode. */
24762 if (ptr_mode
== SImode
)
24773 mem
= adjust_address (m_tramp
, HImode
, offset
);
24774 emit_move_insn (mem
, gen_int_mode (opcode
, HImode
));
24776 mem
= adjust_address (m_tramp
, ptr_mode
, offset
+ 2);
24777 emit_move_insn (mem
, chain_value
);
24780 /* Jump to r11; the last (unused) byte is a nop, only there to
24781 pad the write out to a single 32-bit store. */
24782 mem
= adjust_address (m_tramp
, SImode
, offset
);
24783 emit_move_insn (mem
, gen_int_mode (0x90e3ff49, SImode
));
24790 /* Depending on the static chain location, either load a register
24791 with a constant, or push the constant to the stack. All of the
24792 instructions are the same size. */
24793 chain
= ix86_static_chain (fndecl
, true);
24796 switch (REGNO (chain
))
24799 opcode
= 0xb8; break;
24801 opcode
= 0xb9; break;
24803 gcc_unreachable ();
24809 mem
= adjust_address (m_tramp
, QImode
, offset
);
24810 emit_move_insn (mem
, gen_int_mode (opcode
, QImode
));
24812 mem
= adjust_address (m_tramp
, SImode
, offset
+ 1);
24813 emit_move_insn (mem
, chain_value
);
24816 mem
= adjust_address (m_tramp
, QImode
, offset
);
24817 emit_move_insn (mem
, gen_int_mode (0xe9, QImode
));
24819 mem
= adjust_address (m_tramp
, SImode
, offset
+ 1);
24821 /* Compute offset from the end of the jmp to the target function.
24822 In the case in which the trampoline stores the static chain on
24823 the stack, we need to skip the first insn which pushes the
24824 (call-saved) register static chain; this push is 1 byte. */
24826 disp
= expand_binop (SImode
, sub_optab
, fnaddr
,
24827 plus_constant (Pmode
, XEXP (m_tramp
, 0),
24828 offset
- (MEM_P (chain
) ? 1 : 0)),
24829 NULL_RTX
, 1, OPTAB_DIRECT
);
24830 emit_move_insn (mem
, disp
);
24833 gcc_assert (offset
<= TRAMPOLINE_SIZE
);
24835 #ifdef HAVE_ENABLE_EXECUTE_STACK
24836 #ifdef CHECK_EXECUTE_STACK_ENABLED
24837 if (CHECK_EXECUTE_STACK_ENABLED
)
24839 emit_library_call (gen_rtx_SYMBOL_REF (Pmode
, "__enable_execute_stack"),
24840 LCT_NORMAL
, VOIDmode
, 1, XEXP (m_tramp
, 0), Pmode
);
24844 /* The following file contains several enumerations and data structures
24845 built from the definitions in i386-builtin-types.def. */
24847 #include "i386-builtin-types.inc"
24849 /* Table for the ix86 builtin non-function types. */
24850 static GTY(()) tree ix86_builtin_type_tab
[(int) IX86_BT_LAST_CPTR
+ 1];
24852 /* Retrieve an element from the above table, building some of
24853 the types lazily. */
24856 ix86_get_builtin_type (enum ix86_builtin_type tcode
)
24858 unsigned int index
;
24861 gcc_assert ((unsigned)tcode
< ARRAY_SIZE(ix86_builtin_type_tab
));
24863 type
= ix86_builtin_type_tab
[(int) tcode
];
24867 gcc_assert (tcode
> IX86_BT_LAST_PRIM
);
24868 if (tcode
<= IX86_BT_LAST_VECT
)
24870 enum machine_mode mode
;
24872 index
= tcode
- IX86_BT_LAST_PRIM
- 1;
24873 itype
= ix86_get_builtin_type (ix86_builtin_type_vect_base
[index
]);
24874 mode
= ix86_builtin_type_vect_mode
[index
];
24876 type
= build_vector_type_for_mode (itype
, mode
);
24882 index
= tcode
- IX86_BT_LAST_VECT
- 1;
24883 if (tcode
<= IX86_BT_LAST_PTR
)
24884 quals
= TYPE_UNQUALIFIED
;
24886 quals
= TYPE_QUAL_CONST
;
24888 itype
= ix86_get_builtin_type (ix86_builtin_type_ptr_base
[index
]);
24889 if (quals
!= TYPE_UNQUALIFIED
)
24890 itype
= build_qualified_type (itype
, quals
);
24892 type
= build_pointer_type (itype
);
24895 ix86_builtin_type_tab
[(int) tcode
] = type
;
24899 /* Table for the ix86 builtin function types. */
24900 static GTY(()) tree ix86_builtin_func_type_tab
[(int) IX86_BT_LAST_ALIAS
+ 1];
24902 /* Retrieve an element from the above table, building some of
24903 the types lazily. */
24906 ix86_get_builtin_func_type (enum ix86_builtin_func_type tcode
)
24910 gcc_assert ((unsigned)tcode
< ARRAY_SIZE (ix86_builtin_func_type_tab
));
24912 type
= ix86_builtin_func_type_tab
[(int) tcode
];
24916 if (tcode
<= IX86_BT_LAST_FUNC
)
24918 unsigned start
= ix86_builtin_func_start
[(int) tcode
];
24919 unsigned after
= ix86_builtin_func_start
[(int) tcode
+ 1];
24920 tree rtype
, atype
, args
= void_list_node
;
24923 rtype
= ix86_get_builtin_type (ix86_builtin_func_args
[start
]);
24924 for (i
= after
- 1; i
> start
; --i
)
24926 atype
= ix86_get_builtin_type (ix86_builtin_func_args
[i
]);
24927 args
= tree_cons (NULL
, atype
, args
);
24930 type
= build_function_type (rtype
, args
);
24934 unsigned index
= tcode
- IX86_BT_LAST_FUNC
- 1;
24935 enum ix86_builtin_func_type icode
;
24937 icode
= ix86_builtin_func_alias_base
[index
];
24938 type
= ix86_get_builtin_func_type (icode
);
24941 ix86_builtin_func_type_tab
[(int) tcode
] = type
;
24946 /* Codes for all the SSE/MMX builtins. */
24949 IX86_BUILTIN_ADDPS
,
24950 IX86_BUILTIN_ADDSS
,
24951 IX86_BUILTIN_DIVPS
,
24952 IX86_BUILTIN_DIVSS
,
24953 IX86_BUILTIN_MULPS
,
24954 IX86_BUILTIN_MULSS
,
24955 IX86_BUILTIN_SUBPS
,
24956 IX86_BUILTIN_SUBSS
,
24958 IX86_BUILTIN_CMPEQPS
,
24959 IX86_BUILTIN_CMPLTPS
,
24960 IX86_BUILTIN_CMPLEPS
,
24961 IX86_BUILTIN_CMPGTPS
,
24962 IX86_BUILTIN_CMPGEPS
,
24963 IX86_BUILTIN_CMPNEQPS
,
24964 IX86_BUILTIN_CMPNLTPS
,
24965 IX86_BUILTIN_CMPNLEPS
,
24966 IX86_BUILTIN_CMPNGTPS
,
24967 IX86_BUILTIN_CMPNGEPS
,
24968 IX86_BUILTIN_CMPORDPS
,
24969 IX86_BUILTIN_CMPUNORDPS
,
24970 IX86_BUILTIN_CMPEQSS
,
24971 IX86_BUILTIN_CMPLTSS
,
24972 IX86_BUILTIN_CMPLESS
,
24973 IX86_BUILTIN_CMPNEQSS
,
24974 IX86_BUILTIN_CMPNLTSS
,
24975 IX86_BUILTIN_CMPNLESS
,
24976 IX86_BUILTIN_CMPNGTSS
,
24977 IX86_BUILTIN_CMPNGESS
,
24978 IX86_BUILTIN_CMPORDSS
,
24979 IX86_BUILTIN_CMPUNORDSS
,
24981 IX86_BUILTIN_COMIEQSS
,
24982 IX86_BUILTIN_COMILTSS
,
24983 IX86_BUILTIN_COMILESS
,
24984 IX86_BUILTIN_COMIGTSS
,
24985 IX86_BUILTIN_COMIGESS
,
24986 IX86_BUILTIN_COMINEQSS
,
24987 IX86_BUILTIN_UCOMIEQSS
,
24988 IX86_BUILTIN_UCOMILTSS
,
24989 IX86_BUILTIN_UCOMILESS
,
24990 IX86_BUILTIN_UCOMIGTSS
,
24991 IX86_BUILTIN_UCOMIGESS
,
24992 IX86_BUILTIN_UCOMINEQSS
,
24994 IX86_BUILTIN_CVTPI2PS
,
24995 IX86_BUILTIN_CVTPS2PI
,
24996 IX86_BUILTIN_CVTSI2SS
,
24997 IX86_BUILTIN_CVTSI642SS
,
24998 IX86_BUILTIN_CVTSS2SI
,
24999 IX86_BUILTIN_CVTSS2SI64
,
25000 IX86_BUILTIN_CVTTPS2PI
,
25001 IX86_BUILTIN_CVTTSS2SI
,
25002 IX86_BUILTIN_CVTTSS2SI64
,
25004 IX86_BUILTIN_MAXPS
,
25005 IX86_BUILTIN_MAXSS
,
25006 IX86_BUILTIN_MINPS
,
25007 IX86_BUILTIN_MINSS
,
25009 IX86_BUILTIN_LOADUPS
,
25010 IX86_BUILTIN_STOREUPS
,
25011 IX86_BUILTIN_MOVSS
,
25013 IX86_BUILTIN_MOVHLPS
,
25014 IX86_BUILTIN_MOVLHPS
,
25015 IX86_BUILTIN_LOADHPS
,
25016 IX86_BUILTIN_LOADLPS
,
25017 IX86_BUILTIN_STOREHPS
,
25018 IX86_BUILTIN_STORELPS
,
25020 IX86_BUILTIN_MASKMOVQ
,
25021 IX86_BUILTIN_MOVMSKPS
,
25022 IX86_BUILTIN_PMOVMSKB
,
25024 IX86_BUILTIN_MOVNTPS
,
25025 IX86_BUILTIN_MOVNTQ
,
25027 IX86_BUILTIN_LOADDQU
,
25028 IX86_BUILTIN_STOREDQU
,
25030 IX86_BUILTIN_PACKSSWB
,
25031 IX86_BUILTIN_PACKSSDW
,
25032 IX86_BUILTIN_PACKUSWB
,
25034 IX86_BUILTIN_PADDB
,
25035 IX86_BUILTIN_PADDW
,
25036 IX86_BUILTIN_PADDD
,
25037 IX86_BUILTIN_PADDQ
,
25038 IX86_BUILTIN_PADDSB
,
25039 IX86_BUILTIN_PADDSW
,
25040 IX86_BUILTIN_PADDUSB
,
25041 IX86_BUILTIN_PADDUSW
,
25042 IX86_BUILTIN_PSUBB
,
25043 IX86_BUILTIN_PSUBW
,
25044 IX86_BUILTIN_PSUBD
,
25045 IX86_BUILTIN_PSUBQ
,
25046 IX86_BUILTIN_PSUBSB
,
25047 IX86_BUILTIN_PSUBSW
,
25048 IX86_BUILTIN_PSUBUSB
,
25049 IX86_BUILTIN_PSUBUSW
,
25052 IX86_BUILTIN_PANDN
,
25056 IX86_BUILTIN_PAVGB
,
25057 IX86_BUILTIN_PAVGW
,
25059 IX86_BUILTIN_PCMPEQB
,
25060 IX86_BUILTIN_PCMPEQW
,
25061 IX86_BUILTIN_PCMPEQD
,
25062 IX86_BUILTIN_PCMPGTB
,
25063 IX86_BUILTIN_PCMPGTW
,
25064 IX86_BUILTIN_PCMPGTD
,
25066 IX86_BUILTIN_PMADDWD
,
25068 IX86_BUILTIN_PMAXSW
,
25069 IX86_BUILTIN_PMAXUB
,
25070 IX86_BUILTIN_PMINSW
,
25071 IX86_BUILTIN_PMINUB
,
25073 IX86_BUILTIN_PMULHUW
,
25074 IX86_BUILTIN_PMULHW
,
25075 IX86_BUILTIN_PMULLW
,
25077 IX86_BUILTIN_PSADBW
,
25078 IX86_BUILTIN_PSHUFW
,
25080 IX86_BUILTIN_PSLLW
,
25081 IX86_BUILTIN_PSLLD
,
25082 IX86_BUILTIN_PSLLQ
,
25083 IX86_BUILTIN_PSRAW
,
25084 IX86_BUILTIN_PSRAD
,
25085 IX86_BUILTIN_PSRLW
,
25086 IX86_BUILTIN_PSRLD
,
25087 IX86_BUILTIN_PSRLQ
,
25088 IX86_BUILTIN_PSLLWI
,
25089 IX86_BUILTIN_PSLLDI
,
25090 IX86_BUILTIN_PSLLQI
,
25091 IX86_BUILTIN_PSRAWI
,
25092 IX86_BUILTIN_PSRADI
,
25093 IX86_BUILTIN_PSRLWI
,
25094 IX86_BUILTIN_PSRLDI
,
25095 IX86_BUILTIN_PSRLQI
,
25097 IX86_BUILTIN_PUNPCKHBW
,
25098 IX86_BUILTIN_PUNPCKHWD
,
25099 IX86_BUILTIN_PUNPCKHDQ
,
25100 IX86_BUILTIN_PUNPCKLBW
,
25101 IX86_BUILTIN_PUNPCKLWD
,
25102 IX86_BUILTIN_PUNPCKLDQ
,
25104 IX86_BUILTIN_SHUFPS
,
25106 IX86_BUILTIN_RCPPS
,
25107 IX86_BUILTIN_RCPSS
,
25108 IX86_BUILTIN_RSQRTPS
,
25109 IX86_BUILTIN_RSQRTPS_NR
,
25110 IX86_BUILTIN_RSQRTSS
,
25111 IX86_BUILTIN_RSQRTF
,
25112 IX86_BUILTIN_SQRTPS
,
25113 IX86_BUILTIN_SQRTPS_NR
,
25114 IX86_BUILTIN_SQRTSS
,
25116 IX86_BUILTIN_UNPCKHPS
,
25117 IX86_BUILTIN_UNPCKLPS
,
25119 IX86_BUILTIN_ANDPS
,
25120 IX86_BUILTIN_ANDNPS
,
25122 IX86_BUILTIN_XORPS
,
25125 IX86_BUILTIN_LDMXCSR
,
25126 IX86_BUILTIN_STMXCSR
,
25127 IX86_BUILTIN_SFENCE
,
25129 /* 3DNow! Original */
25130 IX86_BUILTIN_FEMMS
,
25131 IX86_BUILTIN_PAVGUSB
,
25132 IX86_BUILTIN_PF2ID
,
25133 IX86_BUILTIN_PFACC
,
25134 IX86_BUILTIN_PFADD
,
25135 IX86_BUILTIN_PFCMPEQ
,
25136 IX86_BUILTIN_PFCMPGE
,
25137 IX86_BUILTIN_PFCMPGT
,
25138 IX86_BUILTIN_PFMAX
,
25139 IX86_BUILTIN_PFMIN
,
25140 IX86_BUILTIN_PFMUL
,
25141 IX86_BUILTIN_PFRCP
,
25142 IX86_BUILTIN_PFRCPIT1
,
25143 IX86_BUILTIN_PFRCPIT2
,
25144 IX86_BUILTIN_PFRSQIT1
,
25145 IX86_BUILTIN_PFRSQRT
,
25146 IX86_BUILTIN_PFSUB
,
25147 IX86_BUILTIN_PFSUBR
,
25148 IX86_BUILTIN_PI2FD
,
25149 IX86_BUILTIN_PMULHRW
,
25151 /* 3DNow! Athlon Extensions */
25152 IX86_BUILTIN_PF2IW
,
25153 IX86_BUILTIN_PFNACC
,
25154 IX86_BUILTIN_PFPNACC
,
25155 IX86_BUILTIN_PI2FW
,
25156 IX86_BUILTIN_PSWAPDSI
,
25157 IX86_BUILTIN_PSWAPDSF
,
25160 IX86_BUILTIN_ADDPD
,
25161 IX86_BUILTIN_ADDSD
,
25162 IX86_BUILTIN_DIVPD
,
25163 IX86_BUILTIN_DIVSD
,
25164 IX86_BUILTIN_MULPD
,
25165 IX86_BUILTIN_MULSD
,
25166 IX86_BUILTIN_SUBPD
,
25167 IX86_BUILTIN_SUBSD
,
25169 IX86_BUILTIN_CMPEQPD
,
25170 IX86_BUILTIN_CMPLTPD
,
25171 IX86_BUILTIN_CMPLEPD
,
25172 IX86_BUILTIN_CMPGTPD
,
25173 IX86_BUILTIN_CMPGEPD
,
25174 IX86_BUILTIN_CMPNEQPD
,
25175 IX86_BUILTIN_CMPNLTPD
,
25176 IX86_BUILTIN_CMPNLEPD
,
25177 IX86_BUILTIN_CMPNGTPD
,
25178 IX86_BUILTIN_CMPNGEPD
,
25179 IX86_BUILTIN_CMPORDPD
,
25180 IX86_BUILTIN_CMPUNORDPD
,
25181 IX86_BUILTIN_CMPEQSD
,
25182 IX86_BUILTIN_CMPLTSD
,
25183 IX86_BUILTIN_CMPLESD
,
25184 IX86_BUILTIN_CMPNEQSD
,
25185 IX86_BUILTIN_CMPNLTSD
,
25186 IX86_BUILTIN_CMPNLESD
,
25187 IX86_BUILTIN_CMPORDSD
,
25188 IX86_BUILTIN_CMPUNORDSD
,
25190 IX86_BUILTIN_COMIEQSD
,
25191 IX86_BUILTIN_COMILTSD
,
25192 IX86_BUILTIN_COMILESD
,
25193 IX86_BUILTIN_COMIGTSD
,
25194 IX86_BUILTIN_COMIGESD
,
25195 IX86_BUILTIN_COMINEQSD
,
25196 IX86_BUILTIN_UCOMIEQSD
,
25197 IX86_BUILTIN_UCOMILTSD
,
25198 IX86_BUILTIN_UCOMILESD
,
25199 IX86_BUILTIN_UCOMIGTSD
,
25200 IX86_BUILTIN_UCOMIGESD
,
25201 IX86_BUILTIN_UCOMINEQSD
,
25203 IX86_BUILTIN_MAXPD
,
25204 IX86_BUILTIN_MAXSD
,
25205 IX86_BUILTIN_MINPD
,
25206 IX86_BUILTIN_MINSD
,
25208 IX86_BUILTIN_ANDPD
,
25209 IX86_BUILTIN_ANDNPD
,
25211 IX86_BUILTIN_XORPD
,
25213 IX86_BUILTIN_SQRTPD
,
25214 IX86_BUILTIN_SQRTSD
,
25216 IX86_BUILTIN_UNPCKHPD
,
25217 IX86_BUILTIN_UNPCKLPD
,
25219 IX86_BUILTIN_SHUFPD
,
25221 IX86_BUILTIN_LOADUPD
,
25222 IX86_BUILTIN_STOREUPD
,
25223 IX86_BUILTIN_MOVSD
,
25225 IX86_BUILTIN_LOADHPD
,
25226 IX86_BUILTIN_LOADLPD
,
25228 IX86_BUILTIN_CVTDQ2PD
,
25229 IX86_BUILTIN_CVTDQ2PS
,
25231 IX86_BUILTIN_CVTPD2DQ
,
25232 IX86_BUILTIN_CVTPD2PI
,
25233 IX86_BUILTIN_CVTPD2PS
,
25234 IX86_BUILTIN_CVTTPD2DQ
,
25235 IX86_BUILTIN_CVTTPD2PI
,
25237 IX86_BUILTIN_CVTPI2PD
,
25238 IX86_BUILTIN_CVTSI2SD
,
25239 IX86_BUILTIN_CVTSI642SD
,
25241 IX86_BUILTIN_CVTSD2SI
,
25242 IX86_BUILTIN_CVTSD2SI64
,
25243 IX86_BUILTIN_CVTSD2SS
,
25244 IX86_BUILTIN_CVTSS2SD
,
25245 IX86_BUILTIN_CVTTSD2SI
,
25246 IX86_BUILTIN_CVTTSD2SI64
,
25248 IX86_BUILTIN_CVTPS2DQ
,
25249 IX86_BUILTIN_CVTPS2PD
,
25250 IX86_BUILTIN_CVTTPS2DQ
,
25252 IX86_BUILTIN_MOVNTI
,
25253 IX86_BUILTIN_MOVNTI64
,
25254 IX86_BUILTIN_MOVNTPD
,
25255 IX86_BUILTIN_MOVNTDQ
,
25257 IX86_BUILTIN_MOVQ128
,
25260 IX86_BUILTIN_MASKMOVDQU
,
25261 IX86_BUILTIN_MOVMSKPD
,
25262 IX86_BUILTIN_PMOVMSKB128
,
25264 IX86_BUILTIN_PACKSSWB128
,
25265 IX86_BUILTIN_PACKSSDW128
,
25266 IX86_BUILTIN_PACKUSWB128
,
25268 IX86_BUILTIN_PADDB128
,
25269 IX86_BUILTIN_PADDW128
,
25270 IX86_BUILTIN_PADDD128
,
25271 IX86_BUILTIN_PADDQ128
,
25272 IX86_BUILTIN_PADDSB128
,
25273 IX86_BUILTIN_PADDSW128
,
25274 IX86_BUILTIN_PADDUSB128
,
25275 IX86_BUILTIN_PADDUSW128
,
25276 IX86_BUILTIN_PSUBB128
,
25277 IX86_BUILTIN_PSUBW128
,
25278 IX86_BUILTIN_PSUBD128
,
25279 IX86_BUILTIN_PSUBQ128
,
25280 IX86_BUILTIN_PSUBSB128
,
25281 IX86_BUILTIN_PSUBSW128
,
25282 IX86_BUILTIN_PSUBUSB128
,
25283 IX86_BUILTIN_PSUBUSW128
,
25285 IX86_BUILTIN_PAND128
,
25286 IX86_BUILTIN_PANDN128
,
25287 IX86_BUILTIN_POR128
,
25288 IX86_BUILTIN_PXOR128
,
25290 IX86_BUILTIN_PAVGB128
,
25291 IX86_BUILTIN_PAVGW128
,
25293 IX86_BUILTIN_PCMPEQB128
,
25294 IX86_BUILTIN_PCMPEQW128
,
25295 IX86_BUILTIN_PCMPEQD128
,
25296 IX86_BUILTIN_PCMPGTB128
,
25297 IX86_BUILTIN_PCMPGTW128
,
25298 IX86_BUILTIN_PCMPGTD128
,
25300 IX86_BUILTIN_PMADDWD128
,
25302 IX86_BUILTIN_PMAXSW128
,
25303 IX86_BUILTIN_PMAXUB128
,
25304 IX86_BUILTIN_PMINSW128
,
25305 IX86_BUILTIN_PMINUB128
,
25307 IX86_BUILTIN_PMULUDQ
,
25308 IX86_BUILTIN_PMULUDQ128
,
25309 IX86_BUILTIN_PMULHUW128
,
25310 IX86_BUILTIN_PMULHW128
,
25311 IX86_BUILTIN_PMULLW128
,
25313 IX86_BUILTIN_PSADBW128
,
25314 IX86_BUILTIN_PSHUFHW
,
25315 IX86_BUILTIN_PSHUFLW
,
25316 IX86_BUILTIN_PSHUFD
,
25318 IX86_BUILTIN_PSLLDQI128
,
25319 IX86_BUILTIN_PSLLWI128
,
25320 IX86_BUILTIN_PSLLDI128
,
25321 IX86_BUILTIN_PSLLQI128
,
25322 IX86_BUILTIN_PSRAWI128
,
25323 IX86_BUILTIN_PSRADI128
,
25324 IX86_BUILTIN_PSRLDQI128
,
25325 IX86_BUILTIN_PSRLWI128
,
25326 IX86_BUILTIN_PSRLDI128
,
25327 IX86_BUILTIN_PSRLQI128
,
25329 IX86_BUILTIN_PSLLDQ128
,
25330 IX86_BUILTIN_PSLLW128
,
25331 IX86_BUILTIN_PSLLD128
,
25332 IX86_BUILTIN_PSLLQ128
,
25333 IX86_BUILTIN_PSRAW128
,
25334 IX86_BUILTIN_PSRAD128
,
25335 IX86_BUILTIN_PSRLW128
,
25336 IX86_BUILTIN_PSRLD128
,
25337 IX86_BUILTIN_PSRLQ128
,
25339 IX86_BUILTIN_PUNPCKHBW128
,
25340 IX86_BUILTIN_PUNPCKHWD128
,
25341 IX86_BUILTIN_PUNPCKHDQ128
,
25342 IX86_BUILTIN_PUNPCKHQDQ128
,
25343 IX86_BUILTIN_PUNPCKLBW128
,
25344 IX86_BUILTIN_PUNPCKLWD128
,
25345 IX86_BUILTIN_PUNPCKLDQ128
,
25346 IX86_BUILTIN_PUNPCKLQDQ128
,
25348 IX86_BUILTIN_CLFLUSH
,
25349 IX86_BUILTIN_MFENCE
,
25350 IX86_BUILTIN_LFENCE
,
25351 IX86_BUILTIN_PAUSE
,
25353 IX86_BUILTIN_BSRSI
,
25354 IX86_BUILTIN_BSRDI
,
25355 IX86_BUILTIN_RDPMC
,
25356 IX86_BUILTIN_RDTSC
,
25357 IX86_BUILTIN_RDTSCP
,
25358 IX86_BUILTIN_ROLQI
,
25359 IX86_BUILTIN_ROLHI
,
25360 IX86_BUILTIN_RORQI
,
25361 IX86_BUILTIN_RORHI
,
25364 IX86_BUILTIN_ADDSUBPS
,
25365 IX86_BUILTIN_HADDPS
,
25366 IX86_BUILTIN_HSUBPS
,
25367 IX86_BUILTIN_MOVSHDUP
,
25368 IX86_BUILTIN_MOVSLDUP
,
25369 IX86_BUILTIN_ADDSUBPD
,
25370 IX86_BUILTIN_HADDPD
,
25371 IX86_BUILTIN_HSUBPD
,
25372 IX86_BUILTIN_LDDQU
,
25374 IX86_BUILTIN_MONITOR
,
25375 IX86_BUILTIN_MWAIT
,
25378 IX86_BUILTIN_PHADDW
,
25379 IX86_BUILTIN_PHADDD
,
25380 IX86_BUILTIN_PHADDSW
,
25381 IX86_BUILTIN_PHSUBW
,
25382 IX86_BUILTIN_PHSUBD
,
25383 IX86_BUILTIN_PHSUBSW
,
25384 IX86_BUILTIN_PMADDUBSW
,
25385 IX86_BUILTIN_PMULHRSW
,
25386 IX86_BUILTIN_PSHUFB
,
25387 IX86_BUILTIN_PSIGNB
,
25388 IX86_BUILTIN_PSIGNW
,
25389 IX86_BUILTIN_PSIGND
,
25390 IX86_BUILTIN_PALIGNR
,
25391 IX86_BUILTIN_PABSB
,
25392 IX86_BUILTIN_PABSW
,
25393 IX86_BUILTIN_PABSD
,
25395 IX86_BUILTIN_PHADDW128
,
25396 IX86_BUILTIN_PHADDD128
,
25397 IX86_BUILTIN_PHADDSW128
,
25398 IX86_BUILTIN_PHSUBW128
,
25399 IX86_BUILTIN_PHSUBD128
,
25400 IX86_BUILTIN_PHSUBSW128
,
25401 IX86_BUILTIN_PMADDUBSW128
,
25402 IX86_BUILTIN_PMULHRSW128
,
25403 IX86_BUILTIN_PSHUFB128
,
25404 IX86_BUILTIN_PSIGNB128
,
25405 IX86_BUILTIN_PSIGNW128
,
25406 IX86_BUILTIN_PSIGND128
,
25407 IX86_BUILTIN_PALIGNR128
,
25408 IX86_BUILTIN_PABSB128
,
25409 IX86_BUILTIN_PABSW128
,
25410 IX86_BUILTIN_PABSD128
,
25412 /* AMDFAM10 - SSE4A New Instructions. */
25413 IX86_BUILTIN_MOVNTSD
,
25414 IX86_BUILTIN_MOVNTSS
,
25415 IX86_BUILTIN_EXTRQI
,
25416 IX86_BUILTIN_EXTRQ
,
25417 IX86_BUILTIN_INSERTQI
,
25418 IX86_BUILTIN_INSERTQ
,
25421 IX86_BUILTIN_BLENDPD
,
25422 IX86_BUILTIN_BLENDPS
,
25423 IX86_BUILTIN_BLENDVPD
,
25424 IX86_BUILTIN_BLENDVPS
,
25425 IX86_BUILTIN_PBLENDVB128
,
25426 IX86_BUILTIN_PBLENDW128
,
25431 IX86_BUILTIN_INSERTPS128
,
25433 IX86_BUILTIN_MOVNTDQA
,
25434 IX86_BUILTIN_MPSADBW128
,
25435 IX86_BUILTIN_PACKUSDW128
,
25436 IX86_BUILTIN_PCMPEQQ
,
25437 IX86_BUILTIN_PHMINPOSUW128
,
25439 IX86_BUILTIN_PMAXSB128
,
25440 IX86_BUILTIN_PMAXSD128
,
25441 IX86_BUILTIN_PMAXUD128
,
25442 IX86_BUILTIN_PMAXUW128
,
25444 IX86_BUILTIN_PMINSB128
,
25445 IX86_BUILTIN_PMINSD128
,
25446 IX86_BUILTIN_PMINUD128
,
25447 IX86_BUILTIN_PMINUW128
,
25449 IX86_BUILTIN_PMOVSXBW128
,
25450 IX86_BUILTIN_PMOVSXBD128
,
25451 IX86_BUILTIN_PMOVSXBQ128
,
25452 IX86_BUILTIN_PMOVSXWD128
,
25453 IX86_BUILTIN_PMOVSXWQ128
,
25454 IX86_BUILTIN_PMOVSXDQ128
,
25456 IX86_BUILTIN_PMOVZXBW128
,
25457 IX86_BUILTIN_PMOVZXBD128
,
25458 IX86_BUILTIN_PMOVZXBQ128
,
25459 IX86_BUILTIN_PMOVZXWD128
,
25460 IX86_BUILTIN_PMOVZXWQ128
,
25461 IX86_BUILTIN_PMOVZXDQ128
,
25463 IX86_BUILTIN_PMULDQ128
,
25464 IX86_BUILTIN_PMULLD128
,
25466 IX86_BUILTIN_ROUNDSD
,
25467 IX86_BUILTIN_ROUNDSS
,
25469 IX86_BUILTIN_ROUNDPD
,
25470 IX86_BUILTIN_ROUNDPS
,
25472 IX86_BUILTIN_FLOORPD
,
25473 IX86_BUILTIN_CEILPD
,
25474 IX86_BUILTIN_TRUNCPD
,
25475 IX86_BUILTIN_RINTPD
,
25476 IX86_BUILTIN_ROUNDPD_AZ
,
25478 IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX
,
25479 IX86_BUILTIN_CEILPD_VEC_PACK_SFIX
,
25480 IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX
,
25482 IX86_BUILTIN_FLOORPS
,
25483 IX86_BUILTIN_CEILPS
,
25484 IX86_BUILTIN_TRUNCPS
,
25485 IX86_BUILTIN_RINTPS
,
25486 IX86_BUILTIN_ROUNDPS_AZ
,
25488 IX86_BUILTIN_FLOORPS_SFIX
,
25489 IX86_BUILTIN_CEILPS_SFIX
,
25490 IX86_BUILTIN_ROUNDPS_AZ_SFIX
,
25492 IX86_BUILTIN_PTESTZ
,
25493 IX86_BUILTIN_PTESTC
,
25494 IX86_BUILTIN_PTESTNZC
,
25496 IX86_BUILTIN_VEC_INIT_V2SI
,
25497 IX86_BUILTIN_VEC_INIT_V4HI
,
25498 IX86_BUILTIN_VEC_INIT_V8QI
,
25499 IX86_BUILTIN_VEC_EXT_V2DF
,
25500 IX86_BUILTIN_VEC_EXT_V2DI
,
25501 IX86_BUILTIN_VEC_EXT_V4SF
,
25502 IX86_BUILTIN_VEC_EXT_V4SI
,
25503 IX86_BUILTIN_VEC_EXT_V8HI
,
25504 IX86_BUILTIN_VEC_EXT_V2SI
,
25505 IX86_BUILTIN_VEC_EXT_V4HI
,
25506 IX86_BUILTIN_VEC_EXT_V16QI
,
25507 IX86_BUILTIN_VEC_SET_V2DI
,
25508 IX86_BUILTIN_VEC_SET_V4SF
,
25509 IX86_BUILTIN_VEC_SET_V4SI
,
25510 IX86_BUILTIN_VEC_SET_V8HI
,
25511 IX86_BUILTIN_VEC_SET_V4HI
,
25512 IX86_BUILTIN_VEC_SET_V16QI
,
25514 IX86_BUILTIN_VEC_PACK_SFIX
,
25515 IX86_BUILTIN_VEC_PACK_SFIX256
,
25518 IX86_BUILTIN_CRC32QI
,
25519 IX86_BUILTIN_CRC32HI
,
25520 IX86_BUILTIN_CRC32SI
,
25521 IX86_BUILTIN_CRC32DI
,
25523 IX86_BUILTIN_PCMPESTRI128
,
25524 IX86_BUILTIN_PCMPESTRM128
,
25525 IX86_BUILTIN_PCMPESTRA128
,
25526 IX86_BUILTIN_PCMPESTRC128
,
25527 IX86_BUILTIN_PCMPESTRO128
,
25528 IX86_BUILTIN_PCMPESTRS128
,
25529 IX86_BUILTIN_PCMPESTRZ128
,
25530 IX86_BUILTIN_PCMPISTRI128
,
25531 IX86_BUILTIN_PCMPISTRM128
,
25532 IX86_BUILTIN_PCMPISTRA128
,
25533 IX86_BUILTIN_PCMPISTRC128
,
25534 IX86_BUILTIN_PCMPISTRO128
,
25535 IX86_BUILTIN_PCMPISTRS128
,
25536 IX86_BUILTIN_PCMPISTRZ128
,
25538 IX86_BUILTIN_PCMPGTQ
,
25540 /* AES instructions */
25541 IX86_BUILTIN_AESENC128
,
25542 IX86_BUILTIN_AESENCLAST128
,
25543 IX86_BUILTIN_AESDEC128
,
25544 IX86_BUILTIN_AESDECLAST128
,
25545 IX86_BUILTIN_AESIMC128
,
25546 IX86_BUILTIN_AESKEYGENASSIST128
,
25548 /* PCLMUL instruction */
25549 IX86_BUILTIN_PCLMULQDQ128
,
25552 IX86_BUILTIN_ADDPD256
,
25553 IX86_BUILTIN_ADDPS256
,
25554 IX86_BUILTIN_ADDSUBPD256
,
25555 IX86_BUILTIN_ADDSUBPS256
,
25556 IX86_BUILTIN_ANDPD256
,
25557 IX86_BUILTIN_ANDPS256
,
25558 IX86_BUILTIN_ANDNPD256
,
25559 IX86_BUILTIN_ANDNPS256
,
25560 IX86_BUILTIN_BLENDPD256
,
25561 IX86_BUILTIN_BLENDPS256
,
25562 IX86_BUILTIN_BLENDVPD256
,
25563 IX86_BUILTIN_BLENDVPS256
,
25564 IX86_BUILTIN_DIVPD256
,
25565 IX86_BUILTIN_DIVPS256
,
25566 IX86_BUILTIN_DPPS256
,
25567 IX86_BUILTIN_HADDPD256
,
25568 IX86_BUILTIN_HADDPS256
,
25569 IX86_BUILTIN_HSUBPD256
,
25570 IX86_BUILTIN_HSUBPS256
,
25571 IX86_BUILTIN_MAXPD256
,
25572 IX86_BUILTIN_MAXPS256
,
25573 IX86_BUILTIN_MINPD256
,
25574 IX86_BUILTIN_MINPS256
,
25575 IX86_BUILTIN_MULPD256
,
25576 IX86_BUILTIN_MULPS256
,
25577 IX86_BUILTIN_ORPD256
,
25578 IX86_BUILTIN_ORPS256
,
25579 IX86_BUILTIN_SHUFPD256
,
25580 IX86_BUILTIN_SHUFPS256
,
25581 IX86_BUILTIN_SUBPD256
,
25582 IX86_BUILTIN_SUBPS256
,
25583 IX86_BUILTIN_XORPD256
,
25584 IX86_BUILTIN_XORPS256
,
25585 IX86_BUILTIN_CMPSD
,
25586 IX86_BUILTIN_CMPSS
,
25587 IX86_BUILTIN_CMPPD
,
25588 IX86_BUILTIN_CMPPS
,
25589 IX86_BUILTIN_CMPPD256
,
25590 IX86_BUILTIN_CMPPS256
,
25591 IX86_BUILTIN_CVTDQ2PD256
,
25592 IX86_BUILTIN_CVTDQ2PS256
,
25593 IX86_BUILTIN_CVTPD2PS256
,
25594 IX86_BUILTIN_CVTPS2DQ256
,
25595 IX86_BUILTIN_CVTPS2PD256
,
25596 IX86_BUILTIN_CVTTPD2DQ256
,
25597 IX86_BUILTIN_CVTPD2DQ256
,
25598 IX86_BUILTIN_CVTTPS2DQ256
,
25599 IX86_BUILTIN_EXTRACTF128PD256
,
25600 IX86_BUILTIN_EXTRACTF128PS256
,
25601 IX86_BUILTIN_EXTRACTF128SI256
,
25602 IX86_BUILTIN_VZEROALL
,
25603 IX86_BUILTIN_VZEROUPPER
,
25604 IX86_BUILTIN_VPERMILVARPD
,
25605 IX86_BUILTIN_VPERMILVARPS
,
25606 IX86_BUILTIN_VPERMILVARPD256
,
25607 IX86_BUILTIN_VPERMILVARPS256
,
25608 IX86_BUILTIN_VPERMILPD
,
25609 IX86_BUILTIN_VPERMILPS
,
25610 IX86_BUILTIN_VPERMILPD256
,
25611 IX86_BUILTIN_VPERMILPS256
,
25612 IX86_BUILTIN_VPERMIL2PD
,
25613 IX86_BUILTIN_VPERMIL2PS
,
25614 IX86_BUILTIN_VPERMIL2PD256
,
25615 IX86_BUILTIN_VPERMIL2PS256
,
25616 IX86_BUILTIN_VPERM2F128PD256
,
25617 IX86_BUILTIN_VPERM2F128PS256
,
25618 IX86_BUILTIN_VPERM2F128SI256
,
25619 IX86_BUILTIN_VBROADCASTSS
,
25620 IX86_BUILTIN_VBROADCASTSD256
,
25621 IX86_BUILTIN_VBROADCASTSS256
,
25622 IX86_BUILTIN_VBROADCASTPD256
,
25623 IX86_BUILTIN_VBROADCASTPS256
,
25624 IX86_BUILTIN_VINSERTF128PD256
,
25625 IX86_BUILTIN_VINSERTF128PS256
,
25626 IX86_BUILTIN_VINSERTF128SI256
,
25627 IX86_BUILTIN_LOADUPD256
,
25628 IX86_BUILTIN_LOADUPS256
,
25629 IX86_BUILTIN_STOREUPD256
,
25630 IX86_BUILTIN_STOREUPS256
,
25631 IX86_BUILTIN_LDDQU256
,
25632 IX86_BUILTIN_MOVNTDQ256
,
25633 IX86_BUILTIN_MOVNTPD256
,
25634 IX86_BUILTIN_MOVNTPS256
,
25635 IX86_BUILTIN_LOADDQU256
,
25636 IX86_BUILTIN_STOREDQU256
,
25637 IX86_BUILTIN_MASKLOADPD
,
25638 IX86_BUILTIN_MASKLOADPS
,
25639 IX86_BUILTIN_MASKSTOREPD
,
25640 IX86_BUILTIN_MASKSTOREPS
,
25641 IX86_BUILTIN_MASKLOADPD256
,
25642 IX86_BUILTIN_MASKLOADPS256
,
25643 IX86_BUILTIN_MASKSTOREPD256
,
25644 IX86_BUILTIN_MASKSTOREPS256
,
25645 IX86_BUILTIN_MOVSHDUP256
,
25646 IX86_BUILTIN_MOVSLDUP256
,
25647 IX86_BUILTIN_MOVDDUP256
,
25649 IX86_BUILTIN_SQRTPD256
,
25650 IX86_BUILTIN_SQRTPS256
,
25651 IX86_BUILTIN_SQRTPS_NR256
,
25652 IX86_BUILTIN_RSQRTPS256
,
25653 IX86_BUILTIN_RSQRTPS_NR256
,
25655 IX86_BUILTIN_RCPPS256
,
25657 IX86_BUILTIN_ROUNDPD256
,
25658 IX86_BUILTIN_ROUNDPS256
,
25660 IX86_BUILTIN_FLOORPD256
,
25661 IX86_BUILTIN_CEILPD256
,
25662 IX86_BUILTIN_TRUNCPD256
,
25663 IX86_BUILTIN_RINTPD256
,
25664 IX86_BUILTIN_ROUNDPD_AZ256
,
25666 IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX256
,
25667 IX86_BUILTIN_CEILPD_VEC_PACK_SFIX256
,
25668 IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX256
,
25670 IX86_BUILTIN_FLOORPS256
,
25671 IX86_BUILTIN_CEILPS256
,
25672 IX86_BUILTIN_TRUNCPS256
,
25673 IX86_BUILTIN_RINTPS256
,
25674 IX86_BUILTIN_ROUNDPS_AZ256
,
25676 IX86_BUILTIN_FLOORPS_SFIX256
,
25677 IX86_BUILTIN_CEILPS_SFIX256
,
25678 IX86_BUILTIN_ROUNDPS_AZ_SFIX256
,
25680 IX86_BUILTIN_UNPCKHPD256
,
25681 IX86_BUILTIN_UNPCKLPD256
,
25682 IX86_BUILTIN_UNPCKHPS256
,
25683 IX86_BUILTIN_UNPCKLPS256
,
25685 IX86_BUILTIN_SI256_SI
,
25686 IX86_BUILTIN_PS256_PS
,
25687 IX86_BUILTIN_PD256_PD
,
25688 IX86_BUILTIN_SI_SI256
,
25689 IX86_BUILTIN_PS_PS256
,
25690 IX86_BUILTIN_PD_PD256
,
25692 IX86_BUILTIN_VTESTZPD
,
25693 IX86_BUILTIN_VTESTCPD
,
25694 IX86_BUILTIN_VTESTNZCPD
,
25695 IX86_BUILTIN_VTESTZPS
,
25696 IX86_BUILTIN_VTESTCPS
,
25697 IX86_BUILTIN_VTESTNZCPS
,
25698 IX86_BUILTIN_VTESTZPD256
,
25699 IX86_BUILTIN_VTESTCPD256
,
25700 IX86_BUILTIN_VTESTNZCPD256
,
25701 IX86_BUILTIN_VTESTZPS256
,
25702 IX86_BUILTIN_VTESTCPS256
,
25703 IX86_BUILTIN_VTESTNZCPS256
,
25704 IX86_BUILTIN_PTESTZ256
,
25705 IX86_BUILTIN_PTESTC256
,
25706 IX86_BUILTIN_PTESTNZC256
,
25708 IX86_BUILTIN_MOVMSKPD256
,
25709 IX86_BUILTIN_MOVMSKPS256
,
25712 IX86_BUILTIN_MPSADBW256
,
25713 IX86_BUILTIN_PABSB256
,
25714 IX86_BUILTIN_PABSW256
,
25715 IX86_BUILTIN_PABSD256
,
25716 IX86_BUILTIN_PACKSSDW256
,
25717 IX86_BUILTIN_PACKSSWB256
,
25718 IX86_BUILTIN_PACKUSDW256
,
25719 IX86_BUILTIN_PACKUSWB256
,
25720 IX86_BUILTIN_PADDB256
,
25721 IX86_BUILTIN_PADDW256
,
25722 IX86_BUILTIN_PADDD256
,
25723 IX86_BUILTIN_PADDQ256
,
25724 IX86_BUILTIN_PADDSB256
,
25725 IX86_BUILTIN_PADDSW256
,
25726 IX86_BUILTIN_PADDUSB256
,
25727 IX86_BUILTIN_PADDUSW256
,
25728 IX86_BUILTIN_PALIGNR256
,
25729 IX86_BUILTIN_AND256I
,
25730 IX86_BUILTIN_ANDNOT256I
,
25731 IX86_BUILTIN_PAVGB256
,
25732 IX86_BUILTIN_PAVGW256
,
25733 IX86_BUILTIN_PBLENDVB256
,
25734 IX86_BUILTIN_PBLENDVW256
,
25735 IX86_BUILTIN_PCMPEQB256
,
25736 IX86_BUILTIN_PCMPEQW256
,
25737 IX86_BUILTIN_PCMPEQD256
,
25738 IX86_BUILTIN_PCMPEQQ256
,
25739 IX86_BUILTIN_PCMPGTB256
,
25740 IX86_BUILTIN_PCMPGTW256
,
25741 IX86_BUILTIN_PCMPGTD256
,
25742 IX86_BUILTIN_PCMPGTQ256
,
25743 IX86_BUILTIN_PHADDW256
,
25744 IX86_BUILTIN_PHADDD256
,
25745 IX86_BUILTIN_PHADDSW256
,
25746 IX86_BUILTIN_PHSUBW256
,
25747 IX86_BUILTIN_PHSUBD256
,
25748 IX86_BUILTIN_PHSUBSW256
,
25749 IX86_BUILTIN_PMADDUBSW256
,
25750 IX86_BUILTIN_PMADDWD256
,
25751 IX86_BUILTIN_PMAXSB256
,
25752 IX86_BUILTIN_PMAXSW256
,
25753 IX86_BUILTIN_PMAXSD256
,
25754 IX86_BUILTIN_PMAXUB256
,
25755 IX86_BUILTIN_PMAXUW256
,
25756 IX86_BUILTIN_PMAXUD256
,
25757 IX86_BUILTIN_PMINSB256
,
25758 IX86_BUILTIN_PMINSW256
,
25759 IX86_BUILTIN_PMINSD256
,
25760 IX86_BUILTIN_PMINUB256
,
25761 IX86_BUILTIN_PMINUW256
,
25762 IX86_BUILTIN_PMINUD256
,
25763 IX86_BUILTIN_PMOVMSKB256
,
25764 IX86_BUILTIN_PMOVSXBW256
,
25765 IX86_BUILTIN_PMOVSXBD256
,
25766 IX86_BUILTIN_PMOVSXBQ256
,
25767 IX86_BUILTIN_PMOVSXWD256
,
25768 IX86_BUILTIN_PMOVSXWQ256
,
25769 IX86_BUILTIN_PMOVSXDQ256
,
25770 IX86_BUILTIN_PMOVZXBW256
,
25771 IX86_BUILTIN_PMOVZXBD256
,
25772 IX86_BUILTIN_PMOVZXBQ256
,
25773 IX86_BUILTIN_PMOVZXWD256
,
25774 IX86_BUILTIN_PMOVZXWQ256
,
25775 IX86_BUILTIN_PMOVZXDQ256
,
25776 IX86_BUILTIN_PMULDQ256
,
25777 IX86_BUILTIN_PMULHRSW256
,
25778 IX86_BUILTIN_PMULHUW256
,
25779 IX86_BUILTIN_PMULHW256
,
25780 IX86_BUILTIN_PMULLW256
,
25781 IX86_BUILTIN_PMULLD256
,
25782 IX86_BUILTIN_PMULUDQ256
,
25783 IX86_BUILTIN_POR256
,
25784 IX86_BUILTIN_PSADBW256
,
25785 IX86_BUILTIN_PSHUFB256
,
25786 IX86_BUILTIN_PSHUFD256
,
25787 IX86_BUILTIN_PSHUFHW256
,
25788 IX86_BUILTIN_PSHUFLW256
,
25789 IX86_BUILTIN_PSIGNB256
,
25790 IX86_BUILTIN_PSIGNW256
,
25791 IX86_BUILTIN_PSIGND256
,
25792 IX86_BUILTIN_PSLLDQI256
,
25793 IX86_BUILTIN_PSLLWI256
,
25794 IX86_BUILTIN_PSLLW256
,
25795 IX86_BUILTIN_PSLLDI256
,
25796 IX86_BUILTIN_PSLLD256
,
25797 IX86_BUILTIN_PSLLQI256
,
25798 IX86_BUILTIN_PSLLQ256
,
25799 IX86_BUILTIN_PSRAWI256
,
25800 IX86_BUILTIN_PSRAW256
,
25801 IX86_BUILTIN_PSRADI256
,
25802 IX86_BUILTIN_PSRAD256
,
25803 IX86_BUILTIN_PSRLDQI256
,
25804 IX86_BUILTIN_PSRLWI256
,
25805 IX86_BUILTIN_PSRLW256
,
25806 IX86_BUILTIN_PSRLDI256
,
25807 IX86_BUILTIN_PSRLD256
,
25808 IX86_BUILTIN_PSRLQI256
,
25809 IX86_BUILTIN_PSRLQ256
,
25810 IX86_BUILTIN_PSUBB256
,
25811 IX86_BUILTIN_PSUBW256
,
25812 IX86_BUILTIN_PSUBD256
,
25813 IX86_BUILTIN_PSUBQ256
,
25814 IX86_BUILTIN_PSUBSB256
,
25815 IX86_BUILTIN_PSUBSW256
,
25816 IX86_BUILTIN_PSUBUSB256
,
25817 IX86_BUILTIN_PSUBUSW256
,
25818 IX86_BUILTIN_PUNPCKHBW256
,
25819 IX86_BUILTIN_PUNPCKHWD256
,
25820 IX86_BUILTIN_PUNPCKHDQ256
,
25821 IX86_BUILTIN_PUNPCKHQDQ256
,
25822 IX86_BUILTIN_PUNPCKLBW256
,
25823 IX86_BUILTIN_PUNPCKLWD256
,
25824 IX86_BUILTIN_PUNPCKLDQ256
,
25825 IX86_BUILTIN_PUNPCKLQDQ256
,
25826 IX86_BUILTIN_PXOR256
,
25827 IX86_BUILTIN_MOVNTDQA256
,
25828 IX86_BUILTIN_VBROADCASTSS_PS
,
25829 IX86_BUILTIN_VBROADCASTSS_PS256
,
25830 IX86_BUILTIN_VBROADCASTSD_PD256
,
25831 IX86_BUILTIN_VBROADCASTSI256
,
25832 IX86_BUILTIN_PBLENDD256
,
25833 IX86_BUILTIN_PBLENDD128
,
25834 IX86_BUILTIN_PBROADCASTB256
,
25835 IX86_BUILTIN_PBROADCASTW256
,
25836 IX86_BUILTIN_PBROADCASTD256
,
25837 IX86_BUILTIN_PBROADCASTQ256
,
25838 IX86_BUILTIN_PBROADCASTB128
,
25839 IX86_BUILTIN_PBROADCASTW128
,
25840 IX86_BUILTIN_PBROADCASTD128
,
25841 IX86_BUILTIN_PBROADCASTQ128
,
25842 IX86_BUILTIN_VPERMVARSI256
,
25843 IX86_BUILTIN_VPERMDF256
,
25844 IX86_BUILTIN_VPERMVARSF256
,
25845 IX86_BUILTIN_VPERMDI256
,
25846 IX86_BUILTIN_VPERMTI256
,
25847 IX86_BUILTIN_VEXTRACT128I256
,
25848 IX86_BUILTIN_VINSERT128I256
,
25849 IX86_BUILTIN_MASKLOADD
,
25850 IX86_BUILTIN_MASKLOADQ
,
25851 IX86_BUILTIN_MASKLOADD256
,
25852 IX86_BUILTIN_MASKLOADQ256
,
25853 IX86_BUILTIN_MASKSTORED
,
25854 IX86_BUILTIN_MASKSTOREQ
,
25855 IX86_BUILTIN_MASKSTORED256
,
25856 IX86_BUILTIN_MASKSTOREQ256
,
25857 IX86_BUILTIN_PSLLVV4DI
,
25858 IX86_BUILTIN_PSLLVV2DI
,
25859 IX86_BUILTIN_PSLLVV8SI
,
25860 IX86_BUILTIN_PSLLVV4SI
,
25861 IX86_BUILTIN_PSRAVV8SI
,
25862 IX86_BUILTIN_PSRAVV4SI
,
25863 IX86_BUILTIN_PSRLVV4DI
,
25864 IX86_BUILTIN_PSRLVV2DI
,
25865 IX86_BUILTIN_PSRLVV8SI
,
25866 IX86_BUILTIN_PSRLVV4SI
,
25868 IX86_BUILTIN_GATHERSIV2DF
,
25869 IX86_BUILTIN_GATHERSIV4DF
,
25870 IX86_BUILTIN_GATHERDIV2DF
,
25871 IX86_BUILTIN_GATHERDIV4DF
,
25872 IX86_BUILTIN_GATHERSIV4SF
,
25873 IX86_BUILTIN_GATHERSIV8SF
,
25874 IX86_BUILTIN_GATHERDIV4SF
,
25875 IX86_BUILTIN_GATHERDIV8SF
,
25876 IX86_BUILTIN_GATHERSIV2DI
,
25877 IX86_BUILTIN_GATHERSIV4DI
,
25878 IX86_BUILTIN_GATHERDIV2DI
,
25879 IX86_BUILTIN_GATHERDIV4DI
,
25880 IX86_BUILTIN_GATHERSIV4SI
,
25881 IX86_BUILTIN_GATHERSIV8SI
,
25882 IX86_BUILTIN_GATHERDIV4SI
,
25883 IX86_BUILTIN_GATHERDIV8SI
,
25885 /* Alternate 4 element gather for the vectorizer where
25886 all operands are 32-byte wide. */
25887 IX86_BUILTIN_GATHERALTSIV4DF
,
25888 IX86_BUILTIN_GATHERALTDIV8SF
,
25889 IX86_BUILTIN_GATHERALTSIV4DI
,
25890 IX86_BUILTIN_GATHERALTDIV8SI
,
25892 /* TFmode support builtins. */
25894 IX86_BUILTIN_HUGE_VALQ
,
25895 IX86_BUILTIN_FABSQ
,
25896 IX86_BUILTIN_COPYSIGNQ
,
25898 /* Vectorizer support builtins. */
25899 IX86_BUILTIN_CPYSGNPS
,
25900 IX86_BUILTIN_CPYSGNPD
,
25901 IX86_BUILTIN_CPYSGNPS256
,
25902 IX86_BUILTIN_CPYSGNPD256
,
25904 /* FMA4 instructions. */
25905 IX86_BUILTIN_VFMADDSS
,
25906 IX86_BUILTIN_VFMADDSD
,
25907 IX86_BUILTIN_VFMADDPS
,
25908 IX86_BUILTIN_VFMADDPD
,
25909 IX86_BUILTIN_VFMADDPS256
,
25910 IX86_BUILTIN_VFMADDPD256
,
25911 IX86_BUILTIN_VFMADDSUBPS
,
25912 IX86_BUILTIN_VFMADDSUBPD
,
25913 IX86_BUILTIN_VFMADDSUBPS256
,
25914 IX86_BUILTIN_VFMADDSUBPD256
,
25916 /* FMA3 instructions. */
25917 IX86_BUILTIN_VFMADDSS3
,
25918 IX86_BUILTIN_VFMADDSD3
,
25920 /* XOP instructions. */
25921 IX86_BUILTIN_VPCMOV
,
25922 IX86_BUILTIN_VPCMOV_V2DI
,
25923 IX86_BUILTIN_VPCMOV_V4SI
,
25924 IX86_BUILTIN_VPCMOV_V8HI
,
25925 IX86_BUILTIN_VPCMOV_V16QI
,
25926 IX86_BUILTIN_VPCMOV_V4SF
,
25927 IX86_BUILTIN_VPCMOV_V2DF
,
25928 IX86_BUILTIN_VPCMOV256
,
25929 IX86_BUILTIN_VPCMOV_V4DI256
,
25930 IX86_BUILTIN_VPCMOV_V8SI256
,
25931 IX86_BUILTIN_VPCMOV_V16HI256
,
25932 IX86_BUILTIN_VPCMOV_V32QI256
,
25933 IX86_BUILTIN_VPCMOV_V8SF256
,
25934 IX86_BUILTIN_VPCMOV_V4DF256
,
25936 IX86_BUILTIN_VPPERM
,
25938 IX86_BUILTIN_VPMACSSWW
,
25939 IX86_BUILTIN_VPMACSWW
,
25940 IX86_BUILTIN_VPMACSSWD
,
25941 IX86_BUILTIN_VPMACSWD
,
25942 IX86_BUILTIN_VPMACSSDD
,
25943 IX86_BUILTIN_VPMACSDD
,
25944 IX86_BUILTIN_VPMACSSDQL
,
25945 IX86_BUILTIN_VPMACSSDQH
,
25946 IX86_BUILTIN_VPMACSDQL
,
25947 IX86_BUILTIN_VPMACSDQH
,
25948 IX86_BUILTIN_VPMADCSSWD
,
25949 IX86_BUILTIN_VPMADCSWD
,
25951 IX86_BUILTIN_VPHADDBW
,
25952 IX86_BUILTIN_VPHADDBD
,
25953 IX86_BUILTIN_VPHADDBQ
,
25954 IX86_BUILTIN_VPHADDWD
,
25955 IX86_BUILTIN_VPHADDWQ
,
25956 IX86_BUILTIN_VPHADDDQ
,
25957 IX86_BUILTIN_VPHADDUBW
,
25958 IX86_BUILTIN_VPHADDUBD
,
25959 IX86_BUILTIN_VPHADDUBQ
,
25960 IX86_BUILTIN_VPHADDUWD
,
25961 IX86_BUILTIN_VPHADDUWQ
,
25962 IX86_BUILTIN_VPHADDUDQ
,
25963 IX86_BUILTIN_VPHSUBBW
,
25964 IX86_BUILTIN_VPHSUBWD
,
25965 IX86_BUILTIN_VPHSUBDQ
,
25967 IX86_BUILTIN_VPROTB
,
25968 IX86_BUILTIN_VPROTW
,
25969 IX86_BUILTIN_VPROTD
,
25970 IX86_BUILTIN_VPROTQ
,
25971 IX86_BUILTIN_VPROTB_IMM
,
25972 IX86_BUILTIN_VPROTW_IMM
,
25973 IX86_BUILTIN_VPROTD_IMM
,
25974 IX86_BUILTIN_VPROTQ_IMM
,
25976 IX86_BUILTIN_VPSHLB
,
25977 IX86_BUILTIN_VPSHLW
,
25978 IX86_BUILTIN_VPSHLD
,
25979 IX86_BUILTIN_VPSHLQ
,
25980 IX86_BUILTIN_VPSHAB
,
25981 IX86_BUILTIN_VPSHAW
,
25982 IX86_BUILTIN_VPSHAD
,
25983 IX86_BUILTIN_VPSHAQ
,
25985 IX86_BUILTIN_VFRCZSS
,
25986 IX86_BUILTIN_VFRCZSD
,
25987 IX86_BUILTIN_VFRCZPS
,
25988 IX86_BUILTIN_VFRCZPD
,
25989 IX86_BUILTIN_VFRCZPS256
,
25990 IX86_BUILTIN_VFRCZPD256
,
25992 IX86_BUILTIN_VPCOMEQUB
,
25993 IX86_BUILTIN_VPCOMNEUB
,
25994 IX86_BUILTIN_VPCOMLTUB
,
25995 IX86_BUILTIN_VPCOMLEUB
,
25996 IX86_BUILTIN_VPCOMGTUB
,
25997 IX86_BUILTIN_VPCOMGEUB
,
25998 IX86_BUILTIN_VPCOMFALSEUB
,
25999 IX86_BUILTIN_VPCOMTRUEUB
,
26001 IX86_BUILTIN_VPCOMEQUW
,
26002 IX86_BUILTIN_VPCOMNEUW
,
26003 IX86_BUILTIN_VPCOMLTUW
,
26004 IX86_BUILTIN_VPCOMLEUW
,
26005 IX86_BUILTIN_VPCOMGTUW
,
26006 IX86_BUILTIN_VPCOMGEUW
,
26007 IX86_BUILTIN_VPCOMFALSEUW
,
26008 IX86_BUILTIN_VPCOMTRUEUW
,
26010 IX86_BUILTIN_VPCOMEQUD
,
26011 IX86_BUILTIN_VPCOMNEUD
,
26012 IX86_BUILTIN_VPCOMLTUD
,
26013 IX86_BUILTIN_VPCOMLEUD
,
26014 IX86_BUILTIN_VPCOMGTUD
,
26015 IX86_BUILTIN_VPCOMGEUD
,
26016 IX86_BUILTIN_VPCOMFALSEUD
,
26017 IX86_BUILTIN_VPCOMTRUEUD
,
26019 IX86_BUILTIN_VPCOMEQUQ
,
26020 IX86_BUILTIN_VPCOMNEUQ
,
26021 IX86_BUILTIN_VPCOMLTUQ
,
26022 IX86_BUILTIN_VPCOMLEUQ
,
26023 IX86_BUILTIN_VPCOMGTUQ
,
26024 IX86_BUILTIN_VPCOMGEUQ
,
26025 IX86_BUILTIN_VPCOMFALSEUQ
,
26026 IX86_BUILTIN_VPCOMTRUEUQ
,
26028 IX86_BUILTIN_VPCOMEQB
,
26029 IX86_BUILTIN_VPCOMNEB
,
26030 IX86_BUILTIN_VPCOMLTB
,
26031 IX86_BUILTIN_VPCOMLEB
,
26032 IX86_BUILTIN_VPCOMGTB
,
26033 IX86_BUILTIN_VPCOMGEB
,
26034 IX86_BUILTIN_VPCOMFALSEB
,
26035 IX86_BUILTIN_VPCOMTRUEB
,
26037 IX86_BUILTIN_VPCOMEQW
,
26038 IX86_BUILTIN_VPCOMNEW
,
26039 IX86_BUILTIN_VPCOMLTW
,
26040 IX86_BUILTIN_VPCOMLEW
,
26041 IX86_BUILTIN_VPCOMGTW
,
26042 IX86_BUILTIN_VPCOMGEW
,
26043 IX86_BUILTIN_VPCOMFALSEW
,
26044 IX86_BUILTIN_VPCOMTRUEW
,
26046 IX86_BUILTIN_VPCOMEQD
,
26047 IX86_BUILTIN_VPCOMNED
,
26048 IX86_BUILTIN_VPCOMLTD
,
26049 IX86_BUILTIN_VPCOMLED
,
26050 IX86_BUILTIN_VPCOMGTD
,
26051 IX86_BUILTIN_VPCOMGED
,
26052 IX86_BUILTIN_VPCOMFALSED
,
26053 IX86_BUILTIN_VPCOMTRUED
,
26055 IX86_BUILTIN_VPCOMEQQ
,
26056 IX86_BUILTIN_VPCOMNEQ
,
26057 IX86_BUILTIN_VPCOMLTQ
,
26058 IX86_BUILTIN_VPCOMLEQ
,
26059 IX86_BUILTIN_VPCOMGTQ
,
26060 IX86_BUILTIN_VPCOMGEQ
,
26061 IX86_BUILTIN_VPCOMFALSEQ
,
26062 IX86_BUILTIN_VPCOMTRUEQ
,
26064 /* LWP instructions. */
26065 IX86_BUILTIN_LLWPCB
,
26066 IX86_BUILTIN_SLWPCB
,
26067 IX86_BUILTIN_LWPVAL32
,
26068 IX86_BUILTIN_LWPVAL64
,
26069 IX86_BUILTIN_LWPINS32
,
26070 IX86_BUILTIN_LWPINS64
,
26075 IX86_BUILTIN_XBEGIN
,
26077 IX86_BUILTIN_XABORT
,
26078 IX86_BUILTIN_XTEST
,
26080 /* BMI instructions. */
26081 IX86_BUILTIN_BEXTR32
,
26082 IX86_BUILTIN_BEXTR64
,
26085 /* TBM instructions. */
26086 IX86_BUILTIN_BEXTRI32
,
26087 IX86_BUILTIN_BEXTRI64
,
26089 /* BMI2 instructions. */
26090 IX86_BUILTIN_BZHI32
,
26091 IX86_BUILTIN_BZHI64
,
26092 IX86_BUILTIN_PDEP32
,
26093 IX86_BUILTIN_PDEP64
,
26094 IX86_BUILTIN_PEXT32
,
26095 IX86_BUILTIN_PEXT64
,
26097 /* FSGSBASE instructions. */
26098 IX86_BUILTIN_RDFSBASE32
,
26099 IX86_BUILTIN_RDFSBASE64
,
26100 IX86_BUILTIN_RDGSBASE32
,
26101 IX86_BUILTIN_RDGSBASE64
,
26102 IX86_BUILTIN_WRFSBASE32
,
26103 IX86_BUILTIN_WRFSBASE64
,
26104 IX86_BUILTIN_WRGSBASE32
,
26105 IX86_BUILTIN_WRGSBASE64
,
26107 /* RDRND instructions. */
26108 IX86_BUILTIN_RDRAND16_STEP
,
26109 IX86_BUILTIN_RDRAND32_STEP
,
26110 IX86_BUILTIN_RDRAND64_STEP
,
26112 /* F16C instructions. */
26113 IX86_BUILTIN_CVTPH2PS
,
26114 IX86_BUILTIN_CVTPH2PS256
,
26115 IX86_BUILTIN_CVTPS2PH
,
26116 IX86_BUILTIN_CVTPS2PH256
,
26118 /* CFString built-in for darwin */
26119 IX86_BUILTIN_CFSTRING
,
26121 /* Builtins to get CPU type and supported features. */
26122 IX86_BUILTIN_CPU_INIT
,
26123 IX86_BUILTIN_CPU_IS
,
26124 IX86_BUILTIN_CPU_SUPPORTS
,
26129 /* Table for the ix86 builtin decls. */
26130 static GTY(()) tree ix86_builtins
[(int) IX86_BUILTIN_MAX
];
26132 /* Table of all of the builtin functions that are possible with different ISA's
26133 but are waiting to be built until a function is declared to use that
26135 struct builtin_isa
{
26136 const char *name
; /* function name */
26137 enum ix86_builtin_func_type tcode
; /* type to use in the declaration */
26138 HOST_WIDE_INT isa
; /* isa_flags this builtin is defined for */
26139 bool const_p
; /* true if the declaration is constant */
26140 bool set_and_not_built_p
;
26143 static struct builtin_isa ix86_builtins_isa
[(int) IX86_BUILTIN_MAX
];
26146 /* Add an ix86 target builtin function with CODE, NAME and TYPE. Save the MASK
26147 of which isa_flags to use in the ix86_builtins_isa array. Stores the
26148 function decl in the ix86_builtins array. Returns the function decl or
26149 NULL_TREE, if the builtin was not added.
26151 If the front end has a special hook for builtin functions, delay adding
26152 builtin functions that aren't in the current ISA until the ISA is changed
26153 with function specific optimization. Doing so, can save about 300K for the
26154 default compiler. When the builtin is expanded, check at that time whether
26157 If the front end doesn't have a special hook, record all builtins, even if
26158 it isn't an instruction set in the current ISA in case the user uses
26159 function specific options for a different ISA, so that we don't get scope
26160 errors if a builtin is added in the middle of a function scope. */
26163 def_builtin (HOST_WIDE_INT mask
, const char *name
,
26164 enum ix86_builtin_func_type tcode
,
26165 enum ix86_builtins code
)
26167 tree decl
= NULL_TREE
;
26169 if (!(mask
& OPTION_MASK_ISA_64BIT
) || TARGET_64BIT
)
26171 ix86_builtins_isa
[(int) code
].isa
= mask
;
26173 mask
&= ~OPTION_MASK_ISA_64BIT
;
26175 || (mask
& ix86_isa_flags
) != 0
26176 || (lang_hooks
.builtin_function
26177 == lang_hooks
.builtin_function_ext_scope
))
26180 tree type
= ix86_get_builtin_func_type (tcode
);
26181 decl
= add_builtin_function (name
, type
, code
, BUILT_IN_MD
,
26183 ix86_builtins
[(int) code
] = decl
;
26184 ix86_builtins_isa
[(int) code
].set_and_not_built_p
= false;
26188 ix86_builtins
[(int) code
] = NULL_TREE
;
26189 ix86_builtins_isa
[(int) code
].tcode
= tcode
;
26190 ix86_builtins_isa
[(int) code
].name
= name
;
26191 ix86_builtins_isa
[(int) code
].const_p
= false;
26192 ix86_builtins_isa
[(int) code
].set_and_not_built_p
= true;
26199 /* Like def_builtin, but also marks the function decl "const". */
26202 def_builtin_const (HOST_WIDE_INT mask
, const char *name
,
26203 enum ix86_builtin_func_type tcode
, enum ix86_builtins code
)
26205 tree decl
= def_builtin (mask
, name
, tcode
, code
);
26207 TREE_READONLY (decl
) = 1;
26209 ix86_builtins_isa
[(int) code
].const_p
= true;
26214 /* Add any new builtin functions for a given ISA that may not have been
26215 declared. This saves a bit of space compared to adding all of the
26216 declarations to the tree, even if we didn't use them. */
26219 ix86_add_new_builtins (HOST_WIDE_INT isa
)
26223 for (i
= 0; i
< (int)IX86_BUILTIN_MAX
; i
++)
26225 if ((ix86_builtins_isa
[i
].isa
& isa
) != 0
26226 && ix86_builtins_isa
[i
].set_and_not_built_p
)
26230 /* Don't define the builtin again. */
26231 ix86_builtins_isa
[i
].set_and_not_built_p
= false;
26233 type
= ix86_get_builtin_func_type (ix86_builtins_isa
[i
].tcode
);
26234 decl
= add_builtin_function_ext_scope (ix86_builtins_isa
[i
].name
,
26235 type
, i
, BUILT_IN_MD
, NULL
,
26238 ix86_builtins
[i
] = decl
;
26239 if (ix86_builtins_isa
[i
].const_p
)
26240 TREE_READONLY (decl
) = 1;
26245 /* Bits for builtin_description.flag. */
26247 /* Set when we don't support the comparison natively, and should
26248 swap_comparison in order to support it. */
26249 #define BUILTIN_DESC_SWAP_OPERANDS 1
26251 struct builtin_description
26253 const HOST_WIDE_INT mask
;
26254 const enum insn_code icode
;
26255 const char *const name
;
26256 const enum ix86_builtins code
;
26257 const enum rtx_code comparison
;
26261 static const struct builtin_description bdesc_comi
[] =
26263 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_comi
, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS
, UNEQ
, 0 },
26264 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_comi
, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS
, UNLT
, 0 },
26265 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_comi
, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS
, UNLE
, 0 },
26266 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_comi
, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS
, GT
, 0 },
26267 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_comi
, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS
, GE
, 0 },
26268 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_comi
, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS
, LTGT
, 0 },
26269 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS
, UNEQ
, 0 },
26270 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS
, UNLT
, 0 },
26271 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS
, UNLE
, 0 },
26272 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS
, GT
, 0 },
26273 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS
, GE
, 0 },
26274 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS
, LTGT
, 0 },
26275 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD
, UNEQ
, 0 },
26276 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD
, UNLT
, 0 },
26277 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD
, UNLE
, 0 },
26278 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD
, GT
, 0 },
26279 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD
, GE
, 0 },
26280 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD
, LTGT
, 0 },
26281 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD
, UNEQ
, 0 },
26282 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD
, UNLT
, 0 },
26283 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD
, UNLE
, 0 },
26284 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD
, GT
, 0 },
26285 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD
, GE
, 0 },
26286 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD
, LTGT
, 0 },
26289 static const struct builtin_description bdesc_pcmpestr
[] =
26292 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpestr
, "__builtin_ia32_pcmpestri128", IX86_BUILTIN_PCMPESTRI128
, UNKNOWN
, 0 },
26293 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpestr
, "__builtin_ia32_pcmpestrm128", IX86_BUILTIN_PCMPESTRM128
, UNKNOWN
, 0 },
26294 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpestr
, "__builtin_ia32_pcmpestria128", IX86_BUILTIN_PCMPESTRA128
, UNKNOWN
, (int) CCAmode
},
26295 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpestr
, "__builtin_ia32_pcmpestric128", IX86_BUILTIN_PCMPESTRC128
, UNKNOWN
, (int) CCCmode
},
26296 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpestr
, "__builtin_ia32_pcmpestrio128", IX86_BUILTIN_PCMPESTRO128
, UNKNOWN
, (int) CCOmode
},
26297 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpestr
, "__builtin_ia32_pcmpestris128", IX86_BUILTIN_PCMPESTRS128
, UNKNOWN
, (int) CCSmode
},
26298 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpestr
, "__builtin_ia32_pcmpestriz128", IX86_BUILTIN_PCMPESTRZ128
, UNKNOWN
, (int) CCZmode
},
26301 static const struct builtin_description bdesc_pcmpistr
[] =
26304 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpistr
, "__builtin_ia32_pcmpistri128", IX86_BUILTIN_PCMPISTRI128
, UNKNOWN
, 0 },
26305 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpistr
, "__builtin_ia32_pcmpistrm128", IX86_BUILTIN_PCMPISTRM128
, UNKNOWN
, 0 },
26306 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpistr
, "__builtin_ia32_pcmpistria128", IX86_BUILTIN_PCMPISTRA128
, UNKNOWN
, (int) CCAmode
},
26307 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpistr
, "__builtin_ia32_pcmpistric128", IX86_BUILTIN_PCMPISTRC128
, UNKNOWN
, (int) CCCmode
},
26308 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpistr
, "__builtin_ia32_pcmpistrio128", IX86_BUILTIN_PCMPISTRO128
, UNKNOWN
, (int) CCOmode
},
26309 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpistr
, "__builtin_ia32_pcmpistris128", IX86_BUILTIN_PCMPISTRS128
, UNKNOWN
, (int) CCSmode
},
26310 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpistr
, "__builtin_ia32_pcmpistriz128", IX86_BUILTIN_PCMPISTRZ128
, UNKNOWN
, (int) CCZmode
},
26313 /* Special builtins with variable number of arguments. */
26314 static const struct builtin_description bdesc_special_args
[] =
26316 { ~OPTION_MASK_ISA_64BIT
, CODE_FOR_rdtsc
, "__builtin_ia32_rdtsc", IX86_BUILTIN_RDTSC
, UNKNOWN
, (int) UINT64_FTYPE_VOID
},
26317 { ~OPTION_MASK_ISA_64BIT
, CODE_FOR_rdtscp
, "__builtin_ia32_rdtscp", IX86_BUILTIN_RDTSCP
, UNKNOWN
, (int) UINT64_FTYPE_PUNSIGNED
},
26318 { ~OPTION_MASK_ISA_64BIT
, CODE_FOR_pause
, "__builtin_ia32_pause", IX86_BUILTIN_PAUSE
, UNKNOWN
, (int) VOID_FTYPE_VOID
},
26321 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_emms
, "__builtin_ia32_emms", IX86_BUILTIN_EMMS
, UNKNOWN
, (int) VOID_FTYPE_VOID
},
26324 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_femms
, "__builtin_ia32_femms", IX86_BUILTIN_FEMMS
, UNKNOWN
, (int) VOID_FTYPE_VOID
},
26327 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_movups
, "__builtin_ia32_storeups", IX86_BUILTIN_STOREUPS
, UNKNOWN
, (int) VOID_FTYPE_PFLOAT_V4SF
},
26328 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_movntv4sf
, "__builtin_ia32_movntps", IX86_BUILTIN_MOVNTPS
, UNKNOWN
, (int) VOID_FTYPE_PFLOAT_V4SF
},
26329 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_movups
, "__builtin_ia32_loadups", IX86_BUILTIN_LOADUPS
, UNKNOWN
, (int) V4SF_FTYPE_PCFLOAT
},
26331 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_loadhps_exp
, "__builtin_ia32_loadhps", IX86_BUILTIN_LOADHPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_PCV2SF
},
26332 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_loadlps_exp
, "__builtin_ia32_loadlps", IX86_BUILTIN_LOADLPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_PCV2SF
},
26333 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_storehps
, "__builtin_ia32_storehps", IX86_BUILTIN_STOREHPS
, UNKNOWN
, (int) VOID_FTYPE_PV2SF_V4SF
},
26334 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_storelps
, "__builtin_ia32_storelps", IX86_BUILTIN_STORELPS
, UNKNOWN
, (int) VOID_FTYPE_PV2SF_V4SF
},
26336 /* SSE or 3DNow!A */
26337 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_sse_sfence
, "__builtin_ia32_sfence", IX86_BUILTIN_SFENCE
, UNKNOWN
, (int) VOID_FTYPE_VOID
},
26338 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_sse_movntq
, "__builtin_ia32_movntq", IX86_BUILTIN_MOVNTQ
, UNKNOWN
, (int) VOID_FTYPE_PULONGLONG_ULONGLONG
},
26341 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_lfence
, "__builtin_ia32_lfence", IX86_BUILTIN_LFENCE
, UNKNOWN
, (int) VOID_FTYPE_VOID
},
26342 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_mfence
, 0, IX86_BUILTIN_MFENCE
, UNKNOWN
, (int) VOID_FTYPE_VOID
},
26343 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_movupd
, "__builtin_ia32_storeupd", IX86_BUILTIN_STOREUPD
, UNKNOWN
, (int) VOID_FTYPE_PDOUBLE_V2DF
},
26344 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_movdqu
, "__builtin_ia32_storedqu", IX86_BUILTIN_STOREDQU
, UNKNOWN
, (int) VOID_FTYPE_PCHAR_V16QI
},
26345 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_movntv2df
, "__builtin_ia32_movntpd", IX86_BUILTIN_MOVNTPD
, UNKNOWN
, (int) VOID_FTYPE_PDOUBLE_V2DF
},
26346 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_movntv2di
, "__builtin_ia32_movntdq", IX86_BUILTIN_MOVNTDQ
, UNKNOWN
, (int) VOID_FTYPE_PV2DI_V2DI
},
26347 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_movntisi
, "__builtin_ia32_movnti", IX86_BUILTIN_MOVNTI
, UNKNOWN
, (int) VOID_FTYPE_PINT_INT
},
26348 { OPTION_MASK_ISA_SSE2
| OPTION_MASK_ISA_64BIT
, CODE_FOR_sse2_movntidi
, "__builtin_ia32_movnti64", IX86_BUILTIN_MOVNTI64
, UNKNOWN
, (int) VOID_FTYPE_PLONGLONG_LONGLONG
},
26349 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_movupd
, "__builtin_ia32_loadupd", IX86_BUILTIN_LOADUPD
, UNKNOWN
, (int) V2DF_FTYPE_PCDOUBLE
},
26350 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_movdqu
, "__builtin_ia32_loaddqu", IX86_BUILTIN_LOADDQU
, UNKNOWN
, (int) V16QI_FTYPE_PCCHAR
},
26352 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_loadhpd_exp
, "__builtin_ia32_loadhpd", IX86_BUILTIN_LOADHPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_PCDOUBLE
},
26353 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_loadlpd_exp
, "__builtin_ia32_loadlpd", IX86_BUILTIN_LOADLPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_PCDOUBLE
},
26356 { OPTION_MASK_ISA_SSE3
, CODE_FOR_sse3_lddqu
, "__builtin_ia32_lddqu", IX86_BUILTIN_LDDQU
, UNKNOWN
, (int) V16QI_FTYPE_PCCHAR
},
26359 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_movntdqa
, "__builtin_ia32_movntdqa", IX86_BUILTIN_MOVNTDQA
, UNKNOWN
, (int) V2DI_FTYPE_PV2DI
},
26362 { OPTION_MASK_ISA_SSE4A
, CODE_FOR_sse4a_vmmovntv2df
, "__builtin_ia32_movntsd", IX86_BUILTIN_MOVNTSD
, UNKNOWN
, (int) VOID_FTYPE_PDOUBLE_V2DF
},
26363 { OPTION_MASK_ISA_SSE4A
, CODE_FOR_sse4a_vmmovntv4sf
, "__builtin_ia32_movntss", IX86_BUILTIN_MOVNTSS
, UNKNOWN
, (int) VOID_FTYPE_PFLOAT_V4SF
},
26366 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vzeroall
, "__builtin_ia32_vzeroall", IX86_BUILTIN_VZEROALL
, UNKNOWN
, (int) VOID_FTYPE_VOID
},
26367 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vzeroupper
, "__builtin_ia32_vzeroupper", IX86_BUILTIN_VZEROUPPER
, UNKNOWN
, (int) VOID_FTYPE_VOID
},
26369 { OPTION_MASK_ISA_AVX
, CODE_FOR_vec_dupv4sf
, "__builtin_ia32_vbroadcastss", IX86_BUILTIN_VBROADCASTSS
, UNKNOWN
, (int) V4SF_FTYPE_PCFLOAT
},
26370 { OPTION_MASK_ISA_AVX
, CODE_FOR_vec_dupv4df
, "__builtin_ia32_vbroadcastsd256", IX86_BUILTIN_VBROADCASTSD256
, UNKNOWN
, (int) V4DF_FTYPE_PCDOUBLE
},
26371 { OPTION_MASK_ISA_AVX
, CODE_FOR_vec_dupv8sf
, "__builtin_ia32_vbroadcastss256", IX86_BUILTIN_VBROADCASTSS256
, UNKNOWN
, (int) V8SF_FTYPE_PCFLOAT
},
26372 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vbroadcastf128_v4df
, "__builtin_ia32_vbroadcastf128_pd256", IX86_BUILTIN_VBROADCASTPD256
, UNKNOWN
, (int) V4DF_FTYPE_PCV2DF
},
26373 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vbroadcastf128_v8sf
, "__builtin_ia32_vbroadcastf128_ps256", IX86_BUILTIN_VBROADCASTPS256
, UNKNOWN
, (int) V8SF_FTYPE_PCV4SF
},
26375 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_movupd256
, "__builtin_ia32_loadupd256", IX86_BUILTIN_LOADUPD256
, UNKNOWN
, (int) V4DF_FTYPE_PCDOUBLE
},
26376 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_movups256
, "__builtin_ia32_loadups256", IX86_BUILTIN_LOADUPS256
, UNKNOWN
, (int) V8SF_FTYPE_PCFLOAT
},
26377 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_movupd256
, "__builtin_ia32_storeupd256", IX86_BUILTIN_STOREUPD256
, UNKNOWN
, (int) VOID_FTYPE_PDOUBLE_V4DF
},
26378 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_movups256
, "__builtin_ia32_storeups256", IX86_BUILTIN_STOREUPS256
, UNKNOWN
, (int) VOID_FTYPE_PFLOAT_V8SF
},
26379 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_movdqu256
, "__builtin_ia32_loaddqu256", IX86_BUILTIN_LOADDQU256
, UNKNOWN
, (int) V32QI_FTYPE_PCCHAR
},
26380 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_movdqu256
, "__builtin_ia32_storedqu256", IX86_BUILTIN_STOREDQU256
, UNKNOWN
, (int) VOID_FTYPE_PCHAR_V32QI
},
26381 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_lddqu256
, "__builtin_ia32_lddqu256", IX86_BUILTIN_LDDQU256
, UNKNOWN
, (int) V32QI_FTYPE_PCCHAR
},
26383 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_movntv4di
, "__builtin_ia32_movntdq256", IX86_BUILTIN_MOVNTDQ256
, UNKNOWN
, (int) VOID_FTYPE_PV4DI_V4DI
},
26384 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_movntv4df
, "__builtin_ia32_movntpd256", IX86_BUILTIN_MOVNTPD256
, UNKNOWN
, (int) VOID_FTYPE_PDOUBLE_V4DF
},
26385 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_movntv8sf
, "__builtin_ia32_movntps256", IX86_BUILTIN_MOVNTPS256
, UNKNOWN
, (int) VOID_FTYPE_PFLOAT_V8SF
},
26387 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_maskloadpd
, "__builtin_ia32_maskloadpd", IX86_BUILTIN_MASKLOADPD
, UNKNOWN
, (int) V2DF_FTYPE_PCV2DF_V2DI
},
26388 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_maskloadps
, "__builtin_ia32_maskloadps", IX86_BUILTIN_MASKLOADPS
, UNKNOWN
, (int) V4SF_FTYPE_PCV4SF_V4SI
},
26389 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_maskloadpd256
, "__builtin_ia32_maskloadpd256", IX86_BUILTIN_MASKLOADPD256
, UNKNOWN
, (int) V4DF_FTYPE_PCV4DF_V4DI
},
26390 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_maskloadps256
, "__builtin_ia32_maskloadps256", IX86_BUILTIN_MASKLOADPS256
, UNKNOWN
, (int) V8SF_FTYPE_PCV8SF_V8SI
},
26391 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_maskstorepd
, "__builtin_ia32_maskstorepd", IX86_BUILTIN_MASKSTOREPD
, UNKNOWN
, (int) VOID_FTYPE_PV2DF_V2DI_V2DF
},
26392 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_maskstoreps
, "__builtin_ia32_maskstoreps", IX86_BUILTIN_MASKSTOREPS
, UNKNOWN
, (int) VOID_FTYPE_PV4SF_V4SI_V4SF
},
26393 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_maskstorepd256
, "__builtin_ia32_maskstorepd256", IX86_BUILTIN_MASKSTOREPD256
, UNKNOWN
, (int) VOID_FTYPE_PV4DF_V4DI_V4DF
},
26394 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_maskstoreps256
, "__builtin_ia32_maskstoreps256", IX86_BUILTIN_MASKSTOREPS256
, UNKNOWN
, (int) VOID_FTYPE_PV8SF_V8SI_V8SF
},
26397 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_movntdqa
, "__builtin_ia32_movntdqa256", IX86_BUILTIN_MOVNTDQA256
, UNKNOWN
, (int) V4DI_FTYPE_PV4DI
},
26398 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_maskloadd
, "__builtin_ia32_maskloadd", IX86_BUILTIN_MASKLOADD
, UNKNOWN
, (int) V4SI_FTYPE_PCV4SI_V4SI
},
26399 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_maskloadq
, "__builtin_ia32_maskloadq", IX86_BUILTIN_MASKLOADQ
, UNKNOWN
, (int) V2DI_FTYPE_PCV2DI_V2DI
},
26400 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_maskloadd256
, "__builtin_ia32_maskloadd256", IX86_BUILTIN_MASKLOADD256
, UNKNOWN
, (int) V8SI_FTYPE_PCV8SI_V8SI
},
26401 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_maskloadq256
, "__builtin_ia32_maskloadq256", IX86_BUILTIN_MASKLOADQ256
, UNKNOWN
, (int) V4DI_FTYPE_PCV4DI_V4DI
},
26402 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_maskstored
, "__builtin_ia32_maskstored", IX86_BUILTIN_MASKSTORED
, UNKNOWN
, (int) VOID_FTYPE_PV4SI_V4SI_V4SI
},
26403 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_maskstoreq
, "__builtin_ia32_maskstoreq", IX86_BUILTIN_MASKSTOREQ
, UNKNOWN
, (int) VOID_FTYPE_PV2DI_V2DI_V2DI
},
26404 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_maskstored256
, "__builtin_ia32_maskstored256", IX86_BUILTIN_MASKSTORED256
, UNKNOWN
, (int) VOID_FTYPE_PV8SI_V8SI_V8SI
},
26405 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_maskstoreq256
, "__builtin_ia32_maskstoreq256", IX86_BUILTIN_MASKSTOREQ256
, UNKNOWN
, (int) VOID_FTYPE_PV4DI_V4DI_V4DI
},
26407 { OPTION_MASK_ISA_LWP
, CODE_FOR_lwp_llwpcb
, "__builtin_ia32_llwpcb", IX86_BUILTIN_LLWPCB
, UNKNOWN
, (int) VOID_FTYPE_PVOID
},
26408 { OPTION_MASK_ISA_LWP
, CODE_FOR_lwp_slwpcb
, "__builtin_ia32_slwpcb", IX86_BUILTIN_SLWPCB
, UNKNOWN
, (int) PVOID_FTYPE_VOID
},
26409 { OPTION_MASK_ISA_LWP
, CODE_FOR_lwp_lwpvalsi3
, "__builtin_ia32_lwpval32", IX86_BUILTIN_LWPVAL32
, UNKNOWN
, (int) VOID_FTYPE_UINT_UINT_UINT
},
26410 { OPTION_MASK_ISA_LWP
, CODE_FOR_lwp_lwpvaldi3
, "__builtin_ia32_lwpval64", IX86_BUILTIN_LWPVAL64
, UNKNOWN
, (int) VOID_FTYPE_UINT64_UINT_UINT
},
26411 { OPTION_MASK_ISA_LWP
, CODE_FOR_lwp_lwpinssi3
, "__builtin_ia32_lwpins32", IX86_BUILTIN_LWPINS32
, UNKNOWN
, (int) UCHAR_FTYPE_UINT_UINT_UINT
},
26412 { OPTION_MASK_ISA_LWP
, CODE_FOR_lwp_lwpinsdi3
, "__builtin_ia32_lwpins64", IX86_BUILTIN_LWPINS64
, UNKNOWN
, (int) UCHAR_FTYPE_UINT64_UINT_UINT
},
26415 { OPTION_MASK_ISA_FSGSBASE
| OPTION_MASK_ISA_64BIT
, CODE_FOR_rdfsbasesi
, "__builtin_ia32_rdfsbase32", IX86_BUILTIN_RDFSBASE32
, UNKNOWN
, (int) UNSIGNED_FTYPE_VOID
},
26416 { OPTION_MASK_ISA_FSGSBASE
| OPTION_MASK_ISA_64BIT
, CODE_FOR_rdfsbasedi
, "__builtin_ia32_rdfsbase64", IX86_BUILTIN_RDFSBASE64
, UNKNOWN
, (int) UINT64_FTYPE_VOID
},
26417 { OPTION_MASK_ISA_FSGSBASE
| OPTION_MASK_ISA_64BIT
, CODE_FOR_rdgsbasesi
, "__builtin_ia32_rdgsbase32", IX86_BUILTIN_RDGSBASE32
, UNKNOWN
, (int) UNSIGNED_FTYPE_VOID
},
26418 { OPTION_MASK_ISA_FSGSBASE
| OPTION_MASK_ISA_64BIT
, CODE_FOR_rdgsbasedi
, "__builtin_ia32_rdgsbase64", IX86_BUILTIN_RDGSBASE64
, UNKNOWN
, (int) UINT64_FTYPE_VOID
},
26419 { OPTION_MASK_ISA_FSGSBASE
| OPTION_MASK_ISA_64BIT
, CODE_FOR_wrfsbasesi
, "__builtin_ia32_wrfsbase32", IX86_BUILTIN_WRFSBASE32
, UNKNOWN
, (int) VOID_FTYPE_UNSIGNED
},
26420 { OPTION_MASK_ISA_FSGSBASE
| OPTION_MASK_ISA_64BIT
, CODE_FOR_wrfsbasedi
, "__builtin_ia32_wrfsbase64", IX86_BUILTIN_WRFSBASE64
, UNKNOWN
, (int) VOID_FTYPE_UINT64
},
26421 { OPTION_MASK_ISA_FSGSBASE
| OPTION_MASK_ISA_64BIT
, CODE_FOR_wrgsbasesi
, "__builtin_ia32_wrgsbase32", IX86_BUILTIN_WRGSBASE32
, UNKNOWN
, (int) VOID_FTYPE_UNSIGNED
},
26422 { OPTION_MASK_ISA_FSGSBASE
| OPTION_MASK_ISA_64BIT
, CODE_FOR_wrgsbasedi
, "__builtin_ia32_wrgsbase64", IX86_BUILTIN_WRGSBASE64
, UNKNOWN
, (int) VOID_FTYPE_UINT64
},
26425 { OPTION_MASK_ISA_RTM
, CODE_FOR_xbegin
, "__builtin_ia32_xbegin", IX86_BUILTIN_XBEGIN
, UNKNOWN
, (int) UNSIGNED_FTYPE_VOID
},
26426 { OPTION_MASK_ISA_RTM
, CODE_FOR_xend
, "__builtin_ia32_xend", IX86_BUILTIN_XEND
, UNKNOWN
, (int) VOID_FTYPE_VOID
},
26427 { OPTION_MASK_ISA_RTM
, CODE_FOR_xtest
, "__builtin_ia32_xtest", IX86_BUILTIN_XTEST
, UNKNOWN
, (int) INT_FTYPE_VOID
},
26430 /* Builtins with variable number of arguments. */
26431 static const struct builtin_description bdesc_args
[] =
26433 { ~OPTION_MASK_ISA_64BIT
, CODE_FOR_bsr
, "__builtin_ia32_bsrsi", IX86_BUILTIN_BSRSI
, UNKNOWN
, (int) INT_FTYPE_INT
},
26434 { OPTION_MASK_ISA_64BIT
, CODE_FOR_bsr_rex64
, "__builtin_ia32_bsrdi", IX86_BUILTIN_BSRDI
, UNKNOWN
, (int) INT64_FTYPE_INT64
},
26435 { ~OPTION_MASK_ISA_64BIT
, CODE_FOR_rdpmc
, "__builtin_ia32_rdpmc", IX86_BUILTIN_RDPMC
, UNKNOWN
, (int) UINT64_FTYPE_INT
},
26436 { ~OPTION_MASK_ISA_64BIT
, CODE_FOR_rotlqi3
, "__builtin_ia32_rolqi", IX86_BUILTIN_ROLQI
, UNKNOWN
, (int) UINT8_FTYPE_UINT8_INT
},
26437 { ~OPTION_MASK_ISA_64BIT
, CODE_FOR_rotlhi3
, "__builtin_ia32_rolhi", IX86_BUILTIN_ROLHI
, UNKNOWN
, (int) UINT16_FTYPE_UINT16_INT
},
26438 { ~OPTION_MASK_ISA_64BIT
, CODE_FOR_rotrqi3
, "__builtin_ia32_rorqi", IX86_BUILTIN_RORQI
, UNKNOWN
, (int) UINT8_FTYPE_UINT8_INT
},
26439 { ~OPTION_MASK_ISA_64BIT
, CODE_FOR_rotrhi3
, "__builtin_ia32_rorhi", IX86_BUILTIN_RORHI
, UNKNOWN
, (int) UINT16_FTYPE_UINT16_INT
},
26442 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_addv8qi3
, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
26443 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_addv4hi3
, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
26444 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_addv2si3
, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI
},
26445 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_subv8qi3
, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
26446 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_subv4hi3
, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
26447 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_subv2si3
, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI
},
26449 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ssaddv8qi3
, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
26450 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ssaddv4hi3
, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
26451 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_sssubv8qi3
, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
26452 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_sssubv4hi3
, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
26453 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_usaddv8qi3
, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
26454 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_usaddv4hi3
, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
26455 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ussubv8qi3
, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
26456 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ussubv4hi3
, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
26458 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_mulv4hi3
, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
26459 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_smulv4hi3_highpart
, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
26461 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_andv2si3
, "__builtin_ia32_pand", IX86_BUILTIN_PAND
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI
},
26462 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_andnotv2si3
, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI
},
26463 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_iorv2si3
, "__builtin_ia32_por", IX86_BUILTIN_POR
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI
},
26464 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_xorv2si3
, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI
},
26466 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_eqv8qi3
, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
26467 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_eqv4hi3
, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
26468 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_eqv2si3
, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI
},
26469 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_gtv8qi3
, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
26470 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_gtv4hi3
, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
26471 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_gtv2si3
, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI
},
26473 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_punpckhbw
, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
26474 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_punpckhwd
, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
26475 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_punpckhdq
, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI
},
26476 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_punpcklbw
, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
26477 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_punpcklwd
, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
26478 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_punpckldq
, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI
},
26480 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_packsswb
, "__builtin_ia32_packsswb", IX86_BUILTIN_PACKSSWB
, UNKNOWN
, (int) V8QI_FTYPE_V4HI_V4HI
},
26481 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_packssdw
, "__builtin_ia32_packssdw", IX86_BUILTIN_PACKSSDW
, UNKNOWN
, (int) V4HI_FTYPE_V2SI_V2SI
},
26482 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_packuswb
, "__builtin_ia32_packuswb", IX86_BUILTIN_PACKUSWB
, UNKNOWN
, (int) V8QI_FTYPE_V4HI_V4HI
},
26484 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_pmaddwd
, "__builtin_ia32_pmaddwd", IX86_BUILTIN_PMADDWD
, UNKNOWN
, (int) V2SI_FTYPE_V4HI_V4HI
},
26486 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ashlv4hi3
, "__builtin_ia32_psllwi", IX86_BUILTIN_PSLLWI
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_SI_COUNT
},
26487 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ashlv2si3
, "__builtin_ia32_pslldi", IX86_BUILTIN_PSLLDI
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_SI_COUNT
},
26488 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ashlv1di3
, "__builtin_ia32_psllqi", IX86_BUILTIN_PSLLQI
, UNKNOWN
, (int) V1DI_FTYPE_V1DI_SI_COUNT
},
26489 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ashlv4hi3
, "__builtin_ia32_psllw", IX86_BUILTIN_PSLLW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI_COUNT
},
26490 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ashlv2si3
, "__builtin_ia32_pslld", IX86_BUILTIN_PSLLD
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI_COUNT
},
26491 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ashlv1di3
, "__builtin_ia32_psllq", IX86_BUILTIN_PSLLQ
, UNKNOWN
, (int) V1DI_FTYPE_V1DI_V1DI_COUNT
},
26493 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_lshrv4hi3
, "__builtin_ia32_psrlwi", IX86_BUILTIN_PSRLWI
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_SI_COUNT
},
26494 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_lshrv2si3
, "__builtin_ia32_psrldi", IX86_BUILTIN_PSRLDI
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_SI_COUNT
},
26495 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_lshrv1di3
, "__builtin_ia32_psrlqi", IX86_BUILTIN_PSRLQI
, UNKNOWN
, (int) V1DI_FTYPE_V1DI_SI_COUNT
},
26496 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_lshrv4hi3
, "__builtin_ia32_psrlw", IX86_BUILTIN_PSRLW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI_COUNT
},
26497 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_lshrv2si3
, "__builtin_ia32_psrld", IX86_BUILTIN_PSRLD
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI_COUNT
},
26498 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_lshrv1di3
, "__builtin_ia32_psrlq", IX86_BUILTIN_PSRLQ
, UNKNOWN
, (int) V1DI_FTYPE_V1DI_V1DI_COUNT
},
26500 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ashrv4hi3
, "__builtin_ia32_psrawi", IX86_BUILTIN_PSRAWI
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_SI_COUNT
},
26501 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ashrv2si3
, "__builtin_ia32_psradi", IX86_BUILTIN_PSRADI
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_SI_COUNT
},
26502 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ashrv4hi3
, "__builtin_ia32_psraw", IX86_BUILTIN_PSRAW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI_COUNT
},
26503 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ashrv2si3
, "__builtin_ia32_psrad", IX86_BUILTIN_PSRAD
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI_COUNT
},
26506 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_pf2id
, "__builtin_ia32_pf2id", IX86_BUILTIN_PF2ID
, UNKNOWN
, (int) V2SI_FTYPE_V2SF
},
26507 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_floatv2si2
, "__builtin_ia32_pi2fd", IX86_BUILTIN_PI2FD
, UNKNOWN
, (int) V2SF_FTYPE_V2SI
},
26508 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_rcpv2sf2
, "__builtin_ia32_pfrcp", IX86_BUILTIN_PFRCP
, UNKNOWN
, (int) V2SF_FTYPE_V2SF
},
26509 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_rsqrtv2sf2
, "__builtin_ia32_pfrsqrt", IX86_BUILTIN_PFRSQRT
, UNKNOWN
, (int) V2SF_FTYPE_V2SF
},
26511 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_uavgv8qi3
, "__builtin_ia32_pavgusb", IX86_BUILTIN_PAVGUSB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
26512 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_haddv2sf3
, "__builtin_ia32_pfacc", IX86_BUILTIN_PFACC
, UNKNOWN
, (int) V2SF_FTYPE_V2SF_V2SF
},
26513 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_addv2sf3
, "__builtin_ia32_pfadd", IX86_BUILTIN_PFADD
, UNKNOWN
, (int) V2SF_FTYPE_V2SF_V2SF
},
26514 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_eqv2sf3
, "__builtin_ia32_pfcmpeq", IX86_BUILTIN_PFCMPEQ
, UNKNOWN
, (int) V2SI_FTYPE_V2SF_V2SF
},
26515 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_gev2sf3
, "__builtin_ia32_pfcmpge", IX86_BUILTIN_PFCMPGE
, UNKNOWN
, (int) V2SI_FTYPE_V2SF_V2SF
},
26516 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_gtv2sf3
, "__builtin_ia32_pfcmpgt", IX86_BUILTIN_PFCMPGT
, UNKNOWN
, (int) V2SI_FTYPE_V2SF_V2SF
},
26517 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_smaxv2sf3
, "__builtin_ia32_pfmax", IX86_BUILTIN_PFMAX
, UNKNOWN
, (int) V2SF_FTYPE_V2SF_V2SF
},
26518 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_sminv2sf3
, "__builtin_ia32_pfmin", IX86_BUILTIN_PFMIN
, UNKNOWN
, (int) V2SF_FTYPE_V2SF_V2SF
},
26519 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_mulv2sf3
, "__builtin_ia32_pfmul", IX86_BUILTIN_PFMUL
, UNKNOWN
, (int) V2SF_FTYPE_V2SF_V2SF
},
26520 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_rcpit1v2sf3
, "__builtin_ia32_pfrcpit1", IX86_BUILTIN_PFRCPIT1
, UNKNOWN
, (int) V2SF_FTYPE_V2SF_V2SF
},
26521 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_rcpit2v2sf3
, "__builtin_ia32_pfrcpit2", IX86_BUILTIN_PFRCPIT2
, UNKNOWN
, (int) V2SF_FTYPE_V2SF_V2SF
},
26522 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_rsqit1v2sf3
, "__builtin_ia32_pfrsqit1", IX86_BUILTIN_PFRSQIT1
, UNKNOWN
, (int) V2SF_FTYPE_V2SF_V2SF
},
26523 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_subv2sf3
, "__builtin_ia32_pfsub", IX86_BUILTIN_PFSUB
, UNKNOWN
, (int) V2SF_FTYPE_V2SF_V2SF
},
26524 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_subrv2sf3
, "__builtin_ia32_pfsubr", IX86_BUILTIN_PFSUBR
, UNKNOWN
, (int) V2SF_FTYPE_V2SF_V2SF
},
26525 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_pmulhrwv4hi3
, "__builtin_ia32_pmulhrw", IX86_BUILTIN_PMULHRW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
26528 { OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_pf2iw
, "__builtin_ia32_pf2iw", IX86_BUILTIN_PF2IW
, UNKNOWN
, (int) V2SI_FTYPE_V2SF
},
26529 { OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_pi2fw
, "__builtin_ia32_pi2fw", IX86_BUILTIN_PI2FW
, UNKNOWN
, (int) V2SF_FTYPE_V2SI
},
26530 { OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_pswapdv2si2
, "__builtin_ia32_pswapdsi", IX86_BUILTIN_PSWAPDSI
, UNKNOWN
, (int) V2SI_FTYPE_V2SI
},
26531 { OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_pswapdv2sf2
, "__builtin_ia32_pswapdsf", IX86_BUILTIN_PSWAPDSF
, UNKNOWN
, (int) V2SF_FTYPE_V2SF
},
26532 { OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_hsubv2sf3
, "__builtin_ia32_pfnacc", IX86_BUILTIN_PFNACC
, UNKNOWN
, (int) V2SF_FTYPE_V2SF_V2SF
},
26533 { OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_addsubv2sf3
, "__builtin_ia32_pfpnacc", IX86_BUILTIN_PFPNACC
, UNKNOWN
, (int) V2SF_FTYPE_V2SF_V2SF
},
26536 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_movmskps
, "__builtin_ia32_movmskps", IX86_BUILTIN_MOVMSKPS
, UNKNOWN
, (int) INT_FTYPE_V4SF
},
26537 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_sqrtv4sf2
, "__builtin_ia32_sqrtps", IX86_BUILTIN_SQRTPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF
},
26538 { OPTION_MASK_ISA_SSE
, CODE_FOR_sqrtv4sf2
, "__builtin_ia32_sqrtps_nr", IX86_BUILTIN_SQRTPS_NR
, UNKNOWN
, (int) V4SF_FTYPE_V4SF
},
26539 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_rsqrtv4sf2
, "__builtin_ia32_rsqrtps", IX86_BUILTIN_RSQRTPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF
},
26540 { OPTION_MASK_ISA_SSE
, CODE_FOR_rsqrtv4sf2
, "__builtin_ia32_rsqrtps_nr", IX86_BUILTIN_RSQRTPS_NR
, UNKNOWN
, (int) V4SF_FTYPE_V4SF
},
26541 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_rcpv4sf2
, "__builtin_ia32_rcpps", IX86_BUILTIN_RCPPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF
},
26542 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_cvtps2pi
, "__builtin_ia32_cvtps2pi", IX86_BUILTIN_CVTPS2PI
, UNKNOWN
, (int) V2SI_FTYPE_V4SF
},
26543 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_cvtss2si
, "__builtin_ia32_cvtss2si", IX86_BUILTIN_CVTSS2SI
, UNKNOWN
, (int) INT_FTYPE_V4SF
},
26544 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_64BIT
, CODE_FOR_sse_cvtss2siq
, "__builtin_ia32_cvtss2si64", IX86_BUILTIN_CVTSS2SI64
, UNKNOWN
, (int) INT64_FTYPE_V4SF
},
26545 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_cvttps2pi
, "__builtin_ia32_cvttps2pi", IX86_BUILTIN_CVTTPS2PI
, UNKNOWN
, (int) V2SI_FTYPE_V4SF
},
26546 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_cvttss2si
, "__builtin_ia32_cvttss2si", IX86_BUILTIN_CVTTSS2SI
, UNKNOWN
, (int) INT_FTYPE_V4SF
},
26547 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_64BIT
, CODE_FOR_sse_cvttss2siq
, "__builtin_ia32_cvttss2si64", IX86_BUILTIN_CVTTSS2SI64
, UNKNOWN
, (int) INT64_FTYPE_V4SF
},
26549 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_shufps
, "__builtin_ia32_shufps", IX86_BUILTIN_SHUFPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF_INT
},
26551 { OPTION_MASK_ISA_SSE
, CODE_FOR_addv4sf3
, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
26552 { OPTION_MASK_ISA_SSE
, CODE_FOR_subv4sf3
, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
26553 { OPTION_MASK_ISA_SSE
, CODE_FOR_mulv4sf3
, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
26554 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_divv4sf3
, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
26555 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmaddv4sf3
, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
26556 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmsubv4sf3
, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
26557 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmmulv4sf3
, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
26558 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmdivv4sf3
, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
26560 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS
, EQ
, (int) V4SF_FTYPE_V4SF_V4SF
},
26561 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS
, LT
, (int) V4SF_FTYPE_V4SF_V4SF
},
26562 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS
, LE
, (int) V4SF_FTYPE_V4SF_V4SF
},
26563 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS
, LT
, (int) V4SF_FTYPE_V4SF_V4SF_SWAP
},
26564 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS
, LE
, (int) V4SF_FTYPE_V4SF_V4SF_SWAP
},
26565 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS
, UNORDERED
, (int) V4SF_FTYPE_V4SF_V4SF
},
26566 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS
, NE
, (int) V4SF_FTYPE_V4SF_V4SF
},
26567 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS
, UNGE
, (int) V4SF_FTYPE_V4SF_V4SF
},
26568 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS
, UNGT
, (int) V4SF_FTYPE_V4SF_V4SF
},
26569 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS
, UNGE
, (int) V4SF_FTYPE_V4SF_V4SF_SWAP
},
26570 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS
, UNGT
, (int) V4SF_FTYPE_V4SF_V4SF_SWAP
},
26571 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS
, ORDERED
, (int) V4SF_FTYPE_V4SF_V4SF
},
26572 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS
, EQ
, (int) V4SF_FTYPE_V4SF_V4SF
},
26573 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS
, LT
, (int) V4SF_FTYPE_V4SF_V4SF
},
26574 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS
, LE
, (int) V4SF_FTYPE_V4SF_V4SF
},
26575 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS
, UNORDERED
, (int) V4SF_FTYPE_V4SF_V4SF
},
26576 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS
, NE
, (int) V4SF_FTYPE_V4SF_V4SF
},
26577 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS
, UNGE
, (int) V4SF_FTYPE_V4SF_V4SF
},
26578 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS
, UNGT
, (int) V4SF_FTYPE_V4SF_V4SF
},
26579 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpngtss", IX86_BUILTIN_CMPNGTSS
, UNGE
, (int) V4SF_FTYPE_V4SF_V4SF_SWAP
},
26580 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpngess", IX86_BUILTIN_CMPNGESS
, UNGT
, (int) V4SF_FTYPE_V4SF_V4SF_SWAP
},
26581 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS
, ORDERED
, (int) V4SF_FTYPE_V4SF_V4SF
},
26583 { OPTION_MASK_ISA_SSE
, CODE_FOR_sminv4sf3
, "__builtin_ia32_minps", IX86_BUILTIN_MINPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
26584 { OPTION_MASK_ISA_SSE
, CODE_FOR_smaxv4sf3
, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
26585 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmsminv4sf3
, "__builtin_ia32_minss", IX86_BUILTIN_MINSS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
26586 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmsmaxv4sf3
, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
26588 { OPTION_MASK_ISA_SSE
, CODE_FOR_andv4sf3
, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
26589 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_andnotv4sf3
, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
26590 { OPTION_MASK_ISA_SSE
, CODE_FOR_iorv4sf3
, "__builtin_ia32_orps", IX86_BUILTIN_ORPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
26591 { OPTION_MASK_ISA_SSE
, CODE_FOR_xorv4sf3
, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
26593 { OPTION_MASK_ISA_SSE
, CODE_FOR_copysignv4sf3
, "__builtin_ia32_copysignps", IX86_BUILTIN_CPYSGNPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
26595 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_movss
, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
26596 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_movhlps_exp
, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
26597 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_movlhps_exp
, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
26598 { OPTION_MASK_ISA_SSE
, CODE_FOR_vec_interleave_highv4sf
, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
26599 { OPTION_MASK_ISA_SSE
, CODE_FOR_vec_interleave_lowv4sf
, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
26601 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_cvtpi2ps
, "__builtin_ia32_cvtpi2ps", IX86_BUILTIN_CVTPI2PS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V2SI
},
26602 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_cvtsi2ss
, "__builtin_ia32_cvtsi2ss", IX86_BUILTIN_CVTSI2SS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_SI
},
26603 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_64BIT
, CODE_FOR_sse_cvtsi2ssq
, "__builtin_ia32_cvtsi642ss", IX86_BUILTIN_CVTSI642SS
, UNKNOWN
, V4SF_FTYPE_V4SF_DI
},
26605 { OPTION_MASK_ISA_SSE
, CODE_FOR_rsqrtsf2
, "__builtin_ia32_rsqrtf", IX86_BUILTIN_RSQRTF
, UNKNOWN
, (int) FLOAT_FTYPE_FLOAT
},
26607 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmsqrtv4sf2
, "__builtin_ia32_sqrtss", IX86_BUILTIN_SQRTSS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_VEC_MERGE
},
26608 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmrsqrtv4sf2
, "__builtin_ia32_rsqrtss", IX86_BUILTIN_RSQRTSS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_VEC_MERGE
},
26609 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmrcpv4sf2
, "__builtin_ia32_rcpss", IX86_BUILTIN_RCPSS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_VEC_MERGE
},
26611 { OPTION_MASK_ISA_SSE
, CODE_FOR_abstf2
, 0, IX86_BUILTIN_FABSQ
, UNKNOWN
, (int) FLOAT128_FTYPE_FLOAT128
},
26612 { OPTION_MASK_ISA_SSE
, CODE_FOR_copysigntf3
, 0, IX86_BUILTIN_COPYSIGNQ
, UNKNOWN
, (int) FLOAT128_FTYPE_FLOAT128_FLOAT128
},
26614 /* SSE MMX or 3Dnow!A */
26615 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_uavgv8qi3
, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
26616 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_uavgv4hi3
, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
26617 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_umulv4hi3_highpart
, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
26619 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_umaxv8qi3
, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
26620 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_smaxv4hi3
, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
26621 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_uminv8qi3
, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
26622 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_sminv4hi3
, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
26624 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_psadbw
, "__builtin_ia32_psadbw", IX86_BUILTIN_PSADBW
, UNKNOWN
, (int) V1DI_FTYPE_V8QI_V8QI
},
26625 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_pmovmskb
, "__builtin_ia32_pmovmskb", IX86_BUILTIN_PMOVMSKB
, UNKNOWN
, (int) INT_FTYPE_V8QI
},
26627 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_pshufw
, "__builtin_ia32_pshufw", IX86_BUILTIN_PSHUFW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_INT
},
26630 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_shufpd
, "__builtin_ia32_shufpd", IX86_BUILTIN_SHUFPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF_INT
},
26632 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_movmskpd
, "__builtin_ia32_movmskpd", IX86_BUILTIN_MOVMSKPD
, UNKNOWN
, (int) INT_FTYPE_V2DF
},
26633 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_pmovmskb
, "__builtin_ia32_pmovmskb128", IX86_BUILTIN_PMOVMSKB128
, UNKNOWN
, (int) INT_FTYPE_V16QI
},
26634 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sqrtv2df2
, "__builtin_ia32_sqrtpd", IX86_BUILTIN_SQRTPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF
},
26635 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvtdq2pd
, "__builtin_ia32_cvtdq2pd", IX86_BUILTIN_CVTDQ2PD
, UNKNOWN
, (int) V2DF_FTYPE_V4SI
},
26636 { OPTION_MASK_ISA_SSE2
, CODE_FOR_floatv4siv4sf2
, "__builtin_ia32_cvtdq2ps", IX86_BUILTIN_CVTDQ2PS
, UNKNOWN
, (int) V4SF_FTYPE_V4SI
},
26638 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvtpd2dq
, "__builtin_ia32_cvtpd2dq", IX86_BUILTIN_CVTPD2DQ
, UNKNOWN
, (int) V4SI_FTYPE_V2DF
},
26639 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvtpd2pi
, "__builtin_ia32_cvtpd2pi", IX86_BUILTIN_CVTPD2PI
, UNKNOWN
, (int) V2SI_FTYPE_V2DF
},
26640 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvtpd2ps
, "__builtin_ia32_cvtpd2ps", IX86_BUILTIN_CVTPD2PS
, UNKNOWN
, (int) V4SF_FTYPE_V2DF
},
26641 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvttpd2dq
, "__builtin_ia32_cvttpd2dq", IX86_BUILTIN_CVTTPD2DQ
, UNKNOWN
, (int) V4SI_FTYPE_V2DF
},
26642 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvttpd2pi
, "__builtin_ia32_cvttpd2pi", IX86_BUILTIN_CVTTPD2PI
, UNKNOWN
, (int) V2SI_FTYPE_V2DF
},
26644 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvtpi2pd
, "__builtin_ia32_cvtpi2pd", IX86_BUILTIN_CVTPI2PD
, UNKNOWN
, (int) V2DF_FTYPE_V2SI
},
26646 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvtsd2si
, "__builtin_ia32_cvtsd2si", IX86_BUILTIN_CVTSD2SI
, UNKNOWN
, (int) INT_FTYPE_V2DF
},
26647 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvttsd2si
, "__builtin_ia32_cvttsd2si", IX86_BUILTIN_CVTTSD2SI
, UNKNOWN
, (int) INT_FTYPE_V2DF
},
26648 { OPTION_MASK_ISA_SSE2
| OPTION_MASK_ISA_64BIT
, CODE_FOR_sse2_cvtsd2siq
, "__builtin_ia32_cvtsd2si64", IX86_BUILTIN_CVTSD2SI64
, UNKNOWN
, (int) INT64_FTYPE_V2DF
},
26649 { OPTION_MASK_ISA_SSE2
| OPTION_MASK_ISA_64BIT
, CODE_FOR_sse2_cvttsd2siq
, "__builtin_ia32_cvttsd2si64", IX86_BUILTIN_CVTTSD2SI64
, UNKNOWN
, (int) INT64_FTYPE_V2DF
},
26651 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvtps2dq
, "__builtin_ia32_cvtps2dq", IX86_BUILTIN_CVTPS2DQ
, UNKNOWN
, (int) V4SI_FTYPE_V4SF
},
26652 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvtps2pd
, "__builtin_ia32_cvtps2pd", IX86_BUILTIN_CVTPS2PD
, UNKNOWN
, (int) V2DF_FTYPE_V4SF
},
26653 { OPTION_MASK_ISA_SSE2
, CODE_FOR_fix_truncv4sfv4si2
, "__builtin_ia32_cvttps2dq", IX86_BUILTIN_CVTTPS2DQ
, UNKNOWN
, (int) V4SI_FTYPE_V4SF
},
26655 { OPTION_MASK_ISA_SSE2
, CODE_FOR_addv2df3
, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
26656 { OPTION_MASK_ISA_SSE2
, CODE_FOR_subv2df3
, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
26657 { OPTION_MASK_ISA_SSE2
, CODE_FOR_mulv2df3
, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
26658 { OPTION_MASK_ISA_SSE2
, CODE_FOR_divv2df3
, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
26659 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmaddv2df3
, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
26660 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmsubv2df3
, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
26661 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmmulv2df3
, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
26662 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmdivv2df3
, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
26664 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD
, EQ
, (int) V2DF_FTYPE_V2DF_V2DF
},
26665 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD
, LT
, (int) V2DF_FTYPE_V2DF_V2DF
},
26666 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD
, LE
, (int) V2DF_FTYPE_V2DF_V2DF
},
26667 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD
, LT
, (int) V2DF_FTYPE_V2DF_V2DF_SWAP
},
26668 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD
, LE
, (int) V2DF_FTYPE_V2DF_V2DF_SWAP
},
26669 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD
, UNORDERED
, (int) V2DF_FTYPE_V2DF_V2DF
},
26670 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD
, NE
, (int) V2DF_FTYPE_V2DF_V2DF
},
26671 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD
, UNGE
, (int) V2DF_FTYPE_V2DF_V2DF
},
26672 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD
, UNGT
, (int) V2DF_FTYPE_V2DF_V2DF
},
26673 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD
, UNGE
, (int) V2DF_FTYPE_V2DF_V2DF_SWAP
},
26674 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD
, UNGT
, (int) V2DF_FTYPE_V2DF_V2DF_SWAP
},
26675 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD
, ORDERED
, (int) V2DF_FTYPE_V2DF_V2DF
},
26676 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD
, EQ
, (int) V2DF_FTYPE_V2DF_V2DF
},
26677 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD
, LT
, (int) V2DF_FTYPE_V2DF_V2DF
},
26678 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD
, LE
, (int) V2DF_FTYPE_V2DF_V2DF
},
26679 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD
, UNORDERED
, (int) V2DF_FTYPE_V2DF_V2DF
},
26680 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD
, NE
, (int) V2DF_FTYPE_V2DF_V2DF
},
26681 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD
, UNGE
, (int) V2DF_FTYPE_V2DF_V2DF
},
26682 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD
, UNGT
, (int) V2DF_FTYPE_V2DF_V2DF
},
26683 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD
, ORDERED
, (int) V2DF_FTYPE_V2DF_V2DF
},
26685 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sminv2df3
, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
26686 { OPTION_MASK_ISA_SSE2
, CODE_FOR_smaxv2df3
, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
26687 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmsminv2df3
, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
26688 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmsmaxv2df3
, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
26690 { OPTION_MASK_ISA_SSE2
, CODE_FOR_andv2df3
, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
26691 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_andnotv2df3
, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
26692 { OPTION_MASK_ISA_SSE2
, CODE_FOR_iorv2df3
, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
26693 { OPTION_MASK_ISA_SSE2
, CODE_FOR_xorv2df3
, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
26695 { OPTION_MASK_ISA_SSE2
, CODE_FOR_copysignv2df3
, "__builtin_ia32_copysignpd", IX86_BUILTIN_CPYSGNPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
26697 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_movsd
, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
26698 { OPTION_MASK_ISA_SSE2
, CODE_FOR_vec_interleave_highv2df
, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
26699 { OPTION_MASK_ISA_SSE2
, CODE_FOR_vec_interleave_lowv2df
, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
26701 { OPTION_MASK_ISA_SSE2
, CODE_FOR_vec_pack_sfix_v2df
, "__builtin_ia32_vec_pack_sfix", IX86_BUILTIN_VEC_PACK_SFIX
, UNKNOWN
, (int) V4SI_FTYPE_V2DF_V2DF
},
26703 { OPTION_MASK_ISA_SSE2
, CODE_FOR_addv16qi3
, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
26704 { OPTION_MASK_ISA_SSE2
, CODE_FOR_addv8hi3
, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
26705 { OPTION_MASK_ISA_SSE2
, CODE_FOR_addv4si3
, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
26706 { OPTION_MASK_ISA_SSE2
, CODE_FOR_addv2di3
, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
26707 { OPTION_MASK_ISA_SSE2
, CODE_FOR_subv16qi3
, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
26708 { OPTION_MASK_ISA_SSE2
, CODE_FOR_subv8hi3
, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
26709 { OPTION_MASK_ISA_SSE2
, CODE_FOR_subv4si3
, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
26710 { OPTION_MASK_ISA_SSE2
, CODE_FOR_subv2di3
, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
26712 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_ssaddv16qi3
, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
26713 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_ssaddv8hi3
, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
26714 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_sssubv16qi3
, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
26715 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_sssubv8hi3
, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
26716 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_usaddv16qi3
, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
26717 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_usaddv8hi3
, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
26718 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_ussubv16qi3
, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
26719 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_ussubv8hi3
, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
26721 { OPTION_MASK_ISA_SSE2
, CODE_FOR_mulv8hi3
, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
26722 { OPTION_MASK_ISA_SSE2
, CODE_FOR_smulv8hi3_highpart
, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128
, UNKNOWN
,(int) V8HI_FTYPE_V8HI_V8HI
},
26724 { OPTION_MASK_ISA_SSE2
, CODE_FOR_andv2di3
, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
26725 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_andnotv2di3
, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
26726 { OPTION_MASK_ISA_SSE2
, CODE_FOR_iorv2di3
, "__builtin_ia32_por128", IX86_BUILTIN_POR128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
26727 { OPTION_MASK_ISA_SSE2
, CODE_FOR_xorv2di3
, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
26729 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_uavgv16qi3
, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
26730 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_uavgv8hi3
, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
26732 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_eqv16qi3
, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
26733 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_eqv8hi3
, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
26734 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_eqv4si3
, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
26735 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_gtv16qi3
, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
26736 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_gtv8hi3
, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
26737 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_gtv4si3
, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
26739 { OPTION_MASK_ISA_SSE2
, CODE_FOR_umaxv16qi3
, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
26740 { OPTION_MASK_ISA_SSE2
, CODE_FOR_smaxv8hi3
, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
26741 { OPTION_MASK_ISA_SSE2
, CODE_FOR_uminv16qi3
, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
26742 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sminv8hi3
, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
26744 { OPTION_MASK_ISA_SSE2
, CODE_FOR_vec_interleave_highv16qi
, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
26745 { OPTION_MASK_ISA_SSE2
, CODE_FOR_vec_interleave_highv8hi
, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
26746 { OPTION_MASK_ISA_SSE2
, CODE_FOR_vec_interleave_highv4si
, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
26747 { OPTION_MASK_ISA_SSE2
, CODE_FOR_vec_interleave_highv2di
, "__builtin_ia32_punpckhqdq128", IX86_BUILTIN_PUNPCKHQDQ128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
26748 { OPTION_MASK_ISA_SSE2
, CODE_FOR_vec_interleave_lowv16qi
, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
26749 { OPTION_MASK_ISA_SSE2
, CODE_FOR_vec_interleave_lowv8hi
, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
26750 { OPTION_MASK_ISA_SSE2
, CODE_FOR_vec_interleave_lowv4si
, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
26751 { OPTION_MASK_ISA_SSE2
, CODE_FOR_vec_interleave_lowv2di
, "__builtin_ia32_punpcklqdq128", IX86_BUILTIN_PUNPCKLQDQ128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
26753 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_packsswb
, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128
, UNKNOWN
, (int) V16QI_FTYPE_V8HI_V8HI
},
26754 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_packssdw
, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128
, UNKNOWN
, (int) V8HI_FTYPE_V4SI_V4SI
},
26755 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_packuswb
, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128
, UNKNOWN
, (int) V16QI_FTYPE_V8HI_V8HI
},
26757 { OPTION_MASK_ISA_SSE2
, CODE_FOR_umulv8hi3_highpart
, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
26758 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_psadbw
, "__builtin_ia32_psadbw128", IX86_BUILTIN_PSADBW128
, UNKNOWN
, (int) V2DI_FTYPE_V16QI_V16QI
},
26760 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_umulv1siv1di3
, "__builtin_ia32_pmuludq", IX86_BUILTIN_PMULUDQ
, UNKNOWN
, (int) V1DI_FTYPE_V2SI_V2SI
},
26761 { OPTION_MASK_ISA_SSE2
, CODE_FOR_vec_widen_umult_even_v4si
, "__builtin_ia32_pmuludq128", IX86_BUILTIN_PMULUDQ128
, UNKNOWN
, (int) V2DI_FTYPE_V4SI_V4SI
},
26763 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_pmaddwd
, "__builtin_ia32_pmaddwd128", IX86_BUILTIN_PMADDWD128
, UNKNOWN
, (int) V4SI_FTYPE_V8HI_V8HI
},
26765 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvtsi2sd
, "__builtin_ia32_cvtsi2sd", IX86_BUILTIN_CVTSI2SD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_SI
},
26766 { OPTION_MASK_ISA_SSE2
| OPTION_MASK_ISA_64BIT
, CODE_FOR_sse2_cvtsi2sdq
, "__builtin_ia32_cvtsi642sd", IX86_BUILTIN_CVTSI642SD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_DI
},
26767 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvtsd2ss
, "__builtin_ia32_cvtsd2ss", IX86_BUILTIN_CVTSD2SS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V2DF
},
26768 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvtss2sd
, "__builtin_ia32_cvtss2sd", IX86_BUILTIN_CVTSS2SD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V4SF
},
26770 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_ashlv1ti3
, "__builtin_ia32_pslldqi128", IX86_BUILTIN_PSLLDQI128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_INT_CONVERT
},
26771 { OPTION_MASK_ISA_SSE2
, CODE_FOR_ashlv8hi3
, "__builtin_ia32_psllwi128", IX86_BUILTIN_PSLLWI128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_SI_COUNT
},
26772 { OPTION_MASK_ISA_SSE2
, CODE_FOR_ashlv4si3
, "__builtin_ia32_pslldi128", IX86_BUILTIN_PSLLDI128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_SI_COUNT
},
26773 { OPTION_MASK_ISA_SSE2
, CODE_FOR_ashlv2di3
, "__builtin_ia32_psllqi128", IX86_BUILTIN_PSLLQI128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_SI_COUNT
},
26774 { OPTION_MASK_ISA_SSE2
, CODE_FOR_ashlv8hi3
, "__builtin_ia32_psllw128", IX86_BUILTIN_PSLLW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI_COUNT
},
26775 { OPTION_MASK_ISA_SSE2
, CODE_FOR_ashlv4si3
, "__builtin_ia32_pslld128", IX86_BUILTIN_PSLLD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI_COUNT
},
26776 { OPTION_MASK_ISA_SSE2
, CODE_FOR_ashlv2di3
, "__builtin_ia32_psllq128", IX86_BUILTIN_PSLLQ128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI_COUNT
},
26778 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_lshrv1ti3
, "__builtin_ia32_psrldqi128", IX86_BUILTIN_PSRLDQI128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_INT_CONVERT
},
26779 { OPTION_MASK_ISA_SSE2
, CODE_FOR_lshrv8hi3
, "__builtin_ia32_psrlwi128", IX86_BUILTIN_PSRLWI128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_SI_COUNT
},
26780 { OPTION_MASK_ISA_SSE2
, CODE_FOR_lshrv4si3
, "__builtin_ia32_psrldi128", IX86_BUILTIN_PSRLDI128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_SI_COUNT
},
26781 { OPTION_MASK_ISA_SSE2
, CODE_FOR_lshrv2di3
, "__builtin_ia32_psrlqi128", IX86_BUILTIN_PSRLQI128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_SI_COUNT
},
26782 { OPTION_MASK_ISA_SSE2
, CODE_FOR_lshrv8hi3
, "__builtin_ia32_psrlw128", IX86_BUILTIN_PSRLW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI_COUNT
},
26783 { OPTION_MASK_ISA_SSE2
, CODE_FOR_lshrv4si3
, "__builtin_ia32_psrld128", IX86_BUILTIN_PSRLD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI_COUNT
},
26784 { OPTION_MASK_ISA_SSE2
, CODE_FOR_lshrv2di3
, "__builtin_ia32_psrlq128", IX86_BUILTIN_PSRLQ128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI_COUNT
},
26786 { OPTION_MASK_ISA_SSE2
, CODE_FOR_ashrv8hi3
, "__builtin_ia32_psrawi128", IX86_BUILTIN_PSRAWI128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_SI_COUNT
},
26787 { OPTION_MASK_ISA_SSE2
, CODE_FOR_ashrv4si3
, "__builtin_ia32_psradi128", IX86_BUILTIN_PSRADI128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_SI_COUNT
},
26788 { OPTION_MASK_ISA_SSE2
, CODE_FOR_ashrv8hi3
, "__builtin_ia32_psraw128", IX86_BUILTIN_PSRAW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI_COUNT
},
26789 { OPTION_MASK_ISA_SSE2
, CODE_FOR_ashrv4si3
, "__builtin_ia32_psrad128", IX86_BUILTIN_PSRAD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI_COUNT
},
26791 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_pshufd
, "__builtin_ia32_pshufd", IX86_BUILTIN_PSHUFD
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_INT
},
26792 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_pshuflw
, "__builtin_ia32_pshuflw", IX86_BUILTIN_PSHUFLW
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_INT
},
26793 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_pshufhw
, "__builtin_ia32_pshufhw", IX86_BUILTIN_PSHUFHW
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_INT
},
26795 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmsqrtv2df2
, "__builtin_ia32_sqrtsd", IX86_BUILTIN_SQRTSD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_VEC_MERGE
},
26797 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse2_movq128
, "__builtin_ia32_movq128", IX86_BUILTIN_MOVQ128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI
},
26800 { OPTION_MASK_ISA_SSE2
, CODE_FOR_mmx_addv1di3
, "__builtin_ia32_paddq", IX86_BUILTIN_PADDQ
, UNKNOWN
, (int) V1DI_FTYPE_V1DI_V1DI
},
26801 { OPTION_MASK_ISA_SSE2
, CODE_FOR_mmx_subv1di3
, "__builtin_ia32_psubq", IX86_BUILTIN_PSUBQ
, UNKNOWN
, (int) V1DI_FTYPE_V1DI_V1DI
},
26804 { OPTION_MASK_ISA_SSE3
, CODE_FOR_sse3_movshdup
, "__builtin_ia32_movshdup", IX86_BUILTIN_MOVSHDUP
, UNKNOWN
, (int) V4SF_FTYPE_V4SF
},
26805 { OPTION_MASK_ISA_SSE3
, CODE_FOR_sse3_movsldup
, "__builtin_ia32_movsldup", IX86_BUILTIN_MOVSLDUP
, UNKNOWN
, (int) V4SF_FTYPE_V4SF
},
26807 { OPTION_MASK_ISA_SSE3
, CODE_FOR_sse3_addsubv4sf3
, "__builtin_ia32_addsubps", IX86_BUILTIN_ADDSUBPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
26808 { OPTION_MASK_ISA_SSE3
, CODE_FOR_sse3_addsubv2df3
, "__builtin_ia32_addsubpd", IX86_BUILTIN_ADDSUBPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
26809 { OPTION_MASK_ISA_SSE3
, CODE_FOR_sse3_haddv4sf3
, "__builtin_ia32_haddps", IX86_BUILTIN_HADDPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
26810 { OPTION_MASK_ISA_SSE3
, CODE_FOR_sse3_haddv2df3
, "__builtin_ia32_haddpd", IX86_BUILTIN_HADDPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
26811 { OPTION_MASK_ISA_SSE3
, CODE_FOR_sse3_hsubv4sf3
, "__builtin_ia32_hsubps", IX86_BUILTIN_HSUBPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
26812 { OPTION_MASK_ISA_SSE3
, CODE_FOR_sse3_hsubv2df3
, "__builtin_ia32_hsubpd", IX86_BUILTIN_HSUBPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
26815 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_absv16qi2
, "__builtin_ia32_pabsb128", IX86_BUILTIN_PABSB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI
},
26816 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_absv8qi2
, "__builtin_ia32_pabsb", IX86_BUILTIN_PABSB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI
},
26817 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_absv8hi2
, "__builtin_ia32_pabsw128", IX86_BUILTIN_PABSW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI
},
26818 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_absv4hi2
, "__builtin_ia32_pabsw", IX86_BUILTIN_PABSW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI
},
26819 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_absv4si2
, "__builtin_ia32_pabsd128", IX86_BUILTIN_PABSD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI
},
26820 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_absv2si2
, "__builtin_ia32_pabsd", IX86_BUILTIN_PABSD
, UNKNOWN
, (int) V2SI_FTYPE_V2SI
},
26822 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_phaddwv8hi3
, "__builtin_ia32_phaddw128", IX86_BUILTIN_PHADDW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
26823 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_phaddwv4hi3
, "__builtin_ia32_phaddw", IX86_BUILTIN_PHADDW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
26824 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_phadddv4si3
, "__builtin_ia32_phaddd128", IX86_BUILTIN_PHADDD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
26825 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_phadddv2si3
, "__builtin_ia32_phaddd", IX86_BUILTIN_PHADDD
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI
},
26826 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_phaddswv8hi3
, "__builtin_ia32_phaddsw128", IX86_BUILTIN_PHADDSW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
26827 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_phaddswv4hi3
, "__builtin_ia32_phaddsw", IX86_BUILTIN_PHADDSW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
26828 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_phsubwv8hi3
, "__builtin_ia32_phsubw128", IX86_BUILTIN_PHSUBW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
26829 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_phsubwv4hi3
, "__builtin_ia32_phsubw", IX86_BUILTIN_PHSUBW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
26830 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_phsubdv4si3
, "__builtin_ia32_phsubd128", IX86_BUILTIN_PHSUBD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
26831 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_phsubdv2si3
, "__builtin_ia32_phsubd", IX86_BUILTIN_PHSUBD
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI
},
26832 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_phsubswv8hi3
, "__builtin_ia32_phsubsw128", IX86_BUILTIN_PHSUBSW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
26833 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_phsubswv4hi3
, "__builtin_ia32_phsubsw", IX86_BUILTIN_PHSUBSW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
26834 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_pmaddubsw128
, "__builtin_ia32_pmaddubsw128", IX86_BUILTIN_PMADDUBSW128
, UNKNOWN
, (int) V8HI_FTYPE_V16QI_V16QI
},
26835 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_pmaddubsw
, "__builtin_ia32_pmaddubsw", IX86_BUILTIN_PMADDUBSW
, UNKNOWN
, (int) V4HI_FTYPE_V8QI_V8QI
},
26836 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_pmulhrswv8hi3
, "__builtin_ia32_pmulhrsw128", IX86_BUILTIN_PMULHRSW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
26837 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_pmulhrswv4hi3
, "__builtin_ia32_pmulhrsw", IX86_BUILTIN_PMULHRSW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
26838 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_pshufbv16qi3
, "__builtin_ia32_pshufb128", IX86_BUILTIN_PSHUFB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
26839 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_pshufbv8qi3
, "__builtin_ia32_pshufb", IX86_BUILTIN_PSHUFB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
26840 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_psignv16qi3
, "__builtin_ia32_psignb128", IX86_BUILTIN_PSIGNB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
26841 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_psignv8qi3
, "__builtin_ia32_psignb", IX86_BUILTIN_PSIGNB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
26842 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_psignv8hi3
, "__builtin_ia32_psignw128", IX86_BUILTIN_PSIGNW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
26843 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_psignv4hi3
, "__builtin_ia32_psignw", IX86_BUILTIN_PSIGNW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
26844 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_psignv4si3
, "__builtin_ia32_psignd128", IX86_BUILTIN_PSIGND128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
26845 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_psignv2si3
, "__builtin_ia32_psignd", IX86_BUILTIN_PSIGND
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI
},
26848 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_palignrti
, "__builtin_ia32_palignr128", IX86_BUILTIN_PALIGNR128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI_INT_CONVERT
},
26849 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_palignrdi
, "__builtin_ia32_palignr", IX86_BUILTIN_PALIGNR
, UNKNOWN
, (int) V1DI_FTYPE_V1DI_V1DI_INT_CONVERT
},
26852 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_blendpd
, "__builtin_ia32_blendpd", IX86_BUILTIN_BLENDPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF_INT
},
26853 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_blendps
, "__builtin_ia32_blendps", IX86_BUILTIN_BLENDPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF_INT
},
26854 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_blendvpd
, "__builtin_ia32_blendvpd", IX86_BUILTIN_BLENDVPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF_V2DF
},
26855 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_blendvps
, "__builtin_ia32_blendvps", IX86_BUILTIN_BLENDVPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF_V4SF
},
26856 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_dppd
, "__builtin_ia32_dppd", IX86_BUILTIN_DPPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF_INT
},
26857 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_dpps
, "__builtin_ia32_dpps", IX86_BUILTIN_DPPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF_INT
},
26858 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_insertps
, "__builtin_ia32_insertps128", IX86_BUILTIN_INSERTPS128
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF_INT
},
26859 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_mpsadbw
, "__builtin_ia32_mpsadbw128", IX86_BUILTIN_MPSADBW128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI_INT
},
26860 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_pblendvb
, "__builtin_ia32_pblendvb128", IX86_BUILTIN_PBLENDVB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI_V16QI
},
26861 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_pblendw
, "__builtin_ia32_pblendw128", IX86_BUILTIN_PBLENDW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI_INT
},
26863 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_sign_extendv8qiv8hi2
, "__builtin_ia32_pmovsxbw128", IX86_BUILTIN_PMOVSXBW128
, UNKNOWN
, (int) V8HI_FTYPE_V16QI
},
26864 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_sign_extendv4qiv4si2
, "__builtin_ia32_pmovsxbd128", IX86_BUILTIN_PMOVSXBD128
, UNKNOWN
, (int) V4SI_FTYPE_V16QI
},
26865 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_sign_extendv2qiv2di2
, "__builtin_ia32_pmovsxbq128", IX86_BUILTIN_PMOVSXBQ128
, UNKNOWN
, (int) V2DI_FTYPE_V16QI
},
26866 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_sign_extendv4hiv4si2
, "__builtin_ia32_pmovsxwd128", IX86_BUILTIN_PMOVSXWD128
, UNKNOWN
, (int) V4SI_FTYPE_V8HI
},
26867 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_sign_extendv2hiv2di2
, "__builtin_ia32_pmovsxwq128", IX86_BUILTIN_PMOVSXWQ128
, UNKNOWN
, (int) V2DI_FTYPE_V8HI
},
26868 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_sign_extendv2siv2di2
, "__builtin_ia32_pmovsxdq128", IX86_BUILTIN_PMOVSXDQ128
, UNKNOWN
, (int) V2DI_FTYPE_V4SI
},
26869 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_zero_extendv8qiv8hi2
, "__builtin_ia32_pmovzxbw128", IX86_BUILTIN_PMOVZXBW128
, UNKNOWN
, (int) V8HI_FTYPE_V16QI
},
26870 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_zero_extendv4qiv4si2
, "__builtin_ia32_pmovzxbd128", IX86_BUILTIN_PMOVZXBD128
, UNKNOWN
, (int) V4SI_FTYPE_V16QI
},
26871 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_zero_extendv2qiv2di2
, "__builtin_ia32_pmovzxbq128", IX86_BUILTIN_PMOVZXBQ128
, UNKNOWN
, (int) V2DI_FTYPE_V16QI
},
26872 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_zero_extendv4hiv4si2
, "__builtin_ia32_pmovzxwd128", IX86_BUILTIN_PMOVZXWD128
, UNKNOWN
, (int) V4SI_FTYPE_V8HI
},
26873 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_zero_extendv2hiv2di2
, "__builtin_ia32_pmovzxwq128", IX86_BUILTIN_PMOVZXWQ128
, UNKNOWN
, (int) V2DI_FTYPE_V8HI
},
26874 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_zero_extendv2siv2di2
, "__builtin_ia32_pmovzxdq128", IX86_BUILTIN_PMOVZXDQ128
, UNKNOWN
, (int) V2DI_FTYPE_V4SI
},
26875 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_phminposuw
, "__builtin_ia32_phminposuw128", IX86_BUILTIN_PHMINPOSUW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI
},
26877 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_packusdw
, "__builtin_ia32_packusdw128", IX86_BUILTIN_PACKUSDW128
, UNKNOWN
, (int) V8HI_FTYPE_V4SI_V4SI
},
26878 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_eqv2di3
, "__builtin_ia32_pcmpeqq", IX86_BUILTIN_PCMPEQQ
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
26879 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_smaxv16qi3
, "__builtin_ia32_pmaxsb128", IX86_BUILTIN_PMAXSB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
26880 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_smaxv4si3
, "__builtin_ia32_pmaxsd128", IX86_BUILTIN_PMAXSD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
26881 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_umaxv4si3
, "__builtin_ia32_pmaxud128", IX86_BUILTIN_PMAXUD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
26882 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_umaxv8hi3
, "__builtin_ia32_pmaxuw128", IX86_BUILTIN_PMAXUW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
26883 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sminv16qi3
, "__builtin_ia32_pminsb128", IX86_BUILTIN_PMINSB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
26884 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sminv4si3
, "__builtin_ia32_pminsd128", IX86_BUILTIN_PMINSD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
26885 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_uminv4si3
, "__builtin_ia32_pminud128", IX86_BUILTIN_PMINUD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
26886 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_uminv8hi3
, "__builtin_ia32_pminuw128", IX86_BUILTIN_PMINUW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
26887 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_mulv2siv2di3
, "__builtin_ia32_pmuldq128", IX86_BUILTIN_PMULDQ128
, UNKNOWN
, (int) V2DI_FTYPE_V4SI_V4SI
},
26888 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_mulv4si3
, "__builtin_ia32_pmulld128", IX86_BUILTIN_PMULLD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
26891 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundpd
, "__builtin_ia32_roundpd", IX86_BUILTIN_ROUNDPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_INT
},
26892 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundps
, "__builtin_ia32_roundps", IX86_BUILTIN_ROUNDPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_INT
},
26893 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundsd
, "__builtin_ia32_roundsd", IX86_BUILTIN_ROUNDSD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF_INT
},
26894 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundss
, "__builtin_ia32_roundss", IX86_BUILTIN_ROUNDSS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF_INT
},
26896 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundpd
, "__builtin_ia32_floorpd", IX86_BUILTIN_FLOORPD
, (enum rtx_code
) ROUND_FLOOR
, (int) V2DF_FTYPE_V2DF_ROUND
},
26897 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundpd
, "__builtin_ia32_ceilpd", IX86_BUILTIN_CEILPD
, (enum rtx_code
) ROUND_CEIL
, (int) V2DF_FTYPE_V2DF_ROUND
},
26898 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundpd
, "__builtin_ia32_truncpd", IX86_BUILTIN_TRUNCPD
, (enum rtx_code
) ROUND_TRUNC
, (int) V2DF_FTYPE_V2DF_ROUND
},
26899 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundpd
, "__builtin_ia32_rintpd", IX86_BUILTIN_RINTPD
, (enum rtx_code
) ROUND_MXCSR
, (int) V2DF_FTYPE_V2DF_ROUND
},
26901 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundpd_vec_pack_sfix
, "__builtin_ia32_floorpd_vec_pack_sfix", IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX
, (enum rtx_code
) ROUND_FLOOR
, (int) V4SI_FTYPE_V2DF_V2DF_ROUND
},
26902 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundpd_vec_pack_sfix
, "__builtin_ia32_ceilpd_vec_pack_sfix", IX86_BUILTIN_CEILPD_VEC_PACK_SFIX
, (enum rtx_code
) ROUND_CEIL
, (int) V4SI_FTYPE_V2DF_V2DF_ROUND
},
26904 { OPTION_MASK_ISA_ROUND
, CODE_FOR_roundv2df2
, "__builtin_ia32_roundpd_az", IX86_BUILTIN_ROUNDPD_AZ
, UNKNOWN
, (int) V2DF_FTYPE_V2DF
},
26905 { OPTION_MASK_ISA_ROUND
, CODE_FOR_roundv2df2_vec_pack_sfix
, "__builtin_ia32_roundpd_az_vec_pack_sfix", IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX
, UNKNOWN
, (int) V4SI_FTYPE_V2DF_V2DF
},
26907 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundps
, "__builtin_ia32_floorps", IX86_BUILTIN_FLOORPS
, (enum rtx_code
) ROUND_FLOOR
, (int) V4SF_FTYPE_V4SF_ROUND
},
26908 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundps
, "__builtin_ia32_ceilps", IX86_BUILTIN_CEILPS
, (enum rtx_code
) ROUND_CEIL
, (int) V4SF_FTYPE_V4SF_ROUND
},
26909 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundps
, "__builtin_ia32_truncps", IX86_BUILTIN_TRUNCPS
, (enum rtx_code
) ROUND_TRUNC
, (int) V4SF_FTYPE_V4SF_ROUND
},
26910 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundps
, "__builtin_ia32_rintps", IX86_BUILTIN_RINTPS
, (enum rtx_code
) ROUND_MXCSR
, (int) V4SF_FTYPE_V4SF_ROUND
},
26912 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundps_sfix
, "__builtin_ia32_floorps_sfix", IX86_BUILTIN_FLOORPS_SFIX
, (enum rtx_code
) ROUND_FLOOR
, (int) V4SI_FTYPE_V4SF_ROUND
},
26913 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundps_sfix
, "__builtin_ia32_ceilps_sfix", IX86_BUILTIN_CEILPS_SFIX
, (enum rtx_code
) ROUND_CEIL
, (int) V4SI_FTYPE_V4SF_ROUND
},
26915 { OPTION_MASK_ISA_ROUND
, CODE_FOR_roundv4sf2
, "__builtin_ia32_roundps_az", IX86_BUILTIN_ROUNDPS_AZ
, UNKNOWN
, (int) V4SF_FTYPE_V4SF
},
26916 { OPTION_MASK_ISA_ROUND
, CODE_FOR_roundv4sf2_sfix
, "__builtin_ia32_roundps_az_sfix", IX86_BUILTIN_ROUNDPS_AZ_SFIX
, UNKNOWN
, (int) V4SI_FTYPE_V4SF
},
26918 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_ptest
, "__builtin_ia32_ptestz128", IX86_BUILTIN_PTESTZ
, EQ
, (int) INT_FTYPE_V2DI_V2DI_PTEST
},
26919 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_ptest
, "__builtin_ia32_ptestc128", IX86_BUILTIN_PTESTC
, LTU
, (int) INT_FTYPE_V2DI_V2DI_PTEST
},
26920 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_ptest
, "__builtin_ia32_ptestnzc128", IX86_BUILTIN_PTESTNZC
, GTU
, (int) INT_FTYPE_V2DI_V2DI_PTEST
},
26923 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_gtv2di3
, "__builtin_ia32_pcmpgtq", IX86_BUILTIN_PCMPGTQ
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
26924 { OPTION_MASK_ISA_SSE4_2
| OPTION_MASK_ISA_CRC32
, CODE_FOR_sse4_2_crc32qi
, "__builtin_ia32_crc32qi", IX86_BUILTIN_CRC32QI
, UNKNOWN
, (int) UINT_FTYPE_UINT_UCHAR
},
26925 { OPTION_MASK_ISA_SSE4_2
| OPTION_MASK_ISA_CRC32
, CODE_FOR_sse4_2_crc32hi
, "__builtin_ia32_crc32hi", IX86_BUILTIN_CRC32HI
, UNKNOWN
, (int) UINT_FTYPE_UINT_USHORT
},
26926 { OPTION_MASK_ISA_SSE4_2
| OPTION_MASK_ISA_CRC32
, CODE_FOR_sse4_2_crc32si
, "__builtin_ia32_crc32si", IX86_BUILTIN_CRC32SI
, UNKNOWN
, (int) UINT_FTYPE_UINT_UINT
},
26927 { OPTION_MASK_ISA_SSE4_2
| OPTION_MASK_ISA_CRC32
| OPTION_MASK_ISA_64BIT
, CODE_FOR_sse4_2_crc32di
, "__builtin_ia32_crc32di", IX86_BUILTIN_CRC32DI
, UNKNOWN
, (int) UINT64_FTYPE_UINT64_UINT64
},
26930 { OPTION_MASK_ISA_SSE4A
, CODE_FOR_sse4a_extrqi
, "__builtin_ia32_extrqi", IX86_BUILTIN_EXTRQI
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_UINT_UINT
},
26931 { OPTION_MASK_ISA_SSE4A
, CODE_FOR_sse4a_extrq
, "__builtin_ia32_extrq", IX86_BUILTIN_EXTRQ
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V16QI
},
26932 { OPTION_MASK_ISA_SSE4A
, CODE_FOR_sse4a_insertqi
, "__builtin_ia32_insertqi", IX86_BUILTIN_INSERTQI
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI_UINT_UINT
},
26933 { OPTION_MASK_ISA_SSE4A
, CODE_FOR_sse4a_insertq
, "__builtin_ia32_insertq", IX86_BUILTIN_INSERTQ
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
26936 { OPTION_MASK_ISA_SSE2
, CODE_FOR_aeskeygenassist
, 0, IX86_BUILTIN_AESKEYGENASSIST128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_INT
},
26937 { OPTION_MASK_ISA_SSE2
, CODE_FOR_aesimc
, 0, IX86_BUILTIN_AESIMC128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI
},
26939 { OPTION_MASK_ISA_SSE2
, CODE_FOR_aesenc
, 0, IX86_BUILTIN_AESENC128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
26940 { OPTION_MASK_ISA_SSE2
, CODE_FOR_aesenclast
, 0, IX86_BUILTIN_AESENCLAST128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
26941 { OPTION_MASK_ISA_SSE2
, CODE_FOR_aesdec
, 0, IX86_BUILTIN_AESDEC128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
26942 { OPTION_MASK_ISA_SSE2
, CODE_FOR_aesdeclast
, 0, IX86_BUILTIN_AESDECLAST128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
26945 { OPTION_MASK_ISA_SSE2
, CODE_FOR_pclmulqdq
, 0, IX86_BUILTIN_PCLMULQDQ128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI_INT
},
26948 { OPTION_MASK_ISA_AVX
, CODE_FOR_addv4df3
, "__builtin_ia32_addpd256", IX86_BUILTIN_ADDPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
26949 { OPTION_MASK_ISA_AVX
, CODE_FOR_addv8sf3
, "__builtin_ia32_addps256", IX86_BUILTIN_ADDPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
26950 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_addsubv4df3
, "__builtin_ia32_addsubpd256", IX86_BUILTIN_ADDSUBPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
26951 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_addsubv8sf3
, "__builtin_ia32_addsubps256", IX86_BUILTIN_ADDSUBPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
26952 { OPTION_MASK_ISA_AVX
, CODE_FOR_andv4df3
, "__builtin_ia32_andpd256", IX86_BUILTIN_ANDPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
26953 { OPTION_MASK_ISA_AVX
, CODE_FOR_andv8sf3
, "__builtin_ia32_andps256", IX86_BUILTIN_ANDPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
26954 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_andnotv4df3
, "__builtin_ia32_andnpd256", IX86_BUILTIN_ANDNPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
26955 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_andnotv8sf3
, "__builtin_ia32_andnps256", IX86_BUILTIN_ANDNPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
26956 { OPTION_MASK_ISA_AVX
, CODE_FOR_divv4df3
, "__builtin_ia32_divpd256", IX86_BUILTIN_DIVPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
26957 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_divv8sf3
, "__builtin_ia32_divps256", IX86_BUILTIN_DIVPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
26958 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_haddv4df3
, "__builtin_ia32_haddpd256", IX86_BUILTIN_HADDPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
26959 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_hsubv8sf3
, "__builtin_ia32_hsubps256", IX86_BUILTIN_HSUBPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
26960 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_hsubv4df3
, "__builtin_ia32_hsubpd256", IX86_BUILTIN_HSUBPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
26961 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_haddv8sf3
, "__builtin_ia32_haddps256", IX86_BUILTIN_HADDPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
26962 { OPTION_MASK_ISA_AVX
, CODE_FOR_smaxv4df3
, "__builtin_ia32_maxpd256", IX86_BUILTIN_MAXPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
26963 { OPTION_MASK_ISA_AVX
, CODE_FOR_smaxv8sf3
, "__builtin_ia32_maxps256", IX86_BUILTIN_MAXPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
26964 { OPTION_MASK_ISA_AVX
, CODE_FOR_sminv4df3
, "__builtin_ia32_minpd256", IX86_BUILTIN_MINPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
26965 { OPTION_MASK_ISA_AVX
, CODE_FOR_sminv8sf3
, "__builtin_ia32_minps256", IX86_BUILTIN_MINPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
26966 { OPTION_MASK_ISA_AVX
, CODE_FOR_mulv4df3
, "__builtin_ia32_mulpd256", IX86_BUILTIN_MULPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
26967 { OPTION_MASK_ISA_AVX
, CODE_FOR_mulv8sf3
, "__builtin_ia32_mulps256", IX86_BUILTIN_MULPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
26968 { OPTION_MASK_ISA_AVX
, CODE_FOR_iorv4df3
, "__builtin_ia32_orpd256", IX86_BUILTIN_ORPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
26969 { OPTION_MASK_ISA_AVX
, CODE_FOR_iorv8sf3
, "__builtin_ia32_orps256", IX86_BUILTIN_ORPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
26970 { OPTION_MASK_ISA_AVX
, CODE_FOR_subv4df3
, "__builtin_ia32_subpd256", IX86_BUILTIN_SUBPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
26971 { OPTION_MASK_ISA_AVX
, CODE_FOR_subv8sf3
, "__builtin_ia32_subps256", IX86_BUILTIN_SUBPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
26972 { OPTION_MASK_ISA_AVX
, CODE_FOR_xorv4df3
, "__builtin_ia32_xorpd256", IX86_BUILTIN_XORPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
26973 { OPTION_MASK_ISA_AVX
, CODE_FOR_xorv8sf3
, "__builtin_ia32_xorps256", IX86_BUILTIN_XORPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
26975 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vpermilvarv2df3
, "__builtin_ia32_vpermilvarpd", IX86_BUILTIN_VPERMILVARPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DI
},
26976 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vpermilvarv4sf3
, "__builtin_ia32_vpermilvarps", IX86_BUILTIN_VPERMILVARPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SI
},
26977 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vpermilvarv4df3
, "__builtin_ia32_vpermilvarpd256", IX86_BUILTIN_VPERMILVARPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DI
},
26978 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vpermilvarv8sf3
, "__builtin_ia32_vpermilvarps256", IX86_BUILTIN_VPERMILVARPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SI
},
26980 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_blendpd256
, "__builtin_ia32_blendpd256", IX86_BUILTIN_BLENDPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF_INT
},
26981 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_blendps256
, "__builtin_ia32_blendps256", IX86_BUILTIN_BLENDPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF_INT
},
26982 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_blendvpd256
, "__builtin_ia32_blendvpd256", IX86_BUILTIN_BLENDVPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF_V4DF
},
26983 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_blendvps256
, "__builtin_ia32_blendvps256", IX86_BUILTIN_BLENDVPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF_V8SF
},
26984 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_dpps256
, "__builtin_ia32_dpps256", IX86_BUILTIN_DPPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF_INT
},
26985 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_shufpd256
, "__builtin_ia32_shufpd256", IX86_BUILTIN_SHUFPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF_INT
},
26986 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_shufps256
, "__builtin_ia32_shufps256", IX86_BUILTIN_SHUFPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF_INT
},
26987 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vmcmpv2df3
, "__builtin_ia32_cmpsd", IX86_BUILTIN_CMPSD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF_INT
},
26988 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vmcmpv4sf3
, "__builtin_ia32_cmpss", IX86_BUILTIN_CMPSS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF_INT
},
26989 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_cmpv2df3
, "__builtin_ia32_cmppd", IX86_BUILTIN_CMPPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF_INT
},
26990 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_cmpv4sf3
, "__builtin_ia32_cmpps", IX86_BUILTIN_CMPPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF_INT
},
26991 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_cmpv4df3
, "__builtin_ia32_cmppd256", IX86_BUILTIN_CMPPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF_INT
},
26992 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_cmpv8sf3
, "__builtin_ia32_cmpps256", IX86_BUILTIN_CMPPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF_INT
},
26993 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vextractf128v4df
, "__builtin_ia32_vextractf128_pd256", IX86_BUILTIN_EXTRACTF128PD256
, UNKNOWN
, (int) V2DF_FTYPE_V4DF_INT
},
26994 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vextractf128v8sf
, "__builtin_ia32_vextractf128_ps256", IX86_BUILTIN_EXTRACTF128PS256
, UNKNOWN
, (int) V4SF_FTYPE_V8SF_INT
},
26995 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vextractf128v8si
, "__builtin_ia32_vextractf128_si256", IX86_BUILTIN_EXTRACTF128SI256
, UNKNOWN
, (int) V4SI_FTYPE_V8SI_INT
},
26996 { OPTION_MASK_ISA_AVX
, CODE_FOR_floatv4siv4df2
, "__builtin_ia32_cvtdq2pd256", IX86_BUILTIN_CVTDQ2PD256
, UNKNOWN
, (int) V4DF_FTYPE_V4SI
},
26997 { OPTION_MASK_ISA_AVX
, CODE_FOR_floatv8siv8sf2
, "__builtin_ia32_cvtdq2ps256", IX86_BUILTIN_CVTDQ2PS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SI
},
26998 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_cvtpd2ps256
, "__builtin_ia32_cvtpd2ps256", IX86_BUILTIN_CVTPD2PS256
, UNKNOWN
, (int) V4SF_FTYPE_V4DF
},
26999 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_cvtps2dq256
, "__builtin_ia32_cvtps2dq256", IX86_BUILTIN_CVTPS2DQ256
, UNKNOWN
, (int) V8SI_FTYPE_V8SF
},
27000 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_cvtps2pd256
, "__builtin_ia32_cvtps2pd256", IX86_BUILTIN_CVTPS2PD256
, UNKNOWN
, (int) V4DF_FTYPE_V4SF
},
27001 { OPTION_MASK_ISA_AVX
, CODE_FOR_fix_truncv4dfv4si2
, "__builtin_ia32_cvttpd2dq256", IX86_BUILTIN_CVTTPD2DQ256
, UNKNOWN
, (int) V4SI_FTYPE_V4DF
},
27002 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_cvtpd2dq256
, "__builtin_ia32_cvtpd2dq256", IX86_BUILTIN_CVTPD2DQ256
, UNKNOWN
, (int) V4SI_FTYPE_V4DF
},
27003 { OPTION_MASK_ISA_AVX
, CODE_FOR_fix_truncv8sfv8si2
, "__builtin_ia32_cvttps2dq256", IX86_BUILTIN_CVTTPS2DQ256
, UNKNOWN
, (int) V8SI_FTYPE_V8SF
},
27004 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vperm2f128v4df3
, "__builtin_ia32_vperm2f128_pd256", IX86_BUILTIN_VPERM2F128PD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF_INT
},
27005 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vperm2f128v8sf3
, "__builtin_ia32_vperm2f128_ps256", IX86_BUILTIN_VPERM2F128PS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF_INT
},
27006 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vperm2f128v8si3
, "__builtin_ia32_vperm2f128_si256", IX86_BUILTIN_VPERM2F128SI256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI_INT
},
27007 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vpermilv2df
, "__builtin_ia32_vpermilpd", IX86_BUILTIN_VPERMILPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_INT
},
27008 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vpermilv4sf
, "__builtin_ia32_vpermilps", IX86_BUILTIN_VPERMILPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_INT
},
27009 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vpermilv4df
, "__builtin_ia32_vpermilpd256", IX86_BUILTIN_VPERMILPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_INT
},
27010 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vpermilv8sf
, "__builtin_ia32_vpermilps256", IX86_BUILTIN_VPERMILPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_INT
},
27011 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vinsertf128v4df
, "__builtin_ia32_vinsertf128_pd256", IX86_BUILTIN_VINSERTF128PD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V2DF_INT
},
27012 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vinsertf128v8sf
, "__builtin_ia32_vinsertf128_ps256", IX86_BUILTIN_VINSERTF128PS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V4SF_INT
},
27013 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vinsertf128v8si
, "__builtin_ia32_vinsertf128_si256", IX86_BUILTIN_VINSERTF128SI256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V4SI_INT
},
27015 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_movshdup256
, "__builtin_ia32_movshdup256", IX86_BUILTIN_MOVSHDUP256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF
},
27016 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_movsldup256
, "__builtin_ia32_movsldup256", IX86_BUILTIN_MOVSLDUP256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF
},
27017 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_movddup256
, "__builtin_ia32_movddup256", IX86_BUILTIN_MOVDDUP256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF
},
27019 { OPTION_MASK_ISA_AVX
, CODE_FOR_sqrtv4df2
, "__builtin_ia32_sqrtpd256", IX86_BUILTIN_SQRTPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF
},
27020 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_sqrtv8sf2
, "__builtin_ia32_sqrtps256", IX86_BUILTIN_SQRTPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF
},
27021 { OPTION_MASK_ISA_AVX
, CODE_FOR_sqrtv8sf2
, "__builtin_ia32_sqrtps_nr256", IX86_BUILTIN_SQRTPS_NR256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF
},
27022 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_rsqrtv8sf2
, "__builtin_ia32_rsqrtps256", IX86_BUILTIN_RSQRTPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF
},
27023 { OPTION_MASK_ISA_AVX
, CODE_FOR_rsqrtv8sf2
, "__builtin_ia32_rsqrtps_nr256", IX86_BUILTIN_RSQRTPS_NR256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF
},
27025 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_rcpv8sf2
, "__builtin_ia32_rcpps256", IX86_BUILTIN_RCPPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF
},
27027 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_roundpd256
, "__builtin_ia32_roundpd256", IX86_BUILTIN_ROUNDPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_INT
},
27028 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_roundps256
, "__builtin_ia32_roundps256", IX86_BUILTIN_ROUNDPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_INT
},
27030 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_roundpd256
, "__builtin_ia32_floorpd256", IX86_BUILTIN_FLOORPD256
, (enum rtx_code
) ROUND_FLOOR
, (int) V4DF_FTYPE_V4DF_ROUND
},
27031 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_roundpd256
, "__builtin_ia32_ceilpd256", IX86_BUILTIN_CEILPD256
, (enum rtx_code
) ROUND_CEIL
, (int) V4DF_FTYPE_V4DF_ROUND
},
27032 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_roundpd256
, "__builtin_ia32_truncpd256", IX86_BUILTIN_TRUNCPD256
, (enum rtx_code
) ROUND_TRUNC
, (int) V4DF_FTYPE_V4DF_ROUND
},
27033 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_roundpd256
, "__builtin_ia32_rintpd256", IX86_BUILTIN_RINTPD256
, (enum rtx_code
) ROUND_MXCSR
, (int) V4DF_FTYPE_V4DF_ROUND
},
27035 { OPTION_MASK_ISA_AVX
, CODE_FOR_roundv4df2
, "__builtin_ia32_roundpd_az256", IX86_BUILTIN_ROUNDPD_AZ256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF
},
27036 { OPTION_MASK_ISA_AVX
, CODE_FOR_roundv4df2_vec_pack_sfix
, "__builtin_ia32_roundpd_az_vec_pack_sfix256", IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX256
, UNKNOWN
, (int) V8SI_FTYPE_V4DF_V4DF
},
27038 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_roundpd_vec_pack_sfix256
, "__builtin_ia32_floorpd_vec_pack_sfix256", IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX256
, (enum rtx_code
) ROUND_FLOOR
, (int) V8SI_FTYPE_V4DF_V4DF_ROUND
},
27039 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_roundpd_vec_pack_sfix256
, "__builtin_ia32_ceilpd_vec_pack_sfix256", IX86_BUILTIN_CEILPD_VEC_PACK_SFIX256
, (enum rtx_code
) ROUND_CEIL
, (int) V8SI_FTYPE_V4DF_V4DF_ROUND
},
27041 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_roundps256
, "__builtin_ia32_floorps256", IX86_BUILTIN_FLOORPS256
, (enum rtx_code
) ROUND_FLOOR
, (int) V8SF_FTYPE_V8SF_ROUND
},
27042 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_roundps256
, "__builtin_ia32_ceilps256", IX86_BUILTIN_CEILPS256
, (enum rtx_code
) ROUND_CEIL
, (int) V8SF_FTYPE_V8SF_ROUND
},
27043 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_roundps256
, "__builtin_ia32_truncps256", IX86_BUILTIN_TRUNCPS256
, (enum rtx_code
) ROUND_TRUNC
, (int) V8SF_FTYPE_V8SF_ROUND
},
27044 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_roundps256
, "__builtin_ia32_rintps256", IX86_BUILTIN_RINTPS256
, (enum rtx_code
) ROUND_MXCSR
, (int) V8SF_FTYPE_V8SF_ROUND
},
27046 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_roundps_sfix256
, "__builtin_ia32_floorps_sfix256", IX86_BUILTIN_FLOORPS_SFIX256
, (enum rtx_code
) ROUND_FLOOR
, (int) V8SI_FTYPE_V8SF_ROUND
},
27047 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_roundps_sfix256
, "__builtin_ia32_ceilps_sfix256", IX86_BUILTIN_CEILPS_SFIX256
, (enum rtx_code
) ROUND_CEIL
, (int) V8SI_FTYPE_V8SF_ROUND
},
27049 { OPTION_MASK_ISA_AVX
, CODE_FOR_roundv8sf2
, "__builtin_ia32_roundps_az256", IX86_BUILTIN_ROUNDPS_AZ256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF
},
27050 { OPTION_MASK_ISA_AVX
, CODE_FOR_roundv8sf2_sfix
, "__builtin_ia32_roundps_az_sfix256", IX86_BUILTIN_ROUNDPS_AZ_SFIX256
, UNKNOWN
, (int) V8SI_FTYPE_V8SF
},
27052 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_unpckhpd256
, "__builtin_ia32_unpckhpd256", IX86_BUILTIN_UNPCKHPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
27053 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_unpcklpd256
, "__builtin_ia32_unpcklpd256", IX86_BUILTIN_UNPCKLPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
27054 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_unpckhps256
, "__builtin_ia32_unpckhps256", IX86_BUILTIN_UNPCKHPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
27055 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_unpcklps256
, "__builtin_ia32_unpcklps256", IX86_BUILTIN_UNPCKLPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
27057 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_si256_si
, "__builtin_ia32_si256_si", IX86_BUILTIN_SI256_SI
, UNKNOWN
, (int) V8SI_FTYPE_V4SI
},
27058 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_ps256_ps
, "__builtin_ia32_ps256_ps", IX86_BUILTIN_PS256_PS
, UNKNOWN
, (int) V8SF_FTYPE_V4SF
},
27059 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_pd256_pd
, "__builtin_ia32_pd256_pd", IX86_BUILTIN_PD256_PD
, UNKNOWN
, (int) V4DF_FTYPE_V2DF
},
27060 { OPTION_MASK_ISA_AVX
, CODE_FOR_vec_extract_lo_v8si
, "__builtin_ia32_si_si256", IX86_BUILTIN_SI_SI256
, UNKNOWN
, (int) V4SI_FTYPE_V8SI
},
27061 { OPTION_MASK_ISA_AVX
, CODE_FOR_vec_extract_lo_v8sf
, "__builtin_ia32_ps_ps256", IX86_BUILTIN_PS_PS256
, UNKNOWN
, (int) V4SF_FTYPE_V8SF
},
27062 { OPTION_MASK_ISA_AVX
, CODE_FOR_vec_extract_lo_v4df
, "__builtin_ia32_pd_pd256", IX86_BUILTIN_PD_PD256
, UNKNOWN
, (int) V2DF_FTYPE_V4DF
},
27064 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vtestpd
, "__builtin_ia32_vtestzpd", IX86_BUILTIN_VTESTZPD
, EQ
, (int) INT_FTYPE_V2DF_V2DF_PTEST
},
27065 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vtestpd
, "__builtin_ia32_vtestcpd", IX86_BUILTIN_VTESTCPD
, LTU
, (int) INT_FTYPE_V2DF_V2DF_PTEST
},
27066 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vtestpd
, "__builtin_ia32_vtestnzcpd", IX86_BUILTIN_VTESTNZCPD
, GTU
, (int) INT_FTYPE_V2DF_V2DF_PTEST
},
27067 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vtestps
, "__builtin_ia32_vtestzps", IX86_BUILTIN_VTESTZPS
, EQ
, (int) INT_FTYPE_V4SF_V4SF_PTEST
},
27068 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vtestps
, "__builtin_ia32_vtestcps", IX86_BUILTIN_VTESTCPS
, LTU
, (int) INT_FTYPE_V4SF_V4SF_PTEST
},
27069 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vtestps
, "__builtin_ia32_vtestnzcps", IX86_BUILTIN_VTESTNZCPS
, GTU
, (int) INT_FTYPE_V4SF_V4SF_PTEST
},
27070 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vtestpd256
, "__builtin_ia32_vtestzpd256", IX86_BUILTIN_VTESTZPD256
, EQ
, (int) INT_FTYPE_V4DF_V4DF_PTEST
},
27071 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vtestpd256
, "__builtin_ia32_vtestcpd256", IX86_BUILTIN_VTESTCPD256
, LTU
, (int) INT_FTYPE_V4DF_V4DF_PTEST
},
27072 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vtestpd256
, "__builtin_ia32_vtestnzcpd256", IX86_BUILTIN_VTESTNZCPD256
, GTU
, (int) INT_FTYPE_V4DF_V4DF_PTEST
},
27073 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vtestps256
, "__builtin_ia32_vtestzps256", IX86_BUILTIN_VTESTZPS256
, EQ
, (int) INT_FTYPE_V8SF_V8SF_PTEST
},
27074 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vtestps256
, "__builtin_ia32_vtestcps256", IX86_BUILTIN_VTESTCPS256
, LTU
, (int) INT_FTYPE_V8SF_V8SF_PTEST
},
27075 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vtestps256
, "__builtin_ia32_vtestnzcps256", IX86_BUILTIN_VTESTNZCPS256
, GTU
, (int) INT_FTYPE_V8SF_V8SF_PTEST
},
27076 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_ptest256
, "__builtin_ia32_ptestz256", IX86_BUILTIN_PTESTZ256
, EQ
, (int) INT_FTYPE_V4DI_V4DI_PTEST
},
27077 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_ptest256
, "__builtin_ia32_ptestc256", IX86_BUILTIN_PTESTC256
, LTU
, (int) INT_FTYPE_V4DI_V4DI_PTEST
},
27078 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_ptest256
, "__builtin_ia32_ptestnzc256", IX86_BUILTIN_PTESTNZC256
, GTU
, (int) INT_FTYPE_V4DI_V4DI_PTEST
},
27080 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_movmskpd256
, "__builtin_ia32_movmskpd256", IX86_BUILTIN_MOVMSKPD256
, UNKNOWN
, (int) INT_FTYPE_V4DF
},
27081 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_movmskps256
, "__builtin_ia32_movmskps256", IX86_BUILTIN_MOVMSKPS256
, UNKNOWN
, (int) INT_FTYPE_V8SF
},
27083 { OPTION_MASK_ISA_AVX
, CODE_FOR_copysignv8sf3
, "__builtin_ia32_copysignps256", IX86_BUILTIN_CPYSGNPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
27084 { OPTION_MASK_ISA_AVX
, CODE_FOR_copysignv4df3
, "__builtin_ia32_copysignpd256", IX86_BUILTIN_CPYSGNPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
27086 { OPTION_MASK_ISA_AVX
, CODE_FOR_vec_pack_sfix_v4df
, "__builtin_ia32_vec_pack_sfix256 ", IX86_BUILTIN_VEC_PACK_SFIX256
, UNKNOWN
, (int) V8SI_FTYPE_V4DF_V4DF
},
27089 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_mpsadbw
, "__builtin_ia32_mpsadbw256", IX86_BUILTIN_MPSADBW256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI_INT
},
27090 { OPTION_MASK_ISA_AVX2
, CODE_FOR_absv32qi2
, "__builtin_ia32_pabsb256", IX86_BUILTIN_PABSB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI
},
27091 { OPTION_MASK_ISA_AVX2
, CODE_FOR_absv16hi2
, "__builtin_ia32_pabsw256", IX86_BUILTIN_PABSW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI
},
27092 { OPTION_MASK_ISA_AVX2
, CODE_FOR_absv8si2
, "__builtin_ia32_pabsd256", IX86_BUILTIN_PABSD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI
},
27093 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_packssdw
, "__builtin_ia32_packssdw256", IX86_BUILTIN_PACKSSDW256
, UNKNOWN
, (int) V16HI_FTYPE_V8SI_V8SI
},
27094 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_packsswb
, "__builtin_ia32_packsswb256", IX86_BUILTIN_PACKSSWB256
, UNKNOWN
, (int) V32QI_FTYPE_V16HI_V16HI
},
27095 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_packusdw
, "__builtin_ia32_packusdw256", IX86_BUILTIN_PACKUSDW256
, UNKNOWN
, (int) V16HI_FTYPE_V8SI_V8SI
},
27096 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_packuswb
, "__builtin_ia32_packuswb256", IX86_BUILTIN_PACKUSWB256
, UNKNOWN
, (int) V32QI_FTYPE_V16HI_V16HI
},
27097 { OPTION_MASK_ISA_AVX2
, CODE_FOR_addv32qi3
, "__builtin_ia32_paddb256", IX86_BUILTIN_PADDB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
27098 { OPTION_MASK_ISA_AVX2
, CODE_FOR_addv16hi3
, "__builtin_ia32_paddw256", IX86_BUILTIN_PADDW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
27099 { OPTION_MASK_ISA_AVX2
, CODE_FOR_addv8si3
, "__builtin_ia32_paddd256", IX86_BUILTIN_PADDD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
27100 { OPTION_MASK_ISA_AVX2
, CODE_FOR_addv4di3
, "__builtin_ia32_paddq256", IX86_BUILTIN_PADDQ256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V4DI
},
27101 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_ssaddv32qi3
, "__builtin_ia32_paddsb256", IX86_BUILTIN_PADDSB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
27102 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_ssaddv16hi3
, "__builtin_ia32_paddsw256", IX86_BUILTIN_PADDSW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
27103 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_usaddv32qi3
, "__builtin_ia32_paddusb256", IX86_BUILTIN_PADDUSB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
27104 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_usaddv16hi3
, "__builtin_ia32_paddusw256", IX86_BUILTIN_PADDUSW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
27105 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_palignrv2ti
, "__builtin_ia32_palignr256", IX86_BUILTIN_PALIGNR256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V4DI_INT_CONVERT
},
27106 { OPTION_MASK_ISA_AVX2
, CODE_FOR_andv4di3
, "__builtin_ia32_andsi256", IX86_BUILTIN_AND256I
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V4DI
},
27107 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_andnotv4di3
, "__builtin_ia32_andnotsi256", IX86_BUILTIN_ANDNOT256I
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V4DI
},
27108 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_uavgv32qi3
, "__builtin_ia32_pavgb256", IX86_BUILTIN_PAVGB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
27109 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_uavgv16hi3
, "__builtin_ia32_pavgw256", IX86_BUILTIN_PAVGW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
27110 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pblendvb
, "__builtin_ia32_pblendvb256", IX86_BUILTIN_PBLENDVB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI_V32QI
},
27111 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pblendw
, "__builtin_ia32_pblendw256", IX86_BUILTIN_PBLENDVW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI_INT
},
27112 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_eqv32qi3
, "__builtin_ia32_pcmpeqb256", IX86_BUILTIN_PCMPEQB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
27113 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_eqv16hi3
, "__builtin_ia32_pcmpeqw256", IX86_BUILTIN_PCMPEQW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
27114 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_eqv8si3
, "__builtin_ia32_pcmpeqd256", IX86_BUILTIN_PCMPEQD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
27115 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_eqv4di3
, "__builtin_ia32_pcmpeqq256", IX86_BUILTIN_PCMPEQQ256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V4DI
},
27116 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_gtv32qi3
, "__builtin_ia32_pcmpgtb256", IX86_BUILTIN_PCMPGTB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
27117 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_gtv16hi3
, "__builtin_ia32_pcmpgtw256", IX86_BUILTIN_PCMPGTW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
27118 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_gtv8si3
, "__builtin_ia32_pcmpgtd256", IX86_BUILTIN_PCMPGTD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
27119 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_gtv4di3
, "__builtin_ia32_pcmpgtq256", IX86_BUILTIN_PCMPGTQ256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V4DI
},
27120 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_phaddwv16hi3
, "__builtin_ia32_phaddw256", IX86_BUILTIN_PHADDW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
27121 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_phadddv8si3
, "__builtin_ia32_phaddd256", IX86_BUILTIN_PHADDD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
27122 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_phaddswv16hi3
, "__builtin_ia32_phaddsw256", IX86_BUILTIN_PHADDSW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
27123 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_phsubwv16hi3
, "__builtin_ia32_phsubw256", IX86_BUILTIN_PHSUBW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
27124 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_phsubdv8si3
, "__builtin_ia32_phsubd256", IX86_BUILTIN_PHSUBD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
27125 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_phsubswv16hi3
, "__builtin_ia32_phsubsw256", IX86_BUILTIN_PHSUBSW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
27126 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pmaddubsw256
, "__builtin_ia32_pmaddubsw256", IX86_BUILTIN_PMADDUBSW256
, UNKNOWN
, (int) V16HI_FTYPE_V32QI_V32QI
},
27127 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pmaddwd
, "__builtin_ia32_pmaddwd256", IX86_BUILTIN_PMADDWD256
, UNKNOWN
, (int) V8SI_FTYPE_V16HI_V16HI
},
27128 { OPTION_MASK_ISA_AVX2
, CODE_FOR_smaxv32qi3
, "__builtin_ia32_pmaxsb256", IX86_BUILTIN_PMAXSB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
27129 { OPTION_MASK_ISA_AVX2
, CODE_FOR_smaxv16hi3
, "__builtin_ia32_pmaxsw256", IX86_BUILTIN_PMAXSW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
27130 { OPTION_MASK_ISA_AVX2
, CODE_FOR_smaxv8si3
, "__builtin_ia32_pmaxsd256", IX86_BUILTIN_PMAXSD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
27131 { OPTION_MASK_ISA_AVX2
, CODE_FOR_umaxv32qi3
, "__builtin_ia32_pmaxub256", IX86_BUILTIN_PMAXUB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
27132 { OPTION_MASK_ISA_AVX2
, CODE_FOR_umaxv16hi3
, "__builtin_ia32_pmaxuw256", IX86_BUILTIN_PMAXUW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
27133 { OPTION_MASK_ISA_AVX2
, CODE_FOR_umaxv8si3
, "__builtin_ia32_pmaxud256", IX86_BUILTIN_PMAXUD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
27134 { OPTION_MASK_ISA_AVX2
, CODE_FOR_sminv32qi3
, "__builtin_ia32_pminsb256", IX86_BUILTIN_PMINSB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
27135 { OPTION_MASK_ISA_AVX2
, CODE_FOR_sminv16hi3
, "__builtin_ia32_pminsw256", IX86_BUILTIN_PMINSW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
27136 { OPTION_MASK_ISA_AVX2
, CODE_FOR_sminv8si3
, "__builtin_ia32_pminsd256", IX86_BUILTIN_PMINSD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
27137 { OPTION_MASK_ISA_AVX2
, CODE_FOR_uminv32qi3
, "__builtin_ia32_pminub256", IX86_BUILTIN_PMINUB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
27138 { OPTION_MASK_ISA_AVX2
, CODE_FOR_uminv16hi3
, "__builtin_ia32_pminuw256", IX86_BUILTIN_PMINUW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
27139 { OPTION_MASK_ISA_AVX2
, CODE_FOR_uminv8si3
, "__builtin_ia32_pminud256", IX86_BUILTIN_PMINUD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
27140 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pmovmskb
, "__builtin_ia32_pmovmskb256", IX86_BUILTIN_PMOVMSKB256
, UNKNOWN
, (int) INT_FTYPE_V32QI
},
27141 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_sign_extendv16qiv16hi2
, "__builtin_ia32_pmovsxbw256", IX86_BUILTIN_PMOVSXBW256
, UNKNOWN
, (int) V16HI_FTYPE_V16QI
},
27142 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_sign_extendv8qiv8si2
, "__builtin_ia32_pmovsxbd256", IX86_BUILTIN_PMOVSXBD256
, UNKNOWN
, (int) V8SI_FTYPE_V16QI
},
27143 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_sign_extendv4qiv4di2
, "__builtin_ia32_pmovsxbq256", IX86_BUILTIN_PMOVSXBQ256
, UNKNOWN
, (int) V4DI_FTYPE_V16QI
},
27144 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_sign_extendv8hiv8si2
, "__builtin_ia32_pmovsxwd256", IX86_BUILTIN_PMOVSXWD256
, UNKNOWN
, (int) V8SI_FTYPE_V8HI
},
27145 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_sign_extendv4hiv4di2
, "__builtin_ia32_pmovsxwq256", IX86_BUILTIN_PMOVSXWQ256
, UNKNOWN
, (int) V4DI_FTYPE_V8HI
},
27146 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_sign_extendv4siv4di2
, "__builtin_ia32_pmovsxdq256", IX86_BUILTIN_PMOVSXDQ256
, UNKNOWN
, (int) V4DI_FTYPE_V4SI
},
27147 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_zero_extendv16qiv16hi2
, "__builtin_ia32_pmovzxbw256", IX86_BUILTIN_PMOVZXBW256
, UNKNOWN
, (int) V16HI_FTYPE_V16QI
},
27148 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_zero_extendv8qiv8si2
, "__builtin_ia32_pmovzxbd256", IX86_BUILTIN_PMOVZXBD256
, UNKNOWN
, (int) V8SI_FTYPE_V16QI
},
27149 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_zero_extendv4qiv4di2
, "__builtin_ia32_pmovzxbq256", IX86_BUILTIN_PMOVZXBQ256
, UNKNOWN
, (int) V4DI_FTYPE_V16QI
},
27150 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_zero_extendv8hiv8si2
, "__builtin_ia32_pmovzxwd256", IX86_BUILTIN_PMOVZXWD256
, UNKNOWN
, (int) V8SI_FTYPE_V8HI
},
27151 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_zero_extendv4hiv4di2
, "__builtin_ia32_pmovzxwq256", IX86_BUILTIN_PMOVZXWQ256
, UNKNOWN
, (int) V4DI_FTYPE_V8HI
},
27152 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_zero_extendv4siv4di2
, "__builtin_ia32_pmovzxdq256", IX86_BUILTIN_PMOVZXDQ256
, UNKNOWN
, (int) V4DI_FTYPE_V4SI
},
27153 { OPTION_MASK_ISA_AVX2
, CODE_FOR_vec_widen_smult_even_v8si
, "__builtin_ia32_pmuldq256", IX86_BUILTIN_PMULDQ256
, UNKNOWN
, (int) V4DI_FTYPE_V8SI_V8SI
},
27154 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_umulhrswv16hi3
, "__builtin_ia32_pmulhrsw256", IX86_BUILTIN_PMULHRSW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
27155 { OPTION_MASK_ISA_AVX2
, CODE_FOR_umulv16hi3_highpart
, "__builtin_ia32_pmulhuw256" , IX86_BUILTIN_PMULHUW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
27156 { OPTION_MASK_ISA_AVX2
, CODE_FOR_smulv16hi3_highpart
, "__builtin_ia32_pmulhw256" , IX86_BUILTIN_PMULHW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
27157 { OPTION_MASK_ISA_AVX2
, CODE_FOR_mulv16hi3
, "__builtin_ia32_pmullw256" , IX86_BUILTIN_PMULLW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
27158 { OPTION_MASK_ISA_AVX2
, CODE_FOR_mulv8si3
, "__builtin_ia32_pmulld256" , IX86_BUILTIN_PMULLD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
27159 { OPTION_MASK_ISA_AVX2
, CODE_FOR_vec_widen_umult_even_v8si
, "__builtin_ia32_pmuludq256", IX86_BUILTIN_PMULUDQ256
, UNKNOWN
, (int) V4DI_FTYPE_V8SI_V8SI
},
27160 { OPTION_MASK_ISA_AVX2
, CODE_FOR_iorv4di3
, "__builtin_ia32_por256", IX86_BUILTIN_POR256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V4DI
},
27161 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_psadbw
, "__builtin_ia32_psadbw256", IX86_BUILTIN_PSADBW256
, UNKNOWN
, (int) V16HI_FTYPE_V32QI_V32QI
},
27162 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pshufbv32qi3
, "__builtin_ia32_pshufb256", IX86_BUILTIN_PSHUFB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
27163 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pshufdv3
, "__builtin_ia32_pshufd256", IX86_BUILTIN_PSHUFD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_INT
},
27164 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pshufhwv3
, "__builtin_ia32_pshufhw256", IX86_BUILTIN_PSHUFHW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_INT
},
27165 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pshuflwv3
, "__builtin_ia32_pshuflw256", IX86_BUILTIN_PSHUFLW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_INT
},
27166 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_psignv32qi3
, "__builtin_ia32_psignb256", IX86_BUILTIN_PSIGNB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
27167 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_psignv16hi3
, "__builtin_ia32_psignw256", IX86_BUILTIN_PSIGNW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
27168 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_psignv8si3
, "__builtin_ia32_psignd256", IX86_BUILTIN_PSIGND256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
27169 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_ashlv2ti3
, "__builtin_ia32_pslldqi256", IX86_BUILTIN_PSLLDQI256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_INT_CONVERT
},
27170 { OPTION_MASK_ISA_AVX2
, CODE_FOR_ashlv16hi3
, "__builtin_ia32_psllwi256", IX86_BUILTIN_PSLLWI256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_SI_COUNT
},
27171 { OPTION_MASK_ISA_AVX2
, CODE_FOR_ashlv16hi3
, "__builtin_ia32_psllw256", IX86_BUILTIN_PSLLW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V8HI_COUNT
},
27172 { OPTION_MASK_ISA_AVX2
, CODE_FOR_ashlv8si3
, "__builtin_ia32_pslldi256", IX86_BUILTIN_PSLLDI256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_SI_COUNT
},
27173 { OPTION_MASK_ISA_AVX2
, CODE_FOR_ashlv8si3
, "__builtin_ia32_pslld256", IX86_BUILTIN_PSLLD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V4SI_COUNT
},
27174 { OPTION_MASK_ISA_AVX2
, CODE_FOR_ashlv4di3
, "__builtin_ia32_psllqi256", IX86_BUILTIN_PSLLQI256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_INT_COUNT
},
27175 { OPTION_MASK_ISA_AVX2
, CODE_FOR_ashlv4di3
, "__builtin_ia32_psllq256", IX86_BUILTIN_PSLLQ256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V2DI_COUNT
},
27176 { OPTION_MASK_ISA_AVX2
, CODE_FOR_ashrv16hi3
, "__builtin_ia32_psrawi256", IX86_BUILTIN_PSRAWI256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_SI_COUNT
},
27177 { OPTION_MASK_ISA_AVX2
, CODE_FOR_ashrv16hi3
, "__builtin_ia32_psraw256", IX86_BUILTIN_PSRAW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V8HI_COUNT
},
27178 { OPTION_MASK_ISA_AVX2
, CODE_FOR_ashrv8si3
, "__builtin_ia32_psradi256", IX86_BUILTIN_PSRADI256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_SI_COUNT
},
27179 { OPTION_MASK_ISA_AVX2
, CODE_FOR_ashrv8si3
, "__builtin_ia32_psrad256", IX86_BUILTIN_PSRAD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V4SI_COUNT
},
27180 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_lshrv2ti3
, "__builtin_ia32_psrldqi256", IX86_BUILTIN_PSRLDQI256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_INT_CONVERT
},
27181 { OPTION_MASK_ISA_AVX2
, CODE_FOR_lshrv16hi3
, "__builtin_ia32_psrlwi256", IX86_BUILTIN_PSRLWI256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_SI_COUNT
},
27182 { OPTION_MASK_ISA_AVX2
, CODE_FOR_lshrv16hi3
, "__builtin_ia32_psrlw256", IX86_BUILTIN_PSRLW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V8HI_COUNT
},
27183 { OPTION_MASK_ISA_AVX2
, CODE_FOR_lshrv8si3
, "__builtin_ia32_psrldi256", IX86_BUILTIN_PSRLDI256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_SI_COUNT
},
27184 { OPTION_MASK_ISA_AVX2
, CODE_FOR_lshrv8si3
, "__builtin_ia32_psrld256", IX86_BUILTIN_PSRLD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V4SI_COUNT
},
27185 { OPTION_MASK_ISA_AVX2
, CODE_FOR_lshrv4di3
, "__builtin_ia32_psrlqi256", IX86_BUILTIN_PSRLQI256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_INT_COUNT
},
27186 { OPTION_MASK_ISA_AVX2
, CODE_FOR_lshrv4di3
, "__builtin_ia32_psrlq256", IX86_BUILTIN_PSRLQ256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V2DI_COUNT
},
27187 { OPTION_MASK_ISA_AVX2
, CODE_FOR_subv32qi3
, "__builtin_ia32_psubb256", IX86_BUILTIN_PSUBB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
27188 { OPTION_MASK_ISA_AVX2
, CODE_FOR_subv16hi3
, "__builtin_ia32_psubw256", IX86_BUILTIN_PSUBW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
27189 { OPTION_MASK_ISA_AVX2
, CODE_FOR_subv8si3
, "__builtin_ia32_psubd256", IX86_BUILTIN_PSUBD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
27190 { OPTION_MASK_ISA_AVX2
, CODE_FOR_subv4di3
, "__builtin_ia32_psubq256", IX86_BUILTIN_PSUBQ256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V4DI
},
27191 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_sssubv32qi3
, "__builtin_ia32_psubsb256", IX86_BUILTIN_PSUBSB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
27192 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_sssubv16hi3
, "__builtin_ia32_psubsw256", IX86_BUILTIN_PSUBSW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
27193 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_ussubv32qi3
, "__builtin_ia32_psubusb256", IX86_BUILTIN_PSUBUSB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
27194 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_ussubv16hi3
, "__builtin_ia32_psubusw256", IX86_BUILTIN_PSUBUSW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
27195 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_interleave_highv32qi
, "__builtin_ia32_punpckhbw256", IX86_BUILTIN_PUNPCKHBW256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
27196 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_interleave_highv16hi
, "__builtin_ia32_punpckhwd256", IX86_BUILTIN_PUNPCKHWD256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
27197 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_interleave_highv8si
, "__builtin_ia32_punpckhdq256", IX86_BUILTIN_PUNPCKHDQ256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
27198 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_interleave_highv4di
, "__builtin_ia32_punpckhqdq256", IX86_BUILTIN_PUNPCKHQDQ256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V4DI
},
27199 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_interleave_lowv32qi
, "__builtin_ia32_punpcklbw256", IX86_BUILTIN_PUNPCKLBW256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
27200 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_interleave_lowv16hi
, "__builtin_ia32_punpcklwd256", IX86_BUILTIN_PUNPCKLWD256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
27201 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_interleave_lowv8si
, "__builtin_ia32_punpckldq256", IX86_BUILTIN_PUNPCKLDQ256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
27202 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_interleave_lowv4di
, "__builtin_ia32_punpcklqdq256", IX86_BUILTIN_PUNPCKLQDQ256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V4DI
},
27203 { OPTION_MASK_ISA_AVX2
, CODE_FOR_xorv4di3
, "__builtin_ia32_pxor256", IX86_BUILTIN_PXOR256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V4DI
},
27204 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_vec_dupv4sf
, "__builtin_ia32_vbroadcastss_ps", IX86_BUILTIN_VBROADCASTSS_PS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF
},
27205 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_vec_dupv8sf
, "__builtin_ia32_vbroadcastss_ps256", IX86_BUILTIN_VBROADCASTSS_PS256
, UNKNOWN
, (int) V8SF_FTYPE_V4SF
},
27206 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_vec_dupv4df
, "__builtin_ia32_vbroadcastsd_pd256", IX86_BUILTIN_VBROADCASTSD_PD256
, UNKNOWN
, (int) V4DF_FTYPE_V2DF
},
27207 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_vbroadcasti128_v4di
, "__builtin_ia32_vbroadcastsi256", IX86_BUILTIN_VBROADCASTSI256
, UNKNOWN
, (int) V4DI_FTYPE_V2DI
},
27208 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pblenddv4si
, "__builtin_ia32_pblendd128", IX86_BUILTIN_PBLENDD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI_INT
},
27209 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pblenddv8si
, "__builtin_ia32_pblendd256", IX86_BUILTIN_PBLENDD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI_INT
},
27210 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pbroadcastv32qi
, "__builtin_ia32_pbroadcastb256", IX86_BUILTIN_PBROADCASTB256
, UNKNOWN
, (int) V32QI_FTYPE_V16QI
},
27211 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pbroadcastv16hi
, "__builtin_ia32_pbroadcastw256", IX86_BUILTIN_PBROADCASTW256
, UNKNOWN
, (int) V16HI_FTYPE_V8HI
},
27212 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pbroadcastv8si
, "__builtin_ia32_pbroadcastd256", IX86_BUILTIN_PBROADCASTD256
, UNKNOWN
, (int) V8SI_FTYPE_V4SI
},
27213 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pbroadcastv4di
, "__builtin_ia32_pbroadcastq256", IX86_BUILTIN_PBROADCASTQ256
, UNKNOWN
, (int) V4DI_FTYPE_V2DI
},
27214 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pbroadcastv16qi
, "__builtin_ia32_pbroadcastb128", IX86_BUILTIN_PBROADCASTB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI
},
27215 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pbroadcastv8hi
, "__builtin_ia32_pbroadcastw128", IX86_BUILTIN_PBROADCASTW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI
},
27216 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pbroadcastv4si
, "__builtin_ia32_pbroadcastd128", IX86_BUILTIN_PBROADCASTD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI
},
27217 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pbroadcastv2di
, "__builtin_ia32_pbroadcastq128", IX86_BUILTIN_PBROADCASTQ128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI
},
27218 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_permvarv8si
, "__builtin_ia32_permvarsi256", IX86_BUILTIN_VPERMVARSI256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
27219 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_permvarv8sf
, "__builtin_ia32_permvarsf256", IX86_BUILTIN_VPERMVARSF256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SI
},
27220 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_permv4df
, "__builtin_ia32_permdf256", IX86_BUILTIN_VPERMDF256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_INT
},
27221 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_permv4di
, "__builtin_ia32_permdi256", IX86_BUILTIN_VPERMDI256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_INT
},
27222 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_permv2ti
, "__builtin_ia32_permti256", IX86_BUILTIN_VPERMTI256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V4DI_INT
},
27223 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_extracti128
, "__builtin_ia32_extract128i256", IX86_BUILTIN_VEXTRACT128I256
, UNKNOWN
, (int) V2DI_FTYPE_V4DI_INT
},
27224 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_inserti128
, "__builtin_ia32_insert128i256", IX86_BUILTIN_VINSERT128I256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V2DI_INT
},
27225 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_ashlvv4di
, "__builtin_ia32_psllv4di", IX86_BUILTIN_PSLLVV4DI
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V4DI
},
27226 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_ashlvv2di
, "__builtin_ia32_psllv2di", IX86_BUILTIN_PSLLVV2DI
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
27227 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_ashlvv8si
, "__builtin_ia32_psllv8si", IX86_BUILTIN_PSLLVV8SI
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
27228 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_ashlvv4si
, "__builtin_ia32_psllv4si", IX86_BUILTIN_PSLLVV4SI
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
27229 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_ashrvv8si
, "__builtin_ia32_psrav8si", IX86_BUILTIN_PSRAVV8SI
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
27230 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_ashrvv4si
, "__builtin_ia32_psrav4si", IX86_BUILTIN_PSRAVV4SI
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
27231 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_lshrvv4di
, "__builtin_ia32_psrlv4di", IX86_BUILTIN_PSRLVV4DI
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V4DI
},
27232 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_lshrvv2di
, "__builtin_ia32_psrlv2di", IX86_BUILTIN_PSRLVV2DI
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
27233 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_lshrvv8si
, "__builtin_ia32_psrlv8si", IX86_BUILTIN_PSRLVV8SI
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
27234 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_lshrvv4si
, "__builtin_ia32_psrlv4si", IX86_BUILTIN_PSRLVV4SI
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
27236 { OPTION_MASK_ISA_LZCNT
, CODE_FOR_clzhi2_lzcnt
, "__builtin_clzs", IX86_BUILTIN_CLZS
, UNKNOWN
, (int) UINT16_FTYPE_UINT16
},
27239 { OPTION_MASK_ISA_BMI
, CODE_FOR_bmi_bextr_si
, "__builtin_ia32_bextr_u32", IX86_BUILTIN_BEXTR32
, UNKNOWN
, (int) UINT_FTYPE_UINT_UINT
},
27240 { OPTION_MASK_ISA_BMI
, CODE_FOR_bmi_bextr_di
, "__builtin_ia32_bextr_u64", IX86_BUILTIN_BEXTR64
, UNKNOWN
, (int) UINT64_FTYPE_UINT64_UINT64
},
27241 { OPTION_MASK_ISA_BMI
, CODE_FOR_ctzhi2
, "__builtin_ctzs", IX86_BUILTIN_CTZS
, UNKNOWN
, (int) UINT16_FTYPE_UINT16
},
27244 { OPTION_MASK_ISA_TBM
, CODE_FOR_tbm_bextri_si
, "__builtin_ia32_bextri_u32", IX86_BUILTIN_BEXTRI32
, UNKNOWN
, (int) UINT_FTYPE_UINT_UINT
},
27245 { OPTION_MASK_ISA_TBM
, CODE_FOR_tbm_bextri_di
, "__builtin_ia32_bextri_u64", IX86_BUILTIN_BEXTRI64
, UNKNOWN
, (int) UINT64_FTYPE_UINT64_UINT64
},
27248 { OPTION_MASK_ISA_F16C
, CODE_FOR_vcvtph2ps
, "__builtin_ia32_vcvtph2ps", IX86_BUILTIN_CVTPH2PS
, UNKNOWN
, (int) V4SF_FTYPE_V8HI
},
27249 { OPTION_MASK_ISA_F16C
, CODE_FOR_vcvtph2ps256
, "__builtin_ia32_vcvtph2ps256", IX86_BUILTIN_CVTPH2PS256
, UNKNOWN
, (int) V8SF_FTYPE_V8HI
},
27250 { OPTION_MASK_ISA_F16C
, CODE_FOR_vcvtps2ph
, "__builtin_ia32_vcvtps2ph", IX86_BUILTIN_CVTPS2PH
, UNKNOWN
, (int) V8HI_FTYPE_V4SF_INT
},
27251 { OPTION_MASK_ISA_F16C
, CODE_FOR_vcvtps2ph256
, "__builtin_ia32_vcvtps2ph256", IX86_BUILTIN_CVTPS2PH256
, UNKNOWN
, (int) V8HI_FTYPE_V8SF_INT
},
27254 { OPTION_MASK_ISA_BMI2
, CODE_FOR_bmi2_bzhi_si3
, "__builtin_ia32_bzhi_si", IX86_BUILTIN_BZHI32
, UNKNOWN
, (int) UINT_FTYPE_UINT_UINT
},
27255 { OPTION_MASK_ISA_BMI2
, CODE_FOR_bmi2_bzhi_di3
, "__builtin_ia32_bzhi_di", IX86_BUILTIN_BZHI64
, UNKNOWN
, (int) UINT64_FTYPE_UINT64_UINT64
},
27256 { OPTION_MASK_ISA_BMI2
, CODE_FOR_bmi2_pdep_si3
, "__builtin_ia32_pdep_si", IX86_BUILTIN_PDEP32
, UNKNOWN
, (int) UINT_FTYPE_UINT_UINT
},
27257 { OPTION_MASK_ISA_BMI2
, CODE_FOR_bmi2_pdep_di3
, "__builtin_ia32_pdep_di", IX86_BUILTIN_PDEP64
, UNKNOWN
, (int) UINT64_FTYPE_UINT64_UINT64
},
27258 { OPTION_MASK_ISA_BMI2
, CODE_FOR_bmi2_pext_si3
, "__builtin_ia32_pext_si", IX86_BUILTIN_PEXT32
, UNKNOWN
, (int) UINT_FTYPE_UINT_UINT
},
27259 { OPTION_MASK_ISA_BMI2
, CODE_FOR_bmi2_pext_di3
, "__builtin_ia32_pext_di", IX86_BUILTIN_PEXT64
, UNKNOWN
, (int) UINT64_FTYPE_UINT64_UINT64
},
27262 /* FMA4 and XOP. */
27263 #define MULTI_ARG_4_DF2_DI_I V2DF_FTYPE_V2DF_V2DF_V2DI_INT
27264 #define MULTI_ARG_4_DF2_DI_I1 V4DF_FTYPE_V4DF_V4DF_V4DI_INT
27265 #define MULTI_ARG_4_SF2_SI_I V4SF_FTYPE_V4SF_V4SF_V4SI_INT
27266 #define MULTI_ARG_4_SF2_SI_I1 V8SF_FTYPE_V8SF_V8SF_V8SI_INT
27267 #define MULTI_ARG_3_SF V4SF_FTYPE_V4SF_V4SF_V4SF
27268 #define MULTI_ARG_3_DF V2DF_FTYPE_V2DF_V2DF_V2DF
27269 #define MULTI_ARG_3_SF2 V8SF_FTYPE_V8SF_V8SF_V8SF
27270 #define MULTI_ARG_3_DF2 V4DF_FTYPE_V4DF_V4DF_V4DF
27271 #define MULTI_ARG_3_DI V2DI_FTYPE_V2DI_V2DI_V2DI
27272 #define MULTI_ARG_3_SI V4SI_FTYPE_V4SI_V4SI_V4SI
27273 #define MULTI_ARG_3_SI_DI V4SI_FTYPE_V4SI_V4SI_V2DI
27274 #define MULTI_ARG_3_HI V8HI_FTYPE_V8HI_V8HI_V8HI
27275 #define MULTI_ARG_3_HI_SI V8HI_FTYPE_V8HI_V8HI_V4SI
27276 #define MULTI_ARG_3_QI V16QI_FTYPE_V16QI_V16QI_V16QI
27277 #define MULTI_ARG_3_DI2 V4DI_FTYPE_V4DI_V4DI_V4DI
27278 #define MULTI_ARG_3_SI2 V8SI_FTYPE_V8SI_V8SI_V8SI
27279 #define MULTI_ARG_3_HI2 V16HI_FTYPE_V16HI_V16HI_V16HI
27280 #define MULTI_ARG_3_QI2 V32QI_FTYPE_V32QI_V32QI_V32QI
27281 #define MULTI_ARG_2_SF V4SF_FTYPE_V4SF_V4SF
27282 #define MULTI_ARG_2_DF V2DF_FTYPE_V2DF_V2DF
27283 #define MULTI_ARG_2_DI V2DI_FTYPE_V2DI_V2DI
27284 #define MULTI_ARG_2_SI V4SI_FTYPE_V4SI_V4SI
27285 #define MULTI_ARG_2_HI V8HI_FTYPE_V8HI_V8HI
27286 #define MULTI_ARG_2_QI V16QI_FTYPE_V16QI_V16QI
27287 #define MULTI_ARG_2_DI_IMM V2DI_FTYPE_V2DI_SI
27288 #define MULTI_ARG_2_SI_IMM V4SI_FTYPE_V4SI_SI
27289 #define MULTI_ARG_2_HI_IMM V8HI_FTYPE_V8HI_SI
27290 #define MULTI_ARG_2_QI_IMM V16QI_FTYPE_V16QI_SI
27291 #define MULTI_ARG_2_DI_CMP V2DI_FTYPE_V2DI_V2DI_CMP
27292 #define MULTI_ARG_2_SI_CMP V4SI_FTYPE_V4SI_V4SI_CMP
27293 #define MULTI_ARG_2_HI_CMP V8HI_FTYPE_V8HI_V8HI_CMP
27294 #define MULTI_ARG_2_QI_CMP V16QI_FTYPE_V16QI_V16QI_CMP
27295 #define MULTI_ARG_2_SF_TF V4SF_FTYPE_V4SF_V4SF_TF
27296 #define MULTI_ARG_2_DF_TF V2DF_FTYPE_V2DF_V2DF_TF
27297 #define MULTI_ARG_2_DI_TF V2DI_FTYPE_V2DI_V2DI_TF
27298 #define MULTI_ARG_2_SI_TF V4SI_FTYPE_V4SI_V4SI_TF
27299 #define MULTI_ARG_2_HI_TF V8HI_FTYPE_V8HI_V8HI_TF
27300 #define MULTI_ARG_2_QI_TF V16QI_FTYPE_V16QI_V16QI_TF
27301 #define MULTI_ARG_1_SF V4SF_FTYPE_V4SF
27302 #define MULTI_ARG_1_DF V2DF_FTYPE_V2DF
27303 #define MULTI_ARG_1_SF2 V8SF_FTYPE_V8SF
27304 #define MULTI_ARG_1_DF2 V4DF_FTYPE_V4DF
27305 #define MULTI_ARG_1_DI V2DI_FTYPE_V2DI
27306 #define MULTI_ARG_1_SI V4SI_FTYPE_V4SI
27307 #define MULTI_ARG_1_HI V8HI_FTYPE_V8HI
27308 #define MULTI_ARG_1_QI V16QI_FTYPE_V16QI
27309 #define MULTI_ARG_1_SI_DI V2DI_FTYPE_V4SI
27310 #define MULTI_ARG_1_HI_DI V2DI_FTYPE_V8HI
27311 #define MULTI_ARG_1_HI_SI V4SI_FTYPE_V8HI
27312 #define MULTI_ARG_1_QI_DI V2DI_FTYPE_V16QI
27313 #define MULTI_ARG_1_QI_SI V4SI_FTYPE_V16QI
27314 #define MULTI_ARG_1_QI_HI V8HI_FTYPE_V16QI
27316 static const struct builtin_description bdesc_multi_arg
[] =
27318 { OPTION_MASK_ISA_FMA4
, CODE_FOR_fma4i_vmfmadd_v4sf
,
27319 "__builtin_ia32_vfmaddss", IX86_BUILTIN_VFMADDSS
,
27320 UNKNOWN
, (int)MULTI_ARG_3_SF
},
27321 { OPTION_MASK_ISA_FMA4
, CODE_FOR_fma4i_vmfmadd_v2df
,
27322 "__builtin_ia32_vfmaddsd", IX86_BUILTIN_VFMADDSD
,
27323 UNKNOWN
, (int)MULTI_ARG_3_DF
},
27325 { OPTION_MASK_ISA_FMA
, CODE_FOR_fmai_vmfmadd_v4sf
,
27326 "__builtin_ia32_vfmaddss3", IX86_BUILTIN_VFMADDSS3
,
27327 UNKNOWN
, (int)MULTI_ARG_3_SF
},
27328 { OPTION_MASK_ISA_FMA
, CODE_FOR_fmai_vmfmadd_v2df
,
27329 "__builtin_ia32_vfmaddsd3", IX86_BUILTIN_VFMADDSD3
,
27330 UNKNOWN
, (int)MULTI_ARG_3_DF
},
27332 { OPTION_MASK_ISA_FMA
| OPTION_MASK_ISA_FMA4
, CODE_FOR_fma4i_fmadd_v4sf
,
27333 "__builtin_ia32_vfmaddps", IX86_BUILTIN_VFMADDPS
,
27334 UNKNOWN
, (int)MULTI_ARG_3_SF
},
27335 { OPTION_MASK_ISA_FMA
| OPTION_MASK_ISA_FMA4
, CODE_FOR_fma4i_fmadd_v2df
,
27336 "__builtin_ia32_vfmaddpd", IX86_BUILTIN_VFMADDPD
,
27337 UNKNOWN
, (int)MULTI_ARG_3_DF
},
27338 { OPTION_MASK_ISA_FMA
| OPTION_MASK_ISA_FMA4
, CODE_FOR_fma4i_fmadd_v8sf
,
27339 "__builtin_ia32_vfmaddps256", IX86_BUILTIN_VFMADDPS256
,
27340 UNKNOWN
, (int)MULTI_ARG_3_SF2
},
27341 { OPTION_MASK_ISA_FMA
| OPTION_MASK_ISA_FMA4
, CODE_FOR_fma4i_fmadd_v4df
,
27342 "__builtin_ia32_vfmaddpd256", IX86_BUILTIN_VFMADDPD256
,
27343 UNKNOWN
, (int)MULTI_ARG_3_DF2
},
27345 { OPTION_MASK_ISA_FMA
| OPTION_MASK_ISA_FMA4
, CODE_FOR_fmaddsub_v4sf
,
27346 "__builtin_ia32_vfmaddsubps", IX86_BUILTIN_VFMADDSUBPS
,
27347 UNKNOWN
, (int)MULTI_ARG_3_SF
},
27348 { OPTION_MASK_ISA_FMA
| OPTION_MASK_ISA_FMA4
, CODE_FOR_fmaddsub_v2df
,
27349 "__builtin_ia32_vfmaddsubpd", IX86_BUILTIN_VFMADDSUBPD
,
27350 UNKNOWN
, (int)MULTI_ARG_3_DF
},
27351 { OPTION_MASK_ISA_FMA
| OPTION_MASK_ISA_FMA4
, CODE_FOR_fmaddsub_v8sf
,
27352 "__builtin_ia32_vfmaddsubps256", IX86_BUILTIN_VFMADDSUBPS256
,
27353 UNKNOWN
, (int)MULTI_ARG_3_SF2
},
27354 { OPTION_MASK_ISA_FMA
| OPTION_MASK_ISA_FMA4
, CODE_FOR_fmaddsub_v4df
,
27355 "__builtin_ia32_vfmaddsubpd256", IX86_BUILTIN_VFMADDSUBPD256
,
27356 UNKNOWN
, (int)MULTI_ARG_3_DF2
},
27358 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcmov_v2di
, "__builtin_ia32_vpcmov", IX86_BUILTIN_VPCMOV
, UNKNOWN
, (int)MULTI_ARG_3_DI
},
27359 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcmov_v2di
, "__builtin_ia32_vpcmov_v2di", IX86_BUILTIN_VPCMOV_V2DI
, UNKNOWN
, (int)MULTI_ARG_3_DI
},
27360 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcmov_v4si
, "__builtin_ia32_vpcmov_v4si", IX86_BUILTIN_VPCMOV_V4SI
, UNKNOWN
, (int)MULTI_ARG_3_SI
},
27361 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcmov_v8hi
, "__builtin_ia32_vpcmov_v8hi", IX86_BUILTIN_VPCMOV_V8HI
, UNKNOWN
, (int)MULTI_ARG_3_HI
},
27362 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcmov_v16qi
, "__builtin_ia32_vpcmov_v16qi",IX86_BUILTIN_VPCMOV_V16QI
,UNKNOWN
, (int)MULTI_ARG_3_QI
},
27363 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcmov_v2df
, "__builtin_ia32_vpcmov_v2df", IX86_BUILTIN_VPCMOV_V2DF
, UNKNOWN
, (int)MULTI_ARG_3_DF
},
27364 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcmov_v4sf
, "__builtin_ia32_vpcmov_v4sf", IX86_BUILTIN_VPCMOV_V4SF
, UNKNOWN
, (int)MULTI_ARG_3_SF
},
27366 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcmov_v4di256
, "__builtin_ia32_vpcmov256", IX86_BUILTIN_VPCMOV256
, UNKNOWN
, (int)MULTI_ARG_3_DI2
},
27367 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcmov_v4di256
, "__builtin_ia32_vpcmov_v4di256", IX86_BUILTIN_VPCMOV_V4DI256
, UNKNOWN
, (int)MULTI_ARG_3_DI2
},
27368 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcmov_v8si256
, "__builtin_ia32_vpcmov_v8si256", IX86_BUILTIN_VPCMOV_V8SI256
, UNKNOWN
, (int)MULTI_ARG_3_SI2
},
27369 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcmov_v16hi256
, "__builtin_ia32_vpcmov_v16hi256", IX86_BUILTIN_VPCMOV_V16HI256
, UNKNOWN
, (int)MULTI_ARG_3_HI2
},
27370 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcmov_v32qi256
, "__builtin_ia32_vpcmov_v32qi256", IX86_BUILTIN_VPCMOV_V32QI256
, UNKNOWN
, (int)MULTI_ARG_3_QI2
},
27371 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcmov_v4df256
, "__builtin_ia32_vpcmov_v4df256", IX86_BUILTIN_VPCMOV_V4DF256
, UNKNOWN
, (int)MULTI_ARG_3_DF2
},
27372 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcmov_v8sf256
, "__builtin_ia32_vpcmov_v8sf256", IX86_BUILTIN_VPCMOV_V8SF256
, UNKNOWN
, (int)MULTI_ARG_3_SF2
},
27374 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pperm
, "__builtin_ia32_vpperm", IX86_BUILTIN_VPPERM
, UNKNOWN
, (int)MULTI_ARG_3_QI
},
27376 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pmacssww
, "__builtin_ia32_vpmacssww", IX86_BUILTIN_VPMACSSWW
, UNKNOWN
, (int)MULTI_ARG_3_HI
},
27377 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pmacsww
, "__builtin_ia32_vpmacsww", IX86_BUILTIN_VPMACSWW
, UNKNOWN
, (int)MULTI_ARG_3_HI
},
27378 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pmacsswd
, "__builtin_ia32_vpmacsswd", IX86_BUILTIN_VPMACSSWD
, UNKNOWN
, (int)MULTI_ARG_3_HI_SI
},
27379 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pmacswd
, "__builtin_ia32_vpmacswd", IX86_BUILTIN_VPMACSWD
, UNKNOWN
, (int)MULTI_ARG_3_HI_SI
},
27380 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pmacssdd
, "__builtin_ia32_vpmacssdd", IX86_BUILTIN_VPMACSSDD
, UNKNOWN
, (int)MULTI_ARG_3_SI
},
27381 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pmacsdd
, "__builtin_ia32_vpmacsdd", IX86_BUILTIN_VPMACSDD
, UNKNOWN
, (int)MULTI_ARG_3_SI
},
27382 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pmacssdql
, "__builtin_ia32_vpmacssdql", IX86_BUILTIN_VPMACSSDQL
, UNKNOWN
, (int)MULTI_ARG_3_SI_DI
},
27383 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pmacssdqh
, "__builtin_ia32_vpmacssdqh", IX86_BUILTIN_VPMACSSDQH
, UNKNOWN
, (int)MULTI_ARG_3_SI_DI
},
27384 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pmacsdql
, "__builtin_ia32_vpmacsdql", IX86_BUILTIN_VPMACSDQL
, UNKNOWN
, (int)MULTI_ARG_3_SI_DI
},
27385 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pmacsdqh
, "__builtin_ia32_vpmacsdqh", IX86_BUILTIN_VPMACSDQH
, UNKNOWN
, (int)MULTI_ARG_3_SI_DI
},
27386 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pmadcsswd
, "__builtin_ia32_vpmadcsswd", IX86_BUILTIN_VPMADCSSWD
, UNKNOWN
, (int)MULTI_ARG_3_HI_SI
},
27387 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pmadcswd
, "__builtin_ia32_vpmadcswd", IX86_BUILTIN_VPMADCSWD
, UNKNOWN
, (int)MULTI_ARG_3_HI_SI
},
27389 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_vrotlv2di3
, "__builtin_ia32_vprotq", IX86_BUILTIN_VPROTQ
, UNKNOWN
, (int)MULTI_ARG_2_DI
},
27390 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_vrotlv4si3
, "__builtin_ia32_vprotd", IX86_BUILTIN_VPROTD
, UNKNOWN
, (int)MULTI_ARG_2_SI
},
27391 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_vrotlv8hi3
, "__builtin_ia32_vprotw", IX86_BUILTIN_VPROTW
, UNKNOWN
, (int)MULTI_ARG_2_HI
},
27392 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_vrotlv16qi3
, "__builtin_ia32_vprotb", IX86_BUILTIN_VPROTB
, UNKNOWN
, (int)MULTI_ARG_2_QI
},
27393 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_rotlv2di3
, "__builtin_ia32_vprotqi", IX86_BUILTIN_VPROTQ_IMM
, UNKNOWN
, (int)MULTI_ARG_2_DI_IMM
},
27394 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_rotlv4si3
, "__builtin_ia32_vprotdi", IX86_BUILTIN_VPROTD_IMM
, UNKNOWN
, (int)MULTI_ARG_2_SI_IMM
},
27395 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_rotlv8hi3
, "__builtin_ia32_vprotwi", IX86_BUILTIN_VPROTW_IMM
, UNKNOWN
, (int)MULTI_ARG_2_HI_IMM
},
27396 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_rotlv16qi3
, "__builtin_ia32_vprotbi", IX86_BUILTIN_VPROTB_IMM
, UNKNOWN
, (int)MULTI_ARG_2_QI_IMM
},
27397 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_shav2di3
, "__builtin_ia32_vpshaq", IX86_BUILTIN_VPSHAQ
, UNKNOWN
, (int)MULTI_ARG_2_DI
},
27398 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_shav4si3
, "__builtin_ia32_vpshad", IX86_BUILTIN_VPSHAD
, UNKNOWN
, (int)MULTI_ARG_2_SI
},
27399 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_shav8hi3
, "__builtin_ia32_vpshaw", IX86_BUILTIN_VPSHAW
, UNKNOWN
, (int)MULTI_ARG_2_HI
},
27400 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_shav16qi3
, "__builtin_ia32_vpshab", IX86_BUILTIN_VPSHAB
, UNKNOWN
, (int)MULTI_ARG_2_QI
},
27401 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_shlv2di3
, "__builtin_ia32_vpshlq", IX86_BUILTIN_VPSHLQ
, UNKNOWN
, (int)MULTI_ARG_2_DI
},
27402 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_shlv4si3
, "__builtin_ia32_vpshld", IX86_BUILTIN_VPSHLD
, UNKNOWN
, (int)MULTI_ARG_2_SI
},
27403 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_shlv8hi3
, "__builtin_ia32_vpshlw", IX86_BUILTIN_VPSHLW
, UNKNOWN
, (int)MULTI_ARG_2_HI
},
27404 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_shlv16qi3
, "__builtin_ia32_vpshlb", IX86_BUILTIN_VPSHLB
, UNKNOWN
, (int)MULTI_ARG_2_QI
},
27406 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_vmfrczv4sf2
, "__builtin_ia32_vfrczss", IX86_BUILTIN_VFRCZSS
, UNKNOWN
, (int)MULTI_ARG_2_SF
},
27407 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_vmfrczv2df2
, "__builtin_ia32_vfrczsd", IX86_BUILTIN_VFRCZSD
, UNKNOWN
, (int)MULTI_ARG_2_DF
},
27408 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_frczv4sf2
, "__builtin_ia32_vfrczps", IX86_BUILTIN_VFRCZPS
, UNKNOWN
, (int)MULTI_ARG_1_SF
},
27409 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_frczv2df2
, "__builtin_ia32_vfrczpd", IX86_BUILTIN_VFRCZPD
, UNKNOWN
, (int)MULTI_ARG_1_DF
},
27410 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_frczv8sf2
, "__builtin_ia32_vfrczps256", IX86_BUILTIN_VFRCZPS256
, UNKNOWN
, (int)MULTI_ARG_1_SF2
},
27411 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_frczv4df2
, "__builtin_ia32_vfrczpd256", IX86_BUILTIN_VFRCZPD256
, UNKNOWN
, (int)MULTI_ARG_1_DF2
},
27413 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_phaddbw
, "__builtin_ia32_vphaddbw", IX86_BUILTIN_VPHADDBW
, UNKNOWN
, (int)MULTI_ARG_1_QI_HI
},
27414 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_phaddbd
, "__builtin_ia32_vphaddbd", IX86_BUILTIN_VPHADDBD
, UNKNOWN
, (int)MULTI_ARG_1_QI_SI
},
27415 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_phaddbq
, "__builtin_ia32_vphaddbq", IX86_BUILTIN_VPHADDBQ
, UNKNOWN
, (int)MULTI_ARG_1_QI_DI
},
27416 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_phaddwd
, "__builtin_ia32_vphaddwd", IX86_BUILTIN_VPHADDWD
, UNKNOWN
, (int)MULTI_ARG_1_HI_SI
},
27417 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_phaddwq
, "__builtin_ia32_vphaddwq", IX86_BUILTIN_VPHADDWQ
, UNKNOWN
, (int)MULTI_ARG_1_HI_DI
},
27418 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_phadddq
, "__builtin_ia32_vphadddq", IX86_BUILTIN_VPHADDDQ
, UNKNOWN
, (int)MULTI_ARG_1_SI_DI
},
27419 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_phaddubw
, "__builtin_ia32_vphaddubw", IX86_BUILTIN_VPHADDUBW
, UNKNOWN
, (int)MULTI_ARG_1_QI_HI
},
27420 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_phaddubd
, "__builtin_ia32_vphaddubd", IX86_BUILTIN_VPHADDUBD
, UNKNOWN
, (int)MULTI_ARG_1_QI_SI
},
27421 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_phaddubq
, "__builtin_ia32_vphaddubq", IX86_BUILTIN_VPHADDUBQ
, UNKNOWN
, (int)MULTI_ARG_1_QI_DI
},
27422 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_phadduwd
, "__builtin_ia32_vphadduwd", IX86_BUILTIN_VPHADDUWD
, UNKNOWN
, (int)MULTI_ARG_1_HI_SI
},
27423 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_phadduwq
, "__builtin_ia32_vphadduwq", IX86_BUILTIN_VPHADDUWQ
, UNKNOWN
, (int)MULTI_ARG_1_HI_DI
},
27424 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_phaddudq
, "__builtin_ia32_vphaddudq", IX86_BUILTIN_VPHADDUDQ
, UNKNOWN
, (int)MULTI_ARG_1_SI_DI
},
27425 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_phsubbw
, "__builtin_ia32_vphsubbw", IX86_BUILTIN_VPHSUBBW
, UNKNOWN
, (int)MULTI_ARG_1_QI_HI
},
27426 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_phsubwd
, "__builtin_ia32_vphsubwd", IX86_BUILTIN_VPHSUBWD
, UNKNOWN
, (int)MULTI_ARG_1_HI_SI
},
27427 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_phsubdq
, "__builtin_ia32_vphsubdq", IX86_BUILTIN_VPHSUBDQ
, UNKNOWN
, (int)MULTI_ARG_1_SI_DI
},
27429 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv16qi3
, "__builtin_ia32_vpcomeqb", IX86_BUILTIN_VPCOMEQB
, EQ
, (int)MULTI_ARG_2_QI_CMP
},
27430 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv16qi3
, "__builtin_ia32_vpcomneb", IX86_BUILTIN_VPCOMNEB
, NE
, (int)MULTI_ARG_2_QI_CMP
},
27431 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv16qi3
, "__builtin_ia32_vpcomneqb", IX86_BUILTIN_VPCOMNEB
, NE
, (int)MULTI_ARG_2_QI_CMP
},
27432 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv16qi3
, "__builtin_ia32_vpcomltb", IX86_BUILTIN_VPCOMLTB
, LT
, (int)MULTI_ARG_2_QI_CMP
},
27433 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv16qi3
, "__builtin_ia32_vpcomleb", IX86_BUILTIN_VPCOMLEB
, LE
, (int)MULTI_ARG_2_QI_CMP
},
27434 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv16qi3
, "__builtin_ia32_vpcomgtb", IX86_BUILTIN_VPCOMGTB
, GT
, (int)MULTI_ARG_2_QI_CMP
},
27435 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv16qi3
, "__builtin_ia32_vpcomgeb", IX86_BUILTIN_VPCOMGEB
, GE
, (int)MULTI_ARG_2_QI_CMP
},
27437 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv8hi3
, "__builtin_ia32_vpcomeqw", IX86_BUILTIN_VPCOMEQW
, EQ
, (int)MULTI_ARG_2_HI_CMP
},
27438 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv8hi3
, "__builtin_ia32_vpcomnew", IX86_BUILTIN_VPCOMNEW
, NE
, (int)MULTI_ARG_2_HI_CMP
},
27439 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv8hi3
, "__builtin_ia32_vpcomneqw", IX86_BUILTIN_VPCOMNEW
, NE
, (int)MULTI_ARG_2_HI_CMP
},
27440 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv8hi3
, "__builtin_ia32_vpcomltw", IX86_BUILTIN_VPCOMLTW
, LT
, (int)MULTI_ARG_2_HI_CMP
},
27441 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv8hi3
, "__builtin_ia32_vpcomlew", IX86_BUILTIN_VPCOMLEW
, LE
, (int)MULTI_ARG_2_HI_CMP
},
27442 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv8hi3
, "__builtin_ia32_vpcomgtw", IX86_BUILTIN_VPCOMGTW
, GT
, (int)MULTI_ARG_2_HI_CMP
},
27443 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv8hi3
, "__builtin_ia32_vpcomgew", IX86_BUILTIN_VPCOMGEW
, GE
, (int)MULTI_ARG_2_HI_CMP
},
27445 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv4si3
, "__builtin_ia32_vpcomeqd", IX86_BUILTIN_VPCOMEQD
, EQ
, (int)MULTI_ARG_2_SI_CMP
},
27446 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv4si3
, "__builtin_ia32_vpcomned", IX86_BUILTIN_VPCOMNED
, NE
, (int)MULTI_ARG_2_SI_CMP
},
27447 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv4si3
, "__builtin_ia32_vpcomneqd", IX86_BUILTIN_VPCOMNED
, NE
, (int)MULTI_ARG_2_SI_CMP
},
27448 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv4si3
, "__builtin_ia32_vpcomltd", IX86_BUILTIN_VPCOMLTD
, LT
, (int)MULTI_ARG_2_SI_CMP
},
27449 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv4si3
, "__builtin_ia32_vpcomled", IX86_BUILTIN_VPCOMLED
, LE
, (int)MULTI_ARG_2_SI_CMP
},
27450 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv4si3
, "__builtin_ia32_vpcomgtd", IX86_BUILTIN_VPCOMGTD
, GT
, (int)MULTI_ARG_2_SI_CMP
},
27451 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv4si3
, "__builtin_ia32_vpcomged", IX86_BUILTIN_VPCOMGED
, GE
, (int)MULTI_ARG_2_SI_CMP
},
27453 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv2di3
, "__builtin_ia32_vpcomeqq", IX86_BUILTIN_VPCOMEQQ
, EQ
, (int)MULTI_ARG_2_DI_CMP
},
27454 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv2di3
, "__builtin_ia32_vpcomneq", IX86_BUILTIN_VPCOMNEQ
, NE
, (int)MULTI_ARG_2_DI_CMP
},
27455 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv2di3
, "__builtin_ia32_vpcomneqq", IX86_BUILTIN_VPCOMNEQ
, NE
, (int)MULTI_ARG_2_DI_CMP
},
27456 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv2di3
, "__builtin_ia32_vpcomltq", IX86_BUILTIN_VPCOMLTQ
, LT
, (int)MULTI_ARG_2_DI_CMP
},
27457 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv2di3
, "__builtin_ia32_vpcomleq", IX86_BUILTIN_VPCOMLEQ
, LE
, (int)MULTI_ARG_2_DI_CMP
},
27458 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv2di3
, "__builtin_ia32_vpcomgtq", IX86_BUILTIN_VPCOMGTQ
, GT
, (int)MULTI_ARG_2_DI_CMP
},
27459 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv2di3
, "__builtin_ia32_vpcomgeq", IX86_BUILTIN_VPCOMGEQ
, GE
, (int)MULTI_ARG_2_DI_CMP
},
27461 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_uns2v16qi3
,"__builtin_ia32_vpcomequb", IX86_BUILTIN_VPCOMEQUB
, EQ
, (int)MULTI_ARG_2_QI_CMP
},
27462 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_uns2v16qi3
,"__builtin_ia32_vpcomneub", IX86_BUILTIN_VPCOMNEUB
, NE
, (int)MULTI_ARG_2_QI_CMP
},
27463 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_uns2v16qi3
,"__builtin_ia32_vpcomnequb", IX86_BUILTIN_VPCOMNEUB
, NE
, (int)MULTI_ARG_2_QI_CMP
},
27464 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv16qi3
, "__builtin_ia32_vpcomltub", IX86_BUILTIN_VPCOMLTUB
, LTU
, (int)MULTI_ARG_2_QI_CMP
},
27465 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv16qi3
, "__builtin_ia32_vpcomleub", IX86_BUILTIN_VPCOMLEUB
, LEU
, (int)MULTI_ARG_2_QI_CMP
},
27466 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv16qi3
, "__builtin_ia32_vpcomgtub", IX86_BUILTIN_VPCOMGTUB
, GTU
, (int)MULTI_ARG_2_QI_CMP
},
27467 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv16qi3
, "__builtin_ia32_vpcomgeub", IX86_BUILTIN_VPCOMGEUB
, GEU
, (int)MULTI_ARG_2_QI_CMP
},
27469 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_uns2v8hi3
, "__builtin_ia32_vpcomequw", IX86_BUILTIN_VPCOMEQUW
, EQ
, (int)MULTI_ARG_2_HI_CMP
},
27470 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_uns2v8hi3
, "__builtin_ia32_vpcomneuw", IX86_BUILTIN_VPCOMNEUW
, NE
, (int)MULTI_ARG_2_HI_CMP
},
27471 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_uns2v8hi3
, "__builtin_ia32_vpcomnequw", IX86_BUILTIN_VPCOMNEUW
, NE
, (int)MULTI_ARG_2_HI_CMP
},
27472 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv8hi3
, "__builtin_ia32_vpcomltuw", IX86_BUILTIN_VPCOMLTUW
, LTU
, (int)MULTI_ARG_2_HI_CMP
},
27473 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv8hi3
, "__builtin_ia32_vpcomleuw", IX86_BUILTIN_VPCOMLEUW
, LEU
, (int)MULTI_ARG_2_HI_CMP
},
27474 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv8hi3
, "__builtin_ia32_vpcomgtuw", IX86_BUILTIN_VPCOMGTUW
, GTU
, (int)MULTI_ARG_2_HI_CMP
},
27475 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv8hi3
, "__builtin_ia32_vpcomgeuw", IX86_BUILTIN_VPCOMGEUW
, GEU
, (int)MULTI_ARG_2_HI_CMP
},
27477 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_uns2v4si3
, "__builtin_ia32_vpcomequd", IX86_BUILTIN_VPCOMEQUD
, EQ
, (int)MULTI_ARG_2_SI_CMP
},
27478 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_uns2v4si3
, "__builtin_ia32_vpcomneud", IX86_BUILTIN_VPCOMNEUD
, NE
, (int)MULTI_ARG_2_SI_CMP
},
27479 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_uns2v4si3
, "__builtin_ia32_vpcomnequd", IX86_BUILTIN_VPCOMNEUD
, NE
, (int)MULTI_ARG_2_SI_CMP
},
27480 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv4si3
, "__builtin_ia32_vpcomltud", IX86_BUILTIN_VPCOMLTUD
, LTU
, (int)MULTI_ARG_2_SI_CMP
},
27481 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv4si3
, "__builtin_ia32_vpcomleud", IX86_BUILTIN_VPCOMLEUD
, LEU
, (int)MULTI_ARG_2_SI_CMP
},
27482 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv4si3
, "__builtin_ia32_vpcomgtud", IX86_BUILTIN_VPCOMGTUD
, GTU
, (int)MULTI_ARG_2_SI_CMP
},
27483 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv4si3
, "__builtin_ia32_vpcomgeud", IX86_BUILTIN_VPCOMGEUD
, GEU
, (int)MULTI_ARG_2_SI_CMP
},
27485 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_uns2v2di3
, "__builtin_ia32_vpcomequq", IX86_BUILTIN_VPCOMEQUQ
, EQ
, (int)MULTI_ARG_2_DI_CMP
},
27486 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_uns2v2di3
, "__builtin_ia32_vpcomneuq", IX86_BUILTIN_VPCOMNEUQ
, NE
, (int)MULTI_ARG_2_DI_CMP
},
27487 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_uns2v2di3
, "__builtin_ia32_vpcomnequq", IX86_BUILTIN_VPCOMNEUQ
, NE
, (int)MULTI_ARG_2_DI_CMP
},
27488 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv2di3
, "__builtin_ia32_vpcomltuq", IX86_BUILTIN_VPCOMLTUQ
, LTU
, (int)MULTI_ARG_2_DI_CMP
},
27489 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv2di3
, "__builtin_ia32_vpcomleuq", IX86_BUILTIN_VPCOMLEUQ
, LEU
, (int)MULTI_ARG_2_DI_CMP
},
27490 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv2di3
, "__builtin_ia32_vpcomgtuq", IX86_BUILTIN_VPCOMGTUQ
, GTU
, (int)MULTI_ARG_2_DI_CMP
},
27491 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv2di3
, "__builtin_ia32_vpcomgeuq", IX86_BUILTIN_VPCOMGEUQ
, GEU
, (int)MULTI_ARG_2_DI_CMP
},
27493 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv16qi3
, "__builtin_ia32_vpcomfalseb", IX86_BUILTIN_VPCOMFALSEB
, (enum rtx_code
) PCOM_FALSE
, (int)MULTI_ARG_2_QI_TF
},
27494 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv8hi3
, "__builtin_ia32_vpcomfalsew", IX86_BUILTIN_VPCOMFALSEW
, (enum rtx_code
) PCOM_FALSE
, (int)MULTI_ARG_2_HI_TF
},
27495 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv4si3
, "__builtin_ia32_vpcomfalsed", IX86_BUILTIN_VPCOMFALSED
, (enum rtx_code
) PCOM_FALSE
, (int)MULTI_ARG_2_SI_TF
},
27496 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv2di3
, "__builtin_ia32_vpcomfalseq", IX86_BUILTIN_VPCOMFALSEQ
, (enum rtx_code
) PCOM_FALSE
, (int)MULTI_ARG_2_DI_TF
},
27497 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv16qi3
, "__builtin_ia32_vpcomfalseub",IX86_BUILTIN_VPCOMFALSEUB
,(enum rtx_code
) PCOM_FALSE
, (int)MULTI_ARG_2_QI_TF
},
27498 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv8hi3
, "__builtin_ia32_vpcomfalseuw",IX86_BUILTIN_VPCOMFALSEUW
,(enum rtx_code
) PCOM_FALSE
, (int)MULTI_ARG_2_HI_TF
},
27499 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv4si3
, "__builtin_ia32_vpcomfalseud",IX86_BUILTIN_VPCOMFALSEUD
,(enum rtx_code
) PCOM_FALSE
, (int)MULTI_ARG_2_SI_TF
},
27500 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv2di3
, "__builtin_ia32_vpcomfalseuq",IX86_BUILTIN_VPCOMFALSEUQ
,(enum rtx_code
) PCOM_FALSE
, (int)MULTI_ARG_2_DI_TF
},
27502 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv16qi3
, "__builtin_ia32_vpcomtrueb", IX86_BUILTIN_VPCOMTRUEB
, (enum rtx_code
) PCOM_TRUE
, (int)MULTI_ARG_2_QI_TF
},
27503 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv8hi3
, "__builtin_ia32_vpcomtruew", IX86_BUILTIN_VPCOMTRUEW
, (enum rtx_code
) PCOM_TRUE
, (int)MULTI_ARG_2_HI_TF
},
27504 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv4si3
, "__builtin_ia32_vpcomtrued", IX86_BUILTIN_VPCOMTRUED
, (enum rtx_code
) PCOM_TRUE
, (int)MULTI_ARG_2_SI_TF
},
27505 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv2di3
, "__builtin_ia32_vpcomtrueq", IX86_BUILTIN_VPCOMTRUEQ
, (enum rtx_code
) PCOM_TRUE
, (int)MULTI_ARG_2_DI_TF
},
27506 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv16qi3
, "__builtin_ia32_vpcomtrueub", IX86_BUILTIN_VPCOMTRUEUB
, (enum rtx_code
) PCOM_TRUE
, (int)MULTI_ARG_2_QI_TF
},
27507 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv8hi3
, "__builtin_ia32_vpcomtrueuw", IX86_BUILTIN_VPCOMTRUEUW
, (enum rtx_code
) PCOM_TRUE
, (int)MULTI_ARG_2_HI_TF
},
27508 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv4si3
, "__builtin_ia32_vpcomtrueud", IX86_BUILTIN_VPCOMTRUEUD
, (enum rtx_code
) PCOM_TRUE
, (int)MULTI_ARG_2_SI_TF
},
27509 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv2di3
, "__builtin_ia32_vpcomtrueuq", IX86_BUILTIN_VPCOMTRUEUQ
, (enum rtx_code
) PCOM_TRUE
, (int)MULTI_ARG_2_DI_TF
},
27511 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_vpermil2v2df3
, "__builtin_ia32_vpermil2pd", IX86_BUILTIN_VPERMIL2PD
, UNKNOWN
, (int)MULTI_ARG_4_DF2_DI_I
},
27512 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_vpermil2v4sf3
, "__builtin_ia32_vpermil2ps", IX86_BUILTIN_VPERMIL2PS
, UNKNOWN
, (int)MULTI_ARG_4_SF2_SI_I
},
27513 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_vpermil2v4df3
, "__builtin_ia32_vpermil2pd256", IX86_BUILTIN_VPERMIL2PD256
, UNKNOWN
, (int)MULTI_ARG_4_DF2_DI_I1
},
27514 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_vpermil2v8sf3
, "__builtin_ia32_vpermil2ps256", IX86_BUILTIN_VPERMIL2PS256
, UNKNOWN
, (int)MULTI_ARG_4_SF2_SI_I1
},
27518 /* TM vector builtins. */
27520 /* Reuse the existing x86-specific `struct builtin_description' cause
27521 we're lazy. Add casts to make them fit. */
27522 static const struct builtin_description bdesc_tm
[] =
27524 { OPTION_MASK_ISA_MMX
, CODE_FOR_nothing
, "__builtin__ITM_WM64", (enum ix86_builtins
) BUILT_IN_TM_STORE_M64
, UNKNOWN
, VOID_FTYPE_PV2SI_V2SI
},
27525 { OPTION_MASK_ISA_MMX
, CODE_FOR_nothing
, "__builtin__ITM_WaRM64", (enum ix86_builtins
) BUILT_IN_TM_STORE_WAR_M64
, UNKNOWN
, VOID_FTYPE_PV2SI_V2SI
},
27526 { OPTION_MASK_ISA_MMX
, CODE_FOR_nothing
, "__builtin__ITM_WaWM64", (enum ix86_builtins
) BUILT_IN_TM_STORE_WAW_M64
, UNKNOWN
, VOID_FTYPE_PV2SI_V2SI
},
27527 { OPTION_MASK_ISA_MMX
, CODE_FOR_nothing
, "__builtin__ITM_RM64", (enum ix86_builtins
) BUILT_IN_TM_LOAD_M64
, UNKNOWN
, V2SI_FTYPE_PCV2SI
},
27528 { OPTION_MASK_ISA_MMX
, CODE_FOR_nothing
, "__builtin__ITM_RaRM64", (enum ix86_builtins
) BUILT_IN_TM_LOAD_RAR_M64
, UNKNOWN
, V2SI_FTYPE_PCV2SI
},
27529 { OPTION_MASK_ISA_MMX
, CODE_FOR_nothing
, "__builtin__ITM_RaWM64", (enum ix86_builtins
) BUILT_IN_TM_LOAD_RAW_M64
, UNKNOWN
, V2SI_FTYPE_PCV2SI
},
27530 { OPTION_MASK_ISA_MMX
, CODE_FOR_nothing
, "__builtin__ITM_RfWM64", (enum ix86_builtins
) BUILT_IN_TM_LOAD_RFW_M64
, UNKNOWN
, V2SI_FTYPE_PCV2SI
},
27532 { OPTION_MASK_ISA_SSE
, CODE_FOR_nothing
, "__builtin__ITM_WM128", (enum ix86_builtins
) BUILT_IN_TM_STORE_M128
, UNKNOWN
, VOID_FTYPE_PV4SF_V4SF
},
27533 { OPTION_MASK_ISA_SSE
, CODE_FOR_nothing
, "__builtin__ITM_WaRM128", (enum ix86_builtins
) BUILT_IN_TM_STORE_WAR_M128
, UNKNOWN
, VOID_FTYPE_PV4SF_V4SF
},
27534 { OPTION_MASK_ISA_SSE
, CODE_FOR_nothing
, "__builtin__ITM_WaWM128", (enum ix86_builtins
) BUILT_IN_TM_STORE_WAW_M128
, UNKNOWN
, VOID_FTYPE_PV4SF_V4SF
},
27535 { OPTION_MASK_ISA_SSE
, CODE_FOR_nothing
, "__builtin__ITM_RM128", (enum ix86_builtins
) BUILT_IN_TM_LOAD_M128
, UNKNOWN
, V4SF_FTYPE_PCV4SF
},
27536 { OPTION_MASK_ISA_SSE
, CODE_FOR_nothing
, "__builtin__ITM_RaRM128", (enum ix86_builtins
) BUILT_IN_TM_LOAD_RAR_M128
, UNKNOWN
, V4SF_FTYPE_PCV4SF
},
27537 { OPTION_MASK_ISA_SSE
, CODE_FOR_nothing
, "__builtin__ITM_RaWM128", (enum ix86_builtins
) BUILT_IN_TM_LOAD_RAW_M128
, UNKNOWN
, V4SF_FTYPE_PCV4SF
},
27538 { OPTION_MASK_ISA_SSE
, CODE_FOR_nothing
, "__builtin__ITM_RfWM128", (enum ix86_builtins
) BUILT_IN_TM_LOAD_RFW_M128
, UNKNOWN
, V4SF_FTYPE_PCV4SF
},
27540 { OPTION_MASK_ISA_AVX
, CODE_FOR_nothing
, "__builtin__ITM_WM256", (enum ix86_builtins
) BUILT_IN_TM_STORE_M256
, UNKNOWN
, VOID_FTYPE_PV8SF_V8SF
},
27541 { OPTION_MASK_ISA_AVX
, CODE_FOR_nothing
, "__builtin__ITM_WaRM256", (enum ix86_builtins
) BUILT_IN_TM_STORE_WAR_M256
, UNKNOWN
, VOID_FTYPE_PV8SF_V8SF
},
27542 { OPTION_MASK_ISA_AVX
, CODE_FOR_nothing
, "__builtin__ITM_WaWM256", (enum ix86_builtins
) BUILT_IN_TM_STORE_WAW_M256
, UNKNOWN
, VOID_FTYPE_PV8SF_V8SF
},
27543 { OPTION_MASK_ISA_AVX
, CODE_FOR_nothing
, "__builtin__ITM_RM256", (enum ix86_builtins
) BUILT_IN_TM_LOAD_M256
, UNKNOWN
, V8SF_FTYPE_PCV8SF
},
27544 { OPTION_MASK_ISA_AVX
, CODE_FOR_nothing
, "__builtin__ITM_RaRM256", (enum ix86_builtins
) BUILT_IN_TM_LOAD_RAR_M256
, UNKNOWN
, V8SF_FTYPE_PCV8SF
},
27545 { OPTION_MASK_ISA_AVX
, CODE_FOR_nothing
, "__builtin__ITM_RaWM256", (enum ix86_builtins
) BUILT_IN_TM_LOAD_RAW_M256
, UNKNOWN
, V8SF_FTYPE_PCV8SF
},
27546 { OPTION_MASK_ISA_AVX
, CODE_FOR_nothing
, "__builtin__ITM_RfWM256", (enum ix86_builtins
) BUILT_IN_TM_LOAD_RFW_M256
, UNKNOWN
, V8SF_FTYPE_PCV8SF
},
27548 { OPTION_MASK_ISA_MMX
, CODE_FOR_nothing
, "__builtin__ITM_LM64", (enum ix86_builtins
) BUILT_IN_TM_LOG_M64
, UNKNOWN
, VOID_FTYPE_PCVOID
},
27549 { OPTION_MASK_ISA_SSE
, CODE_FOR_nothing
, "__builtin__ITM_LM128", (enum ix86_builtins
) BUILT_IN_TM_LOG_M128
, UNKNOWN
, VOID_FTYPE_PCVOID
},
27550 { OPTION_MASK_ISA_AVX
, CODE_FOR_nothing
, "__builtin__ITM_LM256", (enum ix86_builtins
) BUILT_IN_TM_LOG_M256
, UNKNOWN
, VOID_FTYPE_PCVOID
},
27553 /* TM callbacks. */
27555 /* Return the builtin decl needed to load a vector of TYPE. */
27558 ix86_builtin_tm_load (tree type
)
27560 if (TREE_CODE (type
) == VECTOR_TYPE
)
27562 switch (tree_low_cst (TYPE_SIZE (type
), 1))
27565 return builtin_decl_explicit (BUILT_IN_TM_LOAD_M64
);
27567 return builtin_decl_explicit (BUILT_IN_TM_LOAD_M128
);
27569 return builtin_decl_explicit (BUILT_IN_TM_LOAD_M256
);
27575 /* Return the builtin decl needed to store a vector of TYPE. */
27578 ix86_builtin_tm_store (tree type
)
27580 if (TREE_CODE (type
) == VECTOR_TYPE
)
27582 switch (tree_low_cst (TYPE_SIZE (type
), 1))
27585 return builtin_decl_explicit (BUILT_IN_TM_STORE_M64
);
27587 return builtin_decl_explicit (BUILT_IN_TM_STORE_M128
);
27589 return builtin_decl_explicit (BUILT_IN_TM_STORE_M256
);
27595 /* Initialize the transactional memory vector load/store builtins. */
27598 ix86_init_tm_builtins (void)
27600 enum ix86_builtin_func_type ftype
;
27601 const struct builtin_description
*d
;
27604 tree attrs_load
, attrs_type_load
, attrs_store
, attrs_type_store
;
27605 tree attrs_log
, attrs_type_log
;
27610 /* If there are no builtins defined, we must be compiling in a
27611 language without trans-mem support. */
27612 if (!builtin_decl_explicit_p (BUILT_IN_TM_LOAD_1
))
27615 /* Use whatever attributes a normal TM load has. */
27616 decl
= builtin_decl_explicit (BUILT_IN_TM_LOAD_1
);
27617 attrs_load
= DECL_ATTRIBUTES (decl
);
27618 attrs_type_load
= TYPE_ATTRIBUTES (TREE_TYPE (decl
));
27619 /* Use whatever attributes a normal TM store has. */
27620 decl
= builtin_decl_explicit (BUILT_IN_TM_STORE_1
);
27621 attrs_store
= DECL_ATTRIBUTES (decl
);
27622 attrs_type_store
= TYPE_ATTRIBUTES (TREE_TYPE (decl
));
27623 /* Use whatever attributes a normal TM log has. */
27624 decl
= builtin_decl_explicit (BUILT_IN_TM_LOG
);
27625 attrs_log
= DECL_ATTRIBUTES (decl
);
27626 attrs_type_log
= TYPE_ATTRIBUTES (TREE_TYPE (decl
));
27628 for (i
= 0, d
= bdesc_tm
;
27629 i
< ARRAY_SIZE (bdesc_tm
);
27632 if ((d
->mask
& ix86_isa_flags
) != 0
27633 || (lang_hooks
.builtin_function
27634 == lang_hooks
.builtin_function_ext_scope
))
27636 tree type
, attrs
, attrs_type
;
27637 enum built_in_function code
= (enum built_in_function
) d
->code
;
27639 ftype
= (enum ix86_builtin_func_type
) d
->flag
;
27640 type
= ix86_get_builtin_func_type (ftype
);
27642 if (BUILTIN_TM_LOAD_P (code
))
27644 attrs
= attrs_load
;
27645 attrs_type
= attrs_type_load
;
27647 else if (BUILTIN_TM_STORE_P (code
))
27649 attrs
= attrs_store
;
27650 attrs_type
= attrs_type_store
;
27655 attrs_type
= attrs_type_log
;
27657 decl
= add_builtin_function (d
->name
, type
, code
, BUILT_IN_NORMAL
,
27658 /* The builtin without the prefix for
27659 calling it directly. */
27660 d
->name
+ strlen ("__builtin_"),
27662 /* add_builtin_function() will set the DECL_ATTRIBUTES, now
27663 set the TYPE_ATTRIBUTES. */
27664 decl_attributes (&TREE_TYPE (decl
), attrs_type
, ATTR_FLAG_BUILT_IN
);
27666 set_builtin_decl (code
, decl
, false);
27671 /* Set up all the MMX/SSE builtins, even builtins for instructions that are not
27672 in the current target ISA to allow the user to compile particular modules
27673 with different target specific options that differ from the command line
27676 ix86_init_mmx_sse_builtins (void)
27678 const struct builtin_description
* d
;
27679 enum ix86_builtin_func_type ftype
;
27682 /* Add all special builtins with variable number of operands. */
27683 for (i
= 0, d
= bdesc_special_args
;
27684 i
< ARRAY_SIZE (bdesc_special_args
);
27690 ftype
= (enum ix86_builtin_func_type
) d
->flag
;
27691 def_builtin (d
->mask
, d
->name
, ftype
, d
->code
);
27694 /* Add all builtins with variable number of operands. */
27695 for (i
= 0, d
= bdesc_args
;
27696 i
< ARRAY_SIZE (bdesc_args
);
27702 ftype
= (enum ix86_builtin_func_type
) d
->flag
;
27703 def_builtin_const (d
->mask
, d
->name
, ftype
, d
->code
);
27706 /* pcmpestr[im] insns. */
27707 for (i
= 0, d
= bdesc_pcmpestr
;
27708 i
< ARRAY_SIZE (bdesc_pcmpestr
);
27711 if (d
->code
== IX86_BUILTIN_PCMPESTRM128
)
27712 ftype
= V16QI_FTYPE_V16QI_INT_V16QI_INT_INT
;
27714 ftype
= INT_FTYPE_V16QI_INT_V16QI_INT_INT
;
27715 def_builtin_const (d
->mask
, d
->name
, ftype
, d
->code
);
27718 /* pcmpistr[im] insns. */
27719 for (i
= 0, d
= bdesc_pcmpistr
;
27720 i
< ARRAY_SIZE (bdesc_pcmpistr
);
27723 if (d
->code
== IX86_BUILTIN_PCMPISTRM128
)
27724 ftype
= V16QI_FTYPE_V16QI_V16QI_INT
;
27726 ftype
= INT_FTYPE_V16QI_V16QI_INT
;
27727 def_builtin_const (d
->mask
, d
->name
, ftype
, d
->code
);
27730 /* comi/ucomi insns. */
27731 for (i
= 0, d
= bdesc_comi
; i
< ARRAY_SIZE (bdesc_comi
); i
++, d
++)
27733 if (d
->mask
== OPTION_MASK_ISA_SSE2
)
27734 ftype
= INT_FTYPE_V2DF_V2DF
;
27736 ftype
= INT_FTYPE_V4SF_V4SF
;
27737 def_builtin_const (d
->mask
, d
->name
, ftype
, d
->code
);
27741 def_builtin (OPTION_MASK_ISA_SSE
, "__builtin_ia32_ldmxcsr",
27742 VOID_FTYPE_UNSIGNED
, IX86_BUILTIN_LDMXCSR
);
27743 def_builtin (OPTION_MASK_ISA_SSE
, "__builtin_ia32_stmxcsr",
27744 UNSIGNED_FTYPE_VOID
, IX86_BUILTIN_STMXCSR
);
27746 /* SSE or 3DNow!A */
27747 def_builtin (OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
,
27748 "__builtin_ia32_maskmovq", VOID_FTYPE_V8QI_V8QI_PCHAR
,
27749 IX86_BUILTIN_MASKMOVQ
);
27752 def_builtin (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_maskmovdqu",
27753 VOID_FTYPE_V16QI_V16QI_PCHAR
, IX86_BUILTIN_MASKMOVDQU
);
27755 def_builtin (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_clflush",
27756 VOID_FTYPE_PCVOID
, IX86_BUILTIN_CLFLUSH
);
27757 x86_mfence
= def_builtin (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_mfence",
27758 VOID_FTYPE_VOID
, IX86_BUILTIN_MFENCE
);
27761 def_builtin (OPTION_MASK_ISA_SSE3
, "__builtin_ia32_monitor",
27762 VOID_FTYPE_PCVOID_UNSIGNED_UNSIGNED
, IX86_BUILTIN_MONITOR
);
27763 def_builtin (OPTION_MASK_ISA_SSE3
, "__builtin_ia32_mwait",
27764 VOID_FTYPE_UNSIGNED_UNSIGNED
, IX86_BUILTIN_MWAIT
);
27767 def_builtin_const (OPTION_MASK_ISA_AES
, "__builtin_ia32_aesenc128",
27768 V2DI_FTYPE_V2DI_V2DI
, IX86_BUILTIN_AESENC128
);
27769 def_builtin_const (OPTION_MASK_ISA_AES
, "__builtin_ia32_aesenclast128",
27770 V2DI_FTYPE_V2DI_V2DI
, IX86_BUILTIN_AESENCLAST128
);
27771 def_builtin_const (OPTION_MASK_ISA_AES
, "__builtin_ia32_aesdec128",
27772 V2DI_FTYPE_V2DI_V2DI
, IX86_BUILTIN_AESDEC128
);
27773 def_builtin_const (OPTION_MASK_ISA_AES
, "__builtin_ia32_aesdeclast128",
27774 V2DI_FTYPE_V2DI_V2DI
, IX86_BUILTIN_AESDECLAST128
);
27775 def_builtin_const (OPTION_MASK_ISA_AES
, "__builtin_ia32_aesimc128",
27776 V2DI_FTYPE_V2DI
, IX86_BUILTIN_AESIMC128
);
27777 def_builtin_const (OPTION_MASK_ISA_AES
, "__builtin_ia32_aeskeygenassist128",
27778 V2DI_FTYPE_V2DI_INT
, IX86_BUILTIN_AESKEYGENASSIST128
);
27781 def_builtin_const (OPTION_MASK_ISA_PCLMUL
, "__builtin_ia32_pclmulqdq128",
27782 V2DI_FTYPE_V2DI_V2DI_INT
, IX86_BUILTIN_PCLMULQDQ128
);
27785 def_builtin (OPTION_MASK_ISA_RDRND
, "__builtin_ia32_rdrand16_step",
27786 INT_FTYPE_PUSHORT
, IX86_BUILTIN_RDRAND16_STEP
);
27787 def_builtin (OPTION_MASK_ISA_RDRND
, "__builtin_ia32_rdrand32_step",
27788 INT_FTYPE_PUNSIGNED
, IX86_BUILTIN_RDRAND32_STEP
);
27789 def_builtin (OPTION_MASK_ISA_RDRND
| OPTION_MASK_ISA_64BIT
,
27790 "__builtin_ia32_rdrand64_step", INT_FTYPE_PULONGLONG
,
27791 IX86_BUILTIN_RDRAND64_STEP
);
27794 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gathersiv2df",
27795 V2DF_FTYPE_V2DF_PCDOUBLE_V4SI_V2DF_INT
,
27796 IX86_BUILTIN_GATHERSIV2DF
);
27798 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gathersiv4df",
27799 V4DF_FTYPE_V4DF_PCDOUBLE_V4SI_V4DF_INT
,
27800 IX86_BUILTIN_GATHERSIV4DF
);
27802 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gatherdiv2df",
27803 V2DF_FTYPE_V2DF_PCDOUBLE_V2DI_V2DF_INT
,
27804 IX86_BUILTIN_GATHERDIV2DF
);
27806 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gatherdiv4df",
27807 V4DF_FTYPE_V4DF_PCDOUBLE_V4DI_V4DF_INT
,
27808 IX86_BUILTIN_GATHERDIV4DF
);
27810 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gathersiv4sf",
27811 V4SF_FTYPE_V4SF_PCFLOAT_V4SI_V4SF_INT
,
27812 IX86_BUILTIN_GATHERSIV4SF
);
27814 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gathersiv8sf",
27815 V8SF_FTYPE_V8SF_PCFLOAT_V8SI_V8SF_INT
,
27816 IX86_BUILTIN_GATHERSIV8SF
);
27818 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gatherdiv4sf",
27819 V4SF_FTYPE_V4SF_PCFLOAT_V2DI_V4SF_INT
,
27820 IX86_BUILTIN_GATHERDIV4SF
);
27822 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gatherdiv4sf256",
27823 V4SF_FTYPE_V4SF_PCFLOAT_V4DI_V4SF_INT
,
27824 IX86_BUILTIN_GATHERDIV8SF
);
27826 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gathersiv2di",
27827 V2DI_FTYPE_V2DI_PCINT64_V4SI_V2DI_INT
,
27828 IX86_BUILTIN_GATHERSIV2DI
);
27830 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gathersiv4di",
27831 V4DI_FTYPE_V4DI_PCINT64_V4SI_V4DI_INT
,
27832 IX86_BUILTIN_GATHERSIV4DI
);
27834 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gatherdiv2di",
27835 V2DI_FTYPE_V2DI_PCINT64_V2DI_V2DI_INT
,
27836 IX86_BUILTIN_GATHERDIV2DI
);
27838 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gatherdiv4di",
27839 V4DI_FTYPE_V4DI_PCINT64_V4DI_V4DI_INT
,
27840 IX86_BUILTIN_GATHERDIV4DI
);
27842 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gathersiv4si",
27843 V4SI_FTYPE_V4SI_PCINT_V4SI_V4SI_INT
,
27844 IX86_BUILTIN_GATHERSIV4SI
);
27846 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gathersiv8si",
27847 V8SI_FTYPE_V8SI_PCINT_V8SI_V8SI_INT
,
27848 IX86_BUILTIN_GATHERSIV8SI
);
27850 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gatherdiv4si",
27851 V4SI_FTYPE_V4SI_PCINT_V2DI_V4SI_INT
,
27852 IX86_BUILTIN_GATHERDIV4SI
);
27854 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gatherdiv4si256",
27855 V4SI_FTYPE_V4SI_PCINT_V4DI_V4SI_INT
,
27856 IX86_BUILTIN_GATHERDIV8SI
);
27858 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gatheraltsiv4df ",
27859 V4DF_FTYPE_V4DF_PCDOUBLE_V8SI_V4DF_INT
,
27860 IX86_BUILTIN_GATHERALTSIV4DF
);
27862 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gatheraltdiv4sf256 ",
27863 V8SF_FTYPE_V8SF_PCFLOAT_V4DI_V8SF_INT
,
27864 IX86_BUILTIN_GATHERALTDIV8SF
);
27866 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gatheraltsiv4di ",
27867 V4DI_FTYPE_V4DI_PCINT64_V8SI_V4DI_INT
,
27868 IX86_BUILTIN_GATHERALTSIV4DI
);
27870 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gatheraltdiv4si256 ",
27871 V8SI_FTYPE_V8SI_PCINT_V4DI_V8SI_INT
,
27872 IX86_BUILTIN_GATHERALTDIV8SI
);
27875 def_builtin (OPTION_MASK_ISA_RTM
, "__builtin_ia32_xabort",
27876 VOID_FTYPE_UNSIGNED
, IX86_BUILTIN_XABORT
);
27878 /* MMX access to the vec_init patterns. */
27879 def_builtin_const (OPTION_MASK_ISA_MMX
, "__builtin_ia32_vec_init_v2si",
27880 V2SI_FTYPE_INT_INT
, IX86_BUILTIN_VEC_INIT_V2SI
);
27882 def_builtin_const (OPTION_MASK_ISA_MMX
, "__builtin_ia32_vec_init_v4hi",
27883 V4HI_FTYPE_HI_HI_HI_HI
,
27884 IX86_BUILTIN_VEC_INIT_V4HI
);
27886 def_builtin_const (OPTION_MASK_ISA_MMX
, "__builtin_ia32_vec_init_v8qi",
27887 V8QI_FTYPE_QI_QI_QI_QI_QI_QI_QI_QI
,
27888 IX86_BUILTIN_VEC_INIT_V8QI
);
27890 /* Access to the vec_extract patterns. */
27891 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_vec_ext_v2df",
27892 DOUBLE_FTYPE_V2DF_INT
, IX86_BUILTIN_VEC_EXT_V2DF
);
27893 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_vec_ext_v2di",
27894 DI_FTYPE_V2DI_INT
, IX86_BUILTIN_VEC_EXT_V2DI
);
27895 def_builtin_const (OPTION_MASK_ISA_SSE
, "__builtin_ia32_vec_ext_v4sf",
27896 FLOAT_FTYPE_V4SF_INT
, IX86_BUILTIN_VEC_EXT_V4SF
);
27897 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_vec_ext_v4si",
27898 SI_FTYPE_V4SI_INT
, IX86_BUILTIN_VEC_EXT_V4SI
);
27899 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_vec_ext_v8hi",
27900 HI_FTYPE_V8HI_INT
, IX86_BUILTIN_VEC_EXT_V8HI
);
27902 def_builtin_const (OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
,
27903 "__builtin_ia32_vec_ext_v4hi",
27904 HI_FTYPE_V4HI_INT
, IX86_BUILTIN_VEC_EXT_V4HI
);
27906 def_builtin_const (OPTION_MASK_ISA_MMX
, "__builtin_ia32_vec_ext_v2si",
27907 SI_FTYPE_V2SI_INT
, IX86_BUILTIN_VEC_EXT_V2SI
);
27909 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_vec_ext_v16qi",
27910 QI_FTYPE_V16QI_INT
, IX86_BUILTIN_VEC_EXT_V16QI
);
27912 /* Access to the vec_set patterns. */
27913 def_builtin_const (OPTION_MASK_ISA_SSE4_1
| OPTION_MASK_ISA_64BIT
,
27914 "__builtin_ia32_vec_set_v2di",
27915 V2DI_FTYPE_V2DI_DI_INT
, IX86_BUILTIN_VEC_SET_V2DI
);
27917 def_builtin_const (OPTION_MASK_ISA_SSE4_1
, "__builtin_ia32_vec_set_v4sf",
27918 V4SF_FTYPE_V4SF_FLOAT_INT
, IX86_BUILTIN_VEC_SET_V4SF
);
27920 def_builtin_const (OPTION_MASK_ISA_SSE4_1
, "__builtin_ia32_vec_set_v4si",
27921 V4SI_FTYPE_V4SI_SI_INT
, IX86_BUILTIN_VEC_SET_V4SI
);
27923 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_vec_set_v8hi",
27924 V8HI_FTYPE_V8HI_HI_INT
, IX86_BUILTIN_VEC_SET_V8HI
);
27926 def_builtin_const (OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
,
27927 "__builtin_ia32_vec_set_v4hi",
27928 V4HI_FTYPE_V4HI_HI_INT
, IX86_BUILTIN_VEC_SET_V4HI
);
27930 def_builtin_const (OPTION_MASK_ISA_SSE4_1
, "__builtin_ia32_vec_set_v16qi",
27931 V16QI_FTYPE_V16QI_QI_INT
, IX86_BUILTIN_VEC_SET_V16QI
);
27933 /* Add FMA4 multi-arg argument instructions */
27934 for (i
= 0, d
= bdesc_multi_arg
; i
< ARRAY_SIZE (bdesc_multi_arg
); i
++, d
++)
27939 ftype
= (enum ix86_builtin_func_type
) d
->flag
;
27940 def_builtin_const (d
->mask
, d
->name
, ftype
, d
->code
);
27944 /* This builds the processor_model struct type defined in
27945 libgcc/config/i386/cpuinfo.c */
27948 build_processor_model_struct (void)
27950 const char *field_name
[] = {"__cpu_vendor", "__cpu_type", "__cpu_subtype",
27952 tree field
= NULL_TREE
, field_chain
= NULL_TREE
;
27954 tree type
= make_node (RECORD_TYPE
);
27956 /* The first 3 fields are unsigned int. */
27957 for (i
= 0; i
< 3; ++i
)
27959 field
= build_decl (UNKNOWN_LOCATION
, FIELD_DECL
,
27960 get_identifier (field_name
[i
]), unsigned_type_node
);
27961 if (field_chain
!= NULL_TREE
)
27962 DECL_CHAIN (field
) = field_chain
;
27963 field_chain
= field
;
27966 /* The last field is an array of unsigned integers of size one. */
27967 field
= build_decl (UNKNOWN_LOCATION
, FIELD_DECL
,
27968 get_identifier (field_name
[3]),
27969 build_array_type (unsigned_type_node
,
27970 build_index_type (size_one_node
)));
27971 if (field_chain
!= NULL_TREE
)
27972 DECL_CHAIN (field
) = field_chain
;
27973 field_chain
= field
;
27975 finish_builtin_struct (type
, "__processor_model", field_chain
, NULL_TREE
);
27979 /* Returns a extern, comdat VAR_DECL of type TYPE and name NAME. */
27982 make_var_decl (tree type
, const char *name
)
27986 new_decl
= build_decl (UNKNOWN_LOCATION
,
27988 get_identifier(name
),
27991 DECL_EXTERNAL (new_decl
) = 1;
27992 TREE_STATIC (new_decl
) = 1;
27993 TREE_PUBLIC (new_decl
) = 1;
27994 DECL_INITIAL (new_decl
) = 0;
27995 DECL_ARTIFICIAL (new_decl
) = 0;
27996 DECL_PRESERVE_P (new_decl
) = 1;
27998 make_decl_one_only (new_decl
, DECL_ASSEMBLER_NAME (new_decl
));
27999 assemble_variable (new_decl
, 0, 0, 0);
28004 /* FNDECL is a __builtin_cpu_is or a __builtin_cpu_supports call that is folded
28005 into an integer defined in libgcc/config/i386/cpuinfo.c */
28008 fold_builtin_cpu (tree fndecl
, tree
*args
)
28011 enum ix86_builtins fn_code
= (enum ix86_builtins
)
28012 DECL_FUNCTION_CODE (fndecl
);
28013 tree param_string_cst
= NULL
;
28015 /* This is the order of bit-fields in __processor_features in cpuinfo.c */
28016 enum processor_features
28032 /* These are the values for vendor types and cpu types and subtypes
28033 in cpuinfo.c. Cpu types and subtypes should be subtracted by
28034 the corresponding start value. */
28035 enum processor_model
28045 M_CPU_SUBTYPE_START
,
28046 M_INTEL_COREI7_NEHALEM
,
28047 M_INTEL_COREI7_WESTMERE
,
28048 M_INTEL_COREI7_SANDYBRIDGE
,
28049 M_AMDFAM10H_BARCELONA
,
28050 M_AMDFAM10H_SHANGHAI
,
28051 M_AMDFAM10H_ISTANBUL
,
28052 M_AMDFAM15H_BDVER1
,
28056 static struct _arch_names_table
28058 const char *const name
;
28059 const enum processor_model model
;
28061 const arch_names_table
[] =
28064 {"intel", M_INTEL
},
28065 {"atom", M_INTEL_ATOM
},
28066 {"core2", M_INTEL_CORE2
},
28067 {"corei7", M_INTEL_COREI7
},
28068 {"nehalem", M_INTEL_COREI7_NEHALEM
},
28069 {"westmere", M_INTEL_COREI7_WESTMERE
},
28070 {"sandybridge", M_INTEL_COREI7_SANDYBRIDGE
},
28071 {"amdfam10h", M_AMDFAM10H
},
28072 {"barcelona", M_AMDFAM10H_BARCELONA
},
28073 {"shanghai", M_AMDFAM10H_SHANGHAI
},
28074 {"istanbul", M_AMDFAM10H_ISTANBUL
},
28075 {"amdfam15h", M_AMDFAM15H
},
28076 {"bdver1", M_AMDFAM15H_BDVER1
},
28077 {"bdver2", M_AMDFAM15H_BDVER2
},
28080 static struct _isa_names_table
28082 const char *const name
;
28083 const enum processor_features feature
;
28085 const isa_names_table
[] =
28089 {"popcnt", F_POPCNT
},
28093 {"ssse3", F_SSSE3
},
28094 {"sse4.1", F_SSE4_1
},
28095 {"sse4.2", F_SSE4_2
},
28100 static tree __processor_model_type
= NULL_TREE
;
28101 static tree __cpu_model_var
= NULL_TREE
;
28103 if (__processor_model_type
== NULL_TREE
)
28104 __processor_model_type
= build_processor_model_struct ();
28106 if (__cpu_model_var
== NULL_TREE
)
28107 __cpu_model_var
= make_var_decl (__processor_model_type
,
28110 gcc_assert ((args
!= NULL
) && (*args
!= NULL
));
28112 param_string_cst
= *args
;
28113 while (param_string_cst
28114 && TREE_CODE (param_string_cst
) != STRING_CST
)
28116 /* *args must be a expr that can contain other EXPRS leading to a
28118 if (!EXPR_P (param_string_cst
))
28120 error ("Parameter to builtin must be a string constant or literal");
28121 return integer_zero_node
;
28123 param_string_cst
= TREE_OPERAND (EXPR_CHECK (param_string_cst
), 0);
28126 gcc_assert (param_string_cst
);
28128 if (fn_code
== IX86_BUILTIN_CPU_IS
)
28132 unsigned int field_val
= 0;
28133 unsigned int NUM_ARCH_NAMES
28134 = sizeof (arch_names_table
) / sizeof (struct _arch_names_table
);
28136 for (i
= 0; i
< NUM_ARCH_NAMES
; i
++)
28137 if (strcmp (arch_names_table
[i
].name
,
28138 TREE_STRING_POINTER (param_string_cst
)) == 0)
28141 if (i
== NUM_ARCH_NAMES
)
28143 error ("Parameter to builtin not valid: %s",
28144 TREE_STRING_POINTER (param_string_cst
));
28145 return integer_zero_node
;
28148 field
= TYPE_FIELDS (__processor_model_type
);
28149 field_val
= arch_names_table
[i
].model
;
28151 /* CPU types are stored in the next field. */
28152 if (field_val
> M_CPU_TYPE_START
28153 && field_val
< M_CPU_SUBTYPE_START
)
28155 field
= DECL_CHAIN (field
);
28156 field_val
-= M_CPU_TYPE_START
;
28159 /* CPU subtypes are stored in the next field. */
28160 if (field_val
> M_CPU_SUBTYPE_START
)
28162 field
= DECL_CHAIN ( DECL_CHAIN (field
));
28163 field_val
-= M_CPU_SUBTYPE_START
;
28166 /* Get the appropriate field in __cpu_model. */
28167 ref
= build3 (COMPONENT_REF
, TREE_TYPE (field
), __cpu_model_var
,
28170 /* Check the value. */
28171 return build2 (EQ_EXPR
, unsigned_type_node
, ref
,
28172 build_int_cstu (unsigned_type_node
, field_val
));
28174 else if (fn_code
== IX86_BUILTIN_CPU_SUPPORTS
)
28179 unsigned int field_val
= 0;
28180 unsigned int NUM_ISA_NAMES
28181 = sizeof (isa_names_table
) / sizeof (struct _isa_names_table
);
28183 for (i
= 0; i
< NUM_ISA_NAMES
; i
++)
28184 if (strcmp (isa_names_table
[i
].name
,
28185 TREE_STRING_POINTER (param_string_cst
)) == 0)
28188 if (i
== NUM_ISA_NAMES
)
28190 error ("Parameter to builtin not valid: %s",
28191 TREE_STRING_POINTER (param_string_cst
));
28192 return integer_zero_node
;
28195 field
= TYPE_FIELDS (__processor_model_type
);
28196 /* Get the last field, which is __cpu_features. */
28197 while (DECL_CHAIN (field
))
28198 field
= DECL_CHAIN (field
);
28200 /* Get the appropriate field: __cpu_model.__cpu_features */
28201 ref
= build3 (COMPONENT_REF
, TREE_TYPE (field
), __cpu_model_var
,
28204 /* Access the 0th element of __cpu_features array. */
28205 array_elt
= build4 (ARRAY_REF
, unsigned_type_node
, ref
,
28206 integer_zero_node
, NULL_TREE
, NULL_TREE
);
28208 field_val
= (1 << isa_names_table
[i
].feature
);
28209 /* Return __cpu_model.__cpu_features[0] & field_val */
28210 return build2 (BIT_AND_EXPR
, unsigned_type_node
, array_elt
,
28211 build_int_cstu (unsigned_type_node
, field_val
));
28213 gcc_unreachable ();
28217 ix86_fold_builtin (tree fndecl
, int n_args
,
28218 tree
*args
, bool ignore ATTRIBUTE_UNUSED
)
28220 if (DECL_BUILT_IN_CLASS (fndecl
) == BUILT_IN_MD
)
28222 enum ix86_builtins fn_code
= (enum ix86_builtins
)
28223 DECL_FUNCTION_CODE (fndecl
);
28224 if (fn_code
== IX86_BUILTIN_CPU_IS
28225 || fn_code
== IX86_BUILTIN_CPU_SUPPORTS
)
28227 gcc_assert (n_args
== 1);
28228 return fold_builtin_cpu (fndecl
, args
);
28232 #ifdef SUBTARGET_FOLD_BUILTIN
28233 return SUBTARGET_FOLD_BUILTIN (fndecl
, n_args
, args
, ignore
);
28239 /* Make builtins to detect cpu type and features supported. NAME is
28240 the builtin name, CODE is the builtin code, and FTYPE is the function
28241 type of the builtin. */
28244 make_cpu_type_builtin (const char* name
, int code
,
28245 enum ix86_builtin_func_type ftype
, bool is_const
)
28250 type
= ix86_get_builtin_func_type (ftype
);
28251 decl
= add_builtin_function (name
, type
, code
, BUILT_IN_MD
,
28253 gcc_assert (decl
!= NULL_TREE
);
28254 ix86_builtins
[(int) code
] = decl
;
28255 TREE_READONLY (decl
) = is_const
;
28258 /* Make builtins to get CPU type and features supported. The created
28261 __builtin_cpu_init (), to detect cpu type and features,
28262 __builtin_cpu_is ("<CPUNAME>"), to check if cpu is of type <CPUNAME>,
28263 __builtin_cpu_supports ("<FEATURE>"), to check if cpu supports <FEATURE>
28267 ix86_init_platform_type_builtins (void)
28269 make_cpu_type_builtin ("__builtin_cpu_init", IX86_BUILTIN_CPU_INIT
,
28270 INT_FTYPE_VOID
, false);
28271 make_cpu_type_builtin ("__builtin_cpu_is", IX86_BUILTIN_CPU_IS
,
28272 INT_FTYPE_PCCHAR
, true);
28273 make_cpu_type_builtin ("__builtin_cpu_supports", IX86_BUILTIN_CPU_SUPPORTS
,
28274 INT_FTYPE_PCCHAR
, true);
28277 /* Internal method for ix86_init_builtins. */
28280 ix86_init_builtins_va_builtins_abi (void)
28282 tree ms_va_ref
, sysv_va_ref
;
28283 tree fnvoid_va_end_ms
, fnvoid_va_end_sysv
;
28284 tree fnvoid_va_start_ms
, fnvoid_va_start_sysv
;
28285 tree fnvoid_va_copy_ms
, fnvoid_va_copy_sysv
;
28286 tree fnattr_ms
= NULL_TREE
, fnattr_sysv
= NULL_TREE
;
28290 fnattr_ms
= build_tree_list (get_identifier ("ms_abi"), NULL_TREE
);
28291 fnattr_sysv
= build_tree_list (get_identifier ("sysv_abi"), NULL_TREE
);
28292 ms_va_ref
= build_reference_type (ms_va_list_type_node
);
28294 build_pointer_type (TREE_TYPE (sysv_va_list_type_node
));
28297 build_function_type_list (void_type_node
, ms_va_ref
, NULL_TREE
);
28298 fnvoid_va_start_ms
=
28299 build_varargs_function_type_list (void_type_node
, ms_va_ref
, NULL_TREE
);
28300 fnvoid_va_end_sysv
=
28301 build_function_type_list (void_type_node
, sysv_va_ref
, NULL_TREE
);
28302 fnvoid_va_start_sysv
=
28303 build_varargs_function_type_list (void_type_node
, sysv_va_ref
,
28305 fnvoid_va_copy_ms
=
28306 build_function_type_list (void_type_node
, ms_va_ref
, ms_va_list_type_node
,
28308 fnvoid_va_copy_sysv
=
28309 build_function_type_list (void_type_node
, sysv_va_ref
,
28310 sysv_va_ref
, NULL_TREE
);
28312 add_builtin_function ("__builtin_ms_va_start", fnvoid_va_start_ms
,
28313 BUILT_IN_VA_START
, BUILT_IN_NORMAL
, NULL
, fnattr_ms
);
28314 add_builtin_function ("__builtin_ms_va_end", fnvoid_va_end_ms
,
28315 BUILT_IN_VA_END
, BUILT_IN_NORMAL
, NULL
, fnattr_ms
);
28316 add_builtin_function ("__builtin_ms_va_copy", fnvoid_va_copy_ms
,
28317 BUILT_IN_VA_COPY
, BUILT_IN_NORMAL
, NULL
, fnattr_ms
);
28318 add_builtin_function ("__builtin_sysv_va_start", fnvoid_va_start_sysv
,
28319 BUILT_IN_VA_START
, BUILT_IN_NORMAL
, NULL
, fnattr_sysv
);
28320 add_builtin_function ("__builtin_sysv_va_end", fnvoid_va_end_sysv
,
28321 BUILT_IN_VA_END
, BUILT_IN_NORMAL
, NULL
, fnattr_sysv
);
28322 add_builtin_function ("__builtin_sysv_va_copy", fnvoid_va_copy_sysv
,
28323 BUILT_IN_VA_COPY
, BUILT_IN_NORMAL
, NULL
, fnattr_sysv
);
28327 ix86_init_builtin_types (void)
28329 tree float128_type_node
, float80_type_node
;
28331 /* The __float80 type. */
28332 float80_type_node
= long_double_type_node
;
28333 if (TYPE_MODE (float80_type_node
) != XFmode
)
28335 /* The __float80 type. */
28336 float80_type_node
= make_node (REAL_TYPE
);
28338 TYPE_PRECISION (float80_type_node
) = 80;
28339 layout_type (float80_type_node
);
28341 lang_hooks
.types
.register_builtin_type (float80_type_node
, "__float80");
28343 /* The __float128 type. */
28344 float128_type_node
= make_node (REAL_TYPE
);
28345 TYPE_PRECISION (float128_type_node
) = 128;
28346 layout_type (float128_type_node
);
28347 lang_hooks
.types
.register_builtin_type (float128_type_node
, "__float128");
28349 /* This macro is built by i386-builtin-types.awk. */
28350 DEFINE_BUILTIN_PRIMITIVE_TYPES
;
28354 ix86_init_builtins (void)
28358 ix86_init_builtin_types ();
28360 /* Builtins to get CPU type and features. */
28361 ix86_init_platform_type_builtins ();
28363 /* TFmode support builtins. */
28364 def_builtin_const (0, "__builtin_infq",
28365 FLOAT128_FTYPE_VOID
, IX86_BUILTIN_INFQ
);
28366 def_builtin_const (0, "__builtin_huge_valq",
28367 FLOAT128_FTYPE_VOID
, IX86_BUILTIN_HUGE_VALQ
);
28369 /* We will expand them to normal call if SSE isn't available since
28370 they are used by libgcc. */
28371 t
= ix86_get_builtin_func_type (FLOAT128_FTYPE_FLOAT128
);
28372 t
= add_builtin_function ("__builtin_fabsq", t
, IX86_BUILTIN_FABSQ
,
28373 BUILT_IN_MD
, "__fabstf2", NULL_TREE
);
28374 TREE_READONLY (t
) = 1;
28375 ix86_builtins
[(int) IX86_BUILTIN_FABSQ
] = t
;
28377 t
= ix86_get_builtin_func_type (FLOAT128_FTYPE_FLOAT128_FLOAT128
);
28378 t
= add_builtin_function ("__builtin_copysignq", t
, IX86_BUILTIN_COPYSIGNQ
,
28379 BUILT_IN_MD
, "__copysigntf3", NULL_TREE
);
28380 TREE_READONLY (t
) = 1;
28381 ix86_builtins
[(int) IX86_BUILTIN_COPYSIGNQ
] = t
;
28383 ix86_init_tm_builtins ();
28384 ix86_init_mmx_sse_builtins ();
28387 ix86_init_builtins_va_builtins_abi ();
28389 #ifdef SUBTARGET_INIT_BUILTINS
28390 SUBTARGET_INIT_BUILTINS
;
28394 /* Return the ix86 builtin for CODE. */
28397 ix86_builtin_decl (unsigned code
, bool initialize_p ATTRIBUTE_UNUSED
)
28399 if (code
>= IX86_BUILTIN_MAX
)
28400 return error_mark_node
;
28402 return ix86_builtins
[code
];
28405 /* Errors in the source file can cause expand_expr to return const0_rtx
28406 where we expect a vector. To avoid crashing, use one of the vector
28407 clear instructions. */
28409 safe_vector_operand (rtx x
, enum machine_mode mode
)
28411 if (x
== const0_rtx
)
28412 x
= CONST0_RTX (mode
);
28416 /* Subroutine of ix86_expand_builtin to take care of binop insns. */
28419 ix86_expand_binop_builtin (enum insn_code icode
, tree exp
, rtx target
)
28422 tree arg0
= CALL_EXPR_ARG (exp
, 0);
28423 tree arg1
= CALL_EXPR_ARG (exp
, 1);
28424 rtx op0
= expand_normal (arg0
);
28425 rtx op1
= expand_normal (arg1
);
28426 enum machine_mode tmode
= insn_data
[icode
].operand
[0].mode
;
28427 enum machine_mode mode0
= insn_data
[icode
].operand
[1].mode
;
28428 enum machine_mode mode1
= insn_data
[icode
].operand
[2].mode
;
28430 if (VECTOR_MODE_P (mode0
))
28431 op0
= safe_vector_operand (op0
, mode0
);
28432 if (VECTOR_MODE_P (mode1
))
28433 op1
= safe_vector_operand (op1
, mode1
);
28435 if (optimize
|| !target
28436 || GET_MODE (target
) != tmode
28437 || !insn_data
[icode
].operand
[0].predicate (target
, tmode
))
28438 target
= gen_reg_rtx (tmode
);
28440 if (GET_MODE (op1
) == SImode
&& mode1
== TImode
)
28442 rtx x
= gen_reg_rtx (V4SImode
);
28443 emit_insn (gen_sse2_loadd (x
, op1
));
28444 op1
= gen_lowpart (TImode
, x
);
28447 if (!insn_data
[icode
].operand
[1].predicate (op0
, mode0
))
28448 op0
= copy_to_mode_reg (mode0
, op0
);
28449 if (!insn_data
[icode
].operand
[2].predicate (op1
, mode1
))
28450 op1
= copy_to_mode_reg (mode1
, op1
);
28452 pat
= GEN_FCN (icode
) (target
, op0
, op1
);
28461 /* Subroutine of ix86_expand_builtin to take care of 2-4 argument insns. */
28464 ix86_expand_multi_arg_builtin (enum insn_code icode
, tree exp
, rtx target
,
28465 enum ix86_builtin_func_type m_type
,
28466 enum rtx_code sub_code
)
28471 bool comparison_p
= false;
28473 bool last_arg_constant
= false;
28474 int num_memory
= 0;
28477 enum machine_mode mode
;
28480 enum machine_mode tmode
= insn_data
[icode
].operand
[0].mode
;
28484 case MULTI_ARG_4_DF2_DI_I
:
28485 case MULTI_ARG_4_DF2_DI_I1
:
28486 case MULTI_ARG_4_SF2_SI_I
:
28487 case MULTI_ARG_4_SF2_SI_I1
:
28489 last_arg_constant
= true;
28492 case MULTI_ARG_3_SF
:
28493 case MULTI_ARG_3_DF
:
28494 case MULTI_ARG_3_SF2
:
28495 case MULTI_ARG_3_DF2
:
28496 case MULTI_ARG_3_DI
:
28497 case MULTI_ARG_3_SI
:
28498 case MULTI_ARG_3_SI_DI
:
28499 case MULTI_ARG_3_HI
:
28500 case MULTI_ARG_3_HI_SI
:
28501 case MULTI_ARG_3_QI
:
28502 case MULTI_ARG_3_DI2
:
28503 case MULTI_ARG_3_SI2
:
28504 case MULTI_ARG_3_HI2
:
28505 case MULTI_ARG_3_QI2
:
28509 case MULTI_ARG_2_SF
:
28510 case MULTI_ARG_2_DF
:
28511 case MULTI_ARG_2_DI
:
28512 case MULTI_ARG_2_SI
:
28513 case MULTI_ARG_2_HI
:
28514 case MULTI_ARG_2_QI
:
28518 case MULTI_ARG_2_DI_IMM
:
28519 case MULTI_ARG_2_SI_IMM
:
28520 case MULTI_ARG_2_HI_IMM
:
28521 case MULTI_ARG_2_QI_IMM
:
28523 last_arg_constant
= true;
28526 case MULTI_ARG_1_SF
:
28527 case MULTI_ARG_1_DF
:
28528 case MULTI_ARG_1_SF2
:
28529 case MULTI_ARG_1_DF2
:
28530 case MULTI_ARG_1_DI
:
28531 case MULTI_ARG_1_SI
:
28532 case MULTI_ARG_1_HI
:
28533 case MULTI_ARG_1_QI
:
28534 case MULTI_ARG_1_SI_DI
:
28535 case MULTI_ARG_1_HI_DI
:
28536 case MULTI_ARG_1_HI_SI
:
28537 case MULTI_ARG_1_QI_DI
:
28538 case MULTI_ARG_1_QI_SI
:
28539 case MULTI_ARG_1_QI_HI
:
28543 case MULTI_ARG_2_DI_CMP
:
28544 case MULTI_ARG_2_SI_CMP
:
28545 case MULTI_ARG_2_HI_CMP
:
28546 case MULTI_ARG_2_QI_CMP
:
28548 comparison_p
= true;
28551 case MULTI_ARG_2_SF_TF
:
28552 case MULTI_ARG_2_DF_TF
:
28553 case MULTI_ARG_2_DI_TF
:
28554 case MULTI_ARG_2_SI_TF
:
28555 case MULTI_ARG_2_HI_TF
:
28556 case MULTI_ARG_2_QI_TF
:
28562 gcc_unreachable ();
28565 if (optimize
|| !target
28566 || GET_MODE (target
) != tmode
28567 || !insn_data
[icode
].operand
[0].predicate (target
, tmode
))
28568 target
= gen_reg_rtx (tmode
);
28570 gcc_assert (nargs
<= 4);
28572 for (i
= 0; i
< nargs
; i
++)
28574 tree arg
= CALL_EXPR_ARG (exp
, i
);
28575 rtx op
= expand_normal (arg
);
28576 int adjust
= (comparison_p
) ? 1 : 0;
28577 enum machine_mode mode
= insn_data
[icode
].operand
[i
+adjust
+1].mode
;
28579 if (last_arg_constant
&& i
== nargs
- 1)
28581 if (!insn_data
[icode
].operand
[i
+ 1].predicate (op
, mode
))
28583 enum insn_code new_icode
= icode
;
28586 case CODE_FOR_xop_vpermil2v2df3
:
28587 case CODE_FOR_xop_vpermil2v4sf3
:
28588 case CODE_FOR_xop_vpermil2v4df3
:
28589 case CODE_FOR_xop_vpermil2v8sf3
:
28590 error ("the last argument must be a 2-bit immediate");
28591 return gen_reg_rtx (tmode
);
28592 case CODE_FOR_xop_rotlv2di3
:
28593 new_icode
= CODE_FOR_rotlv2di3
;
28595 case CODE_FOR_xop_rotlv4si3
:
28596 new_icode
= CODE_FOR_rotlv4si3
;
28598 case CODE_FOR_xop_rotlv8hi3
:
28599 new_icode
= CODE_FOR_rotlv8hi3
;
28601 case CODE_FOR_xop_rotlv16qi3
:
28602 new_icode
= CODE_FOR_rotlv16qi3
;
28604 if (CONST_INT_P (op
))
28606 int mask
= GET_MODE_BITSIZE (GET_MODE_INNER (tmode
)) - 1;
28607 op
= GEN_INT (INTVAL (op
) & mask
);
28608 gcc_checking_assert
28609 (insn_data
[icode
].operand
[i
+ 1].predicate (op
, mode
));
28613 gcc_checking_assert
28615 && insn_data
[new_icode
].operand
[0].mode
== tmode
28616 && insn_data
[new_icode
].operand
[1].mode
== tmode
28617 && insn_data
[new_icode
].operand
[2].mode
== mode
28618 && insn_data
[new_icode
].operand
[0].predicate
28619 == insn_data
[icode
].operand
[0].predicate
28620 && insn_data
[new_icode
].operand
[1].predicate
28621 == insn_data
[icode
].operand
[1].predicate
);
28627 gcc_unreachable ();
28634 if (VECTOR_MODE_P (mode
))
28635 op
= safe_vector_operand (op
, mode
);
28637 /* If we aren't optimizing, only allow one memory operand to be
28639 if (memory_operand (op
, mode
))
28642 gcc_assert (GET_MODE (op
) == mode
|| GET_MODE (op
) == VOIDmode
);
28645 || !insn_data
[icode
].operand
[i
+adjust
+1].predicate (op
, mode
)
28647 op
= force_reg (mode
, op
);
28651 args
[i
].mode
= mode
;
28657 pat
= GEN_FCN (icode
) (target
, args
[0].op
);
28662 pat
= GEN_FCN (icode
) (target
, args
[0].op
, args
[1].op
,
28663 GEN_INT ((int)sub_code
));
28664 else if (! comparison_p
)
28665 pat
= GEN_FCN (icode
) (target
, args
[0].op
, args
[1].op
);
28668 rtx cmp_op
= gen_rtx_fmt_ee (sub_code
, GET_MODE (target
),
28672 pat
= GEN_FCN (icode
) (target
, cmp_op
, args
[0].op
, args
[1].op
);
28677 pat
= GEN_FCN (icode
) (target
, args
[0].op
, args
[1].op
, args
[2].op
);
28681 pat
= GEN_FCN (icode
) (target
, args
[0].op
, args
[1].op
, args
[2].op
, args
[3].op
);
28685 gcc_unreachable ();
28695 /* Subroutine of ix86_expand_args_builtin to take care of scalar unop
28696 insns with vec_merge. */
28699 ix86_expand_unop_vec_merge_builtin (enum insn_code icode
, tree exp
,
28703 tree arg0
= CALL_EXPR_ARG (exp
, 0);
28704 rtx op1
, op0
= expand_normal (arg0
);
28705 enum machine_mode tmode
= insn_data
[icode
].operand
[0].mode
;
28706 enum machine_mode mode0
= insn_data
[icode
].operand
[1].mode
;
28708 if (optimize
|| !target
28709 || GET_MODE (target
) != tmode
28710 || !insn_data
[icode
].operand
[0].predicate (target
, tmode
))
28711 target
= gen_reg_rtx (tmode
);
28713 if (VECTOR_MODE_P (mode0
))
28714 op0
= safe_vector_operand (op0
, mode0
);
28716 if ((optimize
&& !register_operand (op0
, mode0
))
28717 || !insn_data
[icode
].operand
[1].predicate (op0
, mode0
))
28718 op0
= copy_to_mode_reg (mode0
, op0
);
28721 if (!insn_data
[icode
].operand
[2].predicate (op1
, mode0
))
28722 op1
= copy_to_mode_reg (mode0
, op1
);
28724 pat
= GEN_FCN (icode
) (target
, op0
, op1
);
28731 /* Subroutine of ix86_expand_builtin to take care of comparison insns. */
28734 ix86_expand_sse_compare (const struct builtin_description
*d
,
28735 tree exp
, rtx target
, bool swap
)
28738 tree arg0
= CALL_EXPR_ARG (exp
, 0);
28739 tree arg1
= CALL_EXPR_ARG (exp
, 1);
28740 rtx op0
= expand_normal (arg0
);
28741 rtx op1
= expand_normal (arg1
);
28743 enum machine_mode tmode
= insn_data
[d
->icode
].operand
[0].mode
;
28744 enum machine_mode mode0
= insn_data
[d
->icode
].operand
[1].mode
;
28745 enum machine_mode mode1
= insn_data
[d
->icode
].operand
[2].mode
;
28746 enum rtx_code comparison
= d
->comparison
;
28748 if (VECTOR_MODE_P (mode0
))
28749 op0
= safe_vector_operand (op0
, mode0
);
28750 if (VECTOR_MODE_P (mode1
))
28751 op1
= safe_vector_operand (op1
, mode1
);
28753 /* Swap operands if we have a comparison that isn't available in
28757 rtx tmp
= gen_reg_rtx (mode1
);
28758 emit_move_insn (tmp
, op1
);
28763 if (optimize
|| !target
28764 || GET_MODE (target
) != tmode
28765 || !insn_data
[d
->icode
].operand
[0].predicate (target
, tmode
))
28766 target
= gen_reg_rtx (tmode
);
28768 if ((optimize
&& !register_operand (op0
, mode0
))
28769 || !insn_data
[d
->icode
].operand
[1].predicate (op0
, mode0
))
28770 op0
= copy_to_mode_reg (mode0
, op0
);
28771 if ((optimize
&& !register_operand (op1
, mode1
))
28772 || !insn_data
[d
->icode
].operand
[2].predicate (op1
, mode1
))
28773 op1
= copy_to_mode_reg (mode1
, op1
);
28775 op2
= gen_rtx_fmt_ee (comparison
, mode0
, op0
, op1
);
28776 pat
= GEN_FCN (d
->icode
) (target
, op0
, op1
, op2
);
28783 /* Subroutine of ix86_expand_builtin to take care of comi insns. */
28786 ix86_expand_sse_comi (const struct builtin_description
*d
, tree exp
,
28790 tree arg0
= CALL_EXPR_ARG (exp
, 0);
28791 tree arg1
= CALL_EXPR_ARG (exp
, 1);
28792 rtx op0
= expand_normal (arg0
);
28793 rtx op1
= expand_normal (arg1
);
28794 enum machine_mode mode0
= insn_data
[d
->icode
].operand
[0].mode
;
28795 enum machine_mode mode1
= insn_data
[d
->icode
].operand
[1].mode
;
28796 enum rtx_code comparison
= d
->comparison
;
28798 if (VECTOR_MODE_P (mode0
))
28799 op0
= safe_vector_operand (op0
, mode0
);
28800 if (VECTOR_MODE_P (mode1
))
28801 op1
= safe_vector_operand (op1
, mode1
);
28803 /* Swap operands if we have a comparison that isn't available in
28805 if (d
->flag
& BUILTIN_DESC_SWAP_OPERANDS
)
28812 target
= gen_reg_rtx (SImode
);
28813 emit_move_insn (target
, const0_rtx
);
28814 target
= gen_rtx_SUBREG (QImode
, target
, 0);
28816 if ((optimize
&& !register_operand (op0
, mode0
))
28817 || !insn_data
[d
->icode
].operand
[0].predicate (op0
, mode0
))
28818 op0
= copy_to_mode_reg (mode0
, op0
);
28819 if ((optimize
&& !register_operand (op1
, mode1
))
28820 || !insn_data
[d
->icode
].operand
[1].predicate (op1
, mode1
))
28821 op1
= copy_to_mode_reg (mode1
, op1
);
28823 pat
= GEN_FCN (d
->icode
) (op0
, op1
);
28827 emit_insn (gen_rtx_SET (VOIDmode
,
28828 gen_rtx_STRICT_LOW_PART (VOIDmode
, target
),
28829 gen_rtx_fmt_ee (comparison
, QImode
,
28833 return SUBREG_REG (target
);
28836 /* Subroutines of ix86_expand_args_builtin to take care of round insns. */
28839 ix86_expand_sse_round (const struct builtin_description
*d
, tree exp
,
28843 tree arg0
= CALL_EXPR_ARG (exp
, 0);
28844 rtx op1
, op0
= expand_normal (arg0
);
28845 enum machine_mode tmode
= insn_data
[d
->icode
].operand
[0].mode
;
28846 enum machine_mode mode0
= insn_data
[d
->icode
].operand
[1].mode
;
28848 if (optimize
|| target
== 0
28849 || GET_MODE (target
) != tmode
28850 || !insn_data
[d
->icode
].operand
[0].predicate (target
, tmode
))
28851 target
= gen_reg_rtx (tmode
);
28853 if (VECTOR_MODE_P (mode0
))
28854 op0
= safe_vector_operand (op0
, mode0
);
28856 if ((optimize
&& !register_operand (op0
, mode0
))
28857 || !insn_data
[d
->icode
].operand
[0].predicate (op0
, mode0
))
28858 op0
= copy_to_mode_reg (mode0
, op0
);
28860 op1
= GEN_INT (d
->comparison
);
28862 pat
= GEN_FCN (d
->icode
) (target
, op0
, op1
);
28870 ix86_expand_sse_round_vec_pack_sfix (const struct builtin_description
*d
,
28871 tree exp
, rtx target
)
28874 tree arg0
= CALL_EXPR_ARG (exp
, 0);
28875 tree arg1
= CALL_EXPR_ARG (exp
, 1);
28876 rtx op0
= expand_normal (arg0
);
28877 rtx op1
= expand_normal (arg1
);
28879 enum machine_mode tmode
= insn_data
[d
->icode
].operand
[0].mode
;
28880 enum machine_mode mode0
= insn_data
[d
->icode
].operand
[1].mode
;
28881 enum machine_mode mode1
= insn_data
[d
->icode
].operand
[2].mode
;
28883 if (optimize
|| target
== 0
28884 || GET_MODE (target
) != tmode
28885 || !insn_data
[d
->icode
].operand
[0].predicate (target
, tmode
))
28886 target
= gen_reg_rtx (tmode
);
28888 op0
= safe_vector_operand (op0
, mode0
);
28889 op1
= safe_vector_operand (op1
, mode1
);
28891 if ((optimize
&& !register_operand (op0
, mode0
))
28892 || !insn_data
[d
->icode
].operand
[0].predicate (op0
, mode0
))
28893 op0
= copy_to_mode_reg (mode0
, op0
);
28894 if ((optimize
&& !register_operand (op1
, mode1
))
28895 || !insn_data
[d
->icode
].operand
[1].predicate (op1
, mode1
))
28896 op1
= copy_to_mode_reg (mode1
, op1
);
28898 op2
= GEN_INT (d
->comparison
);
28900 pat
= GEN_FCN (d
->icode
) (target
, op0
, op1
, op2
);
28907 /* Subroutine of ix86_expand_builtin to take care of ptest insns. */
28910 ix86_expand_sse_ptest (const struct builtin_description
*d
, tree exp
,
28914 tree arg0
= CALL_EXPR_ARG (exp
, 0);
28915 tree arg1
= CALL_EXPR_ARG (exp
, 1);
28916 rtx op0
= expand_normal (arg0
);
28917 rtx op1
= expand_normal (arg1
);
28918 enum machine_mode mode0
= insn_data
[d
->icode
].operand
[0].mode
;
28919 enum machine_mode mode1
= insn_data
[d
->icode
].operand
[1].mode
;
28920 enum rtx_code comparison
= d
->comparison
;
28922 if (VECTOR_MODE_P (mode0
))
28923 op0
= safe_vector_operand (op0
, mode0
);
28924 if (VECTOR_MODE_P (mode1
))
28925 op1
= safe_vector_operand (op1
, mode1
);
28927 target
= gen_reg_rtx (SImode
);
28928 emit_move_insn (target
, const0_rtx
);
28929 target
= gen_rtx_SUBREG (QImode
, target
, 0);
28931 if ((optimize
&& !register_operand (op0
, mode0
))
28932 || !insn_data
[d
->icode
].operand
[0].predicate (op0
, mode0
))
28933 op0
= copy_to_mode_reg (mode0
, op0
);
28934 if ((optimize
&& !register_operand (op1
, mode1
))
28935 || !insn_data
[d
->icode
].operand
[1].predicate (op1
, mode1
))
28936 op1
= copy_to_mode_reg (mode1
, op1
);
28938 pat
= GEN_FCN (d
->icode
) (op0
, op1
);
28942 emit_insn (gen_rtx_SET (VOIDmode
,
28943 gen_rtx_STRICT_LOW_PART (VOIDmode
, target
),
28944 gen_rtx_fmt_ee (comparison
, QImode
,
28948 return SUBREG_REG (target
);
28951 /* Subroutine of ix86_expand_builtin to take care of pcmpestr[im] insns. */
28954 ix86_expand_sse_pcmpestr (const struct builtin_description
*d
,
28955 tree exp
, rtx target
)
28958 tree arg0
= CALL_EXPR_ARG (exp
, 0);
28959 tree arg1
= CALL_EXPR_ARG (exp
, 1);
28960 tree arg2
= CALL_EXPR_ARG (exp
, 2);
28961 tree arg3
= CALL_EXPR_ARG (exp
, 3);
28962 tree arg4
= CALL_EXPR_ARG (exp
, 4);
28963 rtx scratch0
, scratch1
;
28964 rtx op0
= expand_normal (arg0
);
28965 rtx op1
= expand_normal (arg1
);
28966 rtx op2
= expand_normal (arg2
);
28967 rtx op3
= expand_normal (arg3
);
28968 rtx op4
= expand_normal (arg4
);
28969 enum machine_mode tmode0
, tmode1
, modev2
, modei3
, modev4
, modei5
, modeimm
;
28971 tmode0
= insn_data
[d
->icode
].operand
[0].mode
;
28972 tmode1
= insn_data
[d
->icode
].operand
[1].mode
;
28973 modev2
= insn_data
[d
->icode
].operand
[2].mode
;
28974 modei3
= insn_data
[d
->icode
].operand
[3].mode
;
28975 modev4
= insn_data
[d
->icode
].operand
[4].mode
;
28976 modei5
= insn_data
[d
->icode
].operand
[5].mode
;
28977 modeimm
= insn_data
[d
->icode
].operand
[6].mode
;
28979 if (VECTOR_MODE_P (modev2
))
28980 op0
= safe_vector_operand (op0
, modev2
);
28981 if (VECTOR_MODE_P (modev4
))
28982 op2
= safe_vector_operand (op2
, modev4
);
28984 if (!insn_data
[d
->icode
].operand
[2].predicate (op0
, modev2
))
28985 op0
= copy_to_mode_reg (modev2
, op0
);
28986 if (!insn_data
[d
->icode
].operand
[3].predicate (op1
, modei3
))
28987 op1
= copy_to_mode_reg (modei3
, op1
);
28988 if ((optimize
&& !register_operand (op2
, modev4
))
28989 || !insn_data
[d
->icode
].operand
[4].predicate (op2
, modev4
))
28990 op2
= copy_to_mode_reg (modev4
, op2
);
28991 if (!insn_data
[d
->icode
].operand
[5].predicate (op3
, modei5
))
28992 op3
= copy_to_mode_reg (modei5
, op3
);
28994 if (!insn_data
[d
->icode
].operand
[6].predicate (op4
, modeimm
))
28996 error ("the fifth argument must be an 8-bit immediate");
29000 if (d
->code
== IX86_BUILTIN_PCMPESTRI128
)
29002 if (optimize
|| !target
29003 || GET_MODE (target
) != tmode0
29004 || !insn_data
[d
->icode
].operand
[0].predicate (target
, tmode0
))
29005 target
= gen_reg_rtx (tmode0
);
29007 scratch1
= gen_reg_rtx (tmode1
);
29009 pat
= GEN_FCN (d
->icode
) (target
, scratch1
, op0
, op1
, op2
, op3
, op4
);
29011 else if (d
->code
== IX86_BUILTIN_PCMPESTRM128
)
29013 if (optimize
|| !target
29014 || GET_MODE (target
) != tmode1
29015 || !insn_data
[d
->icode
].operand
[1].predicate (target
, tmode1
))
29016 target
= gen_reg_rtx (tmode1
);
29018 scratch0
= gen_reg_rtx (tmode0
);
29020 pat
= GEN_FCN (d
->icode
) (scratch0
, target
, op0
, op1
, op2
, op3
, op4
);
29024 gcc_assert (d
->flag
);
29026 scratch0
= gen_reg_rtx (tmode0
);
29027 scratch1
= gen_reg_rtx (tmode1
);
29029 pat
= GEN_FCN (d
->icode
) (scratch0
, scratch1
, op0
, op1
, op2
, op3
, op4
);
29039 target
= gen_reg_rtx (SImode
);
29040 emit_move_insn (target
, const0_rtx
);
29041 target
= gen_rtx_SUBREG (QImode
, target
, 0);
29044 (gen_rtx_SET (VOIDmode
, gen_rtx_STRICT_LOW_PART (VOIDmode
, target
),
29045 gen_rtx_fmt_ee (EQ
, QImode
,
29046 gen_rtx_REG ((enum machine_mode
) d
->flag
,
29049 return SUBREG_REG (target
);
29056 /* Subroutine of ix86_expand_builtin to take care of pcmpistr[im] insns. */
29059 ix86_expand_sse_pcmpistr (const struct builtin_description
*d
,
29060 tree exp
, rtx target
)
29063 tree arg0
= CALL_EXPR_ARG (exp
, 0);
29064 tree arg1
= CALL_EXPR_ARG (exp
, 1);
29065 tree arg2
= CALL_EXPR_ARG (exp
, 2);
29066 rtx scratch0
, scratch1
;
29067 rtx op0
= expand_normal (arg0
);
29068 rtx op1
= expand_normal (arg1
);
29069 rtx op2
= expand_normal (arg2
);
29070 enum machine_mode tmode0
, tmode1
, modev2
, modev3
, modeimm
;
29072 tmode0
= insn_data
[d
->icode
].operand
[0].mode
;
29073 tmode1
= insn_data
[d
->icode
].operand
[1].mode
;
29074 modev2
= insn_data
[d
->icode
].operand
[2].mode
;
29075 modev3
= insn_data
[d
->icode
].operand
[3].mode
;
29076 modeimm
= insn_data
[d
->icode
].operand
[4].mode
;
29078 if (VECTOR_MODE_P (modev2
))
29079 op0
= safe_vector_operand (op0
, modev2
);
29080 if (VECTOR_MODE_P (modev3
))
29081 op1
= safe_vector_operand (op1
, modev3
);
29083 if (!insn_data
[d
->icode
].operand
[2].predicate (op0
, modev2
))
29084 op0
= copy_to_mode_reg (modev2
, op0
);
29085 if ((optimize
&& !register_operand (op1
, modev3
))
29086 || !insn_data
[d
->icode
].operand
[3].predicate (op1
, modev3
))
29087 op1
= copy_to_mode_reg (modev3
, op1
);
29089 if (!insn_data
[d
->icode
].operand
[4].predicate (op2
, modeimm
))
29091 error ("the third argument must be an 8-bit immediate");
29095 if (d
->code
== IX86_BUILTIN_PCMPISTRI128
)
29097 if (optimize
|| !target
29098 || GET_MODE (target
) != tmode0
29099 || !insn_data
[d
->icode
].operand
[0].predicate (target
, tmode0
))
29100 target
= gen_reg_rtx (tmode0
);
29102 scratch1
= gen_reg_rtx (tmode1
);
29104 pat
= GEN_FCN (d
->icode
) (target
, scratch1
, op0
, op1
, op2
);
29106 else if (d
->code
== IX86_BUILTIN_PCMPISTRM128
)
29108 if (optimize
|| !target
29109 || GET_MODE (target
) != tmode1
29110 || !insn_data
[d
->icode
].operand
[1].predicate (target
, tmode1
))
29111 target
= gen_reg_rtx (tmode1
);
29113 scratch0
= gen_reg_rtx (tmode0
);
29115 pat
= GEN_FCN (d
->icode
) (scratch0
, target
, op0
, op1
, op2
);
29119 gcc_assert (d
->flag
);
29121 scratch0
= gen_reg_rtx (tmode0
);
29122 scratch1
= gen_reg_rtx (tmode1
);
29124 pat
= GEN_FCN (d
->icode
) (scratch0
, scratch1
, op0
, op1
, op2
);
29134 target
= gen_reg_rtx (SImode
);
29135 emit_move_insn (target
, const0_rtx
);
29136 target
= gen_rtx_SUBREG (QImode
, target
, 0);
29139 (gen_rtx_SET (VOIDmode
, gen_rtx_STRICT_LOW_PART (VOIDmode
, target
),
29140 gen_rtx_fmt_ee (EQ
, QImode
,
29141 gen_rtx_REG ((enum machine_mode
) d
->flag
,
29144 return SUBREG_REG (target
);
29150 /* Subroutine of ix86_expand_builtin to take care of insns with
29151 variable number of operands. */
29154 ix86_expand_args_builtin (const struct builtin_description
*d
,
29155 tree exp
, rtx target
)
29157 rtx pat
, real_target
;
29158 unsigned int i
, nargs
;
29159 unsigned int nargs_constant
= 0;
29160 int num_memory
= 0;
29164 enum machine_mode mode
;
29166 bool last_arg_count
= false;
29167 enum insn_code icode
= d
->icode
;
29168 const struct insn_data_d
*insn_p
= &insn_data
[icode
];
29169 enum machine_mode tmode
= insn_p
->operand
[0].mode
;
29170 enum machine_mode rmode
= VOIDmode
;
29172 enum rtx_code comparison
= d
->comparison
;
29174 switch ((enum ix86_builtin_func_type
) d
->flag
)
29176 case V2DF_FTYPE_V2DF_ROUND
:
29177 case V4DF_FTYPE_V4DF_ROUND
:
29178 case V4SF_FTYPE_V4SF_ROUND
:
29179 case V8SF_FTYPE_V8SF_ROUND
:
29180 case V4SI_FTYPE_V4SF_ROUND
:
29181 case V8SI_FTYPE_V8SF_ROUND
:
29182 return ix86_expand_sse_round (d
, exp
, target
);
29183 case V4SI_FTYPE_V2DF_V2DF_ROUND
:
29184 case V8SI_FTYPE_V4DF_V4DF_ROUND
:
29185 return ix86_expand_sse_round_vec_pack_sfix (d
, exp
, target
);
29186 case INT_FTYPE_V8SF_V8SF_PTEST
:
29187 case INT_FTYPE_V4DI_V4DI_PTEST
:
29188 case INT_FTYPE_V4DF_V4DF_PTEST
:
29189 case INT_FTYPE_V4SF_V4SF_PTEST
:
29190 case INT_FTYPE_V2DI_V2DI_PTEST
:
29191 case INT_FTYPE_V2DF_V2DF_PTEST
:
29192 return ix86_expand_sse_ptest (d
, exp
, target
);
29193 case FLOAT128_FTYPE_FLOAT128
:
29194 case FLOAT_FTYPE_FLOAT
:
29195 case INT_FTYPE_INT
:
29196 case UINT64_FTYPE_INT
:
29197 case UINT16_FTYPE_UINT16
:
29198 case INT64_FTYPE_INT64
:
29199 case INT64_FTYPE_V4SF
:
29200 case INT64_FTYPE_V2DF
:
29201 case INT_FTYPE_V16QI
:
29202 case INT_FTYPE_V8QI
:
29203 case INT_FTYPE_V8SF
:
29204 case INT_FTYPE_V4DF
:
29205 case INT_FTYPE_V4SF
:
29206 case INT_FTYPE_V2DF
:
29207 case INT_FTYPE_V32QI
:
29208 case V16QI_FTYPE_V16QI
:
29209 case V8SI_FTYPE_V8SF
:
29210 case V8SI_FTYPE_V4SI
:
29211 case V8HI_FTYPE_V8HI
:
29212 case V8HI_FTYPE_V16QI
:
29213 case V8QI_FTYPE_V8QI
:
29214 case V8SF_FTYPE_V8SF
:
29215 case V8SF_FTYPE_V8SI
:
29216 case V8SF_FTYPE_V4SF
:
29217 case V8SF_FTYPE_V8HI
:
29218 case V4SI_FTYPE_V4SI
:
29219 case V4SI_FTYPE_V16QI
:
29220 case V4SI_FTYPE_V4SF
:
29221 case V4SI_FTYPE_V8SI
:
29222 case V4SI_FTYPE_V8HI
:
29223 case V4SI_FTYPE_V4DF
:
29224 case V4SI_FTYPE_V2DF
:
29225 case V4HI_FTYPE_V4HI
:
29226 case V4DF_FTYPE_V4DF
:
29227 case V4DF_FTYPE_V4SI
:
29228 case V4DF_FTYPE_V4SF
:
29229 case V4DF_FTYPE_V2DF
:
29230 case V4SF_FTYPE_V4SF
:
29231 case V4SF_FTYPE_V4SI
:
29232 case V4SF_FTYPE_V8SF
:
29233 case V4SF_FTYPE_V4DF
:
29234 case V4SF_FTYPE_V8HI
:
29235 case V4SF_FTYPE_V2DF
:
29236 case V2DI_FTYPE_V2DI
:
29237 case V2DI_FTYPE_V16QI
:
29238 case V2DI_FTYPE_V8HI
:
29239 case V2DI_FTYPE_V4SI
:
29240 case V2DF_FTYPE_V2DF
:
29241 case V2DF_FTYPE_V4SI
:
29242 case V2DF_FTYPE_V4DF
:
29243 case V2DF_FTYPE_V4SF
:
29244 case V2DF_FTYPE_V2SI
:
29245 case V2SI_FTYPE_V2SI
:
29246 case V2SI_FTYPE_V4SF
:
29247 case V2SI_FTYPE_V2SF
:
29248 case V2SI_FTYPE_V2DF
:
29249 case V2SF_FTYPE_V2SF
:
29250 case V2SF_FTYPE_V2SI
:
29251 case V32QI_FTYPE_V32QI
:
29252 case V32QI_FTYPE_V16QI
:
29253 case V16HI_FTYPE_V16HI
:
29254 case V16HI_FTYPE_V8HI
:
29255 case V8SI_FTYPE_V8SI
:
29256 case V16HI_FTYPE_V16QI
:
29257 case V8SI_FTYPE_V16QI
:
29258 case V4DI_FTYPE_V16QI
:
29259 case V8SI_FTYPE_V8HI
:
29260 case V4DI_FTYPE_V8HI
:
29261 case V4DI_FTYPE_V4SI
:
29262 case V4DI_FTYPE_V2DI
:
29265 case V4SF_FTYPE_V4SF_VEC_MERGE
:
29266 case V2DF_FTYPE_V2DF_VEC_MERGE
:
29267 return ix86_expand_unop_vec_merge_builtin (icode
, exp
, target
);
29268 case FLOAT128_FTYPE_FLOAT128_FLOAT128
:
29269 case V16QI_FTYPE_V16QI_V16QI
:
29270 case V16QI_FTYPE_V8HI_V8HI
:
29271 case V8QI_FTYPE_V8QI_V8QI
:
29272 case V8QI_FTYPE_V4HI_V4HI
:
29273 case V8HI_FTYPE_V8HI_V8HI
:
29274 case V8HI_FTYPE_V16QI_V16QI
:
29275 case V8HI_FTYPE_V4SI_V4SI
:
29276 case V8SF_FTYPE_V8SF_V8SF
:
29277 case V8SF_FTYPE_V8SF_V8SI
:
29278 case V4SI_FTYPE_V4SI_V4SI
:
29279 case V4SI_FTYPE_V8HI_V8HI
:
29280 case V4SI_FTYPE_V4SF_V4SF
:
29281 case V4SI_FTYPE_V2DF_V2DF
:
29282 case V4HI_FTYPE_V4HI_V4HI
:
29283 case V4HI_FTYPE_V8QI_V8QI
:
29284 case V4HI_FTYPE_V2SI_V2SI
:
29285 case V4DF_FTYPE_V4DF_V4DF
:
29286 case V4DF_FTYPE_V4DF_V4DI
:
29287 case V4SF_FTYPE_V4SF_V4SF
:
29288 case V4SF_FTYPE_V4SF_V4SI
:
29289 case V4SF_FTYPE_V4SF_V2SI
:
29290 case V4SF_FTYPE_V4SF_V2DF
:
29291 case V4SF_FTYPE_V4SF_DI
:
29292 case V4SF_FTYPE_V4SF_SI
:
29293 case V2DI_FTYPE_V2DI_V2DI
:
29294 case V2DI_FTYPE_V16QI_V16QI
:
29295 case V2DI_FTYPE_V4SI_V4SI
:
29296 case V2UDI_FTYPE_V4USI_V4USI
:
29297 case V2DI_FTYPE_V2DI_V16QI
:
29298 case V2DI_FTYPE_V2DF_V2DF
:
29299 case V2SI_FTYPE_V2SI_V2SI
:
29300 case V2SI_FTYPE_V4HI_V4HI
:
29301 case V2SI_FTYPE_V2SF_V2SF
:
29302 case V2DF_FTYPE_V2DF_V2DF
:
29303 case V2DF_FTYPE_V2DF_V4SF
:
29304 case V2DF_FTYPE_V2DF_V2DI
:
29305 case V2DF_FTYPE_V2DF_DI
:
29306 case V2DF_FTYPE_V2DF_SI
:
29307 case V2SF_FTYPE_V2SF_V2SF
:
29308 case V1DI_FTYPE_V1DI_V1DI
:
29309 case V1DI_FTYPE_V8QI_V8QI
:
29310 case V1DI_FTYPE_V2SI_V2SI
:
29311 case V32QI_FTYPE_V16HI_V16HI
:
29312 case V16HI_FTYPE_V8SI_V8SI
:
29313 case V32QI_FTYPE_V32QI_V32QI
:
29314 case V16HI_FTYPE_V32QI_V32QI
:
29315 case V16HI_FTYPE_V16HI_V16HI
:
29316 case V8SI_FTYPE_V4DF_V4DF
:
29317 case V8SI_FTYPE_V8SI_V8SI
:
29318 case V8SI_FTYPE_V16HI_V16HI
:
29319 case V4DI_FTYPE_V4DI_V4DI
:
29320 case V4DI_FTYPE_V8SI_V8SI
:
29321 case V4UDI_FTYPE_V8USI_V8USI
:
29322 if (comparison
== UNKNOWN
)
29323 return ix86_expand_binop_builtin (icode
, exp
, target
);
29326 case V4SF_FTYPE_V4SF_V4SF_SWAP
:
29327 case V2DF_FTYPE_V2DF_V2DF_SWAP
:
29328 gcc_assert (comparison
!= UNKNOWN
);
29332 case V16HI_FTYPE_V16HI_V8HI_COUNT
:
29333 case V16HI_FTYPE_V16HI_SI_COUNT
:
29334 case V8SI_FTYPE_V8SI_V4SI_COUNT
:
29335 case V8SI_FTYPE_V8SI_SI_COUNT
:
29336 case V4DI_FTYPE_V4DI_V2DI_COUNT
:
29337 case V4DI_FTYPE_V4DI_INT_COUNT
:
29338 case V8HI_FTYPE_V8HI_V8HI_COUNT
:
29339 case V8HI_FTYPE_V8HI_SI_COUNT
:
29340 case V4SI_FTYPE_V4SI_V4SI_COUNT
:
29341 case V4SI_FTYPE_V4SI_SI_COUNT
:
29342 case V4HI_FTYPE_V4HI_V4HI_COUNT
:
29343 case V4HI_FTYPE_V4HI_SI_COUNT
:
29344 case V2DI_FTYPE_V2DI_V2DI_COUNT
:
29345 case V2DI_FTYPE_V2DI_SI_COUNT
:
29346 case V2SI_FTYPE_V2SI_V2SI_COUNT
:
29347 case V2SI_FTYPE_V2SI_SI_COUNT
:
29348 case V1DI_FTYPE_V1DI_V1DI_COUNT
:
29349 case V1DI_FTYPE_V1DI_SI_COUNT
:
29351 last_arg_count
= true;
29353 case UINT64_FTYPE_UINT64_UINT64
:
29354 case UINT_FTYPE_UINT_UINT
:
29355 case UINT_FTYPE_UINT_USHORT
:
29356 case UINT_FTYPE_UINT_UCHAR
:
29357 case UINT16_FTYPE_UINT16_INT
:
29358 case UINT8_FTYPE_UINT8_INT
:
29361 case V2DI_FTYPE_V2DI_INT_CONVERT
:
29364 nargs_constant
= 1;
29366 case V4DI_FTYPE_V4DI_INT_CONVERT
:
29369 nargs_constant
= 1;
29371 case V8HI_FTYPE_V8HI_INT
:
29372 case V8HI_FTYPE_V8SF_INT
:
29373 case V8HI_FTYPE_V4SF_INT
:
29374 case V8SF_FTYPE_V8SF_INT
:
29375 case V4SI_FTYPE_V4SI_INT
:
29376 case V4SI_FTYPE_V8SI_INT
:
29377 case V4HI_FTYPE_V4HI_INT
:
29378 case V4DF_FTYPE_V4DF_INT
:
29379 case V4SF_FTYPE_V4SF_INT
:
29380 case V4SF_FTYPE_V8SF_INT
:
29381 case V2DI_FTYPE_V2DI_INT
:
29382 case V2DF_FTYPE_V2DF_INT
:
29383 case V2DF_FTYPE_V4DF_INT
:
29384 case V16HI_FTYPE_V16HI_INT
:
29385 case V8SI_FTYPE_V8SI_INT
:
29386 case V4DI_FTYPE_V4DI_INT
:
29387 case V2DI_FTYPE_V4DI_INT
:
29389 nargs_constant
= 1;
29391 case V16QI_FTYPE_V16QI_V16QI_V16QI
:
29392 case V8SF_FTYPE_V8SF_V8SF_V8SF
:
29393 case V4DF_FTYPE_V4DF_V4DF_V4DF
:
29394 case V4SF_FTYPE_V4SF_V4SF_V4SF
:
29395 case V2DF_FTYPE_V2DF_V2DF_V2DF
:
29396 case V32QI_FTYPE_V32QI_V32QI_V32QI
:
29399 case V32QI_FTYPE_V32QI_V32QI_INT
:
29400 case V16HI_FTYPE_V16HI_V16HI_INT
:
29401 case V16QI_FTYPE_V16QI_V16QI_INT
:
29402 case V4DI_FTYPE_V4DI_V4DI_INT
:
29403 case V8HI_FTYPE_V8HI_V8HI_INT
:
29404 case V8SI_FTYPE_V8SI_V8SI_INT
:
29405 case V8SI_FTYPE_V8SI_V4SI_INT
:
29406 case V8SF_FTYPE_V8SF_V8SF_INT
:
29407 case V8SF_FTYPE_V8SF_V4SF_INT
:
29408 case V4SI_FTYPE_V4SI_V4SI_INT
:
29409 case V4DF_FTYPE_V4DF_V4DF_INT
:
29410 case V4DF_FTYPE_V4DF_V2DF_INT
:
29411 case V4SF_FTYPE_V4SF_V4SF_INT
:
29412 case V2DI_FTYPE_V2DI_V2DI_INT
:
29413 case V4DI_FTYPE_V4DI_V2DI_INT
:
29414 case V2DF_FTYPE_V2DF_V2DF_INT
:
29416 nargs_constant
= 1;
29418 case V4DI_FTYPE_V4DI_V4DI_INT_CONVERT
:
29421 nargs_constant
= 1;
29423 case V2DI_FTYPE_V2DI_V2DI_INT_CONVERT
:
29426 nargs_constant
= 1;
29428 case V1DI_FTYPE_V1DI_V1DI_INT_CONVERT
:
29431 nargs_constant
= 1;
29433 case V2DI_FTYPE_V2DI_UINT_UINT
:
29435 nargs_constant
= 2;
29437 case V2DF_FTYPE_V2DF_V2DF_V2DI_INT
:
29438 case V4DF_FTYPE_V4DF_V4DF_V4DI_INT
:
29439 case V4SF_FTYPE_V4SF_V4SF_V4SI_INT
:
29440 case V8SF_FTYPE_V8SF_V8SF_V8SI_INT
:
29442 nargs_constant
= 1;
29444 case V2DI_FTYPE_V2DI_V2DI_UINT_UINT
:
29446 nargs_constant
= 2;
29449 gcc_unreachable ();
29452 gcc_assert (nargs
<= ARRAY_SIZE (args
));
29454 if (comparison
!= UNKNOWN
)
29456 gcc_assert (nargs
== 2);
29457 return ix86_expand_sse_compare (d
, exp
, target
, swap
);
29460 if (rmode
== VOIDmode
|| rmode
== tmode
)
29464 || GET_MODE (target
) != tmode
29465 || !insn_p
->operand
[0].predicate (target
, tmode
))
29466 target
= gen_reg_rtx (tmode
);
29467 real_target
= target
;
29471 target
= gen_reg_rtx (rmode
);
29472 real_target
= simplify_gen_subreg (tmode
, target
, rmode
, 0);
29475 for (i
= 0; i
< nargs
; i
++)
29477 tree arg
= CALL_EXPR_ARG (exp
, i
);
29478 rtx op
= expand_normal (arg
);
29479 enum machine_mode mode
= insn_p
->operand
[i
+ 1].mode
;
29480 bool match
= insn_p
->operand
[i
+ 1].predicate (op
, mode
);
29482 if (last_arg_count
&& (i
+ 1) == nargs
)
29484 /* SIMD shift insns take either an 8-bit immediate or
29485 register as count. But builtin functions take int as
29486 count. If count doesn't match, we put it in register. */
29489 op
= simplify_gen_subreg (SImode
, op
, GET_MODE (op
), 0);
29490 if (!insn_p
->operand
[i
+ 1].predicate (op
, mode
))
29491 op
= copy_to_reg (op
);
29494 else if ((nargs
- i
) <= nargs_constant
)
29499 case CODE_FOR_avx2_inserti128
:
29500 case CODE_FOR_avx2_extracti128
:
29501 error ("the last argument must be an 1-bit immediate");
29504 case CODE_FOR_sse4_1_roundsd
:
29505 case CODE_FOR_sse4_1_roundss
:
29507 case CODE_FOR_sse4_1_roundpd
:
29508 case CODE_FOR_sse4_1_roundps
:
29509 case CODE_FOR_avx_roundpd256
:
29510 case CODE_FOR_avx_roundps256
:
29512 case CODE_FOR_sse4_1_roundpd_vec_pack_sfix
:
29513 case CODE_FOR_sse4_1_roundps_sfix
:
29514 case CODE_FOR_avx_roundpd_vec_pack_sfix256
:
29515 case CODE_FOR_avx_roundps_sfix256
:
29517 case CODE_FOR_sse4_1_blendps
:
29518 case CODE_FOR_avx_blendpd256
:
29519 case CODE_FOR_avx_vpermilv4df
:
29520 error ("the last argument must be a 4-bit immediate");
29523 case CODE_FOR_sse4_1_blendpd
:
29524 case CODE_FOR_avx_vpermilv2df
:
29525 case CODE_FOR_xop_vpermil2v2df3
:
29526 case CODE_FOR_xop_vpermil2v4sf3
:
29527 case CODE_FOR_xop_vpermil2v4df3
:
29528 case CODE_FOR_xop_vpermil2v8sf3
:
29529 error ("the last argument must be a 2-bit immediate");
29532 case CODE_FOR_avx_vextractf128v4df
:
29533 case CODE_FOR_avx_vextractf128v8sf
:
29534 case CODE_FOR_avx_vextractf128v8si
:
29535 case CODE_FOR_avx_vinsertf128v4df
:
29536 case CODE_FOR_avx_vinsertf128v8sf
:
29537 case CODE_FOR_avx_vinsertf128v8si
:
29538 error ("the last argument must be a 1-bit immediate");
29541 case CODE_FOR_avx_vmcmpv2df3
:
29542 case CODE_FOR_avx_vmcmpv4sf3
:
29543 case CODE_FOR_avx_cmpv2df3
:
29544 case CODE_FOR_avx_cmpv4sf3
:
29545 case CODE_FOR_avx_cmpv4df3
:
29546 case CODE_FOR_avx_cmpv8sf3
:
29547 error ("the last argument must be a 5-bit immediate");
29551 switch (nargs_constant
)
29554 if ((nargs
- i
) == nargs_constant
)
29556 error ("the next to last argument must be an 8-bit immediate");
29560 error ("the last argument must be an 8-bit immediate");
29563 gcc_unreachable ();
29570 if (VECTOR_MODE_P (mode
))
29571 op
= safe_vector_operand (op
, mode
);
29573 /* If we aren't optimizing, only allow one memory operand to
29575 if (memory_operand (op
, mode
))
29578 if (GET_MODE (op
) == mode
|| GET_MODE (op
) == VOIDmode
)
29580 if (optimize
|| !match
|| num_memory
> 1)
29581 op
= copy_to_mode_reg (mode
, op
);
29585 op
= copy_to_reg (op
);
29586 op
= simplify_gen_subreg (mode
, op
, GET_MODE (op
), 0);
29591 args
[i
].mode
= mode
;
29597 pat
= GEN_FCN (icode
) (real_target
, args
[0].op
);
29600 pat
= GEN_FCN (icode
) (real_target
, args
[0].op
, args
[1].op
);
29603 pat
= GEN_FCN (icode
) (real_target
, args
[0].op
, args
[1].op
,
29607 pat
= GEN_FCN (icode
) (real_target
, args
[0].op
, args
[1].op
,
29608 args
[2].op
, args
[3].op
);
29611 gcc_unreachable ();
29621 /* Subroutine of ix86_expand_builtin to take care of special insns
29622 with variable number of operands. */
29625 ix86_expand_special_args_builtin (const struct builtin_description
*d
,
29626 tree exp
, rtx target
)
29630 unsigned int i
, nargs
, arg_adjust
, memory
;
29634 enum machine_mode mode
;
29636 enum insn_code icode
= d
->icode
;
29637 bool last_arg_constant
= false;
29638 const struct insn_data_d
*insn_p
= &insn_data
[icode
];
29639 enum machine_mode tmode
= insn_p
->operand
[0].mode
;
29640 enum { load
, store
} klass
;
29642 switch ((enum ix86_builtin_func_type
) d
->flag
)
29644 case VOID_FTYPE_VOID
:
29645 if (icode
== CODE_FOR_avx_vzeroupper
)
29646 target
= GEN_INT (vzeroupper_intrinsic
);
29647 emit_insn (GEN_FCN (icode
) (target
));
29649 case VOID_FTYPE_UINT64
:
29650 case VOID_FTYPE_UNSIGNED
:
29656 case INT_FTYPE_VOID
:
29657 case UINT64_FTYPE_VOID
:
29658 case UNSIGNED_FTYPE_VOID
:
29663 case UINT64_FTYPE_PUNSIGNED
:
29664 case V2DI_FTYPE_PV2DI
:
29665 case V4DI_FTYPE_PV4DI
:
29666 case V32QI_FTYPE_PCCHAR
:
29667 case V16QI_FTYPE_PCCHAR
:
29668 case V8SF_FTYPE_PCV4SF
:
29669 case V8SF_FTYPE_PCFLOAT
:
29670 case V4SF_FTYPE_PCFLOAT
:
29671 case V4DF_FTYPE_PCV2DF
:
29672 case V4DF_FTYPE_PCDOUBLE
:
29673 case V2DF_FTYPE_PCDOUBLE
:
29674 case VOID_FTYPE_PVOID
:
29679 case VOID_FTYPE_PV2SF_V4SF
:
29680 case VOID_FTYPE_PV4DI_V4DI
:
29681 case VOID_FTYPE_PV2DI_V2DI
:
29682 case VOID_FTYPE_PCHAR_V32QI
:
29683 case VOID_FTYPE_PCHAR_V16QI
:
29684 case VOID_FTYPE_PFLOAT_V8SF
:
29685 case VOID_FTYPE_PFLOAT_V4SF
:
29686 case VOID_FTYPE_PDOUBLE_V4DF
:
29687 case VOID_FTYPE_PDOUBLE_V2DF
:
29688 case VOID_FTYPE_PLONGLONG_LONGLONG
:
29689 case VOID_FTYPE_PULONGLONG_ULONGLONG
:
29690 case VOID_FTYPE_PINT_INT
:
29693 /* Reserve memory operand for target. */
29694 memory
= ARRAY_SIZE (args
);
29696 case V4SF_FTYPE_V4SF_PCV2SF
:
29697 case V2DF_FTYPE_V2DF_PCDOUBLE
:
29702 case V8SF_FTYPE_PCV8SF_V8SI
:
29703 case V4DF_FTYPE_PCV4DF_V4DI
:
29704 case V4SF_FTYPE_PCV4SF_V4SI
:
29705 case V2DF_FTYPE_PCV2DF_V2DI
:
29706 case V8SI_FTYPE_PCV8SI_V8SI
:
29707 case V4DI_FTYPE_PCV4DI_V4DI
:
29708 case V4SI_FTYPE_PCV4SI_V4SI
:
29709 case V2DI_FTYPE_PCV2DI_V2DI
:
29714 case VOID_FTYPE_PV8SF_V8SI_V8SF
:
29715 case VOID_FTYPE_PV4DF_V4DI_V4DF
:
29716 case VOID_FTYPE_PV4SF_V4SI_V4SF
:
29717 case VOID_FTYPE_PV2DF_V2DI_V2DF
:
29718 case VOID_FTYPE_PV8SI_V8SI_V8SI
:
29719 case VOID_FTYPE_PV4DI_V4DI_V4DI
:
29720 case VOID_FTYPE_PV4SI_V4SI_V4SI
:
29721 case VOID_FTYPE_PV2DI_V2DI_V2DI
:
29724 /* Reserve memory operand for target. */
29725 memory
= ARRAY_SIZE (args
);
29727 case VOID_FTYPE_UINT_UINT_UINT
:
29728 case VOID_FTYPE_UINT64_UINT_UINT
:
29729 case UCHAR_FTYPE_UINT_UINT_UINT
:
29730 case UCHAR_FTYPE_UINT64_UINT_UINT
:
29733 memory
= ARRAY_SIZE (args
);
29734 last_arg_constant
= true;
29737 gcc_unreachable ();
29740 gcc_assert (nargs
<= ARRAY_SIZE (args
));
29742 if (klass
== store
)
29744 arg
= CALL_EXPR_ARG (exp
, 0);
29745 op
= expand_normal (arg
);
29746 gcc_assert (target
== 0);
29749 if (GET_MODE (op
) != Pmode
)
29750 op
= convert_to_mode (Pmode
, op
, 1);
29751 target
= gen_rtx_MEM (tmode
, force_reg (Pmode
, op
));
29754 target
= force_reg (tmode
, op
);
29762 || !register_operand (target
, tmode
)
29763 || GET_MODE (target
) != tmode
)
29764 target
= gen_reg_rtx (tmode
);
29767 for (i
= 0; i
< nargs
; i
++)
29769 enum machine_mode mode
= insn_p
->operand
[i
+ 1].mode
;
29772 arg
= CALL_EXPR_ARG (exp
, i
+ arg_adjust
);
29773 op
= expand_normal (arg
);
29774 match
= insn_p
->operand
[i
+ 1].predicate (op
, mode
);
29776 if (last_arg_constant
&& (i
+ 1) == nargs
)
29780 if (icode
== CODE_FOR_lwp_lwpvalsi3
29781 || icode
== CODE_FOR_lwp_lwpinssi3
29782 || icode
== CODE_FOR_lwp_lwpvaldi3
29783 || icode
== CODE_FOR_lwp_lwpinsdi3
)
29784 error ("the last argument must be a 32-bit immediate");
29786 error ("the last argument must be an 8-bit immediate");
29794 /* This must be the memory operand. */
29795 if (GET_MODE (op
) != Pmode
)
29796 op
= convert_to_mode (Pmode
, op
, 1);
29797 op
= gen_rtx_MEM (mode
, force_reg (Pmode
, op
));
29798 gcc_assert (GET_MODE (op
) == mode
29799 || GET_MODE (op
) == VOIDmode
);
29803 /* This must be register. */
29804 if (VECTOR_MODE_P (mode
))
29805 op
= safe_vector_operand (op
, mode
);
29807 gcc_assert (GET_MODE (op
) == mode
29808 || GET_MODE (op
) == VOIDmode
);
29809 op
= copy_to_mode_reg (mode
, op
);
29814 args
[i
].mode
= mode
;
29820 pat
= GEN_FCN (icode
) (target
);
29823 pat
= GEN_FCN (icode
) (target
, args
[0].op
);
29826 pat
= GEN_FCN (icode
) (target
, args
[0].op
, args
[1].op
);
29829 pat
= GEN_FCN (icode
) (target
, args
[0].op
, args
[1].op
, args
[2].op
);
29832 gcc_unreachable ();
29838 return klass
== store
? 0 : target
;
29841 /* Return the integer constant in ARG. Constrain it to be in the range
29842 of the subparts of VEC_TYPE; issue an error if not. */
29845 get_element_number (tree vec_type
, tree arg
)
29847 unsigned HOST_WIDE_INT elt
, max
= TYPE_VECTOR_SUBPARTS (vec_type
) - 1;
29849 if (!host_integerp (arg
, 1)
29850 || (elt
= tree_low_cst (arg
, 1), elt
> max
))
29852 error ("selector must be an integer constant in the range 0..%wi", max
);
29859 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
29860 ix86_expand_vector_init. We DO have language-level syntax for this, in
29861 the form of (type){ init-list }. Except that since we can't place emms
29862 instructions from inside the compiler, we can't allow the use of MMX
29863 registers unless the user explicitly asks for it. So we do *not* define
29864 vec_set/vec_extract/vec_init patterns for MMX modes in mmx.md. Instead
29865 we have builtins invoked by mmintrin.h that gives us license to emit
29866 these sorts of instructions. */
29869 ix86_expand_vec_init_builtin (tree type
, tree exp
, rtx target
)
29871 enum machine_mode tmode
= TYPE_MODE (type
);
29872 enum machine_mode inner_mode
= GET_MODE_INNER (tmode
);
29873 int i
, n_elt
= GET_MODE_NUNITS (tmode
);
29874 rtvec v
= rtvec_alloc (n_elt
);
29876 gcc_assert (VECTOR_MODE_P (tmode
));
29877 gcc_assert (call_expr_nargs (exp
) == n_elt
);
29879 for (i
= 0; i
< n_elt
; ++i
)
29881 rtx x
= expand_normal (CALL_EXPR_ARG (exp
, i
));
29882 RTVEC_ELT (v
, i
) = gen_lowpart (inner_mode
, x
);
29885 if (!target
|| !register_operand (target
, tmode
))
29886 target
= gen_reg_rtx (tmode
);
29888 ix86_expand_vector_init (true, target
, gen_rtx_PARALLEL (tmode
, v
));
29892 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
29893 ix86_expand_vector_extract. They would be redundant (for non-MMX) if we
29894 had a language-level syntax for referencing vector elements. */
29897 ix86_expand_vec_ext_builtin (tree exp
, rtx target
)
29899 enum machine_mode tmode
, mode0
;
29904 arg0
= CALL_EXPR_ARG (exp
, 0);
29905 arg1
= CALL_EXPR_ARG (exp
, 1);
29907 op0
= expand_normal (arg0
);
29908 elt
= get_element_number (TREE_TYPE (arg0
), arg1
);
29910 tmode
= TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0
)));
29911 mode0
= TYPE_MODE (TREE_TYPE (arg0
));
29912 gcc_assert (VECTOR_MODE_P (mode0
));
29914 op0
= force_reg (mode0
, op0
);
29916 if (optimize
|| !target
|| !register_operand (target
, tmode
))
29917 target
= gen_reg_rtx (tmode
);
29919 ix86_expand_vector_extract (true, target
, op0
, elt
);
29924 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
29925 ix86_expand_vector_set. They would be redundant (for non-MMX) if we had
29926 a language-level syntax for referencing vector elements. */
29929 ix86_expand_vec_set_builtin (tree exp
)
29931 enum machine_mode tmode
, mode1
;
29932 tree arg0
, arg1
, arg2
;
29934 rtx op0
, op1
, target
;
29936 arg0
= CALL_EXPR_ARG (exp
, 0);
29937 arg1
= CALL_EXPR_ARG (exp
, 1);
29938 arg2
= CALL_EXPR_ARG (exp
, 2);
29940 tmode
= TYPE_MODE (TREE_TYPE (arg0
));
29941 mode1
= TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0
)));
29942 gcc_assert (VECTOR_MODE_P (tmode
));
29944 op0
= expand_expr (arg0
, NULL_RTX
, tmode
, EXPAND_NORMAL
);
29945 op1
= expand_expr (arg1
, NULL_RTX
, mode1
, EXPAND_NORMAL
);
29946 elt
= get_element_number (TREE_TYPE (arg0
), arg2
);
29948 if (GET_MODE (op1
) != mode1
&& GET_MODE (op1
) != VOIDmode
)
29949 op1
= convert_modes (mode1
, GET_MODE (op1
), op1
, true);
29951 op0
= force_reg (tmode
, op0
);
29952 op1
= force_reg (mode1
, op1
);
29954 /* OP0 is the source of these builtin functions and shouldn't be
29955 modified. Create a copy, use it and return it as target. */
29956 target
= gen_reg_rtx (tmode
);
29957 emit_move_insn (target
, op0
);
29958 ix86_expand_vector_set (true, target
, op1
, elt
);
29963 /* Expand an expression EXP that calls a built-in function,
29964 with result going to TARGET if that's convenient
29965 (and in mode MODE if that's convenient).
29966 SUBTARGET may be used as the target for computing one of EXP's operands.
29967 IGNORE is nonzero if the value is to be ignored. */
29970 ix86_expand_builtin (tree exp
, rtx target
, rtx subtarget ATTRIBUTE_UNUSED
,
29971 enum machine_mode mode ATTRIBUTE_UNUSED
,
29972 int ignore ATTRIBUTE_UNUSED
)
29974 const struct builtin_description
*d
;
29976 enum insn_code icode
;
29977 tree fndecl
= TREE_OPERAND (CALL_EXPR_FN (exp
), 0);
29978 tree arg0
, arg1
, arg2
, arg3
, arg4
;
29979 rtx op0
, op1
, op2
, op3
, op4
, pat
;
29980 enum machine_mode mode0
, mode1
, mode2
, mode3
, mode4
;
29981 unsigned int fcode
= DECL_FUNCTION_CODE (fndecl
);
29983 /* For CPU builtins that can be folded, fold first and expand the fold. */
29986 case IX86_BUILTIN_CPU_INIT
:
29988 /* Make it call __cpu_indicator_init in libgcc. */
29989 tree call_expr
, fndecl
, type
;
29990 type
= build_function_type_list (integer_type_node
, NULL_TREE
);
29991 fndecl
= build_fn_decl ("__cpu_indicator_init", type
);
29992 call_expr
= build_call_expr (fndecl
, 0);
29993 return expand_expr (call_expr
, target
, mode
, EXPAND_NORMAL
);
29995 case IX86_BUILTIN_CPU_IS
:
29996 case IX86_BUILTIN_CPU_SUPPORTS
:
29998 tree arg0
= CALL_EXPR_ARG (exp
, 0);
29999 tree fold_expr
= fold_builtin_cpu (fndecl
, &arg0
);
30000 gcc_assert (fold_expr
!= NULL_TREE
);
30001 return expand_expr (fold_expr
, target
, mode
, EXPAND_NORMAL
);
30005 /* Determine whether the builtin function is available under the current ISA.
30006 Originally the builtin was not created if it wasn't applicable to the
30007 current ISA based on the command line switches. With function specific
30008 options, we need to check in the context of the function making the call
30009 whether it is supported. */
30010 if (ix86_builtins_isa
[fcode
].isa
30011 && !(ix86_builtins_isa
[fcode
].isa
& ix86_isa_flags
))
30013 char *opts
= ix86_target_string (ix86_builtins_isa
[fcode
].isa
, 0, NULL
,
30014 NULL
, (enum fpmath_unit
) 0, false);
30017 error ("%qE needs unknown isa option", fndecl
);
30020 gcc_assert (opts
!= NULL
);
30021 error ("%qE needs isa option %s", fndecl
, opts
);
30029 case IX86_BUILTIN_MASKMOVQ
:
30030 case IX86_BUILTIN_MASKMOVDQU
:
30031 icode
= (fcode
== IX86_BUILTIN_MASKMOVQ
30032 ? CODE_FOR_mmx_maskmovq
30033 : CODE_FOR_sse2_maskmovdqu
);
30034 /* Note the arg order is different from the operand order. */
30035 arg1
= CALL_EXPR_ARG (exp
, 0);
30036 arg2
= CALL_EXPR_ARG (exp
, 1);
30037 arg0
= CALL_EXPR_ARG (exp
, 2);
30038 op0
= expand_normal (arg0
);
30039 op1
= expand_normal (arg1
);
30040 op2
= expand_normal (arg2
);
30041 mode0
= insn_data
[icode
].operand
[0].mode
;
30042 mode1
= insn_data
[icode
].operand
[1].mode
;
30043 mode2
= insn_data
[icode
].operand
[2].mode
;
30045 if (GET_MODE (op0
) != Pmode
)
30046 op0
= convert_to_mode (Pmode
, op0
, 1);
30047 op0
= gen_rtx_MEM (mode1
, force_reg (Pmode
, op0
));
30049 if (!insn_data
[icode
].operand
[0].predicate (op0
, mode0
))
30050 op0
= copy_to_mode_reg (mode0
, op0
);
30051 if (!insn_data
[icode
].operand
[1].predicate (op1
, mode1
))
30052 op1
= copy_to_mode_reg (mode1
, op1
);
30053 if (!insn_data
[icode
].operand
[2].predicate (op2
, mode2
))
30054 op2
= copy_to_mode_reg (mode2
, op2
);
30055 pat
= GEN_FCN (icode
) (op0
, op1
, op2
);
30061 case IX86_BUILTIN_LDMXCSR
:
30062 op0
= expand_normal (CALL_EXPR_ARG (exp
, 0));
30063 target
= assign_386_stack_local (SImode
, SLOT_VIRTUAL
);
30064 emit_move_insn (target
, op0
);
30065 emit_insn (gen_sse_ldmxcsr (target
));
30068 case IX86_BUILTIN_STMXCSR
:
30069 target
= assign_386_stack_local (SImode
, SLOT_VIRTUAL
);
30070 emit_insn (gen_sse_stmxcsr (target
));
30071 return copy_to_mode_reg (SImode
, target
);
30073 case IX86_BUILTIN_CLFLUSH
:
30074 arg0
= CALL_EXPR_ARG (exp
, 0);
30075 op0
= expand_normal (arg0
);
30076 icode
= CODE_FOR_sse2_clflush
;
30077 if (!insn_data
[icode
].operand
[0].predicate (op0
, Pmode
))
30079 if (GET_MODE (op0
) != Pmode
)
30080 op0
= convert_to_mode (Pmode
, op0
, 1);
30081 op0
= force_reg (Pmode
, op0
);
30084 emit_insn (gen_sse2_clflush (op0
));
30087 case IX86_BUILTIN_MONITOR
:
30088 arg0
= CALL_EXPR_ARG (exp
, 0);
30089 arg1
= CALL_EXPR_ARG (exp
, 1);
30090 arg2
= CALL_EXPR_ARG (exp
, 2);
30091 op0
= expand_normal (arg0
);
30092 op1
= expand_normal (arg1
);
30093 op2
= expand_normal (arg2
);
30096 if (GET_MODE (op0
) != Pmode
)
30097 op0
= convert_to_mode (Pmode
, op0
, 1);
30098 op0
= force_reg (Pmode
, op0
);
30101 op1
= copy_to_mode_reg (SImode
, op1
);
30103 op2
= copy_to_mode_reg (SImode
, op2
);
30104 emit_insn (ix86_gen_monitor (op0
, op1
, op2
));
30107 case IX86_BUILTIN_MWAIT
:
30108 arg0
= CALL_EXPR_ARG (exp
, 0);
30109 arg1
= CALL_EXPR_ARG (exp
, 1);
30110 op0
= expand_normal (arg0
);
30111 op1
= expand_normal (arg1
);
30113 op0
= copy_to_mode_reg (SImode
, op0
);
30115 op1
= copy_to_mode_reg (SImode
, op1
);
30116 emit_insn (gen_sse3_mwait (op0
, op1
));
30119 case IX86_BUILTIN_VEC_INIT_V2SI
:
30120 case IX86_BUILTIN_VEC_INIT_V4HI
:
30121 case IX86_BUILTIN_VEC_INIT_V8QI
:
30122 return ix86_expand_vec_init_builtin (TREE_TYPE (exp
), exp
, target
);
30124 case IX86_BUILTIN_VEC_EXT_V2DF
:
30125 case IX86_BUILTIN_VEC_EXT_V2DI
:
30126 case IX86_BUILTIN_VEC_EXT_V4SF
:
30127 case IX86_BUILTIN_VEC_EXT_V4SI
:
30128 case IX86_BUILTIN_VEC_EXT_V8HI
:
30129 case IX86_BUILTIN_VEC_EXT_V2SI
:
30130 case IX86_BUILTIN_VEC_EXT_V4HI
:
30131 case IX86_BUILTIN_VEC_EXT_V16QI
:
30132 return ix86_expand_vec_ext_builtin (exp
, target
);
30134 case IX86_BUILTIN_VEC_SET_V2DI
:
30135 case IX86_BUILTIN_VEC_SET_V4SF
:
30136 case IX86_BUILTIN_VEC_SET_V4SI
:
30137 case IX86_BUILTIN_VEC_SET_V8HI
:
30138 case IX86_BUILTIN_VEC_SET_V4HI
:
30139 case IX86_BUILTIN_VEC_SET_V16QI
:
30140 return ix86_expand_vec_set_builtin (exp
);
30142 case IX86_BUILTIN_INFQ
:
30143 case IX86_BUILTIN_HUGE_VALQ
:
30145 REAL_VALUE_TYPE inf
;
30149 tmp
= CONST_DOUBLE_FROM_REAL_VALUE (inf
, mode
);
30151 tmp
= validize_mem (force_const_mem (mode
, tmp
));
30154 target
= gen_reg_rtx (mode
);
30156 emit_move_insn (target
, tmp
);
30160 case IX86_BUILTIN_LLWPCB
:
30161 arg0
= CALL_EXPR_ARG (exp
, 0);
30162 op0
= expand_normal (arg0
);
30163 icode
= CODE_FOR_lwp_llwpcb
;
30164 if (!insn_data
[icode
].operand
[0].predicate (op0
, Pmode
))
30166 if (GET_MODE (op0
) != Pmode
)
30167 op0
= convert_to_mode (Pmode
, op0
, 1);
30168 op0
= force_reg (Pmode
, op0
);
30170 emit_insn (gen_lwp_llwpcb (op0
));
30173 case IX86_BUILTIN_SLWPCB
:
30174 icode
= CODE_FOR_lwp_slwpcb
;
30176 || !insn_data
[icode
].operand
[0].predicate (target
, Pmode
))
30177 target
= gen_reg_rtx (Pmode
);
30178 emit_insn (gen_lwp_slwpcb (target
));
30181 case IX86_BUILTIN_BEXTRI32
:
30182 case IX86_BUILTIN_BEXTRI64
:
30183 arg0
= CALL_EXPR_ARG (exp
, 0);
30184 arg1
= CALL_EXPR_ARG (exp
, 1);
30185 op0
= expand_normal (arg0
);
30186 op1
= expand_normal (arg1
);
30187 icode
= (fcode
== IX86_BUILTIN_BEXTRI32
30188 ? CODE_FOR_tbm_bextri_si
30189 : CODE_FOR_tbm_bextri_di
);
30190 if (!CONST_INT_P (op1
))
30192 error ("last argument must be an immediate");
30197 unsigned char length
= (INTVAL (op1
) >> 8) & 0xFF;
30198 unsigned char lsb_index
= INTVAL (op1
) & 0xFF;
30199 op1
= GEN_INT (length
);
30200 op2
= GEN_INT (lsb_index
);
30201 pat
= GEN_FCN (icode
) (target
, op0
, op1
, op2
);
30207 case IX86_BUILTIN_RDRAND16_STEP
:
30208 icode
= CODE_FOR_rdrandhi_1
;
30212 case IX86_BUILTIN_RDRAND32_STEP
:
30213 icode
= CODE_FOR_rdrandsi_1
;
30217 case IX86_BUILTIN_RDRAND64_STEP
:
30218 icode
= CODE_FOR_rdranddi_1
;
30222 op0
= gen_reg_rtx (mode0
);
30223 emit_insn (GEN_FCN (icode
) (op0
));
30225 arg0
= CALL_EXPR_ARG (exp
, 0);
30226 op1
= expand_normal (arg0
);
30227 if (!address_operand (op1
, VOIDmode
))
30229 op1
= convert_memory_address (Pmode
, op1
);
30230 op1
= copy_addr_to_reg (op1
);
30232 emit_move_insn (gen_rtx_MEM (mode0
, op1
), op0
);
30234 op1
= gen_reg_rtx (SImode
);
30235 emit_move_insn (op1
, CONST1_RTX (SImode
));
30237 /* Emit SImode conditional move. */
30238 if (mode0
== HImode
)
30240 op2
= gen_reg_rtx (SImode
);
30241 emit_insn (gen_zero_extendhisi2 (op2
, op0
));
30243 else if (mode0
== SImode
)
30246 op2
= gen_rtx_SUBREG (SImode
, op0
, 0);
30249 target
= gen_reg_rtx (SImode
);
30251 pat
= gen_rtx_GEU (VOIDmode
, gen_rtx_REG (CCCmode
, FLAGS_REG
),
30253 emit_insn (gen_rtx_SET (VOIDmode
, target
,
30254 gen_rtx_IF_THEN_ELSE (SImode
, pat
, op2
, op1
)));
30257 case IX86_BUILTIN_GATHERSIV2DF
:
30258 icode
= CODE_FOR_avx2_gathersiv2df
;
30260 case IX86_BUILTIN_GATHERSIV4DF
:
30261 icode
= CODE_FOR_avx2_gathersiv4df
;
30263 case IX86_BUILTIN_GATHERDIV2DF
:
30264 icode
= CODE_FOR_avx2_gatherdiv2df
;
30266 case IX86_BUILTIN_GATHERDIV4DF
:
30267 icode
= CODE_FOR_avx2_gatherdiv4df
;
30269 case IX86_BUILTIN_GATHERSIV4SF
:
30270 icode
= CODE_FOR_avx2_gathersiv4sf
;
30272 case IX86_BUILTIN_GATHERSIV8SF
:
30273 icode
= CODE_FOR_avx2_gathersiv8sf
;
30275 case IX86_BUILTIN_GATHERDIV4SF
:
30276 icode
= CODE_FOR_avx2_gatherdiv4sf
;
30278 case IX86_BUILTIN_GATHERDIV8SF
:
30279 icode
= CODE_FOR_avx2_gatherdiv8sf
;
30281 case IX86_BUILTIN_GATHERSIV2DI
:
30282 icode
= CODE_FOR_avx2_gathersiv2di
;
30284 case IX86_BUILTIN_GATHERSIV4DI
:
30285 icode
= CODE_FOR_avx2_gathersiv4di
;
30287 case IX86_BUILTIN_GATHERDIV2DI
:
30288 icode
= CODE_FOR_avx2_gatherdiv2di
;
30290 case IX86_BUILTIN_GATHERDIV4DI
:
30291 icode
= CODE_FOR_avx2_gatherdiv4di
;
30293 case IX86_BUILTIN_GATHERSIV4SI
:
30294 icode
= CODE_FOR_avx2_gathersiv4si
;
30296 case IX86_BUILTIN_GATHERSIV8SI
:
30297 icode
= CODE_FOR_avx2_gathersiv8si
;
30299 case IX86_BUILTIN_GATHERDIV4SI
:
30300 icode
= CODE_FOR_avx2_gatherdiv4si
;
30302 case IX86_BUILTIN_GATHERDIV8SI
:
30303 icode
= CODE_FOR_avx2_gatherdiv8si
;
30305 case IX86_BUILTIN_GATHERALTSIV4DF
:
30306 icode
= CODE_FOR_avx2_gathersiv4df
;
30308 case IX86_BUILTIN_GATHERALTDIV8SF
:
30309 icode
= CODE_FOR_avx2_gatherdiv8sf
;
30311 case IX86_BUILTIN_GATHERALTSIV4DI
:
30312 icode
= CODE_FOR_avx2_gathersiv4di
;
30314 case IX86_BUILTIN_GATHERALTDIV8SI
:
30315 icode
= CODE_FOR_avx2_gatherdiv8si
;
30319 arg0
= CALL_EXPR_ARG (exp
, 0);
30320 arg1
= CALL_EXPR_ARG (exp
, 1);
30321 arg2
= CALL_EXPR_ARG (exp
, 2);
30322 arg3
= CALL_EXPR_ARG (exp
, 3);
30323 arg4
= CALL_EXPR_ARG (exp
, 4);
30324 op0
= expand_normal (arg0
);
30325 op1
= expand_normal (arg1
);
30326 op2
= expand_normal (arg2
);
30327 op3
= expand_normal (arg3
);
30328 op4
= expand_normal (arg4
);
30329 /* Note the arg order is different from the operand order. */
30330 mode0
= insn_data
[icode
].operand
[1].mode
;
30331 mode2
= insn_data
[icode
].operand
[3].mode
;
30332 mode3
= insn_data
[icode
].operand
[4].mode
;
30333 mode4
= insn_data
[icode
].operand
[5].mode
;
30335 if (target
== NULL_RTX
30336 || GET_MODE (target
) != insn_data
[icode
].operand
[0].mode
)
30337 subtarget
= gen_reg_rtx (insn_data
[icode
].operand
[0].mode
);
30339 subtarget
= target
;
30341 if (fcode
== IX86_BUILTIN_GATHERALTSIV4DF
30342 || fcode
== IX86_BUILTIN_GATHERALTSIV4DI
)
30344 rtx half
= gen_reg_rtx (V4SImode
);
30345 if (!nonimmediate_operand (op2
, V8SImode
))
30346 op2
= copy_to_mode_reg (V8SImode
, op2
);
30347 emit_insn (gen_vec_extract_lo_v8si (half
, op2
));
30350 else if (fcode
== IX86_BUILTIN_GATHERALTDIV8SF
30351 || fcode
== IX86_BUILTIN_GATHERALTDIV8SI
)
30353 rtx (*gen
) (rtx
, rtx
);
30354 rtx half
= gen_reg_rtx (mode0
);
30355 if (mode0
== V4SFmode
)
30356 gen
= gen_vec_extract_lo_v8sf
;
30358 gen
= gen_vec_extract_lo_v8si
;
30359 if (!nonimmediate_operand (op0
, GET_MODE (op0
)))
30360 op0
= copy_to_mode_reg (GET_MODE (op0
), op0
);
30361 emit_insn (gen (half
, op0
));
30363 if (!nonimmediate_operand (op3
, GET_MODE (op3
)))
30364 op3
= copy_to_mode_reg (GET_MODE (op3
), op3
);
30365 emit_insn (gen (half
, op3
));
30369 /* Force memory operand only with base register here. But we
30370 don't want to do it on memory operand for other builtin
30372 if (GET_MODE (op1
) != Pmode
)
30373 op1
= convert_to_mode (Pmode
, op1
, 1);
30374 op1
= force_reg (Pmode
, op1
);
30376 if (!insn_data
[icode
].operand
[1].predicate (op0
, mode0
))
30377 op0
= copy_to_mode_reg (mode0
, op0
);
30378 if (!insn_data
[icode
].operand
[2].predicate (op1
, Pmode
))
30379 op1
= copy_to_mode_reg (Pmode
, op1
);
30380 if (!insn_data
[icode
].operand
[3].predicate (op2
, mode2
))
30381 op2
= copy_to_mode_reg (mode2
, op2
);
30382 if (!insn_data
[icode
].operand
[4].predicate (op3
, mode3
))
30383 op3
= copy_to_mode_reg (mode3
, op3
);
30384 if (!insn_data
[icode
].operand
[5].predicate (op4
, mode4
))
30386 error ("last argument must be scale 1, 2, 4, 8");
30390 /* Optimize. If mask is known to have all high bits set,
30391 replace op0 with pc_rtx to signal that the instruction
30392 overwrites the whole destination and doesn't use its
30393 previous contents. */
30396 if (TREE_CODE (arg3
) == VECTOR_CST
)
30398 unsigned int negative
= 0;
30399 for (i
= 0; i
< VECTOR_CST_NELTS (arg3
); ++i
)
30401 tree cst
= VECTOR_CST_ELT (arg3
, i
);
30402 if (TREE_CODE (cst
) == INTEGER_CST
30403 && tree_int_cst_sign_bit (cst
))
30405 else if (TREE_CODE (cst
) == REAL_CST
30406 && REAL_VALUE_NEGATIVE (TREE_REAL_CST (cst
)))
30409 if (negative
== TYPE_VECTOR_SUBPARTS (TREE_TYPE (arg3
)))
30412 else if (TREE_CODE (arg3
) == SSA_NAME
)
30414 /* Recognize also when mask is like:
30415 __v2df src = _mm_setzero_pd ();
30416 __v2df mask = _mm_cmpeq_pd (src, src);
30418 __v8sf src = _mm256_setzero_ps ();
30419 __v8sf mask = _mm256_cmp_ps (src, src, _CMP_EQ_OQ);
30420 as that is a cheaper way to load all ones into
30421 a register than having to load a constant from
30423 gimple def_stmt
= SSA_NAME_DEF_STMT (arg3
);
30424 if (is_gimple_call (def_stmt
))
30426 tree fndecl
= gimple_call_fndecl (def_stmt
);
30428 && DECL_BUILT_IN_CLASS (fndecl
) == BUILT_IN_MD
)
30429 switch ((unsigned int) DECL_FUNCTION_CODE (fndecl
))
30431 case IX86_BUILTIN_CMPPD
:
30432 case IX86_BUILTIN_CMPPS
:
30433 case IX86_BUILTIN_CMPPD256
:
30434 case IX86_BUILTIN_CMPPS256
:
30435 if (!integer_zerop (gimple_call_arg (def_stmt
, 2)))
30438 case IX86_BUILTIN_CMPEQPD
:
30439 case IX86_BUILTIN_CMPEQPS
:
30440 if (initializer_zerop (gimple_call_arg (def_stmt
, 0))
30441 && initializer_zerop (gimple_call_arg (def_stmt
,
30452 pat
= GEN_FCN (icode
) (subtarget
, op0
, op1
, op2
, op3
, op4
);
30457 if (fcode
== IX86_BUILTIN_GATHERDIV8SF
30458 || fcode
== IX86_BUILTIN_GATHERDIV8SI
)
30460 enum machine_mode tmode
= GET_MODE (subtarget
) == V8SFmode
30461 ? V4SFmode
: V4SImode
;
30462 if (target
== NULL_RTX
)
30463 target
= gen_reg_rtx (tmode
);
30464 if (tmode
== V4SFmode
)
30465 emit_insn (gen_vec_extract_lo_v8sf (target
, subtarget
));
30467 emit_insn (gen_vec_extract_lo_v8si (target
, subtarget
));
30470 target
= subtarget
;
30474 case IX86_BUILTIN_XABORT
:
30475 icode
= CODE_FOR_xabort
;
30476 arg0
= CALL_EXPR_ARG (exp
, 0);
30477 op0
= expand_normal (arg0
);
30478 mode0
= insn_data
[icode
].operand
[0].mode
;
30479 if (!insn_data
[icode
].operand
[0].predicate (op0
, mode0
))
30481 error ("the xabort's argument must be an 8-bit immediate");
30484 emit_insn (gen_xabort (op0
));
30491 for (i
= 0, d
= bdesc_special_args
;
30492 i
< ARRAY_SIZE (bdesc_special_args
);
30494 if (d
->code
== fcode
)
30495 return ix86_expand_special_args_builtin (d
, exp
, target
);
30497 for (i
= 0, d
= bdesc_args
;
30498 i
< ARRAY_SIZE (bdesc_args
);
30500 if (d
->code
== fcode
)
30503 case IX86_BUILTIN_FABSQ
:
30504 case IX86_BUILTIN_COPYSIGNQ
:
30506 /* Emit a normal call if SSE isn't available. */
30507 return expand_call (exp
, target
, ignore
);
30509 return ix86_expand_args_builtin (d
, exp
, target
);
30512 for (i
= 0, d
= bdesc_comi
; i
< ARRAY_SIZE (bdesc_comi
); i
++, d
++)
30513 if (d
->code
== fcode
)
30514 return ix86_expand_sse_comi (d
, exp
, target
);
30516 for (i
= 0, d
= bdesc_pcmpestr
;
30517 i
< ARRAY_SIZE (bdesc_pcmpestr
);
30519 if (d
->code
== fcode
)
30520 return ix86_expand_sse_pcmpestr (d
, exp
, target
);
30522 for (i
= 0, d
= bdesc_pcmpistr
;
30523 i
< ARRAY_SIZE (bdesc_pcmpistr
);
30525 if (d
->code
== fcode
)
30526 return ix86_expand_sse_pcmpistr (d
, exp
, target
);
30528 for (i
= 0, d
= bdesc_multi_arg
; i
< ARRAY_SIZE (bdesc_multi_arg
); i
++, d
++)
30529 if (d
->code
== fcode
)
30530 return ix86_expand_multi_arg_builtin (d
->icode
, exp
, target
,
30531 (enum ix86_builtin_func_type
)
30532 d
->flag
, d
->comparison
);
30534 gcc_unreachable ();
30537 /* Returns a function decl for a vectorized version of the builtin function
30538 with builtin function code FN and the result vector type TYPE, or NULL_TREE
30539 if it is not available. */
30542 ix86_builtin_vectorized_function (tree fndecl
, tree type_out
,
30545 enum machine_mode in_mode
, out_mode
;
30547 enum built_in_function fn
= DECL_FUNCTION_CODE (fndecl
);
30549 if (TREE_CODE (type_out
) != VECTOR_TYPE
30550 || TREE_CODE (type_in
) != VECTOR_TYPE
30551 || DECL_BUILT_IN_CLASS (fndecl
) != BUILT_IN_NORMAL
)
30554 out_mode
= TYPE_MODE (TREE_TYPE (type_out
));
30555 out_n
= TYPE_VECTOR_SUBPARTS (type_out
);
30556 in_mode
= TYPE_MODE (TREE_TYPE (type_in
));
30557 in_n
= TYPE_VECTOR_SUBPARTS (type_in
);
30561 case BUILT_IN_SQRT
:
30562 if (out_mode
== DFmode
&& in_mode
== DFmode
)
30564 if (out_n
== 2 && in_n
== 2)
30565 return ix86_builtins
[IX86_BUILTIN_SQRTPD
];
30566 else if (out_n
== 4 && in_n
== 4)
30567 return ix86_builtins
[IX86_BUILTIN_SQRTPD256
];
30571 case BUILT_IN_SQRTF
:
30572 if (out_mode
== SFmode
&& in_mode
== SFmode
)
30574 if (out_n
== 4 && in_n
== 4)
30575 return ix86_builtins
[IX86_BUILTIN_SQRTPS_NR
];
30576 else if (out_n
== 8 && in_n
== 8)
30577 return ix86_builtins
[IX86_BUILTIN_SQRTPS_NR256
];
30581 case BUILT_IN_IFLOOR
:
30582 case BUILT_IN_LFLOOR
:
30583 case BUILT_IN_LLFLOOR
:
30584 /* The round insn does not trap on denormals. */
30585 if (flag_trapping_math
|| !TARGET_ROUND
)
30588 if (out_mode
== SImode
&& in_mode
== DFmode
)
30590 if (out_n
== 4 && in_n
== 2)
30591 return ix86_builtins
[IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX
];
30592 else if (out_n
== 8 && in_n
== 4)
30593 return ix86_builtins
[IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX256
];
30597 case BUILT_IN_IFLOORF
:
30598 case BUILT_IN_LFLOORF
:
30599 case BUILT_IN_LLFLOORF
:
30600 /* The round insn does not trap on denormals. */
30601 if (flag_trapping_math
|| !TARGET_ROUND
)
30604 if (out_mode
== SImode
&& in_mode
== SFmode
)
30606 if (out_n
== 4 && in_n
== 4)
30607 return ix86_builtins
[IX86_BUILTIN_FLOORPS_SFIX
];
30608 else if (out_n
== 8 && in_n
== 8)
30609 return ix86_builtins
[IX86_BUILTIN_FLOORPS_SFIX256
];
30613 case BUILT_IN_ICEIL
:
30614 case BUILT_IN_LCEIL
:
30615 case BUILT_IN_LLCEIL
:
30616 /* The round insn does not trap on denormals. */
30617 if (flag_trapping_math
|| !TARGET_ROUND
)
30620 if (out_mode
== SImode
&& in_mode
== DFmode
)
30622 if (out_n
== 4 && in_n
== 2)
30623 return ix86_builtins
[IX86_BUILTIN_CEILPD_VEC_PACK_SFIX
];
30624 else if (out_n
== 8 && in_n
== 4)
30625 return ix86_builtins
[IX86_BUILTIN_CEILPD_VEC_PACK_SFIX256
];
30629 case BUILT_IN_ICEILF
:
30630 case BUILT_IN_LCEILF
:
30631 case BUILT_IN_LLCEILF
:
30632 /* The round insn does not trap on denormals. */
30633 if (flag_trapping_math
|| !TARGET_ROUND
)
30636 if (out_mode
== SImode
&& in_mode
== SFmode
)
30638 if (out_n
== 4 && in_n
== 4)
30639 return ix86_builtins
[IX86_BUILTIN_CEILPS_SFIX
];
30640 else if (out_n
== 8 && in_n
== 8)
30641 return ix86_builtins
[IX86_BUILTIN_CEILPS_SFIX256
];
30645 case BUILT_IN_IRINT
:
30646 case BUILT_IN_LRINT
:
30647 case BUILT_IN_LLRINT
:
30648 if (out_mode
== SImode
&& in_mode
== DFmode
)
30650 if (out_n
== 4 && in_n
== 2)
30651 return ix86_builtins
[IX86_BUILTIN_VEC_PACK_SFIX
];
30652 else if (out_n
== 8 && in_n
== 4)
30653 return ix86_builtins
[IX86_BUILTIN_VEC_PACK_SFIX256
];
30657 case BUILT_IN_IRINTF
:
30658 case BUILT_IN_LRINTF
:
30659 case BUILT_IN_LLRINTF
:
30660 if (out_mode
== SImode
&& in_mode
== SFmode
)
30662 if (out_n
== 4 && in_n
== 4)
30663 return ix86_builtins
[IX86_BUILTIN_CVTPS2DQ
];
30664 else if (out_n
== 8 && in_n
== 8)
30665 return ix86_builtins
[IX86_BUILTIN_CVTPS2DQ256
];
30669 case BUILT_IN_IROUND
:
30670 case BUILT_IN_LROUND
:
30671 case BUILT_IN_LLROUND
:
30672 /* The round insn does not trap on denormals. */
30673 if (flag_trapping_math
|| !TARGET_ROUND
)
30676 if (out_mode
== SImode
&& in_mode
== DFmode
)
30678 if (out_n
== 4 && in_n
== 2)
30679 return ix86_builtins
[IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX
];
30680 else if (out_n
== 8 && in_n
== 4)
30681 return ix86_builtins
[IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX256
];
30685 case BUILT_IN_IROUNDF
:
30686 case BUILT_IN_LROUNDF
:
30687 case BUILT_IN_LLROUNDF
:
30688 /* The round insn does not trap on denormals. */
30689 if (flag_trapping_math
|| !TARGET_ROUND
)
30692 if (out_mode
== SImode
&& in_mode
== SFmode
)
30694 if (out_n
== 4 && in_n
== 4)
30695 return ix86_builtins
[IX86_BUILTIN_ROUNDPS_AZ_SFIX
];
30696 else if (out_n
== 8 && in_n
== 8)
30697 return ix86_builtins
[IX86_BUILTIN_ROUNDPS_AZ_SFIX256
];
30701 case BUILT_IN_COPYSIGN
:
30702 if (out_mode
== DFmode
&& in_mode
== DFmode
)
30704 if (out_n
== 2 && in_n
== 2)
30705 return ix86_builtins
[IX86_BUILTIN_CPYSGNPD
];
30706 else if (out_n
== 4 && in_n
== 4)
30707 return ix86_builtins
[IX86_BUILTIN_CPYSGNPD256
];
30711 case BUILT_IN_COPYSIGNF
:
30712 if (out_mode
== SFmode
&& in_mode
== SFmode
)
30714 if (out_n
== 4 && in_n
== 4)
30715 return ix86_builtins
[IX86_BUILTIN_CPYSGNPS
];
30716 else if (out_n
== 8 && in_n
== 8)
30717 return ix86_builtins
[IX86_BUILTIN_CPYSGNPS256
];
30721 case BUILT_IN_FLOOR
:
30722 /* The round insn does not trap on denormals. */
30723 if (flag_trapping_math
|| !TARGET_ROUND
)
30726 if (out_mode
== DFmode
&& in_mode
== DFmode
)
30728 if (out_n
== 2 && in_n
== 2)
30729 return ix86_builtins
[IX86_BUILTIN_FLOORPD
];
30730 else if (out_n
== 4 && in_n
== 4)
30731 return ix86_builtins
[IX86_BUILTIN_FLOORPD256
];
30735 case BUILT_IN_FLOORF
:
30736 /* The round insn does not trap on denormals. */
30737 if (flag_trapping_math
|| !TARGET_ROUND
)
30740 if (out_mode
== SFmode
&& in_mode
== SFmode
)
30742 if (out_n
== 4 && in_n
== 4)
30743 return ix86_builtins
[IX86_BUILTIN_FLOORPS
];
30744 else if (out_n
== 8 && in_n
== 8)
30745 return ix86_builtins
[IX86_BUILTIN_FLOORPS256
];
30749 case BUILT_IN_CEIL
:
30750 /* The round insn does not trap on denormals. */
30751 if (flag_trapping_math
|| !TARGET_ROUND
)
30754 if (out_mode
== DFmode
&& in_mode
== DFmode
)
30756 if (out_n
== 2 && in_n
== 2)
30757 return ix86_builtins
[IX86_BUILTIN_CEILPD
];
30758 else if (out_n
== 4 && in_n
== 4)
30759 return ix86_builtins
[IX86_BUILTIN_CEILPD256
];
30763 case BUILT_IN_CEILF
:
30764 /* The round insn does not trap on denormals. */
30765 if (flag_trapping_math
|| !TARGET_ROUND
)
30768 if (out_mode
== SFmode
&& in_mode
== SFmode
)
30770 if (out_n
== 4 && in_n
== 4)
30771 return ix86_builtins
[IX86_BUILTIN_CEILPS
];
30772 else if (out_n
== 8 && in_n
== 8)
30773 return ix86_builtins
[IX86_BUILTIN_CEILPS256
];
30777 case BUILT_IN_TRUNC
:
30778 /* The round insn does not trap on denormals. */
30779 if (flag_trapping_math
|| !TARGET_ROUND
)
30782 if (out_mode
== DFmode
&& in_mode
== DFmode
)
30784 if (out_n
== 2 && in_n
== 2)
30785 return ix86_builtins
[IX86_BUILTIN_TRUNCPD
];
30786 else if (out_n
== 4 && in_n
== 4)
30787 return ix86_builtins
[IX86_BUILTIN_TRUNCPD256
];
30791 case BUILT_IN_TRUNCF
:
30792 /* The round insn does not trap on denormals. */
30793 if (flag_trapping_math
|| !TARGET_ROUND
)
30796 if (out_mode
== SFmode
&& in_mode
== SFmode
)
30798 if (out_n
== 4 && in_n
== 4)
30799 return ix86_builtins
[IX86_BUILTIN_TRUNCPS
];
30800 else if (out_n
== 8 && in_n
== 8)
30801 return ix86_builtins
[IX86_BUILTIN_TRUNCPS256
];
30805 case BUILT_IN_RINT
:
30806 /* The round insn does not trap on denormals. */
30807 if (flag_trapping_math
|| !TARGET_ROUND
)
30810 if (out_mode
== DFmode
&& in_mode
== DFmode
)
30812 if (out_n
== 2 && in_n
== 2)
30813 return ix86_builtins
[IX86_BUILTIN_RINTPD
];
30814 else if (out_n
== 4 && in_n
== 4)
30815 return ix86_builtins
[IX86_BUILTIN_RINTPD256
];
30819 case BUILT_IN_RINTF
:
30820 /* The round insn does not trap on denormals. */
30821 if (flag_trapping_math
|| !TARGET_ROUND
)
30824 if (out_mode
== SFmode
&& in_mode
== SFmode
)
30826 if (out_n
== 4 && in_n
== 4)
30827 return ix86_builtins
[IX86_BUILTIN_RINTPS
];
30828 else if (out_n
== 8 && in_n
== 8)
30829 return ix86_builtins
[IX86_BUILTIN_RINTPS256
];
30833 case BUILT_IN_ROUND
:
30834 /* The round insn does not trap on denormals. */
30835 if (flag_trapping_math
|| !TARGET_ROUND
)
30838 if (out_mode
== DFmode
&& in_mode
== DFmode
)
30840 if (out_n
== 2 && in_n
== 2)
30841 return ix86_builtins
[IX86_BUILTIN_ROUNDPD_AZ
];
30842 else if (out_n
== 4 && in_n
== 4)
30843 return ix86_builtins
[IX86_BUILTIN_ROUNDPD_AZ256
];
30847 case BUILT_IN_ROUNDF
:
30848 /* The round insn does not trap on denormals. */
30849 if (flag_trapping_math
|| !TARGET_ROUND
)
30852 if (out_mode
== SFmode
&& in_mode
== SFmode
)
30854 if (out_n
== 4 && in_n
== 4)
30855 return ix86_builtins
[IX86_BUILTIN_ROUNDPS_AZ
];
30856 else if (out_n
== 8 && in_n
== 8)
30857 return ix86_builtins
[IX86_BUILTIN_ROUNDPS_AZ256
];
30862 if (out_mode
== DFmode
&& in_mode
== DFmode
)
30864 if (out_n
== 2 && in_n
== 2)
30865 return ix86_builtins
[IX86_BUILTIN_VFMADDPD
];
30866 if (out_n
== 4 && in_n
== 4)
30867 return ix86_builtins
[IX86_BUILTIN_VFMADDPD256
];
30871 case BUILT_IN_FMAF
:
30872 if (out_mode
== SFmode
&& in_mode
== SFmode
)
30874 if (out_n
== 4 && in_n
== 4)
30875 return ix86_builtins
[IX86_BUILTIN_VFMADDPS
];
30876 if (out_n
== 8 && in_n
== 8)
30877 return ix86_builtins
[IX86_BUILTIN_VFMADDPS256
];
30885 /* Dispatch to a handler for a vectorization library. */
30886 if (ix86_veclib_handler
)
30887 return ix86_veclib_handler ((enum built_in_function
) fn
, type_out
,
30893 /* Handler for an SVML-style interface to
30894 a library with vectorized intrinsics. */
30897 ix86_veclibabi_svml (enum built_in_function fn
, tree type_out
, tree type_in
)
30900 tree fntype
, new_fndecl
, args
;
30903 enum machine_mode el_mode
, in_mode
;
30906 /* The SVML is suitable for unsafe math only. */
30907 if (!flag_unsafe_math_optimizations
)
30910 el_mode
= TYPE_MODE (TREE_TYPE (type_out
));
30911 n
= TYPE_VECTOR_SUBPARTS (type_out
);
30912 in_mode
= TYPE_MODE (TREE_TYPE (type_in
));
30913 in_n
= TYPE_VECTOR_SUBPARTS (type_in
);
30914 if (el_mode
!= in_mode
30922 case BUILT_IN_LOG10
:
30924 case BUILT_IN_TANH
:
30926 case BUILT_IN_ATAN
:
30927 case BUILT_IN_ATAN2
:
30928 case BUILT_IN_ATANH
:
30929 case BUILT_IN_CBRT
:
30930 case BUILT_IN_SINH
:
30932 case BUILT_IN_ASINH
:
30933 case BUILT_IN_ASIN
:
30934 case BUILT_IN_COSH
:
30936 case BUILT_IN_ACOSH
:
30937 case BUILT_IN_ACOS
:
30938 if (el_mode
!= DFmode
|| n
!= 2)
30942 case BUILT_IN_EXPF
:
30943 case BUILT_IN_LOGF
:
30944 case BUILT_IN_LOG10F
:
30945 case BUILT_IN_POWF
:
30946 case BUILT_IN_TANHF
:
30947 case BUILT_IN_TANF
:
30948 case BUILT_IN_ATANF
:
30949 case BUILT_IN_ATAN2F
:
30950 case BUILT_IN_ATANHF
:
30951 case BUILT_IN_CBRTF
:
30952 case BUILT_IN_SINHF
:
30953 case BUILT_IN_SINF
:
30954 case BUILT_IN_ASINHF
:
30955 case BUILT_IN_ASINF
:
30956 case BUILT_IN_COSHF
:
30957 case BUILT_IN_COSF
:
30958 case BUILT_IN_ACOSHF
:
30959 case BUILT_IN_ACOSF
:
30960 if (el_mode
!= SFmode
|| n
!= 4)
30968 bname
= IDENTIFIER_POINTER (DECL_NAME (builtin_decl_implicit (fn
)));
30970 if (fn
== BUILT_IN_LOGF
)
30971 strcpy (name
, "vmlsLn4");
30972 else if (fn
== BUILT_IN_LOG
)
30973 strcpy (name
, "vmldLn2");
30976 sprintf (name
, "vmls%s", bname
+10);
30977 name
[strlen (name
)-1] = '4';
30980 sprintf (name
, "vmld%s2", bname
+10);
30982 /* Convert to uppercase. */
30986 for (args
= DECL_ARGUMENTS (builtin_decl_implicit (fn
));
30988 args
= TREE_CHAIN (args
))
30992 fntype
= build_function_type_list (type_out
, type_in
, NULL
);
30994 fntype
= build_function_type_list (type_out
, type_in
, type_in
, NULL
);
30996 /* Build a function declaration for the vectorized function. */
30997 new_fndecl
= build_decl (BUILTINS_LOCATION
,
30998 FUNCTION_DECL
, get_identifier (name
), fntype
);
30999 TREE_PUBLIC (new_fndecl
) = 1;
31000 DECL_EXTERNAL (new_fndecl
) = 1;
31001 DECL_IS_NOVOPS (new_fndecl
) = 1;
31002 TREE_READONLY (new_fndecl
) = 1;
31007 /* Handler for an ACML-style interface to
31008 a library with vectorized intrinsics. */
31011 ix86_veclibabi_acml (enum built_in_function fn
, tree type_out
, tree type_in
)
31013 char name
[20] = "__vr.._";
31014 tree fntype
, new_fndecl
, args
;
31017 enum machine_mode el_mode
, in_mode
;
31020 /* The ACML is 64bits only and suitable for unsafe math only as
31021 it does not correctly support parts of IEEE with the required
31022 precision such as denormals. */
31024 || !flag_unsafe_math_optimizations
)
31027 el_mode
= TYPE_MODE (TREE_TYPE (type_out
));
31028 n
= TYPE_VECTOR_SUBPARTS (type_out
);
31029 in_mode
= TYPE_MODE (TREE_TYPE (type_in
));
31030 in_n
= TYPE_VECTOR_SUBPARTS (type_in
);
31031 if (el_mode
!= in_mode
31041 case BUILT_IN_LOG2
:
31042 case BUILT_IN_LOG10
:
31045 if (el_mode
!= DFmode
31050 case BUILT_IN_SINF
:
31051 case BUILT_IN_COSF
:
31052 case BUILT_IN_EXPF
:
31053 case BUILT_IN_POWF
:
31054 case BUILT_IN_LOGF
:
31055 case BUILT_IN_LOG2F
:
31056 case BUILT_IN_LOG10F
:
31059 if (el_mode
!= SFmode
31068 bname
= IDENTIFIER_POINTER (DECL_NAME (builtin_decl_implicit (fn
)));
31069 sprintf (name
+ 7, "%s", bname
+10);
31072 for (args
= DECL_ARGUMENTS (builtin_decl_implicit (fn
));
31074 args
= TREE_CHAIN (args
))
31078 fntype
= build_function_type_list (type_out
, type_in
, NULL
);
31080 fntype
= build_function_type_list (type_out
, type_in
, type_in
, NULL
);
31082 /* Build a function declaration for the vectorized function. */
31083 new_fndecl
= build_decl (BUILTINS_LOCATION
,
31084 FUNCTION_DECL
, get_identifier (name
), fntype
);
31085 TREE_PUBLIC (new_fndecl
) = 1;
31086 DECL_EXTERNAL (new_fndecl
) = 1;
31087 DECL_IS_NOVOPS (new_fndecl
) = 1;
31088 TREE_READONLY (new_fndecl
) = 1;
31093 /* Returns a decl of a function that implements gather load with
31094 memory type MEM_VECTYPE and index type INDEX_VECTYPE and SCALE.
31095 Return NULL_TREE if it is not available. */
31098 ix86_vectorize_builtin_gather (const_tree mem_vectype
,
31099 const_tree index_type
, int scale
)
31102 enum ix86_builtins code
;
31107 if ((TREE_CODE (index_type
) != INTEGER_TYPE
31108 && !POINTER_TYPE_P (index_type
))
31109 || (TYPE_MODE (index_type
) != SImode
31110 && TYPE_MODE (index_type
) != DImode
))
31113 if (TYPE_PRECISION (index_type
) > POINTER_SIZE
)
31116 /* v*gather* insn sign extends index to pointer mode. */
31117 if (TYPE_PRECISION (index_type
) < POINTER_SIZE
31118 && TYPE_UNSIGNED (index_type
))
31123 || (scale
& (scale
- 1)) != 0)
31126 si
= TYPE_MODE (index_type
) == SImode
;
31127 switch (TYPE_MODE (mem_vectype
))
31130 code
= si
? IX86_BUILTIN_GATHERSIV2DF
: IX86_BUILTIN_GATHERDIV2DF
;
31133 code
= si
? IX86_BUILTIN_GATHERALTSIV4DF
: IX86_BUILTIN_GATHERDIV4DF
;
31136 code
= si
? IX86_BUILTIN_GATHERSIV2DI
: IX86_BUILTIN_GATHERDIV2DI
;
31139 code
= si
? IX86_BUILTIN_GATHERALTSIV4DI
: IX86_BUILTIN_GATHERDIV4DI
;
31142 code
= si
? IX86_BUILTIN_GATHERSIV4SF
: IX86_BUILTIN_GATHERDIV4SF
;
31145 code
= si
? IX86_BUILTIN_GATHERSIV8SF
: IX86_BUILTIN_GATHERALTDIV8SF
;
31148 code
= si
? IX86_BUILTIN_GATHERSIV4SI
: IX86_BUILTIN_GATHERDIV4SI
;
31151 code
= si
? IX86_BUILTIN_GATHERSIV8SI
: IX86_BUILTIN_GATHERALTDIV8SI
;
31157 return ix86_builtins
[code
];
31160 /* Returns a code for a target-specific builtin that implements
31161 reciprocal of the function, or NULL_TREE if not available. */
31164 ix86_builtin_reciprocal (unsigned int fn
, bool md_fn
,
31165 bool sqrt ATTRIBUTE_UNUSED
)
31167 if (! (TARGET_SSE_MATH
&& !optimize_insn_for_size_p ()
31168 && flag_finite_math_only
&& !flag_trapping_math
31169 && flag_unsafe_math_optimizations
))
31173 /* Machine dependent builtins. */
31176 /* Vectorized version of sqrt to rsqrt conversion. */
31177 case IX86_BUILTIN_SQRTPS_NR
:
31178 return ix86_builtins
[IX86_BUILTIN_RSQRTPS_NR
];
31180 case IX86_BUILTIN_SQRTPS_NR256
:
31181 return ix86_builtins
[IX86_BUILTIN_RSQRTPS_NR256
];
31187 /* Normal builtins. */
31190 /* Sqrt to rsqrt conversion. */
31191 case BUILT_IN_SQRTF
:
31192 return ix86_builtins
[IX86_BUILTIN_RSQRTF
];
31199 /* Helper for avx_vpermilps256_operand et al. This is also used by
31200 the expansion functions to turn the parallel back into a mask.
31201 The return value is 0 for no match and the imm8+1 for a match. */
31204 avx_vpermilp_parallel (rtx par
, enum machine_mode mode
)
31206 unsigned i
, nelt
= GET_MODE_NUNITS (mode
);
31208 unsigned char ipar
[8];
31210 if (XVECLEN (par
, 0) != (int) nelt
)
31213 /* Validate that all of the elements are constants, and not totally
31214 out of range. Copy the data into an integral array to make the
31215 subsequent checks easier. */
31216 for (i
= 0; i
< nelt
; ++i
)
31218 rtx er
= XVECEXP (par
, 0, i
);
31219 unsigned HOST_WIDE_INT ei
;
31221 if (!CONST_INT_P (er
))
31232 /* In the 256-bit DFmode case, we can only move elements within
31234 for (i
= 0; i
< 2; ++i
)
31238 mask
|= ipar
[i
] << i
;
31240 for (i
= 2; i
< 4; ++i
)
31244 mask
|= (ipar
[i
] - 2) << i
;
31249 /* In the 256-bit SFmode case, we have full freedom of movement
31250 within the low 128-bit lane, but the high 128-bit lane must
31251 mirror the exact same pattern. */
31252 for (i
= 0; i
< 4; ++i
)
31253 if (ipar
[i
] + 4 != ipar
[i
+ 4])
31260 /* In the 128-bit case, we've full freedom in the placement of
31261 the elements from the source operand. */
31262 for (i
= 0; i
< nelt
; ++i
)
31263 mask
|= ipar
[i
] << (i
* (nelt
/ 2));
31267 gcc_unreachable ();
31270 /* Make sure success has a non-zero value by adding one. */
31274 /* Helper for avx_vperm2f128_v4df_operand et al. This is also used by
31275 the expansion functions to turn the parallel back into a mask.
31276 The return value is 0 for no match and the imm8+1 for a match. */
31279 avx_vperm2f128_parallel (rtx par
, enum machine_mode mode
)
31281 unsigned i
, nelt
= GET_MODE_NUNITS (mode
), nelt2
= nelt
/ 2;
31283 unsigned char ipar
[8];
31285 if (XVECLEN (par
, 0) != (int) nelt
)
31288 /* Validate that all of the elements are constants, and not totally
31289 out of range. Copy the data into an integral array to make the
31290 subsequent checks easier. */
31291 for (i
= 0; i
< nelt
; ++i
)
31293 rtx er
= XVECEXP (par
, 0, i
);
31294 unsigned HOST_WIDE_INT ei
;
31296 if (!CONST_INT_P (er
))
31299 if (ei
>= 2 * nelt
)
31304 /* Validate that the halves of the permute are halves. */
31305 for (i
= 0; i
< nelt2
- 1; ++i
)
31306 if (ipar
[i
] + 1 != ipar
[i
+ 1])
31308 for (i
= nelt2
; i
< nelt
- 1; ++i
)
31309 if (ipar
[i
] + 1 != ipar
[i
+ 1])
31312 /* Reconstruct the mask. */
31313 for (i
= 0; i
< 2; ++i
)
31315 unsigned e
= ipar
[i
* nelt2
];
31319 mask
|= e
<< (i
* 4);
31322 /* Make sure success has a non-zero value by adding one. */
31326 /* Store OPERAND to the memory after reload is completed. This means
31327 that we can't easily use assign_stack_local. */
31329 ix86_force_to_memory (enum machine_mode mode
, rtx operand
)
31333 gcc_assert (reload_completed
);
31334 if (ix86_using_red_zone ())
31336 result
= gen_rtx_MEM (mode
,
31337 gen_rtx_PLUS (Pmode
,
31339 GEN_INT (-RED_ZONE_SIZE
)));
31340 emit_move_insn (result
, operand
);
31342 else if (TARGET_64BIT
)
31348 operand
= gen_lowpart (DImode
, operand
);
31352 gen_rtx_SET (VOIDmode
,
31353 gen_rtx_MEM (DImode
,
31354 gen_rtx_PRE_DEC (DImode
,
31355 stack_pointer_rtx
)),
31359 gcc_unreachable ();
31361 result
= gen_rtx_MEM (mode
, stack_pointer_rtx
);
31370 split_double_mode (mode
, &operand
, 1, operands
, operands
+ 1);
31372 gen_rtx_SET (VOIDmode
,
31373 gen_rtx_MEM (SImode
,
31374 gen_rtx_PRE_DEC (Pmode
,
31375 stack_pointer_rtx
)),
31378 gen_rtx_SET (VOIDmode
,
31379 gen_rtx_MEM (SImode
,
31380 gen_rtx_PRE_DEC (Pmode
,
31381 stack_pointer_rtx
)),
31386 /* Store HImodes as SImodes. */
31387 operand
= gen_lowpart (SImode
, operand
);
31391 gen_rtx_SET (VOIDmode
,
31392 gen_rtx_MEM (GET_MODE (operand
),
31393 gen_rtx_PRE_DEC (SImode
,
31394 stack_pointer_rtx
)),
31398 gcc_unreachable ();
31400 result
= gen_rtx_MEM (mode
, stack_pointer_rtx
);
31405 /* Free operand from the memory. */
31407 ix86_free_from_memory (enum machine_mode mode
)
31409 if (!ix86_using_red_zone ())
31413 if (mode
== DImode
|| TARGET_64BIT
)
31417 /* Use LEA to deallocate stack space. In peephole2 it will be converted
31418 to pop or add instruction if registers are available. */
31419 emit_insn (gen_rtx_SET (VOIDmode
, stack_pointer_rtx
,
31420 gen_rtx_PLUS (Pmode
, stack_pointer_rtx
,
31425 /* Implement TARGET_PREFERRED_RELOAD_CLASS.
31427 Put float CONST_DOUBLE in the constant pool instead of fp regs.
31428 QImode must go into class Q_REGS.
31429 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
31430 movdf to do mem-to-mem moves through integer regs. */
31433 ix86_preferred_reload_class (rtx x
, reg_class_t regclass
)
31435 enum machine_mode mode
= GET_MODE (x
);
31437 /* We're only allowed to return a subclass of CLASS. Many of the
31438 following checks fail for NO_REGS, so eliminate that early. */
31439 if (regclass
== NO_REGS
)
31442 /* All classes can load zeros. */
31443 if (x
== CONST0_RTX (mode
))
31446 /* Force constants into memory if we are loading a (nonzero) constant into
31447 an MMX or SSE register. This is because there are no MMX/SSE instructions
31448 to load from a constant. */
31450 && (MAYBE_MMX_CLASS_P (regclass
) || MAYBE_SSE_CLASS_P (regclass
)))
31453 /* Prefer SSE regs only, if we can use them for math. */
31454 if (TARGET_SSE_MATH
&& !TARGET_MIX_SSE_I387
&& SSE_FLOAT_MODE_P (mode
))
31455 return SSE_CLASS_P (regclass
) ? regclass
: NO_REGS
;
31457 /* Floating-point constants need more complex checks. */
31458 if (GET_CODE (x
) == CONST_DOUBLE
&& GET_MODE (x
) != VOIDmode
)
31460 /* General regs can load everything. */
31461 if (reg_class_subset_p (regclass
, GENERAL_REGS
))
31464 /* Floats can load 0 and 1 plus some others. Note that we eliminated
31465 zero above. We only want to wind up preferring 80387 registers if
31466 we plan on doing computation with them. */
31468 && standard_80387_constant_p (x
) > 0)
31470 /* Limit class to non-sse. */
31471 if (regclass
== FLOAT_SSE_REGS
)
31473 if (regclass
== FP_TOP_SSE_REGS
)
31475 if (regclass
== FP_SECOND_SSE_REGS
)
31476 return FP_SECOND_REG
;
31477 if (regclass
== FLOAT_INT_REGS
|| regclass
== FLOAT_REGS
)
31484 /* Generally when we see PLUS here, it's the function invariant
31485 (plus soft-fp const_int). Which can only be computed into general
31487 if (GET_CODE (x
) == PLUS
)
31488 return reg_class_subset_p (regclass
, GENERAL_REGS
) ? regclass
: NO_REGS
;
31490 /* QImode constants are easy to load, but non-constant QImode data
31491 must go into Q_REGS. */
31492 if (GET_MODE (x
) == QImode
&& !CONSTANT_P (x
))
31494 if (reg_class_subset_p (regclass
, Q_REGS
))
31496 if (reg_class_subset_p (Q_REGS
, regclass
))
31504 /* Discourage putting floating-point values in SSE registers unless
31505 SSE math is being used, and likewise for the 387 registers. */
31507 ix86_preferred_output_reload_class (rtx x
, reg_class_t regclass
)
31509 enum machine_mode mode
= GET_MODE (x
);
31511 /* Restrict the output reload class to the register bank that we are doing
31512 math on. If we would like not to return a subset of CLASS, reject this
31513 alternative: if reload cannot do this, it will still use its choice. */
31514 mode
= GET_MODE (x
);
31515 if (TARGET_SSE_MATH
&& SSE_FLOAT_MODE_P (mode
))
31516 return MAYBE_SSE_CLASS_P (regclass
) ? SSE_REGS
: NO_REGS
;
31518 if (X87_FLOAT_MODE_P (mode
))
31520 if (regclass
== FP_TOP_SSE_REGS
)
31522 else if (regclass
== FP_SECOND_SSE_REGS
)
31523 return FP_SECOND_REG
;
31525 return FLOAT_CLASS_P (regclass
) ? regclass
: NO_REGS
;
31532 ix86_secondary_reload (bool in_p
, rtx x
, reg_class_t rclass
,
31533 enum machine_mode mode
, secondary_reload_info
*sri
)
31535 /* Double-word spills from general registers to non-offsettable memory
31536 references (zero-extended addresses) require special handling. */
31539 && GET_MODE_SIZE (mode
) > UNITS_PER_WORD
31540 && rclass
== GENERAL_REGS
31541 && !offsettable_memref_p (x
))
31544 ? CODE_FOR_reload_noff_load
31545 : CODE_FOR_reload_noff_store
);
31546 /* Add the cost of moving address to a temporary. */
31547 sri
->extra_cost
= 1;
31552 /* QImode spills from non-QI registers require
31553 intermediate register on 32bit targets. */
31555 && !in_p
&& mode
== QImode
31556 && (rclass
== GENERAL_REGS
31557 || rclass
== LEGACY_REGS
31558 || rclass
== INDEX_REGS
))
31567 if (regno
>= FIRST_PSEUDO_REGISTER
|| GET_CODE (x
) == SUBREG
)
31568 regno
= true_regnum (x
);
31570 /* Return Q_REGS if the operand is in memory. */
31575 /* This condition handles corner case where an expression involving
31576 pointers gets vectorized. We're trying to use the address of a
31577 stack slot as a vector initializer.
31579 (set (reg:V2DI 74 [ vect_cst_.2 ])
31580 (vec_duplicate:V2DI (reg/f:DI 20 frame)))
31582 Eventually frame gets turned into sp+offset like this:
31584 (set (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
31585 (vec_duplicate:V2DI (plus:DI (reg/f:DI 7 sp)
31586 (const_int 392 [0x188]))))
31588 That later gets turned into:
31590 (set (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
31591 (vec_duplicate:V2DI (plus:DI (reg/f:DI 7 sp)
31592 (mem/u/c/i:DI (symbol_ref/u:DI ("*.LC0") [flags 0x2]) [0 S8 A64]))))
31594 We'll have the following reload recorded:
31596 Reload 0: reload_in (DI) =
31597 (plus:DI (reg/f:DI 7 sp)
31598 (mem/u/c/i:DI (symbol_ref/u:DI ("*.LC0") [flags 0x2]) [0 S8 A64]))
31599 reload_out (V2DI) = (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
31600 SSE_REGS, RELOAD_OTHER (opnum = 0), can't combine
31601 reload_in_reg: (plus:DI (reg/f:DI 7 sp) (const_int 392 [0x188]))
31602 reload_out_reg: (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
31603 reload_reg_rtx: (reg:V2DI 22 xmm1)
31605 Which isn't going to work since SSE instructions can't handle scalar
31606 additions. Returning GENERAL_REGS forces the addition into integer
31607 register and reload can handle subsequent reloads without problems. */
31609 if (in_p
&& GET_CODE (x
) == PLUS
31610 && SSE_CLASS_P (rclass
)
31611 && SCALAR_INT_MODE_P (mode
))
31612 return GENERAL_REGS
;
31617 /* Implement TARGET_CLASS_LIKELY_SPILLED_P. */
31620 ix86_class_likely_spilled_p (reg_class_t rclass
)
31631 case SSE_FIRST_REG
:
31633 case FP_SECOND_REG
:
31643 /* If we are copying between general and FP registers, we need a memory
31644 location. The same is true for SSE and MMX registers.
31646 To optimize register_move_cost performance, allow inline variant.
31648 The macro can't work reliably when one of the CLASSES is class containing
31649 registers from multiple units (SSE, MMX, integer). We avoid this by never
31650 combining those units in single alternative in the machine description.
31651 Ensure that this constraint holds to avoid unexpected surprises.
31653 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
31654 enforce these sanity checks. */
31657 inline_secondary_memory_needed (enum reg_class class1
, enum reg_class class2
,
31658 enum machine_mode mode
, int strict
)
31660 if (MAYBE_FLOAT_CLASS_P (class1
) != FLOAT_CLASS_P (class1
)
31661 || MAYBE_FLOAT_CLASS_P (class2
) != FLOAT_CLASS_P (class2
)
31662 || MAYBE_SSE_CLASS_P (class1
) != SSE_CLASS_P (class1
)
31663 || MAYBE_SSE_CLASS_P (class2
) != SSE_CLASS_P (class2
)
31664 || MAYBE_MMX_CLASS_P (class1
) != MMX_CLASS_P (class1
)
31665 || MAYBE_MMX_CLASS_P (class2
) != MMX_CLASS_P (class2
))
31667 gcc_assert (!strict
);
31671 if (FLOAT_CLASS_P (class1
) != FLOAT_CLASS_P (class2
))
31674 /* ??? This is a lie. We do have moves between mmx/general, and for
31675 mmx/sse2. But by saying we need secondary memory we discourage the
31676 register allocator from using the mmx registers unless needed. */
31677 if (MMX_CLASS_P (class1
) != MMX_CLASS_P (class2
))
31680 if (SSE_CLASS_P (class1
) != SSE_CLASS_P (class2
))
31682 /* SSE1 doesn't have any direct moves from other classes. */
31686 /* If the target says that inter-unit moves are more expensive
31687 than moving through memory, then don't generate them. */
31688 if (!TARGET_INTER_UNIT_MOVES
)
31691 /* Between SSE and general, we have moves no larger than word size. */
31692 if (GET_MODE_SIZE (mode
) > UNITS_PER_WORD
)
31700 ix86_secondary_memory_needed (enum reg_class class1
, enum reg_class class2
,
31701 enum machine_mode mode
, int strict
)
31703 return inline_secondary_memory_needed (class1
, class2
, mode
, strict
);
31706 /* Implement the TARGET_CLASS_MAX_NREGS hook.
31708 On the 80386, this is the size of MODE in words,
31709 except in the FP regs, where a single reg is always enough. */
31711 static unsigned char
31712 ix86_class_max_nregs (reg_class_t rclass
, enum machine_mode mode
)
31714 if (MAYBE_INTEGER_CLASS_P (rclass
))
31716 if (mode
== XFmode
)
31717 return (TARGET_64BIT
? 2 : 3);
31718 else if (mode
== XCmode
)
31719 return (TARGET_64BIT
? 4 : 6);
31721 return ((GET_MODE_SIZE (mode
) + UNITS_PER_WORD
- 1) / UNITS_PER_WORD
);
31725 if (COMPLEX_MODE_P (mode
))
31732 /* Return true if the registers in CLASS cannot represent the change from
31733 modes FROM to TO. */
31736 ix86_cannot_change_mode_class (enum machine_mode from
, enum machine_mode to
,
31737 enum reg_class regclass
)
31742 /* x87 registers can't do subreg at all, as all values are reformatted
31743 to extended precision. */
31744 if (MAYBE_FLOAT_CLASS_P (regclass
))
31747 if (MAYBE_SSE_CLASS_P (regclass
) || MAYBE_MMX_CLASS_P (regclass
))
31749 /* Vector registers do not support QI or HImode loads. If we don't
31750 disallow a change to these modes, reload will assume it's ok to
31751 drop the subreg from (subreg:SI (reg:HI 100) 0). This affects
31752 the vec_dupv4hi pattern. */
31753 if (GET_MODE_SIZE (from
) < 4)
31756 /* Vector registers do not support subreg with nonzero offsets, which
31757 are otherwise valid for integer registers. Since we can't see
31758 whether we have a nonzero offset from here, prohibit all
31759 nonparadoxical subregs changing size. */
31760 if (GET_MODE_SIZE (to
) < GET_MODE_SIZE (from
))
31767 /* Return the cost of moving data of mode M between a
31768 register and memory. A value of 2 is the default; this cost is
31769 relative to those in `REGISTER_MOVE_COST'.
31771 This function is used extensively by register_move_cost that is used to
31772 build tables at startup. Make it inline in this case.
31773 When IN is 2, return maximum of in and out move cost.
31775 If moving between registers and memory is more expensive than
31776 between two registers, you should define this macro to express the
31779 Model also increased moving costs of QImode registers in non
31783 inline_memory_move_cost (enum machine_mode mode
, enum reg_class regclass
,
31787 if (FLOAT_CLASS_P (regclass
))
31805 return MAX (ix86_cost
->fp_load
[index
], ix86_cost
->fp_store
[index
]);
31806 return in
? ix86_cost
->fp_load
[index
] : ix86_cost
->fp_store
[index
];
31808 if (SSE_CLASS_P (regclass
))
31811 switch (GET_MODE_SIZE (mode
))
31826 return MAX (ix86_cost
->sse_load
[index
], ix86_cost
->sse_store
[index
]);
31827 return in
? ix86_cost
->sse_load
[index
] : ix86_cost
->sse_store
[index
];
31829 if (MMX_CLASS_P (regclass
))
31832 switch (GET_MODE_SIZE (mode
))
31844 return MAX (ix86_cost
->mmx_load
[index
], ix86_cost
->mmx_store
[index
]);
31845 return in
? ix86_cost
->mmx_load
[index
] : ix86_cost
->mmx_store
[index
];
31847 switch (GET_MODE_SIZE (mode
))
31850 if (Q_CLASS_P (regclass
) || TARGET_64BIT
)
31853 return ix86_cost
->int_store
[0];
31854 if (TARGET_PARTIAL_REG_DEPENDENCY
31855 && optimize_function_for_speed_p (cfun
))
31856 cost
= ix86_cost
->movzbl_load
;
31858 cost
= ix86_cost
->int_load
[0];
31860 return MAX (cost
, ix86_cost
->int_store
[0]);
31866 return MAX (ix86_cost
->movzbl_load
, ix86_cost
->int_store
[0] + 4);
31868 return ix86_cost
->movzbl_load
;
31870 return ix86_cost
->int_store
[0] + 4;
31875 return MAX (ix86_cost
->int_load
[1], ix86_cost
->int_store
[1]);
31876 return in
? ix86_cost
->int_load
[1] : ix86_cost
->int_store
[1];
31878 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
31879 if (mode
== TFmode
)
31882 cost
= MAX (ix86_cost
->int_load
[2] , ix86_cost
->int_store
[2]);
31884 cost
= ix86_cost
->int_load
[2];
31886 cost
= ix86_cost
->int_store
[2];
31887 return (cost
* (((int) GET_MODE_SIZE (mode
)
31888 + UNITS_PER_WORD
- 1) / UNITS_PER_WORD
));
31893 ix86_memory_move_cost (enum machine_mode mode
, reg_class_t regclass
,
31896 return inline_memory_move_cost (mode
, (enum reg_class
) regclass
, in
? 1 : 0);
31900 /* Return the cost of moving data from a register in class CLASS1 to
31901 one in class CLASS2.
31903 It is not required that the cost always equal 2 when FROM is the same as TO;
31904 on some machines it is expensive to move between registers if they are not
31905 general registers. */
31908 ix86_register_move_cost (enum machine_mode mode
, reg_class_t class1_i
,
31909 reg_class_t class2_i
)
31911 enum reg_class class1
= (enum reg_class
) class1_i
;
31912 enum reg_class class2
= (enum reg_class
) class2_i
;
31914 /* In case we require secondary memory, compute cost of the store followed
31915 by load. In order to avoid bad register allocation choices, we need
31916 for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */
31918 if (inline_secondary_memory_needed (class1
, class2
, mode
, 0))
31922 cost
+= inline_memory_move_cost (mode
, class1
, 2);
31923 cost
+= inline_memory_move_cost (mode
, class2
, 2);
31925 /* In case of copying from general_purpose_register we may emit multiple
31926 stores followed by single load causing memory size mismatch stall.
31927 Count this as arbitrarily high cost of 20. */
31928 if (targetm
.class_max_nregs (class1
, mode
)
31929 > targetm
.class_max_nregs (class2
, mode
))
31932 /* In the case of FP/MMX moves, the registers actually overlap, and we
31933 have to switch modes in order to treat them differently. */
31934 if ((MMX_CLASS_P (class1
) && MAYBE_FLOAT_CLASS_P (class2
))
31935 || (MMX_CLASS_P (class2
) && MAYBE_FLOAT_CLASS_P (class1
)))
31941 /* Moves between SSE/MMX and integer unit are expensive. */
31942 if (MMX_CLASS_P (class1
) != MMX_CLASS_P (class2
)
31943 || SSE_CLASS_P (class1
) != SSE_CLASS_P (class2
))
31945 /* ??? By keeping returned value relatively high, we limit the number
31946 of moves between integer and MMX/SSE registers for all targets.
31947 Additionally, high value prevents problem with x86_modes_tieable_p(),
31948 where integer modes in MMX/SSE registers are not tieable
31949 because of missing QImode and HImode moves to, from or between
31950 MMX/SSE registers. */
31951 return MAX (8, ix86_cost
->mmxsse_to_integer
);
31953 if (MAYBE_FLOAT_CLASS_P (class1
))
31954 return ix86_cost
->fp_move
;
31955 if (MAYBE_SSE_CLASS_P (class1
))
31956 return ix86_cost
->sse_move
;
31957 if (MAYBE_MMX_CLASS_P (class1
))
31958 return ix86_cost
->mmx_move
;
31962 /* Return TRUE if hard register REGNO can hold a value of machine-mode
31966 ix86_hard_regno_mode_ok (int regno
, enum machine_mode mode
)
31968 /* Flags and only flags can only hold CCmode values. */
31969 if (CC_REGNO_P (regno
))
31970 return GET_MODE_CLASS (mode
) == MODE_CC
;
31971 if (GET_MODE_CLASS (mode
) == MODE_CC
31972 || GET_MODE_CLASS (mode
) == MODE_RANDOM
31973 || GET_MODE_CLASS (mode
) == MODE_PARTIAL_INT
)
31975 if (FP_REGNO_P (regno
))
31976 return VALID_FP_MODE_P (mode
);
31977 if (SSE_REGNO_P (regno
))
31979 /* We implement the move patterns for all vector modes into and
31980 out of SSE registers, even when no operation instructions
31981 are available. OImode move is available only when AVX is
31983 return ((TARGET_AVX
&& mode
== OImode
)
31984 || VALID_AVX256_REG_MODE (mode
)
31985 || VALID_SSE_REG_MODE (mode
)
31986 || VALID_SSE2_REG_MODE (mode
)
31987 || VALID_MMX_REG_MODE (mode
)
31988 || VALID_MMX_REG_MODE_3DNOW (mode
));
31990 if (MMX_REGNO_P (regno
))
31992 /* We implement the move patterns for 3DNOW modes even in MMX mode,
31993 so if the register is available at all, then we can move data of
31994 the given mode into or out of it. */
31995 return (VALID_MMX_REG_MODE (mode
)
31996 || VALID_MMX_REG_MODE_3DNOW (mode
));
31999 if (mode
== QImode
)
32001 /* Take care for QImode values - they can be in non-QI regs,
32002 but then they do cause partial register stalls. */
32003 if (regno
<= BX_REG
|| TARGET_64BIT
)
32005 if (!TARGET_PARTIAL_REG_STALL
)
32007 return !can_create_pseudo_p ();
32009 /* We handle both integer and floats in the general purpose registers. */
32010 else if (VALID_INT_MODE_P (mode
))
32012 else if (VALID_FP_MODE_P (mode
))
32014 else if (VALID_DFP_MODE_P (mode
))
32016 /* Lots of MMX code casts 8 byte vector modes to DImode. If we then go
32017 on to use that value in smaller contexts, this can easily force a
32018 pseudo to be allocated to GENERAL_REGS. Since this is no worse than
32019 supporting DImode, allow it. */
32020 else if (VALID_MMX_REG_MODE_3DNOW (mode
) || VALID_MMX_REG_MODE (mode
))
32026 /* A subroutine of ix86_modes_tieable_p. Return true if MODE is a
32027 tieable integer mode. */
32030 ix86_tieable_integer_mode_p (enum machine_mode mode
)
32039 return TARGET_64BIT
|| !TARGET_PARTIAL_REG_STALL
;
32042 return TARGET_64BIT
;
32049 /* Return true if MODE1 is accessible in a register that can hold MODE2
32050 without copying. That is, all register classes that can hold MODE2
32051 can also hold MODE1. */
32054 ix86_modes_tieable_p (enum machine_mode mode1
, enum machine_mode mode2
)
32056 if (mode1
== mode2
)
32059 if (ix86_tieable_integer_mode_p (mode1
)
32060 && ix86_tieable_integer_mode_p (mode2
))
32063 /* MODE2 being XFmode implies fp stack or general regs, which means we
32064 can tie any smaller floating point modes to it. Note that we do not
32065 tie this with TFmode. */
32066 if (mode2
== XFmode
)
32067 return mode1
== SFmode
|| mode1
== DFmode
;
32069 /* MODE2 being DFmode implies fp stack, general or sse regs, which means
32070 that we can tie it with SFmode. */
32071 if (mode2
== DFmode
)
32072 return mode1
== SFmode
;
32074 /* If MODE2 is only appropriate for an SSE register, then tie with
32075 any other mode acceptable to SSE registers. */
32076 if (GET_MODE_SIZE (mode2
) == 32
32077 && ix86_hard_regno_mode_ok (FIRST_SSE_REG
, mode2
))
32078 return (GET_MODE_SIZE (mode1
) == 32
32079 && ix86_hard_regno_mode_ok (FIRST_SSE_REG
, mode1
));
32080 if (GET_MODE_SIZE (mode2
) == 16
32081 && ix86_hard_regno_mode_ok (FIRST_SSE_REG
, mode2
))
32082 return (GET_MODE_SIZE (mode1
) == 16
32083 && ix86_hard_regno_mode_ok (FIRST_SSE_REG
, mode1
));
32085 /* If MODE2 is appropriate for an MMX register, then tie
32086 with any other mode acceptable to MMX registers. */
32087 if (GET_MODE_SIZE (mode2
) == 8
32088 && ix86_hard_regno_mode_ok (FIRST_MMX_REG
, mode2
))
32089 return (GET_MODE_SIZE (mode1
) == 8
32090 && ix86_hard_regno_mode_ok (FIRST_MMX_REG
, mode1
));
32095 /* Return the cost of moving between two registers of mode MODE. */
32098 ix86_set_reg_reg_cost (enum machine_mode mode
)
32100 unsigned int units
= UNITS_PER_WORD
;
32102 switch (GET_MODE_CLASS (mode
))
32108 units
= GET_MODE_SIZE (CCmode
);
32112 if ((TARGET_SSE
&& mode
== TFmode
)
32113 || (TARGET_80387
&& mode
== XFmode
)
32114 || ((TARGET_80387
|| TARGET_SSE2
) && mode
== DFmode
)
32115 || ((TARGET_80387
|| TARGET_SSE
) && mode
== SFmode
))
32116 units
= GET_MODE_SIZE (mode
);
32119 case MODE_COMPLEX_FLOAT
:
32120 if ((TARGET_SSE
&& mode
== TCmode
)
32121 || (TARGET_80387
&& mode
== XCmode
)
32122 || ((TARGET_80387
|| TARGET_SSE2
) && mode
== DCmode
)
32123 || ((TARGET_80387
|| TARGET_SSE
) && mode
== SCmode
))
32124 units
= GET_MODE_SIZE (mode
);
32127 case MODE_VECTOR_INT
:
32128 case MODE_VECTOR_FLOAT
:
32129 if ((TARGET_AVX
&& VALID_AVX256_REG_MODE (mode
))
32130 || (TARGET_SSE2
&& VALID_SSE2_REG_MODE (mode
))
32131 || (TARGET_SSE
&& VALID_SSE_REG_MODE (mode
))
32132 || (TARGET_MMX
&& VALID_MMX_REG_MODE (mode
)))
32133 units
= GET_MODE_SIZE (mode
);
32136 /* Return the cost of moving between two registers of mode MODE,
32137 assuming that the move will be in pieces of at most UNITS bytes. */
32138 return COSTS_N_INSNS ((GET_MODE_SIZE (mode
) + units
- 1) / units
);
32141 /* Compute a (partial) cost for rtx X. Return true if the complete
32142 cost has been computed, and false if subexpressions should be
32143 scanned. In either case, *TOTAL contains the cost result. */
32146 ix86_rtx_costs (rtx x
, int code_i
, int outer_code_i
, int opno
, int *total
,
32149 enum rtx_code code
= (enum rtx_code
) code_i
;
32150 enum rtx_code outer_code
= (enum rtx_code
) outer_code_i
;
32151 enum machine_mode mode
= GET_MODE (x
);
32152 const struct processor_costs
*cost
= speed
? ix86_cost
: &ix86_size_cost
;
32157 if (register_operand (SET_DEST (x
), VOIDmode
)
32158 && reg_or_0_operand (SET_SRC (x
), VOIDmode
))
32160 *total
= ix86_set_reg_reg_cost (GET_MODE (SET_DEST (x
)));
32169 if (TARGET_64BIT
&& !x86_64_immediate_operand (x
, VOIDmode
))
32171 else if (TARGET_64BIT
&& !x86_64_zext_immediate_operand (x
, VOIDmode
))
32173 else if (flag_pic
&& SYMBOLIC_CONST (x
)
32175 || (!GET_CODE (x
) != LABEL_REF
32176 && (GET_CODE (x
) != SYMBOL_REF
32177 || !SYMBOL_REF_LOCAL_P (x
)))))
32184 if (mode
== VOIDmode
)
32189 switch (standard_80387_constant_p (x
))
32194 default: /* Other constants */
32201 if (SSE_FLOAT_MODE_P (mode
))
32204 switch (standard_sse_constant_p (x
))
32208 case 1: /* 0: xor eliminates false dependency */
32211 default: /* -1: cmp contains false dependency */
32216 /* Fall back to (MEM (SYMBOL_REF)), since that's where
32217 it'll probably end up. Add a penalty for size. */
32218 *total
= (COSTS_N_INSNS (1)
32219 + (flag_pic
!= 0 && !TARGET_64BIT
)
32220 + (mode
== SFmode
? 0 : mode
== DFmode
? 1 : 2));
32224 /* The zero extensions is often completely free on x86_64, so make
32225 it as cheap as possible. */
32226 if (TARGET_64BIT
&& mode
== DImode
32227 && GET_MODE (XEXP (x
, 0)) == SImode
)
32229 else if (TARGET_ZERO_EXTEND_WITH_AND
)
32230 *total
= cost
->add
;
32232 *total
= cost
->movzx
;
32236 *total
= cost
->movsx
;
32240 if (SCALAR_INT_MODE_P (mode
)
32241 && GET_MODE_SIZE (mode
) < UNITS_PER_WORD
32242 && CONST_INT_P (XEXP (x
, 1)))
32244 HOST_WIDE_INT value
= INTVAL (XEXP (x
, 1));
32247 *total
= cost
->add
;
32250 if ((value
== 2 || value
== 3)
32251 && cost
->lea
<= cost
->shift_const
)
32253 *total
= cost
->lea
;
32263 if (GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
)
32265 /* ??? Should be SSE vector operation cost. */
32266 /* At least for published AMD latencies, this really is the same
32267 as the latency for a simple fpu operation like fabs. */
32268 /* V*QImode is emulated with 1-11 insns. */
32269 if (mode
== V16QImode
|| mode
== V32QImode
)
32272 if (TARGET_XOP
&& mode
== V16QImode
)
32274 /* For XOP we use vpshab, which requires a broadcast of the
32275 value to the variable shift insn. For constants this
32276 means a V16Q const in mem; even when we can perform the
32277 shift with one insn set the cost to prefer paddb. */
32278 if (CONSTANT_P (XEXP (x
, 1)))
32280 *total
= (cost
->fabs
32281 + rtx_cost (XEXP (x
, 0), code
, 0, speed
)
32282 + (speed
? 2 : COSTS_N_BYTES (16)));
32287 else if (TARGET_SSSE3
)
32289 *total
= cost
->fabs
* count
;
32292 *total
= cost
->fabs
;
32294 else if (GET_MODE_SIZE (mode
) > UNITS_PER_WORD
)
32296 if (CONST_INT_P (XEXP (x
, 1)))
32298 if (INTVAL (XEXP (x
, 1)) > 32)
32299 *total
= cost
->shift_const
+ COSTS_N_INSNS (2);
32301 *total
= cost
->shift_const
* 2;
32305 if (GET_CODE (XEXP (x
, 1)) == AND
)
32306 *total
= cost
->shift_var
* 2;
32308 *total
= cost
->shift_var
* 6 + COSTS_N_INSNS (2);
32313 if (CONST_INT_P (XEXP (x
, 1)))
32314 *total
= cost
->shift_const
;
32316 *total
= cost
->shift_var
;
32324 gcc_assert (FLOAT_MODE_P (mode
));
32325 gcc_assert (TARGET_FMA
|| TARGET_FMA4
);
32327 /* ??? SSE scalar/vector cost should be used here. */
32328 /* ??? Bald assumption that fma has the same cost as fmul. */
32329 *total
= cost
->fmul
;
32330 *total
+= rtx_cost (XEXP (x
, 1), FMA
, 1, speed
);
32332 /* Negate in op0 or op2 is free: FMS, FNMA, FNMS. */
32334 if (GET_CODE (sub
) == NEG
)
32335 sub
= XEXP (sub
, 0);
32336 *total
+= rtx_cost (sub
, FMA
, 0, speed
);
32339 if (GET_CODE (sub
) == NEG
)
32340 sub
= XEXP (sub
, 0);
32341 *total
+= rtx_cost (sub
, FMA
, 2, speed
);
32346 if (SSE_FLOAT_MODE_P (mode
) && TARGET_SSE_MATH
)
32348 /* ??? SSE scalar cost should be used here. */
32349 *total
= cost
->fmul
;
32352 else if (X87_FLOAT_MODE_P (mode
))
32354 *total
= cost
->fmul
;
32357 else if (FLOAT_MODE_P (mode
))
32359 /* ??? SSE vector cost should be used here. */
32360 *total
= cost
->fmul
;
32363 else if (GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
)
32365 /* V*QImode is emulated with 7-13 insns. */
32366 if (mode
== V16QImode
|| mode
== V32QImode
)
32369 if (TARGET_XOP
&& mode
== V16QImode
)
32371 else if (TARGET_SSSE3
)
32373 *total
= cost
->fmul
* 2 + cost
->fabs
* extra
;
32375 /* V*DImode is emulated with 5-8 insns. */
32376 else if (mode
== V2DImode
|| mode
== V4DImode
)
32378 if (TARGET_XOP
&& mode
== V2DImode
)
32379 *total
= cost
->fmul
* 2 + cost
->fabs
* 3;
32381 *total
= cost
->fmul
* 3 + cost
->fabs
* 5;
32383 /* Without sse4.1, we don't have PMULLD; it's emulated with 7
32384 insns, including two PMULUDQ. */
32385 else if (mode
== V4SImode
&& !(TARGET_SSE4_1
|| TARGET_AVX
))
32386 *total
= cost
->fmul
* 2 + cost
->fabs
* 5;
32388 *total
= cost
->fmul
;
32393 rtx op0
= XEXP (x
, 0);
32394 rtx op1
= XEXP (x
, 1);
32396 if (CONST_INT_P (XEXP (x
, 1)))
32398 unsigned HOST_WIDE_INT value
= INTVAL (XEXP (x
, 1));
32399 for (nbits
= 0; value
!= 0; value
&= value
- 1)
32403 /* This is arbitrary. */
32406 /* Compute costs correctly for widening multiplication. */
32407 if ((GET_CODE (op0
) == SIGN_EXTEND
|| GET_CODE (op0
) == ZERO_EXTEND
)
32408 && GET_MODE_SIZE (GET_MODE (XEXP (op0
, 0))) * 2
32409 == GET_MODE_SIZE (mode
))
32411 int is_mulwiden
= 0;
32412 enum machine_mode inner_mode
= GET_MODE (op0
);
32414 if (GET_CODE (op0
) == GET_CODE (op1
))
32415 is_mulwiden
= 1, op1
= XEXP (op1
, 0);
32416 else if (CONST_INT_P (op1
))
32418 if (GET_CODE (op0
) == SIGN_EXTEND
)
32419 is_mulwiden
= trunc_int_for_mode (INTVAL (op1
), inner_mode
)
32422 is_mulwiden
= !(INTVAL (op1
) & ~GET_MODE_MASK (inner_mode
));
32426 op0
= XEXP (op0
, 0), mode
= GET_MODE (op0
);
32429 *total
= (cost
->mult_init
[MODE_INDEX (mode
)]
32430 + nbits
* cost
->mult_bit
32431 + rtx_cost (op0
, outer_code
, opno
, speed
)
32432 + rtx_cost (op1
, outer_code
, opno
, speed
));
32441 if (SSE_FLOAT_MODE_P (mode
) && TARGET_SSE_MATH
)
32442 /* ??? SSE cost should be used here. */
32443 *total
= cost
->fdiv
;
32444 else if (X87_FLOAT_MODE_P (mode
))
32445 *total
= cost
->fdiv
;
32446 else if (FLOAT_MODE_P (mode
))
32447 /* ??? SSE vector cost should be used here. */
32448 *total
= cost
->fdiv
;
32450 *total
= cost
->divide
[MODE_INDEX (mode
)];
32454 if (GET_MODE_CLASS (mode
) == MODE_INT
32455 && GET_MODE_SIZE (mode
) <= UNITS_PER_WORD
)
32457 if (GET_CODE (XEXP (x
, 0)) == PLUS
32458 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == MULT
32459 && CONST_INT_P (XEXP (XEXP (XEXP (x
, 0), 0), 1))
32460 && CONSTANT_P (XEXP (x
, 1)))
32462 HOST_WIDE_INT val
= INTVAL (XEXP (XEXP (XEXP (x
, 0), 0), 1));
32463 if (val
== 2 || val
== 4 || val
== 8)
32465 *total
= cost
->lea
;
32466 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 1),
32467 outer_code
, opno
, speed
);
32468 *total
+= rtx_cost (XEXP (XEXP (XEXP (x
, 0), 0), 0),
32469 outer_code
, opno
, speed
);
32470 *total
+= rtx_cost (XEXP (x
, 1), outer_code
, opno
, speed
);
32474 else if (GET_CODE (XEXP (x
, 0)) == MULT
32475 && CONST_INT_P (XEXP (XEXP (x
, 0), 1)))
32477 HOST_WIDE_INT val
= INTVAL (XEXP (XEXP (x
, 0), 1));
32478 if (val
== 2 || val
== 4 || val
== 8)
32480 *total
= cost
->lea
;
32481 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 0),
32482 outer_code
, opno
, speed
);
32483 *total
+= rtx_cost (XEXP (x
, 1), outer_code
, opno
, speed
);
32487 else if (GET_CODE (XEXP (x
, 0)) == PLUS
)
32489 *total
= cost
->lea
;
32490 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 0),
32491 outer_code
, opno
, speed
);
32492 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 1),
32493 outer_code
, opno
, speed
);
32494 *total
+= rtx_cost (XEXP (x
, 1), outer_code
, opno
, speed
);
32501 if (SSE_FLOAT_MODE_P (mode
) && TARGET_SSE_MATH
)
32503 /* ??? SSE cost should be used here. */
32504 *total
= cost
->fadd
;
32507 else if (X87_FLOAT_MODE_P (mode
))
32509 *total
= cost
->fadd
;
32512 else if (FLOAT_MODE_P (mode
))
32514 /* ??? SSE vector cost should be used here. */
32515 *total
= cost
->fadd
;
32523 if (GET_MODE_SIZE (mode
) > UNITS_PER_WORD
)
32525 *total
= (cost
->add
* 2
32526 + (rtx_cost (XEXP (x
, 0), outer_code
, opno
, speed
)
32527 << (GET_MODE (XEXP (x
, 0)) != DImode
))
32528 + (rtx_cost (XEXP (x
, 1), outer_code
, opno
, speed
)
32529 << (GET_MODE (XEXP (x
, 1)) != DImode
)));
32535 if (SSE_FLOAT_MODE_P (mode
) && TARGET_SSE_MATH
)
32537 /* ??? SSE cost should be used here. */
32538 *total
= cost
->fchs
;
32541 else if (X87_FLOAT_MODE_P (mode
))
32543 *total
= cost
->fchs
;
32546 else if (FLOAT_MODE_P (mode
))
32548 /* ??? SSE vector cost should be used here. */
32549 *total
= cost
->fchs
;
32555 if (GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
)
32557 /* ??? Should be SSE vector operation cost. */
32558 /* At least for published AMD latencies, this really is the same
32559 as the latency for a simple fpu operation like fabs. */
32560 *total
= cost
->fabs
;
32562 else if (GET_MODE_SIZE (mode
) > UNITS_PER_WORD
)
32563 *total
= cost
->add
* 2;
32565 *total
= cost
->add
;
32569 if (GET_CODE (XEXP (x
, 0)) == ZERO_EXTRACT
32570 && XEXP (XEXP (x
, 0), 1) == const1_rtx
32571 && CONST_INT_P (XEXP (XEXP (x
, 0), 2))
32572 && XEXP (x
, 1) == const0_rtx
)
32574 /* This kind of construct is implemented using test[bwl].
32575 Treat it as if we had an AND. */
32576 *total
= (cost
->add
32577 + rtx_cost (XEXP (XEXP (x
, 0), 0), outer_code
, opno
, speed
)
32578 + rtx_cost (const1_rtx
, outer_code
, opno
, speed
));
32584 if (!(SSE_FLOAT_MODE_P (mode
) && TARGET_SSE_MATH
))
32589 if (SSE_FLOAT_MODE_P (mode
) && TARGET_SSE_MATH
)
32590 /* ??? SSE cost should be used here. */
32591 *total
= cost
->fabs
;
32592 else if (X87_FLOAT_MODE_P (mode
))
32593 *total
= cost
->fabs
;
32594 else if (FLOAT_MODE_P (mode
))
32595 /* ??? SSE vector cost should be used here. */
32596 *total
= cost
->fabs
;
32600 if (SSE_FLOAT_MODE_P (mode
) && TARGET_SSE_MATH
)
32601 /* ??? SSE cost should be used here. */
32602 *total
= cost
->fsqrt
;
32603 else if (X87_FLOAT_MODE_P (mode
))
32604 *total
= cost
->fsqrt
;
32605 else if (FLOAT_MODE_P (mode
))
32606 /* ??? SSE vector cost should be used here. */
32607 *total
= cost
->fsqrt
;
32611 if (XINT (x
, 1) == UNSPEC_TP
)
32618 case VEC_DUPLICATE
:
32619 /* ??? Assume all of these vector manipulation patterns are
32620 recognizable. In which case they all pretty much have the
32622 *total
= cost
->fabs
;
32632 static int current_machopic_label_num
;
32634 /* Given a symbol name and its associated stub, write out the
32635 definition of the stub. */
32638 machopic_output_stub (FILE *file
, const char *symb
, const char *stub
)
32640 unsigned int length
;
32641 char *binder_name
, *symbol_name
, lazy_ptr_name
[32];
32642 int label
= ++current_machopic_label_num
;
32644 /* For 64-bit we shouldn't get here. */
32645 gcc_assert (!TARGET_64BIT
);
32647 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
32648 symb
= targetm
.strip_name_encoding (symb
);
32650 length
= strlen (stub
);
32651 binder_name
= XALLOCAVEC (char, length
+ 32);
32652 GEN_BINDER_NAME_FOR_STUB (binder_name
, stub
, length
);
32654 length
= strlen (symb
);
32655 symbol_name
= XALLOCAVEC (char, length
+ 32);
32656 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name
, symb
, length
);
32658 sprintf (lazy_ptr_name
, "L%d$lz", label
);
32660 if (MACHOPIC_ATT_STUB
)
32661 switch_to_section (darwin_sections
[machopic_picsymbol_stub3_section
]);
32662 else if (MACHOPIC_PURE
)
32663 switch_to_section (darwin_sections
[machopic_picsymbol_stub2_section
]);
32665 switch_to_section (darwin_sections
[machopic_symbol_stub_section
]);
32667 fprintf (file
, "%s:\n", stub
);
32668 fprintf (file
, "\t.indirect_symbol %s\n", symbol_name
);
32670 if (MACHOPIC_ATT_STUB
)
32672 fprintf (file
, "\thlt ; hlt ; hlt ; hlt ; hlt\n");
32674 else if (MACHOPIC_PURE
)
32677 /* 25-byte PIC stub using "CALL get_pc_thunk". */
32678 rtx tmp
= gen_rtx_REG (SImode
, 2 /* ECX */);
32679 output_set_got (tmp
, NULL_RTX
); /* "CALL ___<cpu>.get_pc_thunk.cx". */
32680 fprintf (file
, "LPC$%d:\tmovl\t%s-LPC$%d(%%ecx),%%ecx\n",
32681 label
, lazy_ptr_name
, label
);
32682 fprintf (file
, "\tjmp\t*%%ecx\n");
32685 fprintf (file
, "\tjmp\t*%s\n", lazy_ptr_name
);
32687 /* The AT&T-style ("self-modifying") stub is not lazily bound, thus
32688 it needs no stub-binding-helper. */
32689 if (MACHOPIC_ATT_STUB
)
32692 fprintf (file
, "%s:\n", binder_name
);
32696 fprintf (file
, "\tlea\t%s-%s(%%ecx),%%ecx\n", lazy_ptr_name
, binder_name
);
32697 fprintf (file
, "\tpushl\t%%ecx\n");
32700 fprintf (file
, "\tpushl\t$%s\n", lazy_ptr_name
);
32702 fputs ("\tjmp\tdyld_stub_binding_helper\n", file
);
32704 /* N.B. Keep the correspondence of these
32705 'symbol_ptr/symbol_ptr2/symbol_ptr3' sections consistent with the
32706 old-pic/new-pic/non-pic stubs; altering this will break
32707 compatibility with existing dylibs. */
32710 /* 25-byte PIC stub using "CALL get_pc_thunk". */
32711 switch_to_section (darwin_sections
[machopic_lazy_symbol_ptr2_section
]);
32714 /* 16-byte -mdynamic-no-pic stub. */
32715 switch_to_section(darwin_sections
[machopic_lazy_symbol_ptr3_section
]);
32717 fprintf (file
, "%s:\n", lazy_ptr_name
);
32718 fprintf (file
, "\t.indirect_symbol %s\n", symbol_name
);
32719 fprintf (file
, ASM_LONG
"%s\n", binder_name
);
32721 #endif /* TARGET_MACHO */
32723 /* Order the registers for register allocator. */
32726 x86_order_regs_for_local_alloc (void)
32731 /* First allocate the local general purpose registers. */
32732 for (i
= 0; i
< FIRST_PSEUDO_REGISTER
; i
++)
32733 if (GENERAL_REGNO_P (i
) && call_used_regs
[i
])
32734 reg_alloc_order
[pos
++] = i
;
32736 /* Global general purpose registers. */
32737 for (i
= 0; i
< FIRST_PSEUDO_REGISTER
; i
++)
32738 if (GENERAL_REGNO_P (i
) && !call_used_regs
[i
])
32739 reg_alloc_order
[pos
++] = i
;
32741 /* x87 registers come first in case we are doing FP math
32743 if (!TARGET_SSE_MATH
)
32744 for (i
= FIRST_STACK_REG
; i
<= LAST_STACK_REG
; i
++)
32745 reg_alloc_order
[pos
++] = i
;
32747 /* SSE registers. */
32748 for (i
= FIRST_SSE_REG
; i
<= LAST_SSE_REG
; i
++)
32749 reg_alloc_order
[pos
++] = i
;
32750 for (i
= FIRST_REX_SSE_REG
; i
<= LAST_REX_SSE_REG
; i
++)
32751 reg_alloc_order
[pos
++] = i
;
32753 /* x87 registers. */
32754 if (TARGET_SSE_MATH
)
32755 for (i
= FIRST_STACK_REG
; i
<= LAST_STACK_REG
; i
++)
32756 reg_alloc_order
[pos
++] = i
;
32758 for (i
= FIRST_MMX_REG
; i
<= LAST_MMX_REG
; i
++)
32759 reg_alloc_order
[pos
++] = i
;
32761 /* Initialize the rest of array as we do not allocate some registers
32763 while (pos
< FIRST_PSEUDO_REGISTER
)
32764 reg_alloc_order
[pos
++] = 0;
32767 /* Handle a "callee_pop_aggregate_return" attribute; arguments as
32768 in struct attribute_spec handler. */
32770 ix86_handle_callee_pop_aggregate_return (tree
*node
, tree name
,
32772 int flags ATTRIBUTE_UNUSED
,
32773 bool *no_add_attrs
)
32775 if (TREE_CODE (*node
) != FUNCTION_TYPE
32776 && TREE_CODE (*node
) != METHOD_TYPE
32777 && TREE_CODE (*node
) != FIELD_DECL
32778 && TREE_CODE (*node
) != TYPE_DECL
)
32780 warning (OPT_Wattributes
, "%qE attribute only applies to functions",
32782 *no_add_attrs
= true;
32787 warning (OPT_Wattributes
, "%qE attribute only available for 32-bit",
32789 *no_add_attrs
= true;
32792 if (is_attribute_p ("callee_pop_aggregate_return", name
))
32796 cst
= TREE_VALUE (args
);
32797 if (TREE_CODE (cst
) != INTEGER_CST
)
32799 warning (OPT_Wattributes
,
32800 "%qE attribute requires an integer constant argument",
32802 *no_add_attrs
= true;
32804 else if (compare_tree_int (cst
, 0) != 0
32805 && compare_tree_int (cst
, 1) != 0)
32807 warning (OPT_Wattributes
,
32808 "argument to %qE attribute is neither zero, nor one",
32810 *no_add_attrs
= true;
32819 /* Handle a "ms_abi" or "sysv" attribute; arguments as in
32820 struct attribute_spec.handler. */
32822 ix86_handle_abi_attribute (tree
*node
, tree name
,
32823 tree args ATTRIBUTE_UNUSED
,
32824 int flags ATTRIBUTE_UNUSED
, bool *no_add_attrs
)
32826 if (TREE_CODE (*node
) != FUNCTION_TYPE
32827 && TREE_CODE (*node
) != METHOD_TYPE
32828 && TREE_CODE (*node
) != FIELD_DECL
32829 && TREE_CODE (*node
) != TYPE_DECL
)
32831 warning (OPT_Wattributes
, "%qE attribute only applies to functions",
32833 *no_add_attrs
= true;
32837 /* Can combine regparm with all attributes but fastcall. */
32838 if (is_attribute_p ("ms_abi", name
))
32840 if (lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (*node
)))
32842 error ("ms_abi and sysv_abi attributes are not compatible");
32847 else if (is_attribute_p ("sysv_abi", name
))
32849 if (lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (*node
)))
32851 error ("ms_abi and sysv_abi attributes are not compatible");
32860 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
32861 struct attribute_spec.handler. */
32863 ix86_handle_struct_attribute (tree
*node
, tree name
,
32864 tree args ATTRIBUTE_UNUSED
,
32865 int flags ATTRIBUTE_UNUSED
, bool *no_add_attrs
)
32868 if (DECL_P (*node
))
32870 if (TREE_CODE (*node
) == TYPE_DECL
)
32871 type
= &TREE_TYPE (*node
);
32876 if (!(type
&& RECORD_OR_UNION_TYPE_P (*type
)))
32878 warning (OPT_Wattributes
, "%qE attribute ignored",
32880 *no_add_attrs
= true;
32883 else if ((is_attribute_p ("ms_struct", name
)
32884 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type
)))
32885 || ((is_attribute_p ("gcc_struct", name
)
32886 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type
)))))
32888 warning (OPT_Wattributes
, "%qE incompatible attribute ignored",
32890 *no_add_attrs
= true;
32897 ix86_handle_fndecl_attribute (tree
*node
, tree name
,
32898 tree args ATTRIBUTE_UNUSED
,
32899 int flags ATTRIBUTE_UNUSED
, bool *no_add_attrs
)
32901 if (TREE_CODE (*node
) != FUNCTION_DECL
)
32903 warning (OPT_Wattributes
, "%qE attribute only applies to functions",
32905 *no_add_attrs
= true;
32911 ix86_ms_bitfield_layout_p (const_tree record_type
)
32913 return ((TARGET_MS_BITFIELD_LAYOUT
32914 && !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type
)))
32915 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type
)));
32918 /* Returns an expression indicating where the this parameter is
32919 located on entry to the FUNCTION. */
32922 x86_this_parameter (tree function
)
32924 tree type
= TREE_TYPE (function
);
32925 bool aggr
= aggregate_value_p (TREE_TYPE (type
), type
) != 0;
32930 const int *parm_regs
;
32932 if (ix86_function_type_abi (type
) == MS_ABI
)
32933 parm_regs
= x86_64_ms_abi_int_parameter_registers
;
32935 parm_regs
= x86_64_int_parameter_registers
;
32936 return gen_rtx_REG (Pmode
, parm_regs
[aggr
]);
32939 nregs
= ix86_function_regparm (type
, function
);
32941 if (nregs
> 0 && !stdarg_p (type
))
32944 unsigned int ccvt
= ix86_get_callcvt (type
);
32946 if ((ccvt
& IX86_CALLCVT_FASTCALL
) != 0)
32947 regno
= aggr
? DX_REG
: CX_REG
;
32948 else if ((ccvt
& IX86_CALLCVT_THISCALL
) != 0)
32952 return gen_rtx_MEM (SImode
,
32953 plus_constant (Pmode
, stack_pointer_rtx
, 4));
32962 return gen_rtx_MEM (SImode
,
32963 plus_constant (Pmode
,
32964 stack_pointer_rtx
, 4));
32967 return gen_rtx_REG (SImode
, regno
);
32970 return gen_rtx_MEM (SImode
, plus_constant (Pmode
, stack_pointer_rtx
,
32974 /* Determine whether x86_output_mi_thunk can succeed. */
32977 x86_can_output_mi_thunk (const_tree thunk ATTRIBUTE_UNUSED
,
32978 HOST_WIDE_INT delta ATTRIBUTE_UNUSED
,
32979 HOST_WIDE_INT vcall_offset
, const_tree function
)
32981 /* 64-bit can handle anything. */
32985 /* For 32-bit, everything's fine if we have one free register. */
32986 if (ix86_function_regparm (TREE_TYPE (function
), function
) < 3)
32989 /* Need a free register for vcall_offset. */
32993 /* Need a free register for GOT references. */
32994 if (flag_pic
&& !targetm
.binds_local_p (function
))
32997 /* Otherwise ok. */
33001 /* Output the assembler code for a thunk function. THUNK_DECL is the
33002 declaration for the thunk function itself, FUNCTION is the decl for
33003 the target function. DELTA is an immediate constant offset to be
33004 added to THIS. If VCALL_OFFSET is nonzero, the word at
33005 *(*this + vcall_offset) should be added to THIS. */
33008 x86_output_mi_thunk (FILE *file
,
33009 tree thunk ATTRIBUTE_UNUSED
, HOST_WIDE_INT delta
,
33010 HOST_WIDE_INT vcall_offset
, tree function
)
33012 rtx this_param
= x86_this_parameter (function
);
33013 rtx this_reg
, tmp
, fnaddr
;
33014 unsigned int tmp_regno
;
33017 tmp_regno
= R10_REG
;
33020 unsigned int ccvt
= ix86_get_callcvt (TREE_TYPE (function
));
33021 if ((ccvt
& (IX86_CALLCVT_FASTCALL
| IX86_CALLCVT_THISCALL
)) != 0)
33022 tmp_regno
= AX_REG
;
33024 tmp_regno
= CX_REG
;
33027 emit_note (NOTE_INSN_PROLOGUE_END
);
33029 /* If VCALL_OFFSET, we'll need THIS in a register. Might as well
33030 pull it in now and let DELTA benefit. */
33031 if (REG_P (this_param
))
33032 this_reg
= this_param
;
33033 else if (vcall_offset
)
33035 /* Put the this parameter into %eax. */
33036 this_reg
= gen_rtx_REG (Pmode
, AX_REG
);
33037 emit_move_insn (this_reg
, this_param
);
33040 this_reg
= NULL_RTX
;
33042 /* Adjust the this parameter by a fixed constant. */
33045 rtx delta_rtx
= GEN_INT (delta
);
33046 rtx delta_dst
= this_reg
? this_reg
: this_param
;
33050 if (!x86_64_general_operand (delta_rtx
, Pmode
))
33052 tmp
= gen_rtx_REG (Pmode
, tmp_regno
);
33053 emit_move_insn (tmp
, delta_rtx
);
33058 ix86_emit_binop (PLUS
, Pmode
, delta_dst
, delta_rtx
);
33061 /* Adjust the this parameter by a value stored in the vtable. */
33064 rtx vcall_addr
, vcall_mem
, this_mem
;
33066 tmp
= gen_rtx_REG (Pmode
, tmp_regno
);
33068 this_mem
= gen_rtx_MEM (ptr_mode
, this_reg
);
33069 if (Pmode
!= ptr_mode
)
33070 this_mem
= gen_rtx_ZERO_EXTEND (Pmode
, this_mem
);
33071 emit_move_insn (tmp
, this_mem
);
33073 /* Adjust the this parameter. */
33074 vcall_addr
= plus_constant (Pmode
, tmp
, vcall_offset
);
33076 && !ix86_legitimate_address_p (ptr_mode
, vcall_addr
, true))
33078 rtx tmp2
= gen_rtx_REG (Pmode
, R11_REG
);
33079 emit_move_insn (tmp2
, GEN_INT (vcall_offset
));
33080 vcall_addr
= gen_rtx_PLUS (Pmode
, tmp
, tmp2
);
33083 vcall_mem
= gen_rtx_MEM (ptr_mode
, vcall_addr
);
33084 if (Pmode
!= ptr_mode
)
33085 emit_insn (gen_addsi_1_zext (this_reg
,
33086 gen_rtx_REG (ptr_mode
,
33090 ix86_emit_binop (PLUS
, Pmode
, this_reg
, vcall_mem
);
33093 /* If necessary, drop THIS back to its stack slot. */
33094 if (this_reg
&& this_reg
!= this_param
)
33095 emit_move_insn (this_param
, this_reg
);
33097 fnaddr
= XEXP (DECL_RTL (function
), 0);
33100 if (!flag_pic
|| targetm
.binds_local_p (function
)
33101 || cfun
->machine
->call_abi
== MS_ABI
)
33105 tmp
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, fnaddr
), UNSPEC_GOTPCREL
);
33106 tmp
= gen_rtx_CONST (Pmode
, tmp
);
33107 fnaddr
= gen_rtx_MEM (Pmode
, tmp
);
33112 if (!flag_pic
|| targetm
.binds_local_p (function
))
33115 else if (TARGET_MACHO
)
33117 fnaddr
= machopic_indirect_call_target (DECL_RTL (function
));
33118 fnaddr
= XEXP (fnaddr
, 0);
33120 #endif /* TARGET_MACHO */
33123 tmp
= gen_rtx_REG (Pmode
, CX_REG
);
33124 output_set_got (tmp
, NULL_RTX
);
33126 fnaddr
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, fnaddr
), UNSPEC_GOT
);
33127 fnaddr
= gen_rtx_PLUS (Pmode
, fnaddr
, tmp
);
33128 fnaddr
= gen_rtx_MEM (Pmode
, fnaddr
);
33132 /* Our sibling call patterns do not allow memories, because we have no
33133 predicate that can distinguish between frame and non-frame memory.
33134 For our purposes here, we can get away with (ab)using a jump pattern,
33135 because we're going to do no optimization. */
33136 if (MEM_P (fnaddr
))
33137 emit_jump_insn (gen_indirect_jump (fnaddr
));
33140 if (ix86_cmodel
== CM_LARGE_PIC
&& SYMBOLIC_CONST (fnaddr
))
33141 fnaddr
= legitimize_pic_address (fnaddr
,
33142 gen_rtx_REG (Pmode
, tmp_regno
));
33144 if (!sibcall_insn_operand (fnaddr
, word_mode
))
33146 tmp
= gen_rtx_REG (word_mode
, tmp_regno
);
33147 if (GET_MODE (fnaddr
) != word_mode
)
33148 fnaddr
= gen_rtx_ZERO_EXTEND (word_mode
, fnaddr
);
33149 emit_move_insn (tmp
, fnaddr
);
33153 tmp
= gen_rtx_MEM (QImode
, fnaddr
);
33154 tmp
= gen_rtx_CALL (VOIDmode
, tmp
, const0_rtx
);
33155 tmp
= emit_call_insn (tmp
);
33156 SIBLING_CALL_P (tmp
) = 1;
33160 /* Emit just enough of rest_of_compilation to get the insns emitted.
33161 Note that use_thunk calls assemble_start_function et al. */
33162 tmp
= get_insns ();
33163 insn_locators_alloc ();
33164 shorten_branches (tmp
);
33165 final_start_function (tmp
, file
, 1);
33166 final (tmp
, file
, 1);
33167 final_end_function ();
33171 x86_file_start (void)
33173 default_file_start ();
33175 darwin_file_start ();
33177 if (X86_FILE_START_VERSION_DIRECTIVE
)
33178 fputs ("\t.version\t\"01.01\"\n", asm_out_file
);
33179 if (X86_FILE_START_FLTUSED
)
33180 fputs ("\t.global\t__fltused\n", asm_out_file
);
33181 if (ix86_asm_dialect
== ASM_INTEL
)
33182 fputs ("\t.intel_syntax noprefix\n", asm_out_file
);
33186 x86_field_alignment (tree field
, int computed
)
33188 enum machine_mode mode
;
33189 tree type
= TREE_TYPE (field
);
33191 if (TARGET_64BIT
|| TARGET_ALIGN_DOUBLE
)
33193 mode
= TYPE_MODE (strip_array_types (type
));
33194 if (mode
== DFmode
|| mode
== DCmode
33195 || GET_MODE_CLASS (mode
) == MODE_INT
33196 || GET_MODE_CLASS (mode
) == MODE_COMPLEX_INT
)
33197 return MIN (32, computed
);
33201 /* Output assembler code to FILE to increment profiler label # LABELNO
33202 for profiling a function entry. */
33204 x86_function_profiler (FILE *file
, int labelno ATTRIBUTE_UNUSED
)
33206 const char *mcount_name
= (flag_fentry
? MCOUNT_NAME_BEFORE_PROLOGUE
33211 #ifndef NO_PROFILE_COUNTERS
33212 fprintf (file
, "\tleaq\t%sP%d(%%rip),%%r11\n", LPREFIX
, labelno
);
33215 if (DEFAULT_ABI
== SYSV_ABI
&& flag_pic
)
33216 fprintf (file
, "\tcall\t*%s@GOTPCREL(%%rip)\n", mcount_name
);
33218 fprintf (file
, "\tcall\t%s\n", mcount_name
);
33222 #ifndef NO_PROFILE_COUNTERS
33223 fprintf (file
, "\tleal\t%sP%d@GOTOFF(%%ebx),%%" PROFILE_COUNT_REGISTER
"\n",
33226 fprintf (file
, "\tcall\t*%s@GOT(%%ebx)\n", mcount_name
);
33230 #ifndef NO_PROFILE_COUNTERS
33231 fprintf (file
, "\tmovl\t$%sP%d,%%" PROFILE_COUNT_REGISTER
"\n",
33234 fprintf (file
, "\tcall\t%s\n", mcount_name
);
33238 /* We don't have exact information about the insn sizes, but we may assume
33239 quite safely that we are informed about all 1 byte insns and memory
33240 address sizes. This is enough to eliminate unnecessary padding in
33244 min_insn_size (rtx insn
)
33248 if (!INSN_P (insn
) || !active_insn_p (insn
))
33251 /* Discard alignments we've emit and jump instructions. */
33252 if (GET_CODE (PATTERN (insn
)) == UNSPEC_VOLATILE
33253 && XINT (PATTERN (insn
), 1) == UNSPECV_ALIGN
)
33255 if (JUMP_TABLE_DATA_P (insn
))
33258 /* Important case - calls are always 5 bytes.
33259 It is common to have many calls in the row. */
33261 && symbolic_reference_mentioned_p (PATTERN (insn
))
33262 && !SIBLING_CALL_P (insn
))
33264 len
= get_attr_length (insn
);
33268 /* For normal instructions we rely on get_attr_length being exact,
33269 with a few exceptions. */
33270 if (!JUMP_P (insn
))
33272 enum attr_type type
= get_attr_type (insn
);
33277 if (GET_CODE (PATTERN (insn
)) == ASM_INPUT
33278 || asm_noperands (PATTERN (insn
)) >= 0)
33285 /* Otherwise trust get_attr_length. */
33289 l
= get_attr_length_address (insn
);
33290 if (l
< 4 && symbolic_reference_mentioned_p (PATTERN (insn
)))
33299 #ifdef ASM_OUTPUT_MAX_SKIP_PAD
33301 /* AMD K8 core mispredicts jumps when there are more than 3 jumps in 16 byte
33305 ix86_avoid_jump_mispredicts (void)
33307 rtx insn
, start
= get_insns ();
33308 int nbytes
= 0, njumps
= 0;
33311 /* Look for all minimal intervals of instructions containing 4 jumps.
33312 The intervals are bounded by START and INSN. NBYTES is the total
33313 size of instructions in the interval including INSN and not including
33314 START. When the NBYTES is smaller than 16 bytes, it is possible
33315 that the end of START and INSN ends up in the same 16byte page.
33317 The smallest offset in the page INSN can start is the case where START
33318 ends on the offset 0. Offset of INSN is then NBYTES - sizeof (INSN).
33319 We add p2align to 16byte window with maxskip 15 - NBYTES + sizeof (INSN).
33321 for (insn
= start
; insn
; insn
= NEXT_INSN (insn
))
33325 if (LABEL_P (insn
))
33327 int align
= label_to_alignment (insn
);
33328 int max_skip
= label_to_max_skip (insn
);
33332 /* If align > 3, only up to 16 - max_skip - 1 bytes can be
33333 already in the current 16 byte page, because otherwise
33334 ASM_OUTPUT_MAX_SKIP_ALIGN could skip max_skip or fewer
33335 bytes to reach 16 byte boundary. */
33337 || (align
<= 3 && max_skip
!= (1 << align
) - 1))
33340 fprintf (dump_file
, "Label %i with max_skip %i\n",
33341 INSN_UID (insn
), max_skip
);
33344 while (nbytes
+ max_skip
>= 16)
33346 start
= NEXT_INSN (start
);
33347 if ((JUMP_P (start
)
33348 && GET_CODE (PATTERN (start
)) != ADDR_VEC
33349 && GET_CODE (PATTERN (start
)) != ADDR_DIFF_VEC
)
33351 njumps
--, isjump
= 1;
33354 nbytes
-= min_insn_size (start
);
33360 min_size
= min_insn_size (insn
);
33361 nbytes
+= min_size
;
33363 fprintf (dump_file
, "Insn %i estimated to %i bytes\n",
33364 INSN_UID (insn
), min_size
);
33366 && GET_CODE (PATTERN (insn
)) != ADDR_VEC
33367 && GET_CODE (PATTERN (insn
)) != ADDR_DIFF_VEC
)
33375 start
= NEXT_INSN (start
);
33376 if ((JUMP_P (start
)
33377 && GET_CODE (PATTERN (start
)) != ADDR_VEC
33378 && GET_CODE (PATTERN (start
)) != ADDR_DIFF_VEC
)
33380 njumps
--, isjump
= 1;
33383 nbytes
-= min_insn_size (start
);
33385 gcc_assert (njumps
>= 0);
33387 fprintf (dump_file
, "Interval %i to %i has %i bytes\n",
33388 INSN_UID (start
), INSN_UID (insn
), nbytes
);
33390 if (njumps
== 3 && isjump
&& nbytes
< 16)
33392 int padsize
= 15 - nbytes
+ min_insn_size (insn
);
33395 fprintf (dump_file
, "Padding insn %i by %i bytes!\n",
33396 INSN_UID (insn
), padsize
);
33397 emit_insn_before (gen_pad (GEN_INT (padsize
)), insn
);
33403 /* AMD Athlon works faster
33404 when RET is not destination of conditional jump or directly preceded
33405 by other jump instruction. We avoid the penalty by inserting NOP just
33406 before the RET instructions in such cases. */
33408 ix86_pad_returns (void)
33413 FOR_EACH_EDGE (e
, ei
, EXIT_BLOCK_PTR
->preds
)
33415 basic_block bb
= e
->src
;
33416 rtx ret
= BB_END (bb
);
33418 bool replace
= false;
33420 if (!JUMP_P (ret
) || !ANY_RETURN_P (PATTERN (ret
))
33421 || optimize_bb_for_size_p (bb
))
33423 for (prev
= PREV_INSN (ret
); prev
; prev
= PREV_INSN (prev
))
33424 if (active_insn_p (prev
) || LABEL_P (prev
))
33426 if (prev
&& LABEL_P (prev
))
33431 FOR_EACH_EDGE (e
, ei
, bb
->preds
)
33432 if (EDGE_FREQUENCY (e
) && e
->src
->index
>= 0
33433 && !(e
->flags
& EDGE_FALLTHRU
))
33438 prev
= prev_active_insn (ret
);
33440 && ((JUMP_P (prev
) && any_condjump_p (prev
))
33443 /* Empty functions get branch mispredict even when
33444 the jump destination is not visible to us. */
33445 if (!prev
&& !optimize_function_for_size_p (cfun
))
33450 emit_jump_insn_before (gen_simple_return_internal_long (), ret
);
33456 /* Count the minimum number of instructions in BB. Return 4 if the
33457 number of instructions >= 4. */
33460 ix86_count_insn_bb (basic_block bb
)
33463 int insn_count
= 0;
33465 /* Count number of instructions in this block. Return 4 if the number
33466 of instructions >= 4. */
33467 FOR_BB_INSNS (bb
, insn
)
33469 /* Only happen in exit blocks. */
33471 && ANY_RETURN_P (PATTERN (insn
)))
33474 if (NONDEBUG_INSN_P (insn
)
33475 && GET_CODE (PATTERN (insn
)) != USE
33476 && GET_CODE (PATTERN (insn
)) != CLOBBER
)
33479 if (insn_count
>= 4)
33488 /* Count the minimum number of instructions in code path in BB.
33489 Return 4 if the number of instructions >= 4. */
33492 ix86_count_insn (basic_block bb
)
33496 int min_prev_count
;
33498 /* Only bother counting instructions along paths with no
33499 more than 2 basic blocks between entry and exit. Given
33500 that BB has an edge to exit, determine if a predecessor
33501 of BB has an edge from entry. If so, compute the number
33502 of instructions in the predecessor block. If there
33503 happen to be multiple such blocks, compute the minimum. */
33504 min_prev_count
= 4;
33505 FOR_EACH_EDGE (e
, ei
, bb
->preds
)
33508 edge_iterator prev_ei
;
33510 if (e
->src
== ENTRY_BLOCK_PTR
)
33512 min_prev_count
= 0;
33515 FOR_EACH_EDGE (prev_e
, prev_ei
, e
->src
->preds
)
33517 if (prev_e
->src
== ENTRY_BLOCK_PTR
)
33519 int count
= ix86_count_insn_bb (e
->src
);
33520 if (count
< min_prev_count
)
33521 min_prev_count
= count
;
33527 if (min_prev_count
< 4)
33528 min_prev_count
+= ix86_count_insn_bb (bb
);
33530 return min_prev_count
;
33533 /* Pad short function to 4 instructions. */
33536 ix86_pad_short_function (void)
33541 FOR_EACH_EDGE (e
, ei
, EXIT_BLOCK_PTR
->preds
)
33543 rtx ret
= BB_END (e
->src
);
33544 if (JUMP_P (ret
) && ANY_RETURN_P (PATTERN (ret
)))
33546 int insn_count
= ix86_count_insn (e
->src
);
33548 /* Pad short function. */
33549 if (insn_count
< 4)
33553 /* Find epilogue. */
33556 || NOTE_KIND (insn
) != NOTE_INSN_EPILOGUE_BEG
))
33557 insn
= PREV_INSN (insn
);
33562 /* Two NOPs count as one instruction. */
33563 insn_count
= 2 * (4 - insn_count
);
33564 emit_insn_before (gen_nops (GEN_INT (insn_count
)), insn
);
33570 /* Implement machine specific optimizations. We implement padding of returns
33571 for K8 CPUs and pass to avoid 4 jumps in the single 16 byte window. */
33575 /* We are freeing block_for_insn in the toplev to keep compatibility
33576 with old MDEP_REORGS that are not CFG based. Recompute it now. */
33577 compute_bb_for_insn ();
33579 /* Run the vzeroupper optimization if needed. */
33580 if (TARGET_VZEROUPPER
)
33581 move_or_delete_vzeroupper ();
33583 if (optimize
&& optimize_function_for_speed_p (cfun
))
33585 if (TARGET_PAD_SHORT_FUNCTION
)
33586 ix86_pad_short_function ();
33587 else if (TARGET_PAD_RETURNS
)
33588 ix86_pad_returns ();
33589 #ifdef ASM_OUTPUT_MAX_SKIP_PAD
33590 if (TARGET_FOUR_JUMP_LIMIT
)
33591 ix86_avoid_jump_mispredicts ();
33596 /* Return nonzero when QImode register that must be represented via REX prefix
33599 x86_extended_QIreg_mentioned_p (rtx insn
)
33602 extract_insn_cached (insn
);
33603 for (i
= 0; i
< recog_data
.n_operands
; i
++)
33604 if (REG_P (recog_data
.operand
[i
])
33605 && REGNO (recog_data
.operand
[i
]) > BX_REG
)
33610 /* Return nonzero when P points to register encoded via REX prefix.
33611 Called via for_each_rtx. */
33613 extended_reg_mentioned_1 (rtx
*p
, void *data ATTRIBUTE_UNUSED
)
33615 unsigned int regno
;
33618 regno
= REGNO (*p
);
33619 return REX_INT_REGNO_P (regno
) || REX_SSE_REGNO_P (regno
);
33622 /* Return true when INSN mentions register that must be encoded using REX
33625 x86_extended_reg_mentioned_p (rtx insn
)
33627 return for_each_rtx (INSN_P (insn
) ? &PATTERN (insn
) : &insn
,
33628 extended_reg_mentioned_1
, NULL
);
33631 /* If profitable, negate (without causing overflow) integer constant
33632 of mode MODE at location LOC. Return true in this case. */
33634 x86_maybe_negate_const_int (rtx
*loc
, enum machine_mode mode
)
33638 if (!CONST_INT_P (*loc
))
33644 /* DImode x86_64 constants must fit in 32 bits. */
33645 gcc_assert (x86_64_immediate_operand (*loc
, mode
));
33656 gcc_unreachable ();
33659 /* Avoid overflows. */
33660 if (mode_signbit_p (mode
, *loc
))
33663 val
= INTVAL (*loc
);
33665 /* Make things pretty and `subl $4,%eax' rather than `addl $-4,%eax'.
33666 Exceptions: -128 encodes smaller than 128, so swap sign and op. */
33667 if ((val
< 0 && val
!= -128)
33670 *loc
= GEN_INT (-val
);
33677 /* Generate an unsigned DImode/SImode to FP conversion. This is the same code
33678 optabs would emit if we didn't have TFmode patterns. */
33681 x86_emit_floatuns (rtx operands
[2])
33683 rtx neglab
, donelab
, i0
, i1
, f0
, in
, out
;
33684 enum machine_mode mode
, inmode
;
33686 inmode
= GET_MODE (operands
[1]);
33687 gcc_assert (inmode
== SImode
|| inmode
== DImode
);
33690 in
= force_reg (inmode
, operands
[1]);
33691 mode
= GET_MODE (out
);
33692 neglab
= gen_label_rtx ();
33693 donelab
= gen_label_rtx ();
33694 f0
= gen_reg_rtx (mode
);
33696 emit_cmp_and_jump_insns (in
, const0_rtx
, LT
, const0_rtx
, inmode
, 0, neglab
);
33698 expand_float (out
, in
, 0);
33700 emit_jump_insn (gen_jump (donelab
));
33703 emit_label (neglab
);
33705 i0
= expand_simple_binop (inmode
, LSHIFTRT
, in
, const1_rtx
, NULL
,
33707 i1
= expand_simple_binop (inmode
, AND
, in
, const1_rtx
, NULL
,
33709 i0
= expand_simple_binop (inmode
, IOR
, i0
, i1
, i0
, 1, OPTAB_DIRECT
);
33711 expand_float (f0
, i0
, 0);
33713 emit_insn (gen_rtx_SET (VOIDmode
, out
, gen_rtx_PLUS (mode
, f0
, f0
)));
33715 emit_label (donelab
);
33718 /* AVX2 does support 32-byte integer vector operations,
33719 thus the longest vector we are faced with is V32QImode. */
33720 #define MAX_VECT_LEN 32
33722 struct expand_vec_perm_d
33724 rtx target
, op0
, op1
;
33725 unsigned char perm
[MAX_VECT_LEN
];
33726 enum machine_mode vmode
;
33727 unsigned char nelt
;
33728 bool one_operand_p
;
33732 static bool canonicalize_perm (struct expand_vec_perm_d
*d
);
33733 static bool expand_vec_perm_1 (struct expand_vec_perm_d
*d
);
33734 static bool expand_vec_perm_broadcast_1 (struct expand_vec_perm_d
*d
);
33736 /* Get a vector mode of the same size as the original but with elements
33737 twice as wide. This is only guaranteed to apply to integral vectors. */
33739 static inline enum machine_mode
33740 get_mode_wider_vector (enum machine_mode o
)
33742 /* ??? Rely on the ordering that genmodes.c gives to vectors. */
33743 enum machine_mode n
= GET_MODE_WIDER_MODE (o
);
33744 gcc_assert (GET_MODE_NUNITS (o
) == GET_MODE_NUNITS (n
) * 2);
33745 gcc_assert (GET_MODE_SIZE (o
) == GET_MODE_SIZE (n
));
33749 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
33750 with all elements equal to VAR. Return true if successful. */
33753 ix86_expand_vector_init_duplicate (bool mmx_ok
, enum machine_mode mode
,
33754 rtx target
, rtx val
)
33777 /* First attempt to recognize VAL as-is. */
33778 dup
= gen_rtx_VEC_DUPLICATE (mode
, val
);
33779 insn
= emit_insn (gen_rtx_SET (VOIDmode
, target
, dup
));
33780 if (recog_memoized (insn
) < 0)
33783 /* If that fails, force VAL into a register. */
33786 XEXP (dup
, 0) = force_reg (GET_MODE_INNER (mode
), val
);
33787 seq
= get_insns ();
33790 emit_insn_before (seq
, insn
);
33792 ok
= recog_memoized (insn
) >= 0;
33801 if (TARGET_SSE
|| TARGET_3DNOW_A
)
33805 val
= gen_lowpart (SImode
, val
);
33806 x
= gen_rtx_TRUNCATE (HImode
, val
);
33807 x
= gen_rtx_VEC_DUPLICATE (mode
, x
);
33808 emit_insn (gen_rtx_SET (VOIDmode
, target
, x
));
33821 struct expand_vec_perm_d dperm
;
33825 memset (&dperm
, 0, sizeof (dperm
));
33826 dperm
.target
= target
;
33827 dperm
.vmode
= mode
;
33828 dperm
.nelt
= GET_MODE_NUNITS (mode
);
33829 dperm
.op0
= dperm
.op1
= gen_reg_rtx (mode
);
33830 dperm
.one_operand_p
= true;
33832 /* Extend to SImode using a paradoxical SUBREG. */
33833 tmp1
= gen_reg_rtx (SImode
);
33834 emit_move_insn (tmp1
, gen_lowpart (SImode
, val
));
33836 /* Insert the SImode value as low element of a V4SImode vector. */
33837 tmp2
= gen_lowpart (V4SImode
, dperm
.op0
);
33838 emit_insn (gen_vec_setv4si_0 (tmp2
, CONST0_RTX (V4SImode
), tmp1
));
33840 ok
= (expand_vec_perm_1 (&dperm
)
33841 || expand_vec_perm_broadcast_1 (&dperm
));
33853 /* Replicate the value once into the next wider mode and recurse. */
33855 enum machine_mode smode
, wsmode
, wvmode
;
33858 smode
= GET_MODE_INNER (mode
);
33859 wvmode
= get_mode_wider_vector (mode
);
33860 wsmode
= GET_MODE_INNER (wvmode
);
33862 val
= convert_modes (wsmode
, smode
, val
, true);
33863 x
= expand_simple_binop (wsmode
, ASHIFT
, val
,
33864 GEN_INT (GET_MODE_BITSIZE (smode
)),
33865 NULL_RTX
, 1, OPTAB_LIB_WIDEN
);
33866 val
= expand_simple_binop (wsmode
, IOR
, val
, x
, x
, 1, OPTAB_LIB_WIDEN
);
33868 x
= gen_lowpart (wvmode
, target
);
33869 ok
= ix86_expand_vector_init_duplicate (mmx_ok
, wvmode
, x
, val
);
33877 enum machine_mode hvmode
= (mode
== V16HImode
? V8HImode
: V16QImode
);
33878 rtx x
= gen_reg_rtx (hvmode
);
33880 ok
= ix86_expand_vector_init_duplicate (false, hvmode
, x
, val
);
33883 x
= gen_rtx_VEC_CONCAT (mode
, x
, x
);
33884 emit_insn (gen_rtx_SET (VOIDmode
, target
, x
));
33893 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
33894 whose ONE_VAR element is VAR, and other elements are zero. Return true
33898 ix86_expand_vector_init_one_nonzero (bool mmx_ok
, enum machine_mode mode
,
33899 rtx target
, rtx var
, int one_var
)
33901 enum machine_mode vsimode
;
33904 bool use_vector_set
= false;
33909 /* For SSE4.1, we normally use vector set. But if the second
33910 element is zero and inter-unit moves are OK, we use movq
33912 use_vector_set
= (TARGET_64BIT
33914 && !(TARGET_INTER_UNIT_MOVES
33920 use_vector_set
= TARGET_SSE4_1
;
33923 use_vector_set
= TARGET_SSE2
;
33926 use_vector_set
= TARGET_SSE
|| TARGET_3DNOW_A
;
33933 use_vector_set
= TARGET_AVX
;
33936 /* Use ix86_expand_vector_set in 64bit mode only. */
33937 use_vector_set
= TARGET_AVX
&& TARGET_64BIT
;
33943 if (use_vector_set
)
33945 emit_insn (gen_rtx_SET (VOIDmode
, target
, CONST0_RTX (mode
)));
33946 var
= force_reg (GET_MODE_INNER (mode
), var
);
33947 ix86_expand_vector_set (mmx_ok
, target
, var
, one_var
);
33963 var
= force_reg (GET_MODE_INNER (mode
), var
);
33964 x
= gen_rtx_VEC_CONCAT (mode
, var
, CONST0_RTX (GET_MODE_INNER (mode
)));
33965 emit_insn (gen_rtx_SET (VOIDmode
, target
, x
));
33970 if (!REG_P (target
) || REGNO (target
) < FIRST_PSEUDO_REGISTER
)
33971 new_target
= gen_reg_rtx (mode
);
33973 new_target
= target
;
33974 var
= force_reg (GET_MODE_INNER (mode
), var
);
33975 x
= gen_rtx_VEC_DUPLICATE (mode
, var
);
33976 x
= gen_rtx_VEC_MERGE (mode
, x
, CONST0_RTX (mode
), const1_rtx
);
33977 emit_insn (gen_rtx_SET (VOIDmode
, new_target
, x
));
33980 /* We need to shuffle the value to the correct position, so
33981 create a new pseudo to store the intermediate result. */
33983 /* With SSE2, we can use the integer shuffle insns. */
33984 if (mode
!= V4SFmode
&& TARGET_SSE2
)
33986 emit_insn (gen_sse2_pshufd_1 (new_target
, new_target
,
33988 GEN_INT (one_var
== 1 ? 0 : 1),
33989 GEN_INT (one_var
== 2 ? 0 : 1),
33990 GEN_INT (one_var
== 3 ? 0 : 1)));
33991 if (target
!= new_target
)
33992 emit_move_insn (target
, new_target
);
33996 /* Otherwise convert the intermediate result to V4SFmode and
33997 use the SSE1 shuffle instructions. */
33998 if (mode
!= V4SFmode
)
34000 tmp
= gen_reg_rtx (V4SFmode
);
34001 emit_move_insn (tmp
, gen_lowpart (V4SFmode
, new_target
));
34006 emit_insn (gen_sse_shufps_v4sf (tmp
, tmp
, tmp
,
34008 GEN_INT (one_var
== 1 ? 0 : 1),
34009 GEN_INT (one_var
== 2 ? 0+4 : 1+4),
34010 GEN_INT (one_var
== 3 ? 0+4 : 1+4)));
34012 if (mode
!= V4SFmode
)
34013 emit_move_insn (target
, gen_lowpart (V4SImode
, tmp
));
34014 else if (tmp
!= target
)
34015 emit_move_insn (target
, tmp
);
34017 else if (target
!= new_target
)
34018 emit_move_insn (target
, new_target
);
34023 vsimode
= V4SImode
;
34029 vsimode
= V2SImode
;
34035 /* Zero extend the variable element to SImode and recurse. */
34036 var
= convert_modes (SImode
, GET_MODE_INNER (mode
), var
, true);
34038 x
= gen_reg_rtx (vsimode
);
34039 if (!ix86_expand_vector_init_one_nonzero (mmx_ok
, vsimode
, x
,
34041 gcc_unreachable ();
34043 emit_move_insn (target
, gen_lowpart (mode
, x
));
34051 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
34052 consisting of the values in VALS. It is known that all elements
34053 except ONE_VAR are constants. Return true if successful. */
34056 ix86_expand_vector_init_one_var (bool mmx_ok
, enum machine_mode mode
,
34057 rtx target
, rtx vals
, int one_var
)
34059 rtx var
= XVECEXP (vals
, 0, one_var
);
34060 enum machine_mode wmode
;
34063 const_vec
= copy_rtx (vals
);
34064 XVECEXP (const_vec
, 0, one_var
) = CONST0_RTX (GET_MODE_INNER (mode
));
34065 const_vec
= gen_rtx_CONST_VECTOR (mode
, XVEC (const_vec
, 0));
34073 /* For the two element vectors, it's just as easy to use
34074 the general case. */
34078 /* Use ix86_expand_vector_set in 64bit mode only. */
34101 /* There's no way to set one QImode entry easily. Combine
34102 the variable value with its adjacent constant value, and
34103 promote to an HImode set. */
34104 x
= XVECEXP (vals
, 0, one_var
^ 1);
34107 var
= convert_modes (HImode
, QImode
, var
, true);
34108 var
= expand_simple_binop (HImode
, ASHIFT
, var
, GEN_INT (8),
34109 NULL_RTX
, 1, OPTAB_LIB_WIDEN
);
34110 x
= GEN_INT (INTVAL (x
) & 0xff);
34114 var
= convert_modes (HImode
, QImode
, var
, true);
34115 x
= gen_int_mode (INTVAL (x
) << 8, HImode
);
34117 if (x
!= const0_rtx
)
34118 var
= expand_simple_binop (HImode
, IOR
, var
, x
, var
,
34119 1, OPTAB_LIB_WIDEN
);
34121 x
= gen_reg_rtx (wmode
);
34122 emit_move_insn (x
, gen_lowpart (wmode
, const_vec
));
34123 ix86_expand_vector_set (mmx_ok
, x
, var
, one_var
>> 1);
34125 emit_move_insn (target
, gen_lowpart (mode
, x
));
34132 emit_move_insn (target
, const_vec
);
34133 ix86_expand_vector_set (mmx_ok
, target
, var
, one_var
);
34137 /* A subroutine of ix86_expand_vector_init_general. Use vector
34138 concatenate to handle the most general case: all values variable,
34139 and none identical. */
34142 ix86_expand_vector_init_concat (enum machine_mode mode
,
34143 rtx target
, rtx
*ops
, int n
)
34145 enum machine_mode cmode
, hmode
= VOIDmode
;
34146 rtx first
[8], second
[4];
34186 gcc_unreachable ();
34189 if (!register_operand (ops
[1], cmode
))
34190 ops
[1] = force_reg (cmode
, ops
[1]);
34191 if (!register_operand (ops
[0], cmode
))
34192 ops
[0] = force_reg (cmode
, ops
[0]);
34193 emit_insn (gen_rtx_SET (VOIDmode
, target
,
34194 gen_rtx_VEC_CONCAT (mode
, ops
[0],
34214 gcc_unreachable ();
34230 gcc_unreachable ();
34235 /* FIXME: We process inputs backward to help RA. PR 36222. */
34238 for (; i
> 0; i
-= 2, j
--)
34240 first
[j
] = gen_reg_rtx (cmode
);
34241 v
= gen_rtvec (2, ops
[i
- 1], ops
[i
]);
34242 ix86_expand_vector_init (false, first
[j
],
34243 gen_rtx_PARALLEL (cmode
, v
));
34249 gcc_assert (hmode
!= VOIDmode
);
34250 for (i
= j
= 0; i
< n
; i
+= 2, j
++)
34252 second
[j
] = gen_reg_rtx (hmode
);
34253 ix86_expand_vector_init_concat (hmode
, second
[j
],
34257 ix86_expand_vector_init_concat (mode
, target
, second
, n
);
34260 ix86_expand_vector_init_concat (mode
, target
, first
, n
);
34264 gcc_unreachable ();
34268 /* A subroutine of ix86_expand_vector_init_general. Use vector
34269 interleave to handle the most general case: all values variable,
34270 and none identical. */
34273 ix86_expand_vector_init_interleave (enum machine_mode mode
,
34274 rtx target
, rtx
*ops
, int n
)
34276 enum machine_mode first_imode
, second_imode
, third_imode
, inner_mode
;
34279 rtx (*gen_load_even
) (rtx
, rtx
, rtx
);
34280 rtx (*gen_interleave_first_low
) (rtx
, rtx
, rtx
);
34281 rtx (*gen_interleave_second_low
) (rtx
, rtx
, rtx
);
34286 gen_load_even
= gen_vec_setv8hi
;
34287 gen_interleave_first_low
= gen_vec_interleave_lowv4si
;
34288 gen_interleave_second_low
= gen_vec_interleave_lowv2di
;
34289 inner_mode
= HImode
;
34290 first_imode
= V4SImode
;
34291 second_imode
= V2DImode
;
34292 third_imode
= VOIDmode
;
34295 gen_load_even
= gen_vec_setv16qi
;
34296 gen_interleave_first_low
= gen_vec_interleave_lowv8hi
;
34297 gen_interleave_second_low
= gen_vec_interleave_lowv4si
;
34298 inner_mode
= QImode
;
34299 first_imode
= V8HImode
;
34300 second_imode
= V4SImode
;
34301 third_imode
= V2DImode
;
34304 gcc_unreachable ();
34307 for (i
= 0; i
< n
; i
++)
34309 /* Extend the odd elment to SImode using a paradoxical SUBREG. */
34310 op0
= gen_reg_rtx (SImode
);
34311 emit_move_insn (op0
, gen_lowpart (SImode
, ops
[i
+ i
]));
34313 /* Insert the SImode value as low element of V4SImode vector. */
34314 op1
= gen_reg_rtx (V4SImode
);
34315 op0
= gen_rtx_VEC_MERGE (V4SImode
,
34316 gen_rtx_VEC_DUPLICATE (V4SImode
,
34318 CONST0_RTX (V4SImode
),
34320 emit_insn (gen_rtx_SET (VOIDmode
, op1
, op0
));
34322 /* Cast the V4SImode vector back to a vector in orignal mode. */
34323 op0
= gen_reg_rtx (mode
);
34324 emit_move_insn (op0
, gen_lowpart (mode
, op1
));
34326 /* Load even elements into the second positon. */
34327 emit_insn (gen_load_even (op0
,
34328 force_reg (inner_mode
,
34332 /* Cast vector to FIRST_IMODE vector. */
34333 ops
[i
] = gen_reg_rtx (first_imode
);
34334 emit_move_insn (ops
[i
], gen_lowpart (first_imode
, op0
));
34337 /* Interleave low FIRST_IMODE vectors. */
34338 for (i
= j
= 0; i
< n
; i
+= 2, j
++)
34340 op0
= gen_reg_rtx (first_imode
);
34341 emit_insn (gen_interleave_first_low (op0
, ops
[i
], ops
[i
+ 1]));
34343 /* Cast FIRST_IMODE vector to SECOND_IMODE vector. */
34344 ops
[j
] = gen_reg_rtx (second_imode
);
34345 emit_move_insn (ops
[j
], gen_lowpart (second_imode
, op0
));
34348 /* Interleave low SECOND_IMODE vectors. */
34349 switch (second_imode
)
34352 for (i
= j
= 0; i
< n
/ 2; i
+= 2, j
++)
34354 op0
= gen_reg_rtx (second_imode
);
34355 emit_insn (gen_interleave_second_low (op0
, ops
[i
],
34358 /* Cast the SECOND_IMODE vector to the THIRD_IMODE
34360 ops
[j
] = gen_reg_rtx (third_imode
);
34361 emit_move_insn (ops
[j
], gen_lowpart (third_imode
, op0
));
34363 second_imode
= V2DImode
;
34364 gen_interleave_second_low
= gen_vec_interleave_lowv2di
;
34368 op0
= gen_reg_rtx (second_imode
);
34369 emit_insn (gen_interleave_second_low (op0
, ops
[0],
34372 /* Cast the SECOND_IMODE vector back to a vector on original
34374 emit_insn (gen_rtx_SET (VOIDmode
, target
,
34375 gen_lowpart (mode
, op0
)));
34379 gcc_unreachable ();
34383 /* A subroutine of ix86_expand_vector_init. Handle the most general case:
34384 all values variable, and none identical. */
34387 ix86_expand_vector_init_general (bool mmx_ok
, enum machine_mode mode
,
34388 rtx target
, rtx vals
)
34390 rtx ops
[32], op0
, op1
;
34391 enum machine_mode half_mode
= VOIDmode
;
34398 if (!mmx_ok
&& !TARGET_SSE
)
34410 n
= GET_MODE_NUNITS (mode
);
34411 for (i
= 0; i
< n
; i
++)
34412 ops
[i
] = XVECEXP (vals
, 0, i
);
34413 ix86_expand_vector_init_concat (mode
, target
, ops
, n
);
34417 half_mode
= V16QImode
;
34421 half_mode
= V8HImode
;
34425 n
= GET_MODE_NUNITS (mode
);
34426 for (i
= 0; i
< n
; i
++)
34427 ops
[i
] = XVECEXP (vals
, 0, i
);
34428 op0
= gen_reg_rtx (half_mode
);
34429 op1
= gen_reg_rtx (half_mode
);
34430 ix86_expand_vector_init_interleave (half_mode
, op0
, ops
,
34432 ix86_expand_vector_init_interleave (half_mode
, op1
,
34433 &ops
[n
>> 1], n
>> 2);
34434 emit_insn (gen_rtx_SET (VOIDmode
, target
,
34435 gen_rtx_VEC_CONCAT (mode
, op0
, op1
)));
34439 if (!TARGET_SSE4_1
)
34447 /* Don't use ix86_expand_vector_init_interleave if we can't
34448 move from GPR to SSE register directly. */
34449 if (!TARGET_INTER_UNIT_MOVES
)
34452 n
= GET_MODE_NUNITS (mode
);
34453 for (i
= 0; i
< n
; i
++)
34454 ops
[i
] = XVECEXP (vals
, 0, i
);
34455 ix86_expand_vector_init_interleave (mode
, target
, ops
, n
>> 1);
34463 gcc_unreachable ();
34467 int i
, j
, n_elts
, n_words
, n_elt_per_word
;
34468 enum machine_mode inner_mode
;
34469 rtx words
[4], shift
;
34471 inner_mode
= GET_MODE_INNER (mode
);
34472 n_elts
= GET_MODE_NUNITS (mode
);
34473 n_words
= GET_MODE_SIZE (mode
) / UNITS_PER_WORD
;
34474 n_elt_per_word
= n_elts
/ n_words
;
34475 shift
= GEN_INT (GET_MODE_BITSIZE (inner_mode
));
34477 for (i
= 0; i
< n_words
; ++i
)
34479 rtx word
= NULL_RTX
;
34481 for (j
= 0; j
< n_elt_per_word
; ++j
)
34483 rtx elt
= XVECEXP (vals
, 0, (i
+1)*n_elt_per_word
- j
- 1);
34484 elt
= convert_modes (word_mode
, inner_mode
, elt
, true);
34490 word
= expand_simple_binop (word_mode
, ASHIFT
, word
, shift
,
34491 word
, 1, OPTAB_LIB_WIDEN
);
34492 word
= expand_simple_binop (word_mode
, IOR
, word
, elt
,
34493 word
, 1, OPTAB_LIB_WIDEN
);
34501 emit_move_insn (target
, gen_lowpart (mode
, words
[0]));
34502 else if (n_words
== 2)
34504 rtx tmp
= gen_reg_rtx (mode
);
34505 emit_clobber (tmp
);
34506 emit_move_insn (gen_lowpart (word_mode
, tmp
), words
[0]);
34507 emit_move_insn (gen_highpart (word_mode
, tmp
), words
[1]);
34508 emit_move_insn (target
, tmp
);
34510 else if (n_words
== 4)
34512 rtx tmp
= gen_reg_rtx (V4SImode
);
34513 gcc_assert (word_mode
== SImode
);
34514 vals
= gen_rtx_PARALLEL (V4SImode
, gen_rtvec_v (4, words
));
34515 ix86_expand_vector_init_general (false, V4SImode
, tmp
, vals
);
34516 emit_move_insn (target
, gen_lowpart (mode
, tmp
));
34519 gcc_unreachable ();
34523 /* Initialize vector TARGET via VALS. Suppress the use of MMX
34524 instructions unless MMX_OK is true. */
34527 ix86_expand_vector_init (bool mmx_ok
, rtx target
, rtx vals
)
34529 enum machine_mode mode
= GET_MODE (target
);
34530 enum machine_mode inner_mode
= GET_MODE_INNER (mode
);
34531 int n_elts
= GET_MODE_NUNITS (mode
);
34532 int n_var
= 0, one_var
= -1;
34533 bool all_same
= true, all_const_zero
= true;
34537 for (i
= 0; i
< n_elts
; ++i
)
34539 x
= XVECEXP (vals
, 0, i
);
34540 if (!(CONST_INT_P (x
)
34541 || GET_CODE (x
) == CONST_DOUBLE
34542 || GET_CODE (x
) == CONST_FIXED
))
34543 n_var
++, one_var
= i
;
34544 else if (x
!= CONST0_RTX (inner_mode
))
34545 all_const_zero
= false;
34546 if (i
> 0 && !rtx_equal_p (x
, XVECEXP (vals
, 0, 0)))
34550 /* Constants are best loaded from the constant pool. */
34553 emit_move_insn (target
, gen_rtx_CONST_VECTOR (mode
, XVEC (vals
, 0)));
34557 /* If all values are identical, broadcast the value. */
34559 && ix86_expand_vector_init_duplicate (mmx_ok
, mode
, target
,
34560 XVECEXP (vals
, 0, 0)))
34563 /* Values where only one field is non-constant are best loaded from
34564 the pool and overwritten via move later. */
34568 && ix86_expand_vector_init_one_nonzero (mmx_ok
, mode
, target
,
34569 XVECEXP (vals
, 0, one_var
),
34573 if (ix86_expand_vector_init_one_var (mmx_ok
, mode
, target
, vals
, one_var
))
34577 ix86_expand_vector_init_general (mmx_ok
, mode
, target
, vals
);
34581 ix86_expand_vector_set (bool mmx_ok
, rtx target
, rtx val
, int elt
)
34583 enum machine_mode mode
= GET_MODE (target
);
34584 enum machine_mode inner_mode
= GET_MODE_INNER (mode
);
34585 enum machine_mode half_mode
;
34586 bool use_vec_merge
= false;
34588 static rtx (*gen_extract
[6][2]) (rtx
, rtx
)
34590 { gen_vec_extract_lo_v32qi
, gen_vec_extract_hi_v32qi
},
34591 { gen_vec_extract_lo_v16hi
, gen_vec_extract_hi_v16hi
},
34592 { gen_vec_extract_lo_v8si
, gen_vec_extract_hi_v8si
},
34593 { gen_vec_extract_lo_v4di
, gen_vec_extract_hi_v4di
},
34594 { gen_vec_extract_lo_v8sf
, gen_vec_extract_hi_v8sf
},
34595 { gen_vec_extract_lo_v4df
, gen_vec_extract_hi_v4df
}
34597 static rtx (*gen_insert
[6][2]) (rtx
, rtx
, rtx
)
34599 { gen_vec_set_lo_v32qi
, gen_vec_set_hi_v32qi
},
34600 { gen_vec_set_lo_v16hi
, gen_vec_set_hi_v16hi
},
34601 { gen_vec_set_lo_v8si
, gen_vec_set_hi_v8si
},
34602 { gen_vec_set_lo_v4di
, gen_vec_set_hi_v4di
},
34603 { gen_vec_set_lo_v8sf
, gen_vec_set_hi_v8sf
},
34604 { gen_vec_set_lo_v4df
, gen_vec_set_hi_v4df
}
34614 tmp
= gen_reg_rtx (GET_MODE_INNER (mode
));
34615 ix86_expand_vector_extract (true, tmp
, target
, 1 - elt
);
34617 tmp
= gen_rtx_VEC_CONCAT (mode
, val
, tmp
);
34619 tmp
= gen_rtx_VEC_CONCAT (mode
, tmp
, val
);
34620 emit_insn (gen_rtx_SET (VOIDmode
, target
, tmp
));
34626 use_vec_merge
= TARGET_SSE4_1
&& TARGET_64BIT
;
34630 tmp
= gen_reg_rtx (GET_MODE_INNER (mode
));
34631 ix86_expand_vector_extract (false, tmp
, target
, 1 - elt
);
34633 tmp
= gen_rtx_VEC_CONCAT (mode
, val
, tmp
);
34635 tmp
= gen_rtx_VEC_CONCAT (mode
, tmp
, val
);
34636 emit_insn (gen_rtx_SET (VOIDmode
, target
, tmp
));
34643 /* For the two element vectors, we implement a VEC_CONCAT with
34644 the extraction of the other element. */
34646 tmp
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (1, GEN_INT (1 - elt
)));
34647 tmp
= gen_rtx_VEC_SELECT (inner_mode
, target
, tmp
);
34650 op0
= val
, op1
= tmp
;
34652 op0
= tmp
, op1
= val
;
34654 tmp
= gen_rtx_VEC_CONCAT (mode
, op0
, op1
);
34655 emit_insn (gen_rtx_SET (VOIDmode
, target
, tmp
));
34660 use_vec_merge
= TARGET_SSE4_1
;
34667 use_vec_merge
= true;
34671 /* tmp = target = A B C D */
34672 tmp
= copy_to_reg (target
);
34673 /* target = A A B B */
34674 emit_insn (gen_vec_interleave_lowv4sf (target
, target
, target
));
34675 /* target = X A B B */
34676 ix86_expand_vector_set (false, target
, val
, 0);
34677 /* target = A X C D */
34678 emit_insn (gen_sse_shufps_v4sf (target
, target
, tmp
,
34679 const1_rtx
, const0_rtx
,
34680 GEN_INT (2+4), GEN_INT (3+4)));
34684 /* tmp = target = A B C D */
34685 tmp
= copy_to_reg (target
);
34686 /* tmp = X B C D */
34687 ix86_expand_vector_set (false, tmp
, val
, 0);
34688 /* target = A B X D */
34689 emit_insn (gen_sse_shufps_v4sf (target
, target
, tmp
,
34690 const0_rtx
, const1_rtx
,
34691 GEN_INT (0+4), GEN_INT (3+4)));
34695 /* tmp = target = A B C D */
34696 tmp
= copy_to_reg (target
);
34697 /* tmp = X B C D */
34698 ix86_expand_vector_set (false, tmp
, val
, 0);
34699 /* target = A B X D */
34700 emit_insn (gen_sse_shufps_v4sf (target
, target
, tmp
,
34701 const0_rtx
, const1_rtx
,
34702 GEN_INT (2+4), GEN_INT (0+4)));
34706 gcc_unreachable ();
34711 use_vec_merge
= TARGET_SSE4_1
;
34715 /* Element 0 handled by vec_merge below. */
34718 use_vec_merge
= true;
34724 /* With SSE2, use integer shuffles to swap element 0 and ELT,
34725 store into element 0, then shuffle them back. */
34729 order
[0] = GEN_INT (elt
);
34730 order
[1] = const1_rtx
;
34731 order
[2] = const2_rtx
;
34732 order
[3] = GEN_INT (3);
34733 order
[elt
] = const0_rtx
;
34735 emit_insn (gen_sse2_pshufd_1 (target
, target
, order
[0],
34736 order
[1], order
[2], order
[3]));
34738 ix86_expand_vector_set (false, target
, val
, 0);
34740 emit_insn (gen_sse2_pshufd_1 (target
, target
, order
[0],
34741 order
[1], order
[2], order
[3]));
34745 /* For SSE1, we have to reuse the V4SF code. */
34746 ix86_expand_vector_set (false, gen_lowpart (V4SFmode
, target
),
34747 gen_lowpart (SFmode
, val
), elt
);
34752 use_vec_merge
= TARGET_SSE2
;
34755 use_vec_merge
= mmx_ok
&& (TARGET_SSE
|| TARGET_3DNOW_A
);
34759 use_vec_merge
= TARGET_SSE4_1
;
34766 half_mode
= V16QImode
;
34772 half_mode
= V8HImode
;
34778 half_mode
= V4SImode
;
34784 half_mode
= V2DImode
;
34790 half_mode
= V4SFmode
;
34796 half_mode
= V2DFmode
;
34802 /* Compute offset. */
34806 gcc_assert (i
<= 1);
34808 /* Extract the half. */
34809 tmp
= gen_reg_rtx (half_mode
);
34810 emit_insn (gen_extract
[j
][i
] (tmp
, target
));
34812 /* Put val in tmp at elt. */
34813 ix86_expand_vector_set (false, tmp
, val
, elt
);
34816 emit_insn (gen_insert
[j
][i
] (target
, target
, tmp
));
34825 tmp
= gen_rtx_VEC_DUPLICATE (mode
, val
);
34826 tmp
= gen_rtx_VEC_MERGE (mode
, tmp
, target
, GEN_INT (1 << elt
));
34827 emit_insn (gen_rtx_SET (VOIDmode
, target
, tmp
));
34831 rtx mem
= assign_stack_temp (mode
, GET_MODE_SIZE (mode
));
34833 emit_move_insn (mem
, target
);
34835 tmp
= adjust_address (mem
, inner_mode
, elt
*GET_MODE_SIZE (inner_mode
));
34836 emit_move_insn (tmp
, val
);
34838 emit_move_insn (target
, mem
);
34843 ix86_expand_vector_extract (bool mmx_ok
, rtx target
, rtx vec
, int elt
)
34845 enum machine_mode mode
= GET_MODE (vec
);
34846 enum machine_mode inner_mode
= GET_MODE_INNER (mode
);
34847 bool use_vec_extr
= false;
34860 use_vec_extr
= true;
34864 use_vec_extr
= TARGET_SSE4_1
;
34876 tmp
= gen_reg_rtx (mode
);
34877 emit_insn (gen_sse_shufps_v4sf (tmp
, vec
, vec
,
34878 GEN_INT (elt
), GEN_INT (elt
),
34879 GEN_INT (elt
+4), GEN_INT (elt
+4)));
34883 tmp
= gen_reg_rtx (mode
);
34884 emit_insn (gen_vec_interleave_highv4sf (tmp
, vec
, vec
));
34888 gcc_unreachable ();
34891 use_vec_extr
= true;
34896 use_vec_extr
= TARGET_SSE4_1
;
34910 tmp
= gen_reg_rtx (mode
);
34911 emit_insn (gen_sse2_pshufd_1 (tmp
, vec
,
34912 GEN_INT (elt
), GEN_INT (elt
),
34913 GEN_INT (elt
), GEN_INT (elt
)));
34917 tmp
= gen_reg_rtx (mode
);
34918 emit_insn (gen_vec_interleave_highv4si (tmp
, vec
, vec
));
34922 gcc_unreachable ();
34925 use_vec_extr
= true;
34930 /* For SSE1, we have to reuse the V4SF code. */
34931 ix86_expand_vector_extract (false, gen_lowpart (SFmode
, target
),
34932 gen_lowpart (V4SFmode
, vec
), elt
);
34938 use_vec_extr
= TARGET_SSE2
;
34941 use_vec_extr
= mmx_ok
&& (TARGET_SSE
|| TARGET_3DNOW_A
);
34945 use_vec_extr
= TARGET_SSE4_1
;
34951 tmp
= gen_reg_rtx (V4SFmode
);
34953 emit_insn (gen_vec_extract_lo_v8sf (tmp
, vec
));
34955 emit_insn (gen_vec_extract_hi_v8sf (tmp
, vec
));
34956 ix86_expand_vector_extract (false, target
, tmp
, elt
& 3);
34964 tmp
= gen_reg_rtx (V2DFmode
);
34966 emit_insn (gen_vec_extract_lo_v4df (tmp
, vec
));
34968 emit_insn (gen_vec_extract_hi_v4df (tmp
, vec
));
34969 ix86_expand_vector_extract (false, target
, tmp
, elt
& 1);
34977 tmp
= gen_reg_rtx (V16QImode
);
34979 emit_insn (gen_vec_extract_lo_v32qi (tmp
, vec
));
34981 emit_insn (gen_vec_extract_hi_v32qi (tmp
, vec
));
34982 ix86_expand_vector_extract (false, target
, tmp
, elt
& 15);
34990 tmp
= gen_reg_rtx (V8HImode
);
34992 emit_insn (gen_vec_extract_lo_v16hi (tmp
, vec
));
34994 emit_insn (gen_vec_extract_hi_v16hi (tmp
, vec
));
34995 ix86_expand_vector_extract (false, target
, tmp
, elt
& 7);
35003 tmp
= gen_reg_rtx (V4SImode
);
35005 emit_insn (gen_vec_extract_lo_v8si (tmp
, vec
));
35007 emit_insn (gen_vec_extract_hi_v8si (tmp
, vec
));
35008 ix86_expand_vector_extract (false, target
, tmp
, elt
& 3);
35016 tmp
= gen_reg_rtx (V2DImode
);
35018 emit_insn (gen_vec_extract_lo_v4di (tmp
, vec
));
35020 emit_insn (gen_vec_extract_hi_v4di (tmp
, vec
));
35021 ix86_expand_vector_extract (false, target
, tmp
, elt
& 1);
35027 /* ??? Could extract the appropriate HImode element and shift. */
35034 tmp
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (1, GEN_INT (elt
)));
35035 tmp
= gen_rtx_VEC_SELECT (inner_mode
, vec
, tmp
);
35037 /* Let the rtl optimizers know about the zero extension performed. */
35038 if (inner_mode
== QImode
|| inner_mode
== HImode
)
35040 tmp
= gen_rtx_ZERO_EXTEND (SImode
, tmp
);
35041 target
= gen_lowpart (SImode
, target
);
35044 emit_insn (gen_rtx_SET (VOIDmode
, target
, tmp
));
35048 rtx mem
= assign_stack_temp (mode
, GET_MODE_SIZE (mode
));
35050 emit_move_insn (mem
, vec
);
35052 tmp
= adjust_address (mem
, inner_mode
, elt
*GET_MODE_SIZE (inner_mode
));
35053 emit_move_insn (target
, tmp
);
35057 /* Generate code to copy vector bits i / 2 ... i - 1 from vector SRC
35058 to bits 0 ... i / 2 - 1 of vector DEST, which has the same mode.
35059 The upper bits of DEST are undefined, though they shouldn't cause
35060 exceptions (some bits from src or all zeros are ok). */
35063 emit_reduc_half (rtx dest
, rtx src
, int i
)
35066 switch (GET_MODE (src
))
35070 tem
= gen_sse_movhlps (dest
, src
, src
);
35072 tem
= gen_sse_shufps_v4sf (dest
, src
, src
, const1_rtx
, const1_rtx
,
35073 GEN_INT (1 + 4), GEN_INT (1 + 4));
35076 tem
= gen_vec_interleave_highv2df (dest
, src
, src
);
35082 tem
= gen_sse2_lshrv1ti3 (gen_lowpart (V1TImode
, dest
),
35083 gen_lowpart (V1TImode
, src
),
35088 tem
= gen_avx_vperm2f128v8sf3 (dest
, src
, src
, const1_rtx
);
35090 tem
= gen_avx_shufps256 (dest
, src
, src
,
35091 GEN_INT (i
== 128 ? 2 + (3 << 2) : 1));
35095 tem
= gen_avx_vperm2f128v4df3 (dest
, src
, src
, const1_rtx
);
35097 tem
= gen_avx_shufpd256 (dest
, src
, src
, const1_rtx
);
35104 tem
= gen_avx2_permv2ti (gen_lowpart (V4DImode
, dest
),
35105 gen_lowpart (V4DImode
, src
),
35106 gen_lowpart (V4DImode
, src
),
35109 tem
= gen_avx2_lshrv2ti3 (gen_lowpart (V2TImode
, dest
),
35110 gen_lowpart (V2TImode
, src
),
35114 gcc_unreachable ();
35119 /* Expand a vector reduction. FN is the binary pattern to reduce;
35120 DEST is the destination; IN is the input vector. */
35123 ix86_expand_reduc (rtx (*fn
) (rtx
, rtx
, rtx
), rtx dest
, rtx in
)
35125 rtx half
, dst
, vec
= in
;
35126 enum machine_mode mode
= GET_MODE (in
);
35129 /* SSE4 has a special instruction for V8HImode UMIN reduction. */
35131 && mode
== V8HImode
35132 && fn
== gen_uminv8hi3
)
35134 emit_insn (gen_sse4_1_phminposuw (dest
, in
));
35138 for (i
= GET_MODE_BITSIZE (mode
);
35139 i
> GET_MODE_BITSIZE (GET_MODE_INNER (mode
));
35142 half
= gen_reg_rtx (mode
);
35143 emit_reduc_half (half
, vec
, i
);
35144 if (i
== GET_MODE_BITSIZE (GET_MODE_INNER (mode
)) * 2)
35147 dst
= gen_reg_rtx (mode
);
35148 emit_insn (fn (dst
, half
, vec
));
35153 /* Target hook for scalar_mode_supported_p. */
35155 ix86_scalar_mode_supported_p (enum machine_mode mode
)
35157 if (DECIMAL_FLOAT_MODE_P (mode
))
35158 return default_decimal_float_supported_p ();
35159 else if (mode
== TFmode
)
35162 return default_scalar_mode_supported_p (mode
);
35165 /* Implements target hook vector_mode_supported_p. */
35167 ix86_vector_mode_supported_p (enum machine_mode mode
)
35169 if (TARGET_SSE
&& VALID_SSE_REG_MODE (mode
))
35171 if (TARGET_SSE2
&& VALID_SSE2_REG_MODE (mode
))
35173 if (TARGET_AVX
&& VALID_AVX256_REG_MODE (mode
))
35175 if (TARGET_MMX
&& VALID_MMX_REG_MODE (mode
))
35177 if (TARGET_3DNOW
&& VALID_MMX_REG_MODE_3DNOW (mode
))
35182 /* Target hook for c_mode_for_suffix. */
35183 static enum machine_mode
35184 ix86_c_mode_for_suffix (char suffix
)
35194 /* Worker function for TARGET_MD_ASM_CLOBBERS.
35196 We do this in the new i386 backend to maintain source compatibility
35197 with the old cc0-based compiler. */
35200 ix86_md_asm_clobbers (tree outputs ATTRIBUTE_UNUSED
,
35201 tree inputs ATTRIBUTE_UNUSED
,
35204 clobbers
= tree_cons (NULL_TREE
, build_string (5, "flags"),
35206 clobbers
= tree_cons (NULL_TREE
, build_string (4, "fpsr"),
35211 /* Implements target vector targetm.asm.encode_section_info. */
35213 static void ATTRIBUTE_UNUSED
35214 ix86_encode_section_info (tree decl
, rtx rtl
, int first
)
35216 default_encode_section_info (decl
, rtl
, first
);
35218 if (TREE_CODE (decl
) == VAR_DECL
35219 && (TREE_STATIC (decl
) || DECL_EXTERNAL (decl
))
35220 && ix86_in_large_data_p (decl
))
35221 SYMBOL_REF_FLAGS (XEXP (rtl
, 0)) |= SYMBOL_FLAG_FAR_ADDR
;
35224 /* Worker function for REVERSE_CONDITION. */
35227 ix86_reverse_condition (enum rtx_code code
, enum machine_mode mode
)
35229 return (mode
!= CCFPmode
&& mode
!= CCFPUmode
35230 ? reverse_condition (code
)
35231 : reverse_condition_maybe_unordered (code
));
35234 /* Output code to perform an x87 FP register move, from OPERANDS[1]
35238 output_387_reg_move (rtx insn
, rtx
*operands
)
35240 if (REG_P (operands
[0]))
35242 if (REG_P (operands
[1])
35243 && find_regno_note (insn
, REG_DEAD
, REGNO (operands
[1])))
35245 if (REGNO (operands
[0]) == FIRST_STACK_REG
)
35246 return output_387_ffreep (operands
, 0);
35247 return "fstp\t%y0";
35249 if (STACK_TOP_P (operands
[0]))
35250 return "fld%Z1\t%y1";
35253 else if (MEM_P (operands
[0]))
35255 gcc_assert (REG_P (operands
[1]));
35256 if (find_regno_note (insn
, REG_DEAD
, REGNO (operands
[1])))
35257 return "fstp%Z0\t%y0";
35260 /* There is no non-popping store to memory for XFmode.
35261 So if we need one, follow the store with a load. */
35262 if (GET_MODE (operands
[0]) == XFmode
)
35263 return "fstp%Z0\t%y0\n\tfld%Z0\t%y0";
35265 return "fst%Z0\t%y0";
35272 /* Output code to perform a conditional jump to LABEL, if C2 flag in
35273 FP status register is set. */
35276 ix86_emit_fp_unordered_jump (rtx label
)
35278 rtx reg
= gen_reg_rtx (HImode
);
35281 emit_insn (gen_x86_fnstsw_1 (reg
));
35283 if (TARGET_SAHF
&& (TARGET_USE_SAHF
|| optimize_insn_for_size_p ()))
35285 emit_insn (gen_x86_sahf_1 (reg
));
35287 temp
= gen_rtx_REG (CCmode
, FLAGS_REG
);
35288 temp
= gen_rtx_UNORDERED (VOIDmode
, temp
, const0_rtx
);
35292 emit_insn (gen_testqi_ext_ccno_0 (reg
, GEN_INT (0x04)));
35294 temp
= gen_rtx_REG (CCNOmode
, FLAGS_REG
);
35295 temp
= gen_rtx_NE (VOIDmode
, temp
, const0_rtx
);
35298 temp
= gen_rtx_IF_THEN_ELSE (VOIDmode
, temp
,
35299 gen_rtx_LABEL_REF (VOIDmode
, label
),
35301 temp
= gen_rtx_SET (VOIDmode
, pc_rtx
, temp
);
35303 emit_jump_insn (temp
);
35304 predict_jump (REG_BR_PROB_BASE
* 10 / 100);
35307 /* Output code to perform a log1p XFmode calculation. */
35309 void ix86_emit_i387_log1p (rtx op0
, rtx op1
)
35311 rtx label1
= gen_label_rtx ();
35312 rtx label2
= gen_label_rtx ();
35314 rtx tmp
= gen_reg_rtx (XFmode
);
35315 rtx tmp2
= gen_reg_rtx (XFmode
);
35318 emit_insn (gen_absxf2 (tmp
, op1
));
35319 test
= gen_rtx_GE (VOIDmode
, tmp
,
35320 CONST_DOUBLE_FROM_REAL_VALUE (
35321 REAL_VALUE_ATOF ("0.29289321881345247561810596348408353", XFmode
),
35323 emit_jump_insn (gen_cbranchxf4 (test
, XEXP (test
, 0), XEXP (test
, 1), label1
));
35325 emit_move_insn (tmp2
, standard_80387_constant_rtx (4)); /* fldln2 */
35326 emit_insn (gen_fyl2xp1xf3_i387 (op0
, op1
, tmp2
));
35327 emit_jump (label2
);
35329 emit_label (label1
);
35330 emit_move_insn (tmp
, CONST1_RTX (XFmode
));
35331 emit_insn (gen_addxf3 (tmp
, op1
, tmp
));
35332 emit_move_insn (tmp2
, standard_80387_constant_rtx (4)); /* fldln2 */
35333 emit_insn (gen_fyl2xxf3_i387 (op0
, tmp
, tmp2
));
35335 emit_label (label2
);
35338 /* Emit code for round calculation. */
35339 void ix86_emit_i387_round (rtx op0
, rtx op1
)
35341 enum machine_mode inmode
= GET_MODE (op1
);
35342 enum machine_mode outmode
= GET_MODE (op0
);
35343 rtx e1
, e2
, res
, tmp
, tmp1
, half
;
35344 rtx scratch
= gen_reg_rtx (HImode
);
35345 rtx flags
= gen_rtx_REG (CCNOmode
, FLAGS_REG
);
35346 rtx jump_label
= gen_label_rtx ();
35348 rtx (*gen_abs
) (rtx
, rtx
);
35349 rtx (*gen_neg
) (rtx
, rtx
);
35354 gen_abs
= gen_abssf2
;
35357 gen_abs
= gen_absdf2
;
35360 gen_abs
= gen_absxf2
;
35363 gcc_unreachable ();
35369 gen_neg
= gen_negsf2
;
35372 gen_neg
= gen_negdf2
;
35375 gen_neg
= gen_negxf2
;
35378 gen_neg
= gen_neghi2
;
35381 gen_neg
= gen_negsi2
;
35384 gen_neg
= gen_negdi2
;
35387 gcc_unreachable ();
35390 e1
= gen_reg_rtx (inmode
);
35391 e2
= gen_reg_rtx (inmode
);
35392 res
= gen_reg_rtx (outmode
);
35394 half
= CONST_DOUBLE_FROM_REAL_VALUE (dconsthalf
, inmode
);
35396 /* round(a) = sgn(a) * floor(fabs(a) + 0.5) */
35398 /* scratch = fxam(op1) */
35399 emit_insn (gen_rtx_SET (VOIDmode
, scratch
,
35400 gen_rtx_UNSPEC (HImode
, gen_rtvec (1, op1
),
35402 /* e1 = fabs(op1) */
35403 emit_insn (gen_abs (e1
, op1
));
35405 /* e2 = e1 + 0.5 */
35406 half
= force_reg (inmode
, half
);
35407 emit_insn (gen_rtx_SET (VOIDmode
, e2
,
35408 gen_rtx_PLUS (inmode
, e1
, half
)));
35410 /* res = floor(e2) */
35411 if (inmode
!= XFmode
)
35413 tmp1
= gen_reg_rtx (XFmode
);
35415 emit_insn (gen_rtx_SET (VOIDmode
, tmp1
,
35416 gen_rtx_FLOAT_EXTEND (XFmode
, e2
)));
35426 rtx tmp0
= gen_reg_rtx (XFmode
);
35428 emit_insn (gen_frndintxf2_floor (tmp0
, tmp1
));
35430 emit_insn (gen_rtx_SET (VOIDmode
, res
,
35431 gen_rtx_UNSPEC (outmode
, gen_rtvec (1, tmp0
),
35432 UNSPEC_TRUNC_NOOP
)));
35436 emit_insn (gen_frndintxf2_floor (res
, tmp1
));
35439 emit_insn (gen_lfloorxfhi2 (res
, tmp1
));
35442 emit_insn (gen_lfloorxfsi2 (res
, tmp1
));
35445 emit_insn (gen_lfloorxfdi2 (res
, tmp1
));
35448 gcc_unreachable ();
35451 /* flags = signbit(a) */
35452 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x02)));
35454 /* if (flags) then res = -res */
35455 tmp
= gen_rtx_IF_THEN_ELSE (VOIDmode
,
35456 gen_rtx_EQ (VOIDmode
, flags
, const0_rtx
),
35457 gen_rtx_LABEL_REF (VOIDmode
, jump_label
),
35459 insn
= emit_jump_insn (gen_rtx_SET (VOIDmode
, pc_rtx
, tmp
));
35460 predict_jump (REG_BR_PROB_BASE
* 50 / 100);
35461 JUMP_LABEL (insn
) = jump_label
;
35463 emit_insn (gen_neg (res
, res
));
35465 emit_label (jump_label
);
35466 LABEL_NUSES (jump_label
) = 1;
35468 emit_move_insn (op0
, res
);
35471 /* Output code to perform a Newton-Rhapson approximation of a single precision
35472 floating point divide [http://en.wikipedia.org/wiki/N-th_root_algorithm]. */
35474 void ix86_emit_swdivsf (rtx res
, rtx a
, rtx b
, enum machine_mode mode
)
35476 rtx x0
, x1
, e0
, e1
;
35478 x0
= gen_reg_rtx (mode
);
35479 e0
= gen_reg_rtx (mode
);
35480 e1
= gen_reg_rtx (mode
);
35481 x1
= gen_reg_rtx (mode
);
35483 /* a / b = a * ((rcp(b) + rcp(b)) - (b * rcp(b) * rcp (b))) */
35485 b
= force_reg (mode
, b
);
35487 /* x0 = rcp(b) estimate */
35488 emit_insn (gen_rtx_SET (VOIDmode
, x0
,
35489 gen_rtx_UNSPEC (mode
, gen_rtvec (1, b
),
35492 emit_insn (gen_rtx_SET (VOIDmode
, e0
,
35493 gen_rtx_MULT (mode
, x0
, b
)));
35496 emit_insn (gen_rtx_SET (VOIDmode
, e0
,
35497 gen_rtx_MULT (mode
, x0
, e0
)));
35500 emit_insn (gen_rtx_SET (VOIDmode
, e1
,
35501 gen_rtx_PLUS (mode
, x0
, x0
)));
35504 emit_insn (gen_rtx_SET (VOIDmode
, x1
,
35505 gen_rtx_MINUS (mode
, e1
, e0
)));
35508 emit_insn (gen_rtx_SET (VOIDmode
, res
,
35509 gen_rtx_MULT (mode
, a
, x1
)));
35512 /* Output code to perform a Newton-Rhapson approximation of a
35513 single precision floating point [reciprocal] square root. */
35515 void ix86_emit_swsqrtsf (rtx res
, rtx a
, enum machine_mode mode
,
35518 rtx x0
, e0
, e1
, e2
, e3
, mthree
, mhalf
;
35521 x0
= gen_reg_rtx (mode
);
35522 e0
= gen_reg_rtx (mode
);
35523 e1
= gen_reg_rtx (mode
);
35524 e2
= gen_reg_rtx (mode
);
35525 e3
= gen_reg_rtx (mode
);
35527 real_from_integer (&r
, VOIDmode
, -3, -1, 0);
35528 mthree
= CONST_DOUBLE_FROM_REAL_VALUE (r
, SFmode
);
35530 real_arithmetic (&r
, NEGATE_EXPR
, &dconsthalf
, NULL
);
35531 mhalf
= CONST_DOUBLE_FROM_REAL_VALUE (r
, SFmode
);
35533 if (VECTOR_MODE_P (mode
))
35535 mthree
= ix86_build_const_vector (mode
, true, mthree
);
35536 mhalf
= ix86_build_const_vector (mode
, true, mhalf
);
35539 /* sqrt(a) = -0.5 * a * rsqrtss(a) * (a * rsqrtss(a) * rsqrtss(a) - 3.0)
35540 rsqrt(a) = -0.5 * rsqrtss(a) * (a * rsqrtss(a) * rsqrtss(a) - 3.0) */
35542 a
= force_reg (mode
, a
);
35544 /* x0 = rsqrt(a) estimate */
35545 emit_insn (gen_rtx_SET (VOIDmode
, x0
,
35546 gen_rtx_UNSPEC (mode
, gen_rtvec (1, a
),
35549 /* If (a == 0.0) Filter out infinity to prevent NaN for sqrt(0.0). */
35554 zero
= gen_reg_rtx (mode
);
35555 mask
= gen_reg_rtx (mode
);
35557 zero
= force_reg (mode
, CONST0_RTX(mode
));
35558 emit_insn (gen_rtx_SET (VOIDmode
, mask
,
35559 gen_rtx_NE (mode
, zero
, a
)));
35561 emit_insn (gen_rtx_SET (VOIDmode
, x0
,
35562 gen_rtx_AND (mode
, x0
, mask
)));
35566 emit_insn (gen_rtx_SET (VOIDmode
, e0
,
35567 gen_rtx_MULT (mode
, x0
, a
)));
35569 emit_insn (gen_rtx_SET (VOIDmode
, e1
,
35570 gen_rtx_MULT (mode
, e0
, x0
)));
35573 mthree
= force_reg (mode
, mthree
);
35574 emit_insn (gen_rtx_SET (VOIDmode
, e2
,
35575 gen_rtx_PLUS (mode
, e1
, mthree
)));
35577 mhalf
= force_reg (mode
, mhalf
);
35579 /* e3 = -.5 * x0 */
35580 emit_insn (gen_rtx_SET (VOIDmode
, e3
,
35581 gen_rtx_MULT (mode
, x0
, mhalf
)));
35583 /* e3 = -.5 * e0 */
35584 emit_insn (gen_rtx_SET (VOIDmode
, e3
,
35585 gen_rtx_MULT (mode
, e0
, mhalf
)));
35586 /* ret = e2 * e3 */
35587 emit_insn (gen_rtx_SET (VOIDmode
, res
,
35588 gen_rtx_MULT (mode
, e2
, e3
)));
35591 #ifdef TARGET_SOLARIS
35592 /* Solaris implementation of TARGET_ASM_NAMED_SECTION. */
35595 i386_solaris_elf_named_section (const char *name
, unsigned int flags
,
35598 /* With Binutils 2.15, the "@unwind" marker must be specified on
35599 every occurrence of the ".eh_frame" section, not just the first
35602 && strcmp (name
, ".eh_frame") == 0)
35604 fprintf (asm_out_file
, "\t.section\t%s,\"%s\",@unwind\n", name
,
35605 flags
& SECTION_WRITE
? "aw" : "a");
35610 if (HAVE_COMDAT_GROUP
&& flags
& SECTION_LINKONCE
)
35612 solaris_elf_asm_comdat_section (name
, flags
, decl
);
35617 default_elf_asm_named_section (name
, flags
, decl
);
35619 #endif /* TARGET_SOLARIS */
35621 /* Return the mangling of TYPE if it is an extended fundamental type. */
35623 static const char *
35624 ix86_mangle_type (const_tree type
)
35626 type
= TYPE_MAIN_VARIANT (type
);
35628 if (TREE_CODE (type
) != VOID_TYPE
&& TREE_CODE (type
) != BOOLEAN_TYPE
35629 && TREE_CODE (type
) != INTEGER_TYPE
&& TREE_CODE (type
) != REAL_TYPE
)
35632 switch (TYPE_MODE (type
))
35635 /* __float128 is "g". */
35638 /* "long double" or __float80 is "e". */
35645 /* For 32-bit code we can save PIC register setup by using
35646 __stack_chk_fail_local hidden function instead of calling
35647 __stack_chk_fail directly. 64-bit code doesn't need to setup any PIC
35648 register, so it is better to call __stack_chk_fail directly. */
35650 static tree ATTRIBUTE_UNUSED
35651 ix86_stack_protect_fail (void)
35653 return TARGET_64BIT
35654 ? default_external_stack_protect_fail ()
35655 : default_hidden_stack_protect_fail ();
35658 /* Select a format to encode pointers in exception handling data. CODE
35659 is 0 for data, 1 for code labels, 2 for function pointers. GLOBAL is
35660 true if the symbol may be affected by dynamic relocations.
35662 ??? All x86 object file formats are capable of representing this.
35663 After all, the relocation needed is the same as for the call insn.
35664 Whether or not a particular assembler allows us to enter such, I
35665 guess we'll have to see. */
35667 asm_preferred_eh_data_format (int code
, int global
)
35671 int type
= DW_EH_PE_sdata8
;
35673 || ix86_cmodel
== CM_SMALL_PIC
35674 || (ix86_cmodel
== CM_MEDIUM_PIC
&& (global
|| code
)))
35675 type
= DW_EH_PE_sdata4
;
35676 return (global
? DW_EH_PE_indirect
: 0) | DW_EH_PE_pcrel
| type
;
35678 if (ix86_cmodel
== CM_SMALL
35679 || (ix86_cmodel
== CM_MEDIUM
&& code
))
35680 return DW_EH_PE_udata4
;
35681 return DW_EH_PE_absptr
;
35684 /* Expand copysign from SIGN to the positive value ABS_VALUE
35685 storing in RESULT. If MASK is non-null, it shall be a mask to mask out
35688 ix86_sse_copysign_to_positive (rtx result
, rtx abs_value
, rtx sign
, rtx mask
)
35690 enum machine_mode mode
= GET_MODE (sign
);
35691 rtx sgn
= gen_reg_rtx (mode
);
35692 if (mask
== NULL_RTX
)
35694 enum machine_mode vmode
;
35696 if (mode
== SFmode
)
35698 else if (mode
== DFmode
)
35703 mask
= ix86_build_signbit_mask (vmode
, VECTOR_MODE_P (mode
), false);
35704 if (!VECTOR_MODE_P (mode
))
35706 /* We need to generate a scalar mode mask in this case. */
35707 rtx tmp
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (1, const0_rtx
));
35708 tmp
= gen_rtx_VEC_SELECT (mode
, mask
, tmp
);
35709 mask
= gen_reg_rtx (mode
);
35710 emit_insn (gen_rtx_SET (VOIDmode
, mask
, tmp
));
35714 mask
= gen_rtx_NOT (mode
, mask
);
35715 emit_insn (gen_rtx_SET (VOIDmode
, sgn
,
35716 gen_rtx_AND (mode
, mask
, sign
)));
35717 emit_insn (gen_rtx_SET (VOIDmode
, result
,
35718 gen_rtx_IOR (mode
, abs_value
, sgn
)));
35721 /* Expand fabs (OP0) and return a new rtx that holds the result. The
35722 mask for masking out the sign-bit is stored in *SMASK, if that is
35725 ix86_expand_sse_fabs (rtx op0
, rtx
*smask
)
35727 enum machine_mode vmode
, mode
= GET_MODE (op0
);
35730 xa
= gen_reg_rtx (mode
);
35731 if (mode
== SFmode
)
35733 else if (mode
== DFmode
)
35737 mask
= ix86_build_signbit_mask (vmode
, VECTOR_MODE_P (mode
), true);
35738 if (!VECTOR_MODE_P (mode
))
35740 /* We need to generate a scalar mode mask in this case. */
35741 rtx tmp
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (1, const0_rtx
));
35742 tmp
= gen_rtx_VEC_SELECT (mode
, mask
, tmp
);
35743 mask
= gen_reg_rtx (mode
);
35744 emit_insn (gen_rtx_SET (VOIDmode
, mask
, tmp
));
35746 emit_insn (gen_rtx_SET (VOIDmode
, xa
,
35747 gen_rtx_AND (mode
, op0
, mask
)));
35755 /* Expands a comparison of OP0 with OP1 using comparison code CODE,
35756 swapping the operands if SWAP_OPERANDS is true. The expanded
35757 code is a forward jump to a newly created label in case the
35758 comparison is true. The generated label rtx is returned. */
35760 ix86_expand_sse_compare_and_jump (enum rtx_code code
, rtx op0
, rtx op1
,
35761 bool swap_operands
)
35772 label
= gen_label_rtx ();
35773 tmp
= gen_rtx_REG (CCFPUmode
, FLAGS_REG
);
35774 emit_insn (gen_rtx_SET (VOIDmode
, tmp
,
35775 gen_rtx_COMPARE (CCFPUmode
, op0
, op1
)));
35776 tmp
= gen_rtx_fmt_ee (code
, VOIDmode
, tmp
, const0_rtx
);
35777 tmp
= gen_rtx_IF_THEN_ELSE (VOIDmode
, tmp
,
35778 gen_rtx_LABEL_REF (VOIDmode
, label
), pc_rtx
);
35779 tmp
= emit_jump_insn (gen_rtx_SET (VOIDmode
, pc_rtx
, tmp
));
35780 JUMP_LABEL (tmp
) = label
;
35785 /* Expand a mask generating SSE comparison instruction comparing OP0 with OP1
35786 using comparison code CODE. Operands are swapped for the comparison if
35787 SWAP_OPERANDS is true. Returns a rtx for the generated mask. */
35789 ix86_expand_sse_compare_mask (enum rtx_code code
, rtx op0
, rtx op1
,
35790 bool swap_operands
)
35792 rtx (*insn
)(rtx
, rtx
, rtx
, rtx
);
35793 enum machine_mode mode
= GET_MODE (op0
);
35794 rtx mask
= gen_reg_rtx (mode
);
35803 insn
= mode
== DFmode
? gen_setcc_df_sse
: gen_setcc_sf_sse
;
35805 emit_insn (insn (mask
, op0
, op1
,
35806 gen_rtx_fmt_ee (code
, mode
, op0
, op1
)));
35810 /* Generate and return a rtx of mode MODE for 2**n where n is the number
35811 of bits of the mantissa of MODE, which must be one of DFmode or SFmode. */
35813 ix86_gen_TWO52 (enum machine_mode mode
)
35815 REAL_VALUE_TYPE TWO52r
;
35818 real_ldexp (&TWO52r
, &dconst1
, mode
== DFmode
? 52 : 23);
35819 TWO52
= const_double_from_real_value (TWO52r
, mode
);
35820 TWO52
= force_reg (mode
, TWO52
);
35825 /* Expand SSE sequence for computing lround from OP1 storing
35828 ix86_expand_lround (rtx op0
, rtx op1
)
35830 /* C code for the stuff we're doing below:
35831 tmp = op1 + copysign (nextafter (0.5, 0.0), op1)
35834 enum machine_mode mode
= GET_MODE (op1
);
35835 const struct real_format
*fmt
;
35836 REAL_VALUE_TYPE pred_half
, half_minus_pred_half
;
35839 /* load nextafter (0.5, 0.0) */
35840 fmt
= REAL_MODE_FORMAT (mode
);
35841 real_2expN (&half_minus_pred_half
, -(fmt
->p
) - 1, mode
);
35842 REAL_ARITHMETIC (pred_half
, MINUS_EXPR
, dconsthalf
, half_minus_pred_half
);
35844 /* adj = copysign (0.5, op1) */
35845 adj
= force_reg (mode
, const_double_from_real_value (pred_half
, mode
));
35846 ix86_sse_copysign_to_positive (adj
, adj
, force_reg (mode
, op1
), NULL_RTX
);
35848 /* adj = op1 + adj */
35849 adj
= expand_simple_binop (mode
, PLUS
, adj
, op1
, NULL_RTX
, 0, OPTAB_DIRECT
);
35851 /* op0 = (imode)adj */
35852 expand_fix (op0
, adj
, 0);
35855 /* Expand SSE2 sequence for computing lround from OPERAND1 storing
35858 ix86_expand_lfloorceil (rtx op0
, rtx op1
, bool do_floor
)
35860 /* C code for the stuff we're doing below (for do_floor):
35862 xi -= (double)xi > op1 ? 1 : 0;
35865 enum machine_mode fmode
= GET_MODE (op1
);
35866 enum machine_mode imode
= GET_MODE (op0
);
35867 rtx ireg
, freg
, label
, tmp
;
35869 /* reg = (long)op1 */
35870 ireg
= gen_reg_rtx (imode
);
35871 expand_fix (ireg
, op1
, 0);
35873 /* freg = (double)reg */
35874 freg
= gen_reg_rtx (fmode
);
35875 expand_float (freg
, ireg
, 0);
35877 /* ireg = (freg > op1) ? ireg - 1 : ireg */
35878 label
= ix86_expand_sse_compare_and_jump (UNLE
,
35879 freg
, op1
, !do_floor
);
35880 tmp
= expand_simple_binop (imode
, do_floor
? MINUS
: PLUS
,
35881 ireg
, const1_rtx
, NULL_RTX
, 0, OPTAB_DIRECT
);
35882 emit_move_insn (ireg
, tmp
);
35884 emit_label (label
);
35885 LABEL_NUSES (label
) = 1;
35887 emit_move_insn (op0
, ireg
);
35890 /* Expand rint (IEEE round to nearest) rounding OPERAND1 and storing the
35891 result in OPERAND0. */
35893 ix86_expand_rint (rtx operand0
, rtx operand1
)
35895 /* C code for the stuff we're doing below:
35896 xa = fabs (operand1);
35897 if (!isless (xa, 2**52))
35899 xa = xa + 2**52 - 2**52;
35900 return copysign (xa, operand1);
35902 enum machine_mode mode
= GET_MODE (operand0
);
35903 rtx res
, xa
, label
, TWO52
, mask
;
35905 res
= gen_reg_rtx (mode
);
35906 emit_move_insn (res
, operand1
);
35908 /* xa = abs (operand1) */
35909 xa
= ix86_expand_sse_fabs (res
, &mask
);
35911 /* if (!isless (xa, TWO52)) goto label; */
35912 TWO52
= ix86_gen_TWO52 (mode
);
35913 label
= ix86_expand_sse_compare_and_jump (UNLE
, TWO52
, xa
, false);
35915 xa
= expand_simple_binop (mode
, PLUS
, xa
, TWO52
, NULL_RTX
, 0, OPTAB_DIRECT
);
35916 xa
= expand_simple_binop (mode
, MINUS
, xa
, TWO52
, xa
, 0, OPTAB_DIRECT
);
35918 ix86_sse_copysign_to_positive (res
, xa
, res
, mask
);
35920 emit_label (label
);
35921 LABEL_NUSES (label
) = 1;
35923 emit_move_insn (operand0
, res
);
35926 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
35929 ix86_expand_floorceildf_32 (rtx operand0
, rtx operand1
, bool do_floor
)
35931 /* C code for the stuff we expand below.
35932 double xa = fabs (x), x2;
35933 if (!isless (xa, TWO52))
35935 xa = xa + TWO52 - TWO52;
35936 x2 = copysign (xa, x);
35945 enum machine_mode mode
= GET_MODE (operand0
);
35946 rtx xa
, TWO52
, tmp
, label
, one
, res
, mask
;
35948 TWO52
= ix86_gen_TWO52 (mode
);
35950 /* Temporary for holding the result, initialized to the input
35951 operand to ease control flow. */
35952 res
= gen_reg_rtx (mode
);
35953 emit_move_insn (res
, operand1
);
35955 /* xa = abs (operand1) */
35956 xa
= ix86_expand_sse_fabs (res
, &mask
);
35958 /* if (!isless (xa, TWO52)) goto label; */
35959 label
= ix86_expand_sse_compare_and_jump (UNLE
, TWO52
, xa
, false);
35961 /* xa = xa + TWO52 - TWO52; */
35962 xa
= expand_simple_binop (mode
, PLUS
, xa
, TWO52
, NULL_RTX
, 0, OPTAB_DIRECT
);
35963 xa
= expand_simple_binop (mode
, MINUS
, xa
, TWO52
, xa
, 0, OPTAB_DIRECT
);
35965 /* xa = copysign (xa, operand1) */
35966 ix86_sse_copysign_to_positive (xa
, xa
, res
, mask
);
35968 /* generate 1.0 or -1.0 */
35969 one
= force_reg (mode
,
35970 const_double_from_real_value (do_floor
35971 ? dconst1
: dconstm1
, mode
));
35973 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
35974 tmp
= ix86_expand_sse_compare_mask (UNGT
, xa
, res
, !do_floor
);
35975 emit_insn (gen_rtx_SET (VOIDmode
, tmp
,
35976 gen_rtx_AND (mode
, one
, tmp
)));
35977 /* We always need to subtract here to preserve signed zero. */
35978 tmp
= expand_simple_binop (mode
, MINUS
,
35979 xa
, tmp
, NULL_RTX
, 0, OPTAB_DIRECT
);
35980 emit_move_insn (res
, tmp
);
35982 emit_label (label
);
35983 LABEL_NUSES (label
) = 1;
35985 emit_move_insn (operand0
, res
);
35988 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
35991 ix86_expand_floorceil (rtx operand0
, rtx operand1
, bool do_floor
)
35993 /* C code for the stuff we expand below.
35994 double xa = fabs (x), x2;
35995 if (!isless (xa, TWO52))
35997 x2 = (double)(long)x;
36004 if (HONOR_SIGNED_ZEROS (mode))
36005 return copysign (x2, x);
36008 enum machine_mode mode
= GET_MODE (operand0
);
36009 rtx xa
, xi
, TWO52
, tmp
, label
, one
, res
, mask
;
36011 TWO52
= ix86_gen_TWO52 (mode
);
36013 /* Temporary for holding the result, initialized to the input
36014 operand to ease control flow. */
36015 res
= gen_reg_rtx (mode
);
36016 emit_move_insn (res
, operand1
);
36018 /* xa = abs (operand1) */
36019 xa
= ix86_expand_sse_fabs (res
, &mask
);
36021 /* if (!isless (xa, TWO52)) goto label; */
36022 label
= ix86_expand_sse_compare_and_jump (UNLE
, TWO52
, xa
, false);
36024 /* xa = (double)(long)x */
36025 xi
= gen_reg_rtx (mode
== DFmode
? DImode
: SImode
);
36026 expand_fix (xi
, res
, 0);
36027 expand_float (xa
, xi
, 0);
36030 one
= force_reg (mode
, const_double_from_real_value (dconst1
, mode
));
36032 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
36033 tmp
= ix86_expand_sse_compare_mask (UNGT
, xa
, res
, !do_floor
);
36034 emit_insn (gen_rtx_SET (VOIDmode
, tmp
,
36035 gen_rtx_AND (mode
, one
, tmp
)));
36036 tmp
= expand_simple_binop (mode
, do_floor
? MINUS
: PLUS
,
36037 xa
, tmp
, NULL_RTX
, 0, OPTAB_DIRECT
);
36038 emit_move_insn (res
, tmp
);
36040 if (HONOR_SIGNED_ZEROS (mode
))
36041 ix86_sse_copysign_to_positive (res
, res
, force_reg (mode
, operand1
), mask
);
36043 emit_label (label
);
36044 LABEL_NUSES (label
) = 1;
36046 emit_move_insn (operand0
, res
);
36049 /* Expand SSE sequence for computing round from OPERAND1 storing
36050 into OPERAND0. Sequence that works without relying on DImode truncation
36051 via cvttsd2siq that is only available on 64bit targets. */
36053 ix86_expand_rounddf_32 (rtx operand0
, rtx operand1
)
36055 /* C code for the stuff we expand below.
36056 double xa = fabs (x), xa2, x2;
36057 if (!isless (xa, TWO52))
36059 Using the absolute value and copying back sign makes
36060 -0.0 -> -0.0 correct.
36061 xa2 = xa + TWO52 - TWO52;
36066 else if (dxa > 0.5)
36068 x2 = copysign (xa2, x);
36071 enum machine_mode mode
= GET_MODE (operand0
);
36072 rtx xa
, xa2
, dxa
, TWO52
, tmp
, label
, half
, mhalf
, one
, res
, mask
;
36074 TWO52
= ix86_gen_TWO52 (mode
);
36076 /* Temporary for holding the result, initialized to the input
36077 operand to ease control flow. */
36078 res
= gen_reg_rtx (mode
);
36079 emit_move_insn (res
, operand1
);
36081 /* xa = abs (operand1) */
36082 xa
= ix86_expand_sse_fabs (res
, &mask
);
36084 /* if (!isless (xa, TWO52)) goto label; */
36085 label
= ix86_expand_sse_compare_and_jump (UNLE
, TWO52
, xa
, false);
36087 /* xa2 = xa + TWO52 - TWO52; */
36088 xa2
= expand_simple_binop (mode
, PLUS
, xa
, TWO52
, NULL_RTX
, 0, OPTAB_DIRECT
);
36089 xa2
= expand_simple_binop (mode
, MINUS
, xa2
, TWO52
, xa2
, 0, OPTAB_DIRECT
);
36091 /* dxa = xa2 - xa; */
36092 dxa
= expand_simple_binop (mode
, MINUS
, xa2
, xa
, NULL_RTX
, 0, OPTAB_DIRECT
);
36094 /* generate 0.5, 1.0 and -0.5 */
36095 half
= force_reg (mode
, const_double_from_real_value (dconsthalf
, mode
));
36096 one
= expand_simple_binop (mode
, PLUS
, half
, half
, NULL_RTX
, 0, OPTAB_DIRECT
);
36097 mhalf
= expand_simple_binop (mode
, MINUS
, half
, one
, NULL_RTX
,
36101 tmp
= gen_reg_rtx (mode
);
36102 /* xa2 = xa2 - (dxa > 0.5 ? 1 : 0) */
36103 tmp
= ix86_expand_sse_compare_mask (UNGT
, dxa
, half
, false);
36104 emit_insn (gen_rtx_SET (VOIDmode
, tmp
,
36105 gen_rtx_AND (mode
, one
, tmp
)));
36106 xa2
= expand_simple_binop (mode
, MINUS
, xa2
, tmp
, NULL_RTX
, 0, OPTAB_DIRECT
);
36107 /* xa2 = xa2 + (dxa <= -0.5 ? 1 : 0) */
36108 tmp
= ix86_expand_sse_compare_mask (UNGE
, mhalf
, dxa
, false);
36109 emit_insn (gen_rtx_SET (VOIDmode
, tmp
,
36110 gen_rtx_AND (mode
, one
, tmp
)));
36111 xa2
= expand_simple_binop (mode
, PLUS
, xa2
, tmp
, NULL_RTX
, 0, OPTAB_DIRECT
);
36113 /* res = copysign (xa2, operand1) */
36114 ix86_sse_copysign_to_positive (res
, xa2
, force_reg (mode
, operand1
), mask
);
36116 emit_label (label
);
36117 LABEL_NUSES (label
) = 1;
36119 emit_move_insn (operand0
, res
);
36122 /* Expand SSE sequence for computing trunc from OPERAND1 storing
36125 ix86_expand_trunc (rtx operand0
, rtx operand1
)
36127 /* C code for SSE variant we expand below.
36128 double xa = fabs (x), x2;
36129 if (!isless (xa, TWO52))
36131 x2 = (double)(long)x;
36132 if (HONOR_SIGNED_ZEROS (mode))
36133 return copysign (x2, x);
36136 enum machine_mode mode
= GET_MODE (operand0
);
36137 rtx xa
, xi
, TWO52
, label
, res
, mask
;
36139 TWO52
= ix86_gen_TWO52 (mode
);
36141 /* Temporary for holding the result, initialized to the input
36142 operand to ease control flow. */
36143 res
= gen_reg_rtx (mode
);
36144 emit_move_insn (res
, operand1
);
36146 /* xa = abs (operand1) */
36147 xa
= ix86_expand_sse_fabs (res
, &mask
);
36149 /* if (!isless (xa, TWO52)) goto label; */
36150 label
= ix86_expand_sse_compare_and_jump (UNLE
, TWO52
, xa
, false);
36152 /* x = (double)(long)x */
36153 xi
= gen_reg_rtx (mode
== DFmode
? DImode
: SImode
);
36154 expand_fix (xi
, res
, 0);
36155 expand_float (res
, xi
, 0);
36157 if (HONOR_SIGNED_ZEROS (mode
))
36158 ix86_sse_copysign_to_positive (res
, res
, force_reg (mode
, operand1
), mask
);
36160 emit_label (label
);
36161 LABEL_NUSES (label
) = 1;
36163 emit_move_insn (operand0
, res
);
36166 /* Expand SSE sequence for computing trunc from OPERAND1 storing
36169 ix86_expand_truncdf_32 (rtx operand0
, rtx operand1
)
36171 enum machine_mode mode
= GET_MODE (operand0
);
36172 rtx xa
, mask
, TWO52
, label
, one
, res
, smask
, tmp
;
36174 /* C code for SSE variant we expand below.
36175 double xa = fabs (x), x2;
36176 if (!isless (xa, TWO52))
36178 xa2 = xa + TWO52 - TWO52;
36182 x2 = copysign (xa2, x);
36186 TWO52
= ix86_gen_TWO52 (mode
);
36188 /* Temporary for holding the result, initialized to the input
36189 operand to ease control flow. */
36190 res
= gen_reg_rtx (mode
);
36191 emit_move_insn (res
, operand1
);
36193 /* xa = abs (operand1) */
36194 xa
= ix86_expand_sse_fabs (res
, &smask
);
36196 /* if (!isless (xa, TWO52)) goto label; */
36197 label
= ix86_expand_sse_compare_and_jump (UNLE
, TWO52
, xa
, false);
36199 /* res = xa + TWO52 - TWO52; */
36200 tmp
= expand_simple_binop (mode
, PLUS
, xa
, TWO52
, NULL_RTX
, 0, OPTAB_DIRECT
);
36201 tmp
= expand_simple_binop (mode
, MINUS
, tmp
, TWO52
, tmp
, 0, OPTAB_DIRECT
);
36202 emit_move_insn (res
, tmp
);
36205 one
= force_reg (mode
, const_double_from_real_value (dconst1
, mode
));
36207 /* Compensate: res = xa2 - (res > xa ? 1 : 0) */
36208 mask
= ix86_expand_sse_compare_mask (UNGT
, res
, xa
, false);
36209 emit_insn (gen_rtx_SET (VOIDmode
, mask
,
36210 gen_rtx_AND (mode
, mask
, one
)));
36211 tmp
= expand_simple_binop (mode
, MINUS
,
36212 res
, mask
, NULL_RTX
, 0, OPTAB_DIRECT
);
36213 emit_move_insn (res
, tmp
);
36215 /* res = copysign (res, operand1) */
36216 ix86_sse_copysign_to_positive (res
, res
, force_reg (mode
, operand1
), smask
);
36218 emit_label (label
);
36219 LABEL_NUSES (label
) = 1;
36221 emit_move_insn (operand0
, res
);
36224 /* Expand SSE sequence for computing round from OPERAND1 storing
36227 ix86_expand_round (rtx operand0
, rtx operand1
)
36229 /* C code for the stuff we're doing below:
36230 double xa = fabs (x);
36231 if (!isless (xa, TWO52))
36233 xa = (double)(long)(xa + nextafter (0.5, 0.0));
36234 return copysign (xa, x);
36236 enum machine_mode mode
= GET_MODE (operand0
);
36237 rtx res
, TWO52
, xa
, label
, xi
, half
, mask
;
36238 const struct real_format
*fmt
;
36239 REAL_VALUE_TYPE pred_half
, half_minus_pred_half
;
36241 /* Temporary for holding the result, initialized to the input
36242 operand to ease control flow. */
36243 res
= gen_reg_rtx (mode
);
36244 emit_move_insn (res
, operand1
);
36246 TWO52
= ix86_gen_TWO52 (mode
);
36247 xa
= ix86_expand_sse_fabs (res
, &mask
);
36248 label
= ix86_expand_sse_compare_and_jump (UNLE
, TWO52
, xa
, false);
36250 /* load nextafter (0.5, 0.0) */
36251 fmt
= REAL_MODE_FORMAT (mode
);
36252 real_2expN (&half_minus_pred_half
, -(fmt
->p
) - 1, mode
);
36253 REAL_ARITHMETIC (pred_half
, MINUS_EXPR
, dconsthalf
, half_minus_pred_half
);
36255 /* xa = xa + 0.5 */
36256 half
= force_reg (mode
, const_double_from_real_value (pred_half
, mode
));
36257 xa
= expand_simple_binop (mode
, PLUS
, xa
, half
, NULL_RTX
, 0, OPTAB_DIRECT
);
36259 /* xa = (double)(int64_t)xa */
36260 xi
= gen_reg_rtx (mode
== DFmode
? DImode
: SImode
);
36261 expand_fix (xi
, xa
, 0);
36262 expand_float (xa
, xi
, 0);
36264 /* res = copysign (xa, operand1) */
36265 ix86_sse_copysign_to_positive (res
, xa
, force_reg (mode
, operand1
), mask
);
36267 emit_label (label
);
36268 LABEL_NUSES (label
) = 1;
36270 emit_move_insn (operand0
, res
);
36273 /* Expand SSE sequence for computing round
36274 from OP1 storing into OP0 using sse4 round insn. */
36276 ix86_expand_round_sse4 (rtx op0
, rtx op1
)
36278 enum machine_mode mode
= GET_MODE (op0
);
36279 rtx e1
, e2
, res
, half
;
36280 const struct real_format
*fmt
;
36281 REAL_VALUE_TYPE pred_half
, half_minus_pred_half
;
36282 rtx (*gen_copysign
) (rtx
, rtx
, rtx
);
36283 rtx (*gen_round
) (rtx
, rtx
, rtx
);
36288 gen_copysign
= gen_copysignsf3
;
36289 gen_round
= gen_sse4_1_roundsf2
;
36292 gen_copysign
= gen_copysigndf3
;
36293 gen_round
= gen_sse4_1_rounddf2
;
36296 gcc_unreachable ();
36299 /* round (a) = trunc (a + copysign (0.5, a)) */
36301 /* load nextafter (0.5, 0.0) */
36302 fmt
= REAL_MODE_FORMAT (mode
);
36303 real_2expN (&half_minus_pred_half
, -(fmt
->p
) - 1, mode
);
36304 REAL_ARITHMETIC (pred_half
, MINUS_EXPR
, dconsthalf
, half_minus_pred_half
);
36305 half
= const_double_from_real_value (pred_half
, mode
);
36307 /* e1 = copysign (0.5, op1) */
36308 e1
= gen_reg_rtx (mode
);
36309 emit_insn (gen_copysign (e1
, half
, op1
));
36311 /* e2 = op1 + e1 */
36312 e2
= expand_simple_binop (mode
, PLUS
, op1
, e1
, NULL_RTX
, 0, OPTAB_DIRECT
);
36314 /* res = trunc (e2) */
36315 res
= gen_reg_rtx (mode
);
36316 emit_insn (gen_round (res
, e2
, GEN_INT (ROUND_TRUNC
)));
36318 emit_move_insn (op0
, res
);
36322 /* Table of valid machine attributes. */
36323 static const struct attribute_spec ix86_attribute_table
[] =
36325 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
36326 affects_type_identity } */
36327 /* Stdcall attribute says callee is responsible for popping arguments
36328 if they are not variable. */
36329 { "stdcall", 0, 0, false, true, true, ix86_handle_cconv_attribute
,
36331 /* Fastcall attribute says callee is responsible for popping arguments
36332 if they are not variable. */
36333 { "fastcall", 0, 0, false, true, true, ix86_handle_cconv_attribute
,
36335 /* Thiscall attribute says callee is responsible for popping arguments
36336 if they are not variable. */
36337 { "thiscall", 0, 0, false, true, true, ix86_handle_cconv_attribute
,
36339 /* Cdecl attribute says the callee is a normal C declaration */
36340 { "cdecl", 0, 0, false, true, true, ix86_handle_cconv_attribute
,
36342 /* Regparm attribute specifies how many integer arguments are to be
36343 passed in registers. */
36344 { "regparm", 1, 1, false, true, true, ix86_handle_cconv_attribute
,
36346 /* Sseregparm attribute says we are using x86_64 calling conventions
36347 for FP arguments. */
36348 { "sseregparm", 0, 0, false, true, true, ix86_handle_cconv_attribute
,
36350 /* The transactional memory builtins are implicitly regparm or fastcall
36351 depending on the ABI. Override the generic do-nothing attribute that
36352 these builtins were declared with. */
36353 { "*tm regparm", 0, 0, false, true, true, ix86_handle_tm_regparm_attribute
,
36355 /* force_align_arg_pointer says this function realigns the stack at entry. */
36356 { (const char *)&ix86_force_align_arg_pointer_string
, 0, 0,
36357 false, true, true, ix86_handle_cconv_attribute
, false },
36358 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
36359 { "dllimport", 0, 0, false, false, false, handle_dll_attribute
, false },
36360 { "dllexport", 0, 0, false, false, false, handle_dll_attribute
, false },
36361 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute
,
36364 { "ms_struct", 0, 0, false, false, false, ix86_handle_struct_attribute
,
36366 { "gcc_struct", 0, 0, false, false, false, ix86_handle_struct_attribute
,
36368 #ifdef SUBTARGET_ATTRIBUTE_TABLE
36369 SUBTARGET_ATTRIBUTE_TABLE
,
36371 /* ms_abi and sysv_abi calling convention function attributes. */
36372 { "ms_abi", 0, 0, false, true, true, ix86_handle_abi_attribute
, true },
36373 { "sysv_abi", 0, 0, false, true, true, ix86_handle_abi_attribute
, true },
36374 { "ms_hook_prologue", 0, 0, true, false, false, ix86_handle_fndecl_attribute
,
36376 { "callee_pop_aggregate_return", 1, 1, false, true, true,
36377 ix86_handle_callee_pop_aggregate_return
, true },
36379 { NULL
, 0, 0, false, false, false, NULL
, false }
36382 /* Implement targetm.vectorize.builtin_vectorization_cost. */
36384 ix86_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost
,
36386 int misalign ATTRIBUTE_UNUSED
)
36390 switch (type_of_cost
)
36393 return ix86_cost
->scalar_stmt_cost
;
36396 return ix86_cost
->scalar_load_cost
;
36399 return ix86_cost
->scalar_store_cost
;
36402 return ix86_cost
->vec_stmt_cost
;
36405 return ix86_cost
->vec_align_load_cost
;
36408 return ix86_cost
->vec_store_cost
;
36410 case vec_to_scalar
:
36411 return ix86_cost
->vec_to_scalar_cost
;
36413 case scalar_to_vec
:
36414 return ix86_cost
->scalar_to_vec_cost
;
36416 case unaligned_load
:
36417 case unaligned_store
:
36418 return ix86_cost
->vec_unalign_load_cost
;
36420 case cond_branch_taken
:
36421 return ix86_cost
->cond_taken_branch_cost
;
36423 case cond_branch_not_taken
:
36424 return ix86_cost
->cond_not_taken_branch_cost
;
36427 case vec_promote_demote
:
36428 return ix86_cost
->vec_stmt_cost
;
36430 case vec_construct
:
36431 elements
= TYPE_VECTOR_SUBPARTS (vectype
);
36432 return elements
/ 2 + 1;
36435 gcc_unreachable ();
36439 /* A cached (set (nil) (vselect (vconcat (nil) (nil)) (parallel [])))
36440 insn, so that expand_vselect{,_vconcat} doesn't have to create a fresh
36441 insn every time. */
36443 static GTY(()) rtx vselect_insn
;
36445 /* Initialize vselect_insn. */
36448 init_vselect_insn (void)
36453 x
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (MAX_VECT_LEN
));
36454 for (i
= 0; i
< MAX_VECT_LEN
; ++i
)
36455 XVECEXP (x
, 0, i
) = const0_rtx
;
36456 x
= gen_rtx_VEC_SELECT (V2DFmode
, gen_rtx_VEC_CONCAT (V4DFmode
, const0_rtx
,
36458 x
= gen_rtx_SET (VOIDmode
, const0_rtx
, x
);
36460 vselect_insn
= emit_insn (x
);
36464 /* Construct (set target (vec_select op0 (parallel perm))) and
36465 return true if that's a valid instruction in the active ISA. */
36468 expand_vselect (rtx target
, rtx op0
, const unsigned char *perm
,
36469 unsigned nelt
, bool testing_p
)
36472 rtx x
, save_vconcat
;
36475 if (vselect_insn
== NULL_RTX
)
36476 init_vselect_insn ();
36478 x
= XEXP (SET_SRC (PATTERN (vselect_insn
)), 1);
36479 PUT_NUM_ELEM (XVEC (x
, 0), nelt
);
36480 for (i
= 0; i
< nelt
; ++i
)
36481 XVECEXP (x
, 0, i
) = GEN_INT (perm
[i
]);
36482 save_vconcat
= XEXP (SET_SRC (PATTERN (vselect_insn
)), 0);
36483 XEXP (SET_SRC (PATTERN (vselect_insn
)), 0) = op0
;
36484 PUT_MODE (SET_SRC (PATTERN (vselect_insn
)), GET_MODE (target
));
36485 SET_DEST (PATTERN (vselect_insn
)) = target
;
36486 icode
= recog_memoized (vselect_insn
);
36488 if (icode
>= 0 && !testing_p
)
36489 emit_insn (copy_rtx (PATTERN (vselect_insn
)));
36491 SET_DEST (PATTERN (vselect_insn
)) = const0_rtx
;
36492 XEXP (SET_SRC (PATTERN (vselect_insn
)), 0) = save_vconcat
;
36493 INSN_CODE (vselect_insn
) = -1;
36498 /* Similar, but generate a vec_concat from op0 and op1 as well. */
36501 expand_vselect_vconcat (rtx target
, rtx op0
, rtx op1
,
36502 const unsigned char *perm
, unsigned nelt
,
36505 enum machine_mode v2mode
;
36509 if (vselect_insn
== NULL_RTX
)
36510 init_vselect_insn ();
36512 v2mode
= GET_MODE_2XWIDER_MODE (GET_MODE (op0
));
36513 x
= XEXP (SET_SRC (PATTERN (vselect_insn
)), 0);
36514 PUT_MODE (x
, v2mode
);
36517 ok
= expand_vselect (target
, x
, perm
, nelt
, testing_p
);
36518 XEXP (x
, 0) = const0_rtx
;
36519 XEXP (x
, 1) = const0_rtx
;
36523 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
36524 in terms of blendp[sd] / pblendw / pblendvb / vpblendd. */
36527 expand_vec_perm_blend (struct expand_vec_perm_d
*d
)
36529 enum machine_mode vmode
= d
->vmode
;
36530 unsigned i
, mask
, nelt
= d
->nelt
;
36531 rtx target
, op0
, op1
, x
;
36532 rtx rperm
[32], vperm
;
36534 if (d
->one_operand_p
)
36536 if (TARGET_AVX2
&& GET_MODE_SIZE (vmode
) == 32)
36538 else if (TARGET_AVX
&& (vmode
== V4DFmode
|| vmode
== V8SFmode
))
36540 else if (TARGET_SSE4_1
&& GET_MODE_SIZE (vmode
) == 16)
36545 /* This is a blend, not a permute. Elements must stay in their
36546 respective lanes. */
36547 for (i
= 0; i
< nelt
; ++i
)
36549 unsigned e
= d
->perm
[i
];
36550 if (!(e
== i
|| e
== i
+ nelt
))
36557 /* ??? Without SSE4.1, we could implement this with and/andn/or. This
36558 decision should be extracted elsewhere, so that we only try that
36559 sequence once all budget==3 options have been tried. */
36560 target
= d
->target
;
36573 for (i
= 0; i
< nelt
; ++i
)
36574 mask
|= (d
->perm
[i
] >= nelt
) << i
;
36578 for (i
= 0; i
< 2; ++i
)
36579 mask
|= (d
->perm
[i
] >= 2 ? 15 : 0) << (i
* 4);
36584 for (i
= 0; i
< 4; ++i
)
36585 mask
|= (d
->perm
[i
] >= 4 ? 3 : 0) << (i
* 2);
36590 /* See if bytes move in pairs so we can use pblendw with
36591 an immediate argument, rather than pblendvb with a vector
36593 for (i
= 0; i
< 16; i
+= 2)
36594 if (d
->perm
[i
] + 1 != d
->perm
[i
+ 1])
36597 for (i
= 0; i
< nelt
; ++i
)
36598 rperm
[i
] = (d
->perm
[i
] < nelt
? const0_rtx
: constm1_rtx
);
36601 vperm
= gen_rtx_CONST_VECTOR (vmode
, gen_rtvec_v (nelt
, rperm
));
36602 vperm
= force_reg (vmode
, vperm
);
36604 if (GET_MODE_SIZE (vmode
) == 16)
36605 emit_insn (gen_sse4_1_pblendvb (target
, op0
, op1
, vperm
));
36607 emit_insn (gen_avx2_pblendvb (target
, op0
, op1
, vperm
));
36611 for (i
= 0; i
< 8; ++i
)
36612 mask
|= (d
->perm
[i
* 2] >= 16) << i
;
36617 target
= gen_lowpart (vmode
, target
);
36618 op0
= gen_lowpart (vmode
, op0
);
36619 op1
= gen_lowpart (vmode
, op1
);
36623 /* See if bytes move in pairs. If not, vpblendvb must be used. */
36624 for (i
= 0; i
< 32; i
+= 2)
36625 if (d
->perm
[i
] + 1 != d
->perm
[i
+ 1])
36627 /* See if bytes move in quadruplets. If yes, vpblendd
36628 with immediate can be used. */
36629 for (i
= 0; i
< 32; i
+= 4)
36630 if (d
->perm
[i
] + 2 != d
->perm
[i
+ 2])
36634 /* See if bytes move the same in both lanes. If yes,
36635 vpblendw with immediate can be used. */
36636 for (i
= 0; i
< 16; i
+= 2)
36637 if (d
->perm
[i
] + 16 != d
->perm
[i
+ 16])
36640 /* Use vpblendw. */
36641 for (i
= 0; i
< 16; ++i
)
36642 mask
|= (d
->perm
[i
* 2] >= 32) << i
;
36647 /* Use vpblendd. */
36648 for (i
= 0; i
< 8; ++i
)
36649 mask
|= (d
->perm
[i
* 4] >= 32) << i
;
36654 /* See if words move in pairs. If yes, vpblendd can be used. */
36655 for (i
= 0; i
< 16; i
+= 2)
36656 if (d
->perm
[i
] + 1 != d
->perm
[i
+ 1])
36660 /* See if words move the same in both lanes. If not,
36661 vpblendvb must be used. */
36662 for (i
= 0; i
< 8; i
++)
36663 if (d
->perm
[i
] + 8 != d
->perm
[i
+ 8])
36665 /* Use vpblendvb. */
36666 for (i
= 0; i
< 32; ++i
)
36667 rperm
[i
] = (d
->perm
[i
/ 2] < 16 ? const0_rtx
: constm1_rtx
);
36671 target
= gen_lowpart (vmode
, target
);
36672 op0
= gen_lowpart (vmode
, op0
);
36673 op1
= gen_lowpart (vmode
, op1
);
36674 goto finish_pblendvb
;
36677 /* Use vpblendw. */
36678 for (i
= 0; i
< 16; ++i
)
36679 mask
|= (d
->perm
[i
] >= 16) << i
;
36683 /* Use vpblendd. */
36684 for (i
= 0; i
< 8; ++i
)
36685 mask
|= (d
->perm
[i
* 2] >= 16) << i
;
36690 /* Use vpblendd. */
36691 for (i
= 0; i
< 4; ++i
)
36692 mask
|= (d
->perm
[i
] >= 4 ? 3 : 0) << (i
* 2);
36697 gcc_unreachable ();
36700 /* This matches five different patterns with the different modes. */
36701 x
= gen_rtx_VEC_MERGE (vmode
, op1
, op0
, GEN_INT (mask
));
36702 x
= gen_rtx_SET (VOIDmode
, target
, x
);
36708 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
36709 in terms of the variable form of vpermilps.
36711 Note that we will have already failed the immediate input vpermilps,
36712 which requires that the high and low part shuffle be identical; the
36713 variable form doesn't require that. */
36716 expand_vec_perm_vpermil (struct expand_vec_perm_d
*d
)
36718 rtx rperm
[8], vperm
;
36721 if (!TARGET_AVX
|| d
->vmode
!= V8SFmode
|| !d
->one_operand_p
)
36724 /* We can only permute within the 128-bit lane. */
36725 for (i
= 0; i
< 8; ++i
)
36727 unsigned e
= d
->perm
[i
];
36728 if (i
< 4 ? e
>= 4 : e
< 4)
36735 for (i
= 0; i
< 8; ++i
)
36737 unsigned e
= d
->perm
[i
];
36739 /* Within each 128-bit lane, the elements of op0 are numbered
36740 from 0 and the elements of op1 are numbered from 4. */
36746 rperm
[i
] = GEN_INT (e
);
36749 vperm
= gen_rtx_CONST_VECTOR (V8SImode
, gen_rtvec_v (8, rperm
));
36750 vperm
= force_reg (V8SImode
, vperm
);
36751 emit_insn (gen_avx_vpermilvarv8sf3 (d
->target
, d
->op0
, vperm
));
36756 /* Return true if permutation D can be performed as VMODE permutation
36760 valid_perm_using_mode_p (enum machine_mode vmode
, struct expand_vec_perm_d
*d
)
36762 unsigned int i
, j
, chunk
;
36764 if (GET_MODE_CLASS (vmode
) != MODE_VECTOR_INT
36765 || GET_MODE_CLASS (d
->vmode
) != MODE_VECTOR_INT
36766 || GET_MODE_SIZE (vmode
) != GET_MODE_SIZE (d
->vmode
))
36769 if (GET_MODE_NUNITS (vmode
) >= d
->nelt
)
36772 chunk
= d
->nelt
/ GET_MODE_NUNITS (vmode
);
36773 for (i
= 0; i
< d
->nelt
; i
+= chunk
)
36774 if (d
->perm
[i
] & (chunk
- 1))
36777 for (j
= 1; j
< chunk
; ++j
)
36778 if (d
->perm
[i
] + j
!= d
->perm
[i
+ j
])
36784 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
36785 in terms of pshufb, vpperm, vpermq, vpermd, vpermps or vperm2i128. */
36788 expand_vec_perm_pshufb (struct expand_vec_perm_d
*d
)
36790 unsigned i
, nelt
, eltsz
, mask
;
36791 unsigned char perm
[32];
36792 enum machine_mode vmode
= V16QImode
;
36793 rtx rperm
[32], vperm
, target
, op0
, op1
;
36797 if (!d
->one_operand_p
)
36799 if (!TARGET_XOP
|| GET_MODE_SIZE (d
->vmode
) != 16)
36802 && valid_perm_using_mode_p (V2TImode
, d
))
36807 /* Use vperm2i128 insn. The pattern uses
36808 V4DImode instead of V2TImode. */
36809 target
= gen_lowpart (V4DImode
, d
->target
);
36810 op0
= gen_lowpart (V4DImode
, d
->op0
);
36811 op1
= gen_lowpart (V4DImode
, d
->op1
);
36813 = GEN_INT (((d
->perm
[0] & (nelt
/ 2)) ? 1 : 0)
36814 || ((d
->perm
[nelt
/ 2] & (nelt
/ 2)) ? 2 : 0));
36815 emit_insn (gen_avx2_permv2ti (target
, op0
, op1
, rperm
[0]));
36823 if (GET_MODE_SIZE (d
->vmode
) == 16)
36828 else if (GET_MODE_SIZE (d
->vmode
) == 32)
36833 /* V4DImode should be already handled through
36834 expand_vselect by vpermq instruction. */
36835 gcc_assert (d
->vmode
!= V4DImode
);
36838 if (d
->vmode
== V8SImode
36839 || d
->vmode
== V16HImode
36840 || d
->vmode
== V32QImode
)
36842 /* First see if vpermq can be used for
36843 V8SImode/V16HImode/V32QImode. */
36844 if (valid_perm_using_mode_p (V4DImode
, d
))
36846 for (i
= 0; i
< 4; i
++)
36847 perm
[i
] = (d
->perm
[i
* nelt
/ 4] * 4 / nelt
) & 3;
36850 return expand_vselect (gen_lowpart (V4DImode
, d
->target
),
36851 gen_lowpart (V4DImode
, d
->op0
),
36855 /* Next see if vpermd can be used. */
36856 if (valid_perm_using_mode_p (V8SImode
, d
))
36859 /* Or if vpermps can be used. */
36860 else if (d
->vmode
== V8SFmode
)
36863 if (vmode
== V32QImode
)
36865 /* vpshufb only works intra lanes, it is not
36866 possible to shuffle bytes in between the lanes. */
36867 for (i
= 0; i
< nelt
; ++i
)
36868 if ((d
->perm
[i
] ^ i
) & (nelt
/ 2))
36879 if (vmode
== V8SImode
)
36880 for (i
= 0; i
< 8; ++i
)
36881 rperm
[i
] = GEN_INT ((d
->perm
[i
* nelt
/ 8] * 8 / nelt
) & 7);
36884 eltsz
= GET_MODE_SIZE (GET_MODE_INNER (d
->vmode
));
36885 if (!d
->one_operand_p
)
36886 mask
= 2 * nelt
- 1;
36887 else if (vmode
== V16QImode
)
36890 mask
= nelt
/ 2 - 1;
36892 for (i
= 0; i
< nelt
; ++i
)
36894 unsigned j
, e
= d
->perm
[i
] & mask
;
36895 for (j
= 0; j
< eltsz
; ++j
)
36896 rperm
[i
* eltsz
+ j
] = GEN_INT (e
* eltsz
+ j
);
36900 vperm
= gen_rtx_CONST_VECTOR (vmode
,
36901 gen_rtvec_v (GET_MODE_NUNITS (vmode
), rperm
));
36902 vperm
= force_reg (vmode
, vperm
);
36904 target
= gen_lowpart (vmode
, d
->target
);
36905 op0
= gen_lowpart (vmode
, d
->op0
);
36906 if (d
->one_operand_p
)
36908 if (vmode
== V16QImode
)
36909 emit_insn (gen_ssse3_pshufbv16qi3 (target
, op0
, vperm
));
36910 else if (vmode
== V32QImode
)
36911 emit_insn (gen_avx2_pshufbv32qi3 (target
, op0
, vperm
));
36912 else if (vmode
== V8SFmode
)
36913 emit_insn (gen_avx2_permvarv8sf (target
, op0
, vperm
));
36915 emit_insn (gen_avx2_permvarv8si (target
, op0
, vperm
));
36919 op1
= gen_lowpart (vmode
, d
->op1
);
36920 emit_insn (gen_xop_pperm (target
, op0
, op1
, vperm
));
36926 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to instantiate D
36927 in a single instruction. */
36930 expand_vec_perm_1 (struct expand_vec_perm_d
*d
)
36932 unsigned i
, nelt
= d
->nelt
;
36933 unsigned char perm2
[MAX_VECT_LEN
];
36935 /* Check plain VEC_SELECT first, because AVX has instructions that could
36936 match both SEL and SEL+CONCAT, but the plain SEL will allow a memory
36937 input where SEL+CONCAT may not. */
36938 if (d
->one_operand_p
)
36940 int mask
= nelt
- 1;
36941 bool identity_perm
= true;
36942 bool broadcast_perm
= true;
36944 for (i
= 0; i
< nelt
; i
++)
36946 perm2
[i
] = d
->perm
[i
] & mask
;
36948 identity_perm
= false;
36950 broadcast_perm
= false;
36956 emit_move_insn (d
->target
, d
->op0
);
36959 else if (broadcast_perm
&& TARGET_AVX2
)
36961 /* Use vpbroadcast{b,w,d}. */
36962 rtx (*gen
) (rtx
, rtx
) = NULL
;
36966 gen
= gen_avx2_pbroadcastv32qi_1
;
36969 gen
= gen_avx2_pbroadcastv16hi_1
;
36972 gen
= gen_avx2_pbroadcastv8si_1
;
36975 gen
= gen_avx2_pbroadcastv16qi
;
36978 gen
= gen_avx2_pbroadcastv8hi
;
36981 gen
= gen_avx2_vec_dupv8sf_1
;
36983 /* For other modes prefer other shuffles this function creates. */
36989 emit_insn (gen (d
->target
, d
->op0
));
36994 if (expand_vselect (d
->target
, d
->op0
, perm2
, nelt
, d
->testing_p
))
36997 /* There are plenty of patterns in sse.md that are written for
36998 SEL+CONCAT and are not replicated for a single op. Perhaps
36999 that should be changed, to avoid the nastiness here. */
37001 /* Recognize interleave style patterns, which means incrementing
37002 every other permutation operand. */
37003 for (i
= 0; i
< nelt
; i
+= 2)
37005 perm2
[i
] = d
->perm
[i
] & mask
;
37006 perm2
[i
+ 1] = (d
->perm
[i
+ 1] & mask
) + nelt
;
37008 if (expand_vselect_vconcat (d
->target
, d
->op0
, d
->op0
, perm2
, nelt
,
37012 /* Recognize shufps, which means adding {0, 0, nelt, nelt}. */
37015 for (i
= 0; i
< nelt
; i
+= 4)
37017 perm2
[i
+ 0] = d
->perm
[i
+ 0] & mask
;
37018 perm2
[i
+ 1] = d
->perm
[i
+ 1] & mask
;
37019 perm2
[i
+ 2] = (d
->perm
[i
+ 2] & mask
) + nelt
;
37020 perm2
[i
+ 3] = (d
->perm
[i
+ 3] & mask
) + nelt
;
37023 if (expand_vselect_vconcat (d
->target
, d
->op0
, d
->op0
, perm2
, nelt
,
37029 /* Finally, try the fully general two operand permute. */
37030 if (expand_vselect_vconcat (d
->target
, d
->op0
, d
->op1
, d
->perm
, nelt
,
37034 /* Recognize interleave style patterns with reversed operands. */
37035 if (!d
->one_operand_p
)
37037 for (i
= 0; i
< nelt
; ++i
)
37039 unsigned e
= d
->perm
[i
];
37047 if (expand_vselect_vconcat (d
->target
, d
->op1
, d
->op0
, perm2
, nelt
,
37052 /* Try the SSE4.1 blend variable merge instructions. */
37053 if (expand_vec_perm_blend (d
))
37056 /* Try one of the AVX vpermil variable permutations. */
37057 if (expand_vec_perm_vpermil (d
))
37060 /* Try the SSSE3 pshufb or XOP vpperm or AVX2 vperm2i128,
37061 vpshufb, vpermd, vpermps or vpermq variable permutation. */
37062 if (expand_vec_perm_pshufb (d
))
37068 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
37069 in terms of a pair of pshuflw + pshufhw instructions. */
37072 expand_vec_perm_pshuflw_pshufhw (struct expand_vec_perm_d
*d
)
37074 unsigned char perm2
[MAX_VECT_LEN
];
37078 if (d
->vmode
!= V8HImode
|| !d
->one_operand_p
)
37081 /* The two permutations only operate in 64-bit lanes. */
37082 for (i
= 0; i
< 4; ++i
)
37083 if (d
->perm
[i
] >= 4)
37085 for (i
= 4; i
< 8; ++i
)
37086 if (d
->perm
[i
] < 4)
37092 /* Emit the pshuflw. */
37093 memcpy (perm2
, d
->perm
, 4);
37094 for (i
= 4; i
< 8; ++i
)
37096 ok
= expand_vselect (d
->target
, d
->op0
, perm2
, 8, d
->testing_p
);
37099 /* Emit the pshufhw. */
37100 memcpy (perm2
+ 4, d
->perm
+ 4, 4);
37101 for (i
= 0; i
< 4; ++i
)
37103 ok
= expand_vselect (d
->target
, d
->target
, perm2
, 8, d
->testing_p
);
37109 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
37110 the permutation using the SSSE3 palignr instruction. This succeeds
37111 when all of the elements in PERM fit within one vector and we merely
37112 need to shift them down so that a single vector permutation has a
37113 chance to succeed. */
37116 expand_vec_perm_palignr (struct expand_vec_perm_d
*d
)
37118 unsigned i
, nelt
= d
->nelt
;
37123 /* Even with AVX, palignr only operates on 128-bit vectors. */
37124 if (!TARGET_SSSE3
|| GET_MODE_SIZE (d
->vmode
) != 16)
37127 min
= nelt
, max
= 0;
37128 for (i
= 0; i
< nelt
; ++i
)
37130 unsigned e
= d
->perm
[i
];
37136 if (min
== 0 || max
- min
>= nelt
)
37139 /* Given that we have SSSE3, we know we'll be able to implement the
37140 single operand permutation after the palignr with pshufb. */
37144 shift
= GEN_INT (min
* GET_MODE_BITSIZE (GET_MODE_INNER (d
->vmode
)));
37145 emit_insn (gen_ssse3_palignrti (gen_lowpart (TImode
, d
->target
),
37146 gen_lowpart (TImode
, d
->op1
),
37147 gen_lowpart (TImode
, d
->op0
), shift
));
37149 d
->op0
= d
->op1
= d
->target
;
37150 d
->one_operand_p
= true;
37153 for (i
= 0; i
< nelt
; ++i
)
37155 unsigned e
= d
->perm
[i
] - min
;
37161 /* Test for the degenerate case where the alignment by itself
37162 produces the desired permutation. */
37166 ok
= expand_vec_perm_1 (d
);
37172 static bool expand_vec_perm_interleave3 (struct expand_vec_perm_d
*d
);
37174 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
37175 a two vector permutation into a single vector permutation by using
37176 an interleave operation to merge the vectors. */
37179 expand_vec_perm_interleave2 (struct expand_vec_perm_d
*d
)
37181 struct expand_vec_perm_d dremap
, dfinal
;
37182 unsigned i
, nelt
= d
->nelt
, nelt2
= nelt
/ 2;
37183 unsigned HOST_WIDE_INT contents
;
37184 unsigned char remap
[2 * MAX_VECT_LEN
];
37186 bool ok
, same_halves
= false;
37188 if (GET_MODE_SIZE (d
->vmode
) == 16)
37190 if (d
->one_operand_p
)
37193 else if (GET_MODE_SIZE (d
->vmode
) == 32)
37197 /* For 32-byte modes allow even d->one_operand_p.
37198 The lack of cross-lane shuffling in some instructions
37199 might prevent a single insn shuffle. */
37201 dfinal
.testing_p
= true;
37202 /* If expand_vec_perm_interleave3 can expand this into
37203 a 3 insn sequence, give up and let it be expanded as
37204 3 insn sequence. While that is one insn longer,
37205 it doesn't need a memory operand and in the common
37206 case that both interleave low and high permutations
37207 with the same operands are adjacent needs 4 insns
37208 for both after CSE. */
37209 if (expand_vec_perm_interleave3 (&dfinal
))
37215 /* Examine from whence the elements come. */
37217 for (i
= 0; i
< nelt
; ++i
)
37218 contents
|= ((unsigned HOST_WIDE_INT
) 1) << d
->perm
[i
];
37220 memset (remap
, 0xff, sizeof (remap
));
37223 if (GET_MODE_SIZE (d
->vmode
) == 16)
37225 unsigned HOST_WIDE_INT h1
, h2
, h3
, h4
;
37227 /* Split the two input vectors into 4 halves. */
37228 h1
= (((unsigned HOST_WIDE_INT
) 1) << nelt2
) - 1;
37233 /* If the elements from the low halves use interleave low, and similarly
37234 for interleave high. If the elements are from mis-matched halves, we
37235 can use shufps for V4SF/V4SI or do a DImode shuffle. */
37236 if ((contents
& (h1
| h3
)) == contents
)
37239 for (i
= 0; i
< nelt2
; ++i
)
37242 remap
[i
+ nelt
] = i
* 2 + 1;
37243 dremap
.perm
[i
* 2] = i
;
37244 dremap
.perm
[i
* 2 + 1] = i
+ nelt
;
37246 if (!TARGET_SSE2
&& d
->vmode
== V4SImode
)
37247 dremap
.vmode
= V4SFmode
;
37249 else if ((contents
& (h2
| h4
)) == contents
)
37252 for (i
= 0; i
< nelt2
; ++i
)
37254 remap
[i
+ nelt2
] = i
* 2;
37255 remap
[i
+ nelt
+ nelt2
] = i
* 2 + 1;
37256 dremap
.perm
[i
* 2] = i
+ nelt2
;
37257 dremap
.perm
[i
* 2 + 1] = i
+ nelt
+ nelt2
;
37259 if (!TARGET_SSE2
&& d
->vmode
== V4SImode
)
37260 dremap
.vmode
= V4SFmode
;
37262 else if ((contents
& (h1
| h4
)) == contents
)
37265 for (i
= 0; i
< nelt2
; ++i
)
37268 remap
[i
+ nelt
+ nelt2
] = i
+ nelt2
;
37269 dremap
.perm
[i
] = i
;
37270 dremap
.perm
[i
+ nelt2
] = i
+ nelt
+ nelt2
;
37275 dremap
.vmode
= V2DImode
;
37277 dremap
.perm
[0] = 0;
37278 dremap
.perm
[1] = 3;
37281 else if ((contents
& (h2
| h3
)) == contents
)
37284 for (i
= 0; i
< nelt2
; ++i
)
37286 remap
[i
+ nelt2
] = i
;
37287 remap
[i
+ nelt
] = i
+ nelt2
;
37288 dremap
.perm
[i
] = i
+ nelt2
;
37289 dremap
.perm
[i
+ nelt2
] = i
+ nelt
;
37294 dremap
.vmode
= V2DImode
;
37296 dremap
.perm
[0] = 1;
37297 dremap
.perm
[1] = 2;
37305 unsigned int nelt4
= nelt
/ 4, nzcnt
= 0;
37306 unsigned HOST_WIDE_INT q
[8];
37307 unsigned int nonzero_halves
[4];
37309 /* Split the two input vectors into 8 quarters. */
37310 q
[0] = (((unsigned HOST_WIDE_INT
) 1) << nelt4
) - 1;
37311 for (i
= 1; i
< 8; ++i
)
37312 q
[i
] = q
[0] << (nelt4
* i
);
37313 for (i
= 0; i
< 4; ++i
)
37314 if (((q
[2 * i
] | q
[2 * i
+ 1]) & contents
) != 0)
37316 nonzero_halves
[nzcnt
] = i
;
37322 gcc_assert (d
->one_operand_p
);
37323 nonzero_halves
[1] = nonzero_halves
[0];
37324 same_halves
= true;
37326 else if (d
->one_operand_p
)
37328 gcc_assert (nonzero_halves
[0] == 0);
37329 gcc_assert (nonzero_halves
[1] == 1);
37334 if (d
->perm
[0] / nelt2
== nonzero_halves
[1])
37336 /* Attempt to increase the likelihood that dfinal
37337 shuffle will be intra-lane. */
37338 char tmph
= nonzero_halves
[0];
37339 nonzero_halves
[0] = nonzero_halves
[1];
37340 nonzero_halves
[1] = tmph
;
37343 /* vperm2f128 or vperm2i128. */
37344 for (i
= 0; i
< nelt2
; ++i
)
37346 remap
[i
+ nonzero_halves
[1] * nelt2
] = i
+ nelt2
;
37347 remap
[i
+ nonzero_halves
[0] * nelt2
] = i
;
37348 dremap
.perm
[i
+ nelt2
] = i
+ nonzero_halves
[1] * nelt2
;
37349 dremap
.perm
[i
] = i
+ nonzero_halves
[0] * nelt2
;
37352 if (d
->vmode
!= V8SFmode
37353 && d
->vmode
!= V4DFmode
37354 && d
->vmode
!= V8SImode
)
37356 dremap
.vmode
= V8SImode
;
37358 for (i
= 0; i
< 4; ++i
)
37360 dremap
.perm
[i
] = i
+ nonzero_halves
[0] * 4;
37361 dremap
.perm
[i
+ 4] = i
+ nonzero_halves
[1] * 4;
37365 else if (d
->one_operand_p
)
37367 else if (TARGET_AVX2
37368 && (contents
& (q
[0] | q
[2] | q
[4] | q
[6])) == contents
)
37371 for (i
= 0; i
< nelt4
; ++i
)
37374 remap
[i
+ nelt
] = i
* 2 + 1;
37375 remap
[i
+ nelt2
] = i
* 2 + nelt2
;
37376 remap
[i
+ nelt
+ nelt2
] = i
* 2 + nelt2
+ 1;
37377 dremap
.perm
[i
* 2] = i
;
37378 dremap
.perm
[i
* 2 + 1] = i
+ nelt
;
37379 dremap
.perm
[i
* 2 + nelt2
] = i
+ nelt2
;
37380 dremap
.perm
[i
* 2 + nelt2
+ 1] = i
+ nelt
+ nelt2
;
37383 else if (TARGET_AVX2
37384 && (contents
& (q
[1] | q
[3] | q
[5] | q
[7])) == contents
)
37387 for (i
= 0; i
< nelt4
; ++i
)
37389 remap
[i
+ nelt4
] = i
* 2;
37390 remap
[i
+ nelt
+ nelt4
] = i
* 2 + 1;
37391 remap
[i
+ nelt2
+ nelt4
] = i
* 2 + nelt2
;
37392 remap
[i
+ nelt
+ nelt2
+ nelt4
] = i
* 2 + nelt2
+ 1;
37393 dremap
.perm
[i
* 2] = i
+ nelt4
;
37394 dremap
.perm
[i
* 2 + 1] = i
+ nelt
+ nelt4
;
37395 dremap
.perm
[i
* 2 + nelt2
] = i
+ nelt2
+ nelt4
;
37396 dremap
.perm
[i
* 2 + nelt2
+ 1] = i
+ nelt
+ nelt2
+ nelt4
;
37403 /* Use the remapping array set up above to move the elements from their
37404 swizzled locations into their final destinations. */
37406 for (i
= 0; i
< nelt
; ++i
)
37408 unsigned e
= remap
[d
->perm
[i
]];
37409 gcc_assert (e
< nelt
);
37410 /* If same_halves is true, both halves of the remapped vector are the
37411 same. Avoid cross-lane accesses if possible. */
37412 if (same_halves
&& i
>= nelt2
)
37414 gcc_assert (e
< nelt2
);
37415 dfinal
.perm
[i
] = e
+ nelt2
;
37418 dfinal
.perm
[i
] = e
;
37420 dfinal
.op0
= gen_reg_rtx (dfinal
.vmode
);
37421 dfinal
.op1
= dfinal
.op0
;
37422 dfinal
.one_operand_p
= true;
37423 dremap
.target
= dfinal
.op0
;
37425 /* Test if the final remap can be done with a single insn. For V4SFmode or
37426 V4SImode this *will* succeed. For V8HImode or V16QImode it may not. */
37428 ok
= expand_vec_perm_1 (&dfinal
);
37429 seq
= get_insns ();
37438 if (dremap
.vmode
!= dfinal
.vmode
)
37440 dremap
.target
= gen_lowpart (dremap
.vmode
, dremap
.target
);
37441 dremap
.op0
= gen_lowpart (dremap
.vmode
, dremap
.op0
);
37442 dremap
.op1
= gen_lowpart (dremap
.vmode
, dremap
.op1
);
37445 ok
= expand_vec_perm_1 (&dremap
);
37452 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
37453 a single vector cross-lane permutation into vpermq followed
37454 by any of the single insn permutations. */
37457 expand_vec_perm_vpermq_perm_1 (struct expand_vec_perm_d
*d
)
37459 struct expand_vec_perm_d dremap
, dfinal
;
37460 unsigned i
, j
, nelt
= d
->nelt
, nelt2
= nelt
/ 2, nelt4
= nelt
/ 4;
37461 unsigned contents
[2];
37465 && (d
->vmode
== V32QImode
|| d
->vmode
== V16HImode
)
37466 && d
->one_operand_p
))
37471 for (i
= 0; i
< nelt2
; ++i
)
37473 contents
[0] |= 1u << (d
->perm
[i
] / nelt4
);
37474 contents
[1] |= 1u << (d
->perm
[i
+ nelt2
] / nelt4
);
37477 for (i
= 0; i
< 2; ++i
)
37479 unsigned int cnt
= 0;
37480 for (j
= 0; j
< 4; ++j
)
37481 if ((contents
[i
] & (1u << j
)) != 0 && ++cnt
> 2)
37489 dremap
.vmode
= V4DImode
;
37491 dremap
.target
= gen_reg_rtx (V4DImode
);
37492 dremap
.op0
= gen_lowpart (V4DImode
, d
->op0
);
37493 dremap
.op1
= dremap
.op0
;
37494 dremap
.one_operand_p
= true;
37495 for (i
= 0; i
< 2; ++i
)
37497 unsigned int cnt
= 0;
37498 for (j
= 0; j
< 4; ++j
)
37499 if ((contents
[i
] & (1u << j
)) != 0)
37500 dremap
.perm
[2 * i
+ cnt
++] = j
;
37501 for (; cnt
< 2; ++cnt
)
37502 dremap
.perm
[2 * i
+ cnt
] = 0;
37506 dfinal
.op0
= gen_lowpart (dfinal
.vmode
, dremap
.target
);
37507 dfinal
.op1
= dfinal
.op0
;
37508 dfinal
.one_operand_p
= true;
37509 for (i
= 0, j
= 0; i
< nelt
; ++i
)
37513 dfinal
.perm
[i
] = (d
->perm
[i
] & (nelt4
- 1)) | (j
? nelt2
: 0);
37514 if ((d
->perm
[i
] / nelt4
) == dremap
.perm
[j
])
37516 else if ((d
->perm
[i
] / nelt4
) == dremap
.perm
[j
+ 1])
37517 dfinal
.perm
[i
] |= nelt4
;
37519 gcc_unreachable ();
37522 ok
= expand_vec_perm_1 (&dremap
);
37525 ok
= expand_vec_perm_1 (&dfinal
);
37531 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to expand
37532 a vector permutation using two instructions, vperm2f128 resp.
37533 vperm2i128 followed by any single in-lane permutation. */
37536 expand_vec_perm_vperm2f128 (struct expand_vec_perm_d
*d
)
37538 struct expand_vec_perm_d dfirst
, dsecond
;
37539 unsigned i
, j
, nelt
= d
->nelt
, nelt2
= nelt
/ 2, perm
;
37543 || GET_MODE_SIZE (d
->vmode
) != 32
37544 || (d
->vmode
!= V8SFmode
&& d
->vmode
!= V4DFmode
&& !TARGET_AVX2
))
37548 dsecond
.one_operand_p
= false;
37549 dsecond
.testing_p
= true;
37551 /* ((perm << 2)|perm) & 0x33 is the vperm2[fi]128
37552 immediate. For perm < 16 the second permutation uses
37553 d->op0 as first operand, for perm >= 16 it uses d->op1
37554 as first operand. The second operand is the result of
37556 for (perm
= 0; perm
< 32; perm
++)
37558 /* Ignore permutations which do not move anything cross-lane. */
37561 /* The second shuffle for e.g. V4DFmode has
37562 0123 and ABCD operands.
37563 Ignore AB23, as 23 is already in the second lane
37564 of the first operand. */
37565 if ((perm
& 0xc) == (1 << 2)) continue;
37566 /* And 01CD, as 01 is in the first lane of the first
37568 if ((perm
& 3) == 0) continue;
37569 /* And 4567, as then the vperm2[fi]128 doesn't change
37570 anything on the original 4567 second operand. */
37571 if ((perm
& 0xf) == ((3 << 2) | 2)) continue;
37575 /* The second shuffle for e.g. V4DFmode has
37576 4567 and ABCD operands.
37577 Ignore AB67, as 67 is already in the second lane
37578 of the first operand. */
37579 if ((perm
& 0xc) == (3 << 2)) continue;
37580 /* And 45CD, as 45 is in the first lane of the first
37582 if ((perm
& 3) == 2) continue;
37583 /* And 0123, as then the vperm2[fi]128 doesn't change
37584 anything on the original 0123 first operand. */
37585 if ((perm
& 0xf) == (1 << 2)) continue;
37588 for (i
= 0; i
< nelt
; i
++)
37590 j
= d
->perm
[i
] / nelt2
;
37591 if (j
== ((perm
>> (2 * (i
>= nelt2
))) & 3))
37592 dsecond
.perm
[i
] = nelt
+ (i
& nelt2
) + (d
->perm
[i
] & (nelt2
- 1));
37593 else if (j
== (unsigned) (i
>= nelt2
) + 2 * (perm
>= 16))
37594 dsecond
.perm
[i
] = d
->perm
[i
] & (nelt
- 1);
37602 ok
= expand_vec_perm_1 (&dsecond
);
37613 /* Found a usable second shuffle. dfirst will be
37614 vperm2f128 on d->op0 and d->op1. */
37615 dsecond
.testing_p
= false;
37617 dfirst
.target
= gen_reg_rtx (d
->vmode
);
37618 for (i
= 0; i
< nelt
; i
++)
37619 dfirst
.perm
[i
] = (i
& (nelt2
- 1))
37620 + ((perm
>> (2 * (i
>= nelt2
))) & 3) * nelt2
;
37622 ok
= expand_vec_perm_1 (&dfirst
);
37625 /* And dsecond is some single insn shuffle, taking
37626 d->op0 and result of vperm2f128 (if perm < 16) or
37627 d->op1 and result of vperm2f128 (otherwise). */
37628 dsecond
.op1
= dfirst
.target
;
37630 dsecond
.op0
= dfirst
.op1
;
37632 ok
= expand_vec_perm_1 (&dsecond
);
37638 /* For one operand, the only useful vperm2f128 permutation is 0x10. */
37639 if (d
->one_operand_p
)
37646 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
37647 a two vector permutation using 2 intra-lane interleave insns
37648 and cross-lane shuffle for 32-byte vectors. */
37651 expand_vec_perm_interleave3 (struct expand_vec_perm_d
*d
)
37654 rtx (*gen
) (rtx
, rtx
, rtx
);
37656 if (d
->one_operand_p
)
37658 if (TARGET_AVX2
&& GET_MODE_SIZE (d
->vmode
) == 32)
37660 else if (TARGET_AVX
&& (d
->vmode
== V8SFmode
|| d
->vmode
== V4DFmode
))
37666 if (d
->perm
[0] != 0 && d
->perm
[0] != nelt
/ 2)
37668 for (i
= 0; i
< nelt
; i
+= 2)
37669 if (d
->perm
[i
] != d
->perm
[0] + i
/ 2
37670 || d
->perm
[i
+ 1] != d
->perm
[0] + i
/ 2 + nelt
)
37680 gen
= gen_vec_interleave_highv32qi
;
37682 gen
= gen_vec_interleave_lowv32qi
;
37686 gen
= gen_vec_interleave_highv16hi
;
37688 gen
= gen_vec_interleave_lowv16hi
;
37692 gen
= gen_vec_interleave_highv8si
;
37694 gen
= gen_vec_interleave_lowv8si
;
37698 gen
= gen_vec_interleave_highv4di
;
37700 gen
= gen_vec_interleave_lowv4di
;
37704 gen
= gen_vec_interleave_highv8sf
;
37706 gen
= gen_vec_interleave_lowv8sf
;
37710 gen
= gen_vec_interleave_highv4df
;
37712 gen
= gen_vec_interleave_lowv4df
;
37715 gcc_unreachable ();
37718 emit_insn (gen (d
->target
, d
->op0
, d
->op1
));
37722 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement
37723 a single vector permutation using a single intra-lane vector
37724 permutation, vperm2f128 swapping the lanes and vblend* insn blending
37725 the non-swapped and swapped vectors together. */
37728 expand_vec_perm_vperm2f128_vblend (struct expand_vec_perm_d
*d
)
37730 struct expand_vec_perm_d dfirst
, dsecond
;
37731 unsigned i
, j
, msk
, nelt
= d
->nelt
, nelt2
= nelt
/ 2;
37734 rtx (*blend
) (rtx
, rtx
, rtx
, rtx
) = NULL
;
37738 || (d
->vmode
!= V8SFmode
&& d
->vmode
!= V4DFmode
)
37739 || !d
->one_operand_p
)
37743 for (i
= 0; i
< nelt
; i
++)
37744 dfirst
.perm
[i
] = 0xff;
37745 for (i
= 0, msk
= 0; i
< nelt
; i
++)
37747 j
= (d
->perm
[i
] & nelt2
) ? i
| nelt2
: i
& ~nelt2
;
37748 if (dfirst
.perm
[j
] != 0xff && dfirst
.perm
[j
] != d
->perm
[i
])
37750 dfirst
.perm
[j
] = d
->perm
[i
];
37754 for (i
= 0; i
< nelt
; i
++)
37755 if (dfirst
.perm
[i
] == 0xff)
37756 dfirst
.perm
[i
] = i
;
37759 dfirst
.target
= gen_reg_rtx (dfirst
.vmode
);
37762 ok
= expand_vec_perm_1 (&dfirst
);
37763 seq
= get_insns ();
37775 dsecond
.op0
= dfirst
.target
;
37776 dsecond
.op1
= dfirst
.target
;
37777 dsecond
.one_operand_p
= true;
37778 dsecond
.target
= gen_reg_rtx (dsecond
.vmode
);
37779 for (i
= 0; i
< nelt
; i
++)
37780 dsecond
.perm
[i
] = i
^ nelt2
;
37782 ok
= expand_vec_perm_1 (&dsecond
);
37785 blend
= d
->vmode
== V8SFmode
? gen_avx_blendps256
: gen_avx_blendpd256
;
37786 emit_insn (blend (d
->target
, dfirst
.target
, dsecond
.target
, GEN_INT (msk
)));
37790 /* A subroutine of ix86_expand_vec_perm_builtin_1. Implement a V4DF
37791 permutation using two vperm2f128, followed by a vshufpd insn blending
37792 the two vectors together. */
37795 expand_vec_perm_2vperm2f128_vshuf (struct expand_vec_perm_d
*d
)
37797 struct expand_vec_perm_d dfirst
, dsecond
, dthird
;
37800 if (!TARGET_AVX
|| (d
->vmode
!= V4DFmode
))
37810 dfirst
.perm
[0] = (d
->perm
[0] & ~1);
37811 dfirst
.perm
[1] = (d
->perm
[0] & ~1) + 1;
37812 dfirst
.perm
[2] = (d
->perm
[2] & ~1);
37813 dfirst
.perm
[3] = (d
->perm
[2] & ~1) + 1;
37814 dsecond
.perm
[0] = (d
->perm
[1] & ~1);
37815 dsecond
.perm
[1] = (d
->perm
[1] & ~1) + 1;
37816 dsecond
.perm
[2] = (d
->perm
[3] & ~1);
37817 dsecond
.perm
[3] = (d
->perm
[3] & ~1) + 1;
37818 dthird
.perm
[0] = (d
->perm
[0] % 2);
37819 dthird
.perm
[1] = (d
->perm
[1] % 2) + 4;
37820 dthird
.perm
[2] = (d
->perm
[2] % 2) + 2;
37821 dthird
.perm
[3] = (d
->perm
[3] % 2) + 6;
37823 dfirst
.target
= gen_reg_rtx (dfirst
.vmode
);
37824 dsecond
.target
= gen_reg_rtx (dsecond
.vmode
);
37825 dthird
.op0
= dfirst
.target
;
37826 dthird
.op1
= dsecond
.target
;
37827 dthird
.one_operand_p
= false;
37829 canonicalize_perm (&dfirst
);
37830 canonicalize_perm (&dsecond
);
37832 ok
= expand_vec_perm_1 (&dfirst
)
37833 && expand_vec_perm_1 (&dsecond
)
37834 && expand_vec_perm_1 (&dthird
);
37841 /* A subroutine of expand_vec_perm_even_odd_1. Implement the double-word
37842 permutation with two pshufb insns and an ior. We should have already
37843 failed all two instruction sequences. */
37846 expand_vec_perm_pshufb2 (struct expand_vec_perm_d
*d
)
37848 rtx rperm
[2][16], vperm
, l
, h
, op
, m128
;
37849 unsigned int i
, nelt
, eltsz
;
37851 if (!TARGET_SSSE3
|| GET_MODE_SIZE (d
->vmode
) != 16)
37853 gcc_assert (!d
->one_operand_p
);
37856 eltsz
= GET_MODE_SIZE (GET_MODE_INNER (d
->vmode
));
37858 /* Generate two permutation masks. If the required element is within
37859 the given vector it is shuffled into the proper lane. If the required
37860 element is in the other vector, force a zero into the lane by setting
37861 bit 7 in the permutation mask. */
37862 m128
= GEN_INT (-128);
37863 for (i
= 0; i
< nelt
; ++i
)
37865 unsigned j
, e
= d
->perm
[i
];
37866 unsigned which
= (e
>= nelt
);
37870 for (j
= 0; j
< eltsz
; ++j
)
37872 rperm
[which
][i
*eltsz
+ j
] = GEN_INT (e
*eltsz
+ j
);
37873 rperm
[1-which
][i
*eltsz
+ j
] = m128
;
37877 vperm
= gen_rtx_CONST_VECTOR (V16QImode
, gen_rtvec_v (16, rperm
[0]));
37878 vperm
= force_reg (V16QImode
, vperm
);
37880 l
= gen_reg_rtx (V16QImode
);
37881 op
= gen_lowpart (V16QImode
, d
->op0
);
37882 emit_insn (gen_ssse3_pshufbv16qi3 (l
, op
, vperm
));
37884 vperm
= gen_rtx_CONST_VECTOR (V16QImode
, gen_rtvec_v (16, rperm
[1]));
37885 vperm
= force_reg (V16QImode
, vperm
);
37887 h
= gen_reg_rtx (V16QImode
);
37888 op
= gen_lowpart (V16QImode
, d
->op1
);
37889 emit_insn (gen_ssse3_pshufbv16qi3 (h
, op
, vperm
));
37891 op
= gen_lowpart (V16QImode
, d
->target
);
37892 emit_insn (gen_iorv16qi3 (op
, l
, h
));
37897 /* Implement arbitrary permutation of one V32QImode and V16QImode operand
37898 with two vpshufb insns, vpermq and vpor. We should have already failed
37899 all two or three instruction sequences. */
37902 expand_vec_perm_vpshufb2_vpermq (struct expand_vec_perm_d
*d
)
37904 rtx rperm
[2][32], vperm
, l
, h
, hp
, op
, m128
;
37905 unsigned int i
, nelt
, eltsz
;
37908 || !d
->one_operand_p
37909 || (d
->vmode
!= V32QImode
&& d
->vmode
!= V16HImode
))
37916 eltsz
= GET_MODE_SIZE (GET_MODE_INNER (d
->vmode
));
37918 /* Generate two permutation masks. If the required element is within
37919 the same lane, it is shuffled in. If the required element from the
37920 other lane, force a zero by setting bit 7 in the permutation mask.
37921 In the other mask the mask has non-negative elements if element
37922 is requested from the other lane, but also moved to the other lane,
37923 so that the result of vpshufb can have the two V2TImode halves
37925 m128
= GEN_INT (-128);
37926 for (i
= 0; i
< nelt
; ++i
)
37928 unsigned j
, e
= d
->perm
[i
] & (nelt
/ 2 - 1);
37929 unsigned which
= ((d
->perm
[i
] ^ i
) & (nelt
/ 2)) * eltsz
;
37931 for (j
= 0; j
< eltsz
; ++j
)
37933 rperm
[!!which
][(i
* eltsz
+ j
) ^ which
] = GEN_INT (e
* eltsz
+ j
);
37934 rperm
[!which
][(i
* eltsz
+ j
) ^ (which
^ 16)] = m128
;
37938 vperm
= gen_rtx_CONST_VECTOR (V32QImode
, gen_rtvec_v (32, rperm
[1]));
37939 vperm
= force_reg (V32QImode
, vperm
);
37941 h
= gen_reg_rtx (V32QImode
);
37942 op
= gen_lowpart (V32QImode
, d
->op0
);
37943 emit_insn (gen_avx2_pshufbv32qi3 (h
, op
, vperm
));
37945 /* Swap the 128-byte lanes of h into hp. */
37946 hp
= gen_reg_rtx (V4DImode
);
37947 op
= gen_lowpart (V4DImode
, h
);
37948 emit_insn (gen_avx2_permv4di_1 (hp
, op
, const2_rtx
, GEN_INT (3), const0_rtx
,
37951 vperm
= gen_rtx_CONST_VECTOR (V32QImode
, gen_rtvec_v (32, rperm
[0]));
37952 vperm
= force_reg (V32QImode
, vperm
);
37954 l
= gen_reg_rtx (V32QImode
);
37955 op
= gen_lowpart (V32QImode
, d
->op0
);
37956 emit_insn (gen_avx2_pshufbv32qi3 (l
, op
, vperm
));
37958 op
= gen_lowpart (V32QImode
, d
->target
);
37959 emit_insn (gen_iorv32qi3 (op
, l
, gen_lowpart (V32QImode
, hp
)));
37964 /* A subroutine of expand_vec_perm_even_odd_1. Implement extract-even
37965 and extract-odd permutations of two V32QImode and V16QImode operand
37966 with two vpshufb insns, vpor and vpermq. We should have already
37967 failed all two or three instruction sequences. */
37970 expand_vec_perm_vpshufb2_vpermq_even_odd (struct expand_vec_perm_d
*d
)
37972 rtx rperm
[2][32], vperm
, l
, h
, ior
, op
, m128
;
37973 unsigned int i
, nelt
, eltsz
;
37976 || d
->one_operand_p
37977 || (d
->vmode
!= V32QImode
&& d
->vmode
!= V16HImode
))
37980 for (i
= 0; i
< d
->nelt
; ++i
)
37981 if ((d
->perm
[i
] ^ (i
* 2)) & (3 * d
->nelt
/ 2))
37988 eltsz
= GET_MODE_SIZE (GET_MODE_INNER (d
->vmode
));
37990 /* Generate two permutation masks. In the first permutation mask
37991 the first quarter will contain indexes for the first half
37992 of the op0, the second quarter will contain bit 7 set, third quarter
37993 will contain indexes for the second half of the op0 and the
37994 last quarter bit 7 set. In the second permutation mask
37995 the first quarter will contain bit 7 set, the second quarter
37996 indexes for the first half of the op1, the third quarter bit 7 set
37997 and last quarter indexes for the second half of the op1.
37998 I.e. the first mask e.g. for V32QImode extract even will be:
37999 0, 2, ..., 0xe, -128, ..., -128, 0, 2, ..., 0xe, -128, ..., -128
38000 (all values masked with 0xf except for -128) and second mask
38001 for extract even will be
38002 -128, ..., -128, 0, 2, ..., 0xe, -128, ..., -128, 0, 2, ..., 0xe. */
38003 m128
= GEN_INT (-128);
38004 for (i
= 0; i
< nelt
; ++i
)
38006 unsigned j
, e
= d
->perm
[i
] & (nelt
/ 2 - 1);
38007 unsigned which
= d
->perm
[i
] >= nelt
;
38008 unsigned xorv
= (i
>= nelt
/ 4 && i
< 3 * nelt
/ 4) ? 24 : 0;
38010 for (j
= 0; j
< eltsz
; ++j
)
38012 rperm
[which
][(i
* eltsz
+ j
) ^ xorv
] = GEN_INT (e
* eltsz
+ j
);
38013 rperm
[1 - which
][(i
* eltsz
+ j
) ^ xorv
] = m128
;
38017 vperm
= gen_rtx_CONST_VECTOR (V32QImode
, gen_rtvec_v (32, rperm
[0]));
38018 vperm
= force_reg (V32QImode
, vperm
);
38020 l
= gen_reg_rtx (V32QImode
);
38021 op
= gen_lowpart (V32QImode
, d
->op0
);
38022 emit_insn (gen_avx2_pshufbv32qi3 (l
, op
, vperm
));
38024 vperm
= gen_rtx_CONST_VECTOR (V32QImode
, gen_rtvec_v (32, rperm
[1]));
38025 vperm
= force_reg (V32QImode
, vperm
);
38027 h
= gen_reg_rtx (V32QImode
);
38028 op
= gen_lowpart (V32QImode
, d
->op1
);
38029 emit_insn (gen_avx2_pshufbv32qi3 (h
, op
, vperm
));
38031 ior
= gen_reg_rtx (V32QImode
);
38032 emit_insn (gen_iorv32qi3 (ior
, l
, h
));
38034 /* Permute the V4DImode quarters using { 0, 2, 1, 3 } permutation. */
38035 op
= gen_lowpart (V4DImode
, d
->target
);
38036 ior
= gen_lowpart (V4DImode
, ior
);
38037 emit_insn (gen_avx2_permv4di_1 (op
, ior
, const0_rtx
, const2_rtx
,
38038 const1_rtx
, GEN_INT (3)));
38043 /* A subroutine of ix86_expand_vec_perm_builtin_1. Implement extract-even
38044 and extract-odd permutations. */
38047 expand_vec_perm_even_odd_1 (struct expand_vec_perm_d
*d
, unsigned odd
)
38054 t1
= gen_reg_rtx (V4DFmode
);
38055 t2
= gen_reg_rtx (V4DFmode
);
38057 /* Shuffle the lanes around into { 0 1 4 5 } and { 2 3 6 7 }. */
38058 emit_insn (gen_avx_vperm2f128v4df3 (t1
, d
->op0
, d
->op1
, GEN_INT (0x20)));
38059 emit_insn (gen_avx_vperm2f128v4df3 (t2
, d
->op0
, d
->op1
, GEN_INT (0x31)));
38061 /* Now an unpck[lh]pd will produce the result required. */
38063 t3
= gen_avx_unpckhpd256 (d
->target
, t1
, t2
);
38065 t3
= gen_avx_unpcklpd256 (d
->target
, t1
, t2
);
38071 int mask
= odd
? 0xdd : 0x88;
38073 t1
= gen_reg_rtx (V8SFmode
);
38074 t2
= gen_reg_rtx (V8SFmode
);
38075 t3
= gen_reg_rtx (V8SFmode
);
38077 /* Shuffle within the 128-bit lanes to produce:
38078 { 0 2 8 a 4 6 c e } | { 1 3 9 b 5 7 d f }. */
38079 emit_insn (gen_avx_shufps256 (t1
, d
->op0
, d
->op1
,
38082 /* Shuffle the lanes around to produce:
38083 { 4 6 c e 0 2 8 a } and { 5 7 d f 1 3 9 b }. */
38084 emit_insn (gen_avx_vperm2f128v8sf3 (t2
, t1
, t1
,
38087 /* Shuffle within the 128-bit lanes to produce:
38088 { 0 2 4 6 4 6 0 2 } | { 1 3 5 7 5 7 1 3 }. */
38089 emit_insn (gen_avx_shufps256 (t3
, t1
, t2
, GEN_INT (0x44)));
38091 /* Shuffle within the 128-bit lanes to produce:
38092 { 8 a c e c e 8 a } | { 9 b d f d f 9 b }. */
38093 emit_insn (gen_avx_shufps256 (t2
, t1
, t2
, GEN_INT (0xee)));
38095 /* Shuffle the lanes around to produce:
38096 { 0 2 4 6 8 a c e } | { 1 3 5 7 9 b d f }. */
38097 emit_insn (gen_avx_vperm2f128v8sf3 (d
->target
, t3
, t2
,
38106 /* These are always directly implementable by expand_vec_perm_1. */
38107 gcc_unreachable ();
38111 return expand_vec_perm_pshufb2 (d
);
38114 /* We need 2*log2(N)-1 operations to achieve odd/even
38115 with interleave. */
38116 t1
= gen_reg_rtx (V8HImode
);
38117 t2
= gen_reg_rtx (V8HImode
);
38118 emit_insn (gen_vec_interleave_highv8hi (t1
, d
->op0
, d
->op1
));
38119 emit_insn (gen_vec_interleave_lowv8hi (d
->target
, d
->op0
, d
->op1
));
38120 emit_insn (gen_vec_interleave_highv8hi (t2
, d
->target
, t1
));
38121 emit_insn (gen_vec_interleave_lowv8hi (d
->target
, d
->target
, t1
));
38123 t3
= gen_vec_interleave_highv8hi (d
->target
, d
->target
, t2
);
38125 t3
= gen_vec_interleave_lowv8hi (d
->target
, d
->target
, t2
);
38132 return expand_vec_perm_pshufb2 (d
);
38135 t1
= gen_reg_rtx (V16QImode
);
38136 t2
= gen_reg_rtx (V16QImode
);
38137 t3
= gen_reg_rtx (V16QImode
);
38138 emit_insn (gen_vec_interleave_highv16qi (t1
, d
->op0
, d
->op1
));
38139 emit_insn (gen_vec_interleave_lowv16qi (d
->target
, d
->op0
, d
->op1
));
38140 emit_insn (gen_vec_interleave_highv16qi (t2
, d
->target
, t1
));
38141 emit_insn (gen_vec_interleave_lowv16qi (d
->target
, d
->target
, t1
));
38142 emit_insn (gen_vec_interleave_highv16qi (t3
, d
->target
, t2
));
38143 emit_insn (gen_vec_interleave_lowv16qi (d
->target
, d
->target
, t2
));
38145 t3
= gen_vec_interleave_highv16qi (d
->target
, d
->target
, t3
);
38147 t3
= gen_vec_interleave_lowv16qi (d
->target
, d
->target
, t3
);
38154 return expand_vec_perm_vpshufb2_vpermq_even_odd (d
);
38159 struct expand_vec_perm_d d_copy
= *d
;
38160 d_copy
.vmode
= V4DFmode
;
38161 d_copy
.target
= gen_lowpart (V4DFmode
, d
->target
);
38162 d_copy
.op0
= gen_lowpart (V4DFmode
, d
->op0
);
38163 d_copy
.op1
= gen_lowpart (V4DFmode
, d
->op1
);
38164 return expand_vec_perm_even_odd_1 (&d_copy
, odd
);
38167 t1
= gen_reg_rtx (V4DImode
);
38168 t2
= gen_reg_rtx (V4DImode
);
38170 /* Shuffle the lanes around into { 0 1 4 5 } and { 2 3 6 7 }. */
38171 emit_insn (gen_avx2_permv2ti (t1
, d
->op0
, d
->op1
, GEN_INT (0x20)));
38172 emit_insn (gen_avx2_permv2ti (t2
, d
->op0
, d
->op1
, GEN_INT (0x31)));
38174 /* Now an vpunpck[lh]qdq will produce the result required. */
38176 t3
= gen_avx2_interleave_highv4di (d
->target
, t1
, t2
);
38178 t3
= gen_avx2_interleave_lowv4di (d
->target
, t1
, t2
);
38185 struct expand_vec_perm_d d_copy
= *d
;
38186 d_copy
.vmode
= V8SFmode
;
38187 d_copy
.target
= gen_lowpart (V8SFmode
, d
->target
);
38188 d_copy
.op0
= gen_lowpart (V8SFmode
, d
->op0
);
38189 d_copy
.op1
= gen_lowpart (V8SFmode
, d
->op1
);
38190 return expand_vec_perm_even_odd_1 (&d_copy
, odd
);
38193 t1
= gen_reg_rtx (V8SImode
);
38194 t2
= gen_reg_rtx (V8SImode
);
38196 /* Shuffle the lanes around into
38197 { 0 1 2 3 8 9 a b } and { 4 5 6 7 c d e f }. */
38198 emit_insn (gen_avx2_permv2ti (gen_lowpart (V4DImode
, t1
),
38199 gen_lowpart (V4DImode
, d
->op0
),
38200 gen_lowpart (V4DImode
, d
->op1
),
38202 emit_insn (gen_avx2_permv2ti (gen_lowpart (V4DImode
, t2
),
38203 gen_lowpart (V4DImode
, d
->op0
),
38204 gen_lowpart (V4DImode
, d
->op1
),
38207 /* Swap the 2nd and 3rd position in each lane into
38208 { 0 2 1 3 8 a 9 b } and { 4 6 5 7 c e d f }. */
38209 emit_insn (gen_avx2_pshufdv3 (t1
, t1
,
38210 GEN_INT (2 * 4 + 1 * 16 + 3 * 64)));
38211 emit_insn (gen_avx2_pshufdv3 (t2
, t2
,
38212 GEN_INT (2 * 4 + 1 * 16 + 3 * 64)));
38214 /* Now an vpunpck[lh]qdq will produce
38215 { 0 2 4 6 8 a c e } resp. { 1 3 5 7 9 b d f }. */
38217 t3
= gen_avx2_interleave_highv4di (gen_lowpart (V4DImode
, d
->target
),
38218 gen_lowpart (V4DImode
, t1
),
38219 gen_lowpart (V4DImode
, t2
));
38221 t3
= gen_avx2_interleave_lowv4di (gen_lowpart (V4DImode
, d
->target
),
38222 gen_lowpart (V4DImode
, t1
),
38223 gen_lowpart (V4DImode
, t2
));
38228 gcc_unreachable ();
38234 /* A subroutine of ix86_expand_vec_perm_builtin_1. Pattern match
38235 extract-even and extract-odd permutations. */
38238 expand_vec_perm_even_odd (struct expand_vec_perm_d
*d
)
38240 unsigned i
, odd
, nelt
= d
->nelt
;
38243 if (odd
!= 0 && odd
!= 1)
38246 for (i
= 1; i
< nelt
; ++i
)
38247 if (d
->perm
[i
] != 2 * i
+ odd
)
38250 return expand_vec_perm_even_odd_1 (d
, odd
);
38253 /* A subroutine of ix86_expand_vec_perm_builtin_1. Implement broadcast
38254 permutations. We assume that expand_vec_perm_1 has already failed. */
38257 expand_vec_perm_broadcast_1 (struct expand_vec_perm_d
*d
)
38259 unsigned elt
= d
->perm
[0], nelt2
= d
->nelt
/ 2;
38260 enum machine_mode vmode
= d
->vmode
;
38261 unsigned char perm2
[4];
38269 /* These are special-cased in sse.md so that we can optionally
38270 use the vbroadcast instruction. They expand to two insns
38271 if the input happens to be in a register. */
38272 gcc_unreachable ();
38278 /* These are always implementable using standard shuffle patterns. */
38279 gcc_unreachable ();
38283 /* These can be implemented via interleave. We save one insn by
38284 stopping once we have promoted to V4SImode and then use pshufd. */
38288 rtx (*gen
) (rtx
, rtx
, rtx
)
38289 = vmode
== V16QImode
? gen_vec_interleave_lowv16qi
38290 : gen_vec_interleave_lowv8hi
;
38294 gen
= vmode
== V16QImode
? gen_vec_interleave_highv16qi
38295 : gen_vec_interleave_highv8hi
;
38300 dest
= gen_reg_rtx (vmode
);
38301 emit_insn (gen (dest
, op0
, op0
));
38302 vmode
= get_mode_wider_vector (vmode
);
38303 op0
= gen_lowpart (vmode
, dest
);
38305 while (vmode
!= V4SImode
);
38307 memset (perm2
, elt
, 4);
38308 ok
= expand_vselect (gen_lowpart (V4SImode
, d
->target
), op0
, perm2
, 4,
38317 /* For AVX2 broadcasts of the first element vpbroadcast* or
38318 vpermq should be used by expand_vec_perm_1. */
38319 gcc_assert (!TARGET_AVX2
|| d
->perm
[0]);
38323 gcc_unreachable ();
38327 /* A subroutine of ix86_expand_vec_perm_builtin_1. Pattern match
38328 broadcast permutations. */
38331 expand_vec_perm_broadcast (struct expand_vec_perm_d
*d
)
38333 unsigned i
, elt
, nelt
= d
->nelt
;
38335 if (!d
->one_operand_p
)
38339 for (i
= 1; i
< nelt
; ++i
)
38340 if (d
->perm
[i
] != elt
)
38343 return expand_vec_perm_broadcast_1 (d
);
38346 /* Implement arbitrary permutation of two V32QImode and V16QImode operands
38347 with 4 vpshufb insns, 2 vpermq and 3 vpor. We should have already failed
38348 all the shorter instruction sequences. */
38351 expand_vec_perm_vpshufb4_vpermq2 (struct expand_vec_perm_d
*d
)
38353 rtx rperm
[4][32], vperm
, l
[2], h
[2], op
, m128
;
38354 unsigned int i
, nelt
, eltsz
;
38358 || d
->one_operand_p
38359 || (d
->vmode
!= V32QImode
&& d
->vmode
!= V16HImode
))
38366 eltsz
= GET_MODE_SIZE (GET_MODE_INNER (d
->vmode
));
38368 /* Generate 4 permutation masks. If the required element is within
38369 the same lane, it is shuffled in. If the required element from the
38370 other lane, force a zero by setting bit 7 in the permutation mask.
38371 In the other mask the mask has non-negative elements if element
38372 is requested from the other lane, but also moved to the other lane,
38373 so that the result of vpshufb can have the two V2TImode halves
38375 m128
= GEN_INT (-128);
38376 for (i
= 0; i
< 32; ++i
)
38378 rperm
[0][i
] = m128
;
38379 rperm
[1][i
] = m128
;
38380 rperm
[2][i
] = m128
;
38381 rperm
[3][i
] = m128
;
38387 for (i
= 0; i
< nelt
; ++i
)
38389 unsigned j
, e
= d
->perm
[i
] & (nelt
/ 2 - 1);
38390 unsigned xlane
= ((d
->perm
[i
] ^ i
) & (nelt
/ 2)) * eltsz
;
38391 unsigned int which
= ((d
->perm
[i
] & nelt
) ? 2 : 0) + (xlane
? 1 : 0);
38393 for (j
= 0; j
< eltsz
; ++j
)
38394 rperm
[which
][(i
* eltsz
+ j
) ^ xlane
] = GEN_INT (e
* eltsz
+ j
);
38395 used
[which
] = true;
38398 for (i
= 0; i
< 2; ++i
)
38400 if (!used
[2 * i
+ 1])
38405 vperm
= gen_rtx_CONST_VECTOR (V32QImode
,
38406 gen_rtvec_v (32, rperm
[2 * i
+ 1]));
38407 vperm
= force_reg (V32QImode
, vperm
);
38408 h
[i
] = gen_reg_rtx (V32QImode
);
38409 op
= gen_lowpart (V32QImode
, i
? d
->op1
: d
->op0
);
38410 emit_insn (gen_avx2_pshufbv32qi3 (h
[i
], op
, vperm
));
38413 /* Swap the 128-byte lanes of h[X]. */
38414 for (i
= 0; i
< 2; ++i
)
38416 if (h
[i
] == NULL_RTX
)
38418 op
= gen_reg_rtx (V4DImode
);
38419 emit_insn (gen_avx2_permv4di_1 (op
, gen_lowpart (V4DImode
, h
[i
]),
38420 const2_rtx
, GEN_INT (3), const0_rtx
,
38422 h
[i
] = gen_lowpart (V32QImode
, op
);
38425 for (i
= 0; i
< 2; ++i
)
38432 vperm
= gen_rtx_CONST_VECTOR (V32QImode
, gen_rtvec_v (32, rperm
[2 * i
]));
38433 vperm
= force_reg (V32QImode
, vperm
);
38434 l
[i
] = gen_reg_rtx (V32QImode
);
38435 op
= gen_lowpart (V32QImode
, i
? d
->op1
: d
->op0
);
38436 emit_insn (gen_avx2_pshufbv32qi3 (l
[i
], op
, vperm
));
38439 for (i
= 0; i
< 2; ++i
)
38443 op
= gen_reg_rtx (V32QImode
);
38444 emit_insn (gen_iorv32qi3 (op
, l
[i
], h
[i
]));
38451 gcc_assert (l
[0] && l
[1]);
38452 op
= gen_lowpart (V32QImode
, d
->target
);
38453 emit_insn (gen_iorv32qi3 (op
, l
[0], l
[1]));
38457 /* The guts of ix86_expand_vec_perm_const, also used by the ok hook.
38458 With all of the interface bits taken care of, perform the expansion
38459 in D and return true on success. */
38462 ix86_expand_vec_perm_const_1 (struct expand_vec_perm_d
*d
)
38464 /* Try a single instruction expansion. */
38465 if (expand_vec_perm_1 (d
))
38468 /* Try sequences of two instructions. */
38470 if (expand_vec_perm_pshuflw_pshufhw (d
))
38473 if (expand_vec_perm_palignr (d
))
38476 if (expand_vec_perm_interleave2 (d
))
38479 if (expand_vec_perm_broadcast (d
))
38482 if (expand_vec_perm_vpermq_perm_1 (d
))
38485 if (expand_vec_perm_vperm2f128 (d
))
38488 /* Try sequences of three instructions. */
38490 if (expand_vec_perm_2vperm2f128_vshuf (d
))
38493 if (expand_vec_perm_pshufb2 (d
))
38496 if (expand_vec_perm_interleave3 (d
))
38499 if (expand_vec_perm_vperm2f128_vblend (d
))
38502 /* Try sequences of four instructions. */
38504 if (expand_vec_perm_vpshufb2_vpermq (d
))
38507 if (expand_vec_perm_vpshufb2_vpermq_even_odd (d
))
38510 /* ??? Look for narrow permutations whose element orderings would
38511 allow the promotion to a wider mode. */
38513 /* ??? Look for sequences of interleave or a wider permute that place
38514 the data into the correct lanes for a half-vector shuffle like
38515 pshuf[lh]w or vpermilps. */
38517 /* ??? Look for sequences of interleave that produce the desired results.
38518 The combinatorics of punpck[lh] get pretty ugly... */
38520 if (expand_vec_perm_even_odd (d
))
38523 /* Even longer sequences. */
38524 if (expand_vec_perm_vpshufb4_vpermq2 (d
))
38530 /* If a permutation only uses one operand, make it clear. Returns true
38531 if the permutation references both operands. */
38534 canonicalize_perm (struct expand_vec_perm_d
*d
)
38536 int i
, which
, nelt
= d
->nelt
;
38538 for (i
= which
= 0; i
< nelt
; ++i
)
38539 which
|= (d
->perm
[i
] < nelt
? 1 : 2);
38541 d
->one_operand_p
= true;
38548 if (!rtx_equal_p (d
->op0
, d
->op1
))
38550 d
->one_operand_p
= false;
38553 /* The elements of PERM do not suggest that only the first operand
38554 is used, but both operands are identical. Allow easier matching
38555 of the permutation by folding the permutation into the single
38560 for (i
= 0; i
< nelt
; ++i
)
38561 d
->perm
[i
] &= nelt
- 1;
38570 return (which
== 3);
38574 ix86_expand_vec_perm_const (rtx operands
[4])
38576 struct expand_vec_perm_d d
;
38577 unsigned char perm
[MAX_VECT_LEN
];
38582 d
.target
= operands
[0];
38583 d
.op0
= operands
[1];
38584 d
.op1
= operands
[2];
38587 d
.vmode
= GET_MODE (d
.target
);
38588 gcc_assert (VECTOR_MODE_P (d
.vmode
));
38589 d
.nelt
= nelt
= GET_MODE_NUNITS (d
.vmode
);
38590 d
.testing_p
= false;
38592 gcc_assert (GET_CODE (sel
) == CONST_VECTOR
);
38593 gcc_assert (XVECLEN (sel
, 0) == nelt
);
38594 gcc_checking_assert (sizeof (d
.perm
) == sizeof (perm
));
38596 for (i
= 0; i
< nelt
; ++i
)
38598 rtx e
= XVECEXP (sel
, 0, i
);
38599 int ei
= INTVAL (e
) & (2 * nelt
- 1);
38604 two_args
= canonicalize_perm (&d
);
38606 if (ix86_expand_vec_perm_const_1 (&d
))
38609 /* If the selector says both arguments are needed, but the operands are the
38610 same, the above tried to expand with one_operand_p and flattened selector.
38611 If that didn't work, retry without one_operand_p; we succeeded with that
38613 if (two_args
&& d
.one_operand_p
)
38615 d
.one_operand_p
= false;
38616 memcpy (d
.perm
, perm
, sizeof (perm
));
38617 return ix86_expand_vec_perm_const_1 (&d
);
38623 /* Implement targetm.vectorize.vec_perm_const_ok. */
38626 ix86_vectorize_vec_perm_const_ok (enum machine_mode vmode
,
38627 const unsigned char *sel
)
38629 struct expand_vec_perm_d d
;
38630 unsigned int i
, nelt
, which
;
38634 d
.nelt
= nelt
= GET_MODE_NUNITS (d
.vmode
);
38635 d
.testing_p
= true;
38637 /* Given sufficient ISA support we can just return true here
38638 for selected vector modes. */
38639 if (GET_MODE_SIZE (d
.vmode
) == 16)
38641 /* All implementable with a single vpperm insn. */
38644 /* All implementable with 2 pshufb + 1 ior. */
38647 /* All implementable with shufpd or unpck[lh]pd. */
38652 /* Extract the values from the vector CST into the permutation
38654 memcpy (d
.perm
, sel
, nelt
);
38655 for (i
= which
= 0; i
< nelt
; ++i
)
38657 unsigned char e
= d
.perm
[i
];
38658 gcc_assert (e
< 2 * nelt
);
38659 which
|= (e
< nelt
? 1 : 2);
38662 /* For all elements from second vector, fold the elements to first. */
38664 for (i
= 0; i
< nelt
; ++i
)
38667 /* Check whether the mask can be applied to the vector type. */
38668 d
.one_operand_p
= (which
!= 3);
38670 /* Implementable with shufps or pshufd. */
38671 if (d
.one_operand_p
&& (d
.vmode
== V4SFmode
|| d
.vmode
== V4SImode
))
38674 /* Otherwise we have to go through the motions and see if we can
38675 figure out how to generate the requested permutation. */
38676 d
.target
= gen_raw_REG (d
.vmode
, LAST_VIRTUAL_REGISTER
+ 1);
38677 d
.op1
= d
.op0
= gen_raw_REG (d
.vmode
, LAST_VIRTUAL_REGISTER
+ 2);
38678 if (!d
.one_operand_p
)
38679 d
.op1
= gen_raw_REG (d
.vmode
, LAST_VIRTUAL_REGISTER
+ 3);
38682 ret
= ix86_expand_vec_perm_const_1 (&d
);
38689 ix86_expand_vec_extract_even_odd (rtx targ
, rtx op0
, rtx op1
, unsigned odd
)
38691 struct expand_vec_perm_d d
;
38697 d
.vmode
= GET_MODE (targ
);
38698 d
.nelt
= nelt
= GET_MODE_NUNITS (d
.vmode
);
38699 d
.one_operand_p
= false;
38700 d
.testing_p
= false;
38702 for (i
= 0; i
< nelt
; ++i
)
38703 d
.perm
[i
] = i
* 2 + odd
;
38705 /* We'll either be able to implement the permutation directly... */
38706 if (expand_vec_perm_1 (&d
))
38709 /* ... or we use the special-case patterns. */
38710 expand_vec_perm_even_odd_1 (&d
, odd
);
38714 ix86_expand_vec_interleave (rtx targ
, rtx op0
, rtx op1
, bool high_p
)
38716 struct expand_vec_perm_d d
;
38717 unsigned i
, nelt
, base
;
38723 d
.vmode
= GET_MODE (targ
);
38724 d
.nelt
= nelt
= GET_MODE_NUNITS (d
.vmode
);
38725 d
.one_operand_p
= false;
38726 d
.testing_p
= false;
38728 base
= high_p
? nelt
/ 2 : 0;
38729 for (i
= 0; i
< nelt
/ 2; ++i
)
38731 d
.perm
[i
* 2] = i
+ base
;
38732 d
.perm
[i
* 2 + 1] = i
+ base
+ nelt
;
38735 /* Note that for AVX this isn't one instruction. */
38736 ok
= ix86_expand_vec_perm_const_1 (&d
);
38741 /* Expand a vector operation CODE for a V*QImode in terms of the
38742 same operation on V*HImode. */
38745 ix86_expand_vecop_qihi (enum rtx_code code
, rtx dest
, rtx op1
, rtx op2
)
38747 enum machine_mode qimode
= GET_MODE (dest
);
38748 enum machine_mode himode
;
38749 rtx (*gen_il
) (rtx
, rtx
, rtx
);
38750 rtx (*gen_ih
) (rtx
, rtx
, rtx
);
38751 rtx op1_l
, op1_h
, op2_l
, op2_h
, res_l
, res_h
;
38752 struct expand_vec_perm_d d
;
38753 bool ok
, full_interleave
;
38754 bool uns_p
= false;
38761 gen_il
= gen_vec_interleave_lowv16qi
;
38762 gen_ih
= gen_vec_interleave_highv16qi
;
38765 himode
= V16HImode
;
38766 gen_il
= gen_avx2_interleave_lowv32qi
;
38767 gen_ih
= gen_avx2_interleave_highv32qi
;
38770 gcc_unreachable ();
38773 op2_l
= op2_h
= op2
;
38777 /* Unpack data such that we've got a source byte in each low byte of
38778 each word. We don't care what goes into the high byte of each word.
38779 Rather than trying to get zero in there, most convenient is to let
38780 it be a copy of the low byte. */
38781 op2_l
= gen_reg_rtx (qimode
);
38782 op2_h
= gen_reg_rtx (qimode
);
38783 emit_insn (gen_il (op2_l
, op2
, op2
));
38784 emit_insn (gen_ih (op2_h
, op2
, op2
));
38787 op1_l
= gen_reg_rtx (qimode
);
38788 op1_h
= gen_reg_rtx (qimode
);
38789 emit_insn (gen_il (op1_l
, op1
, op1
));
38790 emit_insn (gen_ih (op1_h
, op1
, op1
));
38791 full_interleave
= qimode
== V16QImode
;
38799 op1_l
= gen_reg_rtx (himode
);
38800 op1_h
= gen_reg_rtx (himode
);
38801 ix86_expand_sse_unpack (op1_l
, op1
, uns_p
, false);
38802 ix86_expand_sse_unpack (op1_h
, op1
, uns_p
, true);
38803 full_interleave
= true;
38806 gcc_unreachable ();
38809 /* Perform the operation. */
38810 res_l
= expand_simple_binop (himode
, code
, op1_l
, op2_l
, NULL_RTX
,
38812 res_h
= expand_simple_binop (himode
, code
, op1_h
, op2_h
, NULL_RTX
,
38814 gcc_assert (res_l
&& res_h
);
38816 /* Merge the data back into the right place. */
38818 d
.op0
= gen_lowpart (qimode
, res_l
);
38819 d
.op1
= gen_lowpart (qimode
, res_h
);
38821 d
.nelt
= GET_MODE_NUNITS (qimode
);
38822 d
.one_operand_p
= false;
38823 d
.testing_p
= false;
38825 if (full_interleave
)
38827 /* For SSE2, we used an full interleave, so the desired
38828 results are in the even elements. */
38829 for (i
= 0; i
< 32; ++i
)
38834 /* For AVX, the interleave used above was not cross-lane. So the
38835 extraction is evens but with the second and third quarter swapped.
38836 Happily, that is even one insn shorter than even extraction. */
38837 for (i
= 0; i
< 32; ++i
)
38838 d
.perm
[i
] = i
* 2 + ((i
& 24) == 8 ? 16 : (i
& 24) == 16 ? -16 : 0);
38841 ok
= ix86_expand_vec_perm_const_1 (&d
);
38844 set_unique_reg_note (get_last_insn (), REG_EQUAL
,
38845 gen_rtx_fmt_ee (code
, qimode
, op1
, op2
));
38849 ix86_expand_mul_widen_evenodd (rtx dest
, rtx op1
, rtx op2
,
38850 bool uns_p
, bool odd_p
)
38852 enum machine_mode mode
= GET_MODE (op1
);
38853 enum machine_mode wmode
= GET_MODE (dest
);
38856 /* We only play even/odd games with vectors of SImode. */
38857 gcc_assert (mode
== V4SImode
|| mode
== V8SImode
);
38859 /* If we're looking for the odd results, shift those members down to
38860 the even slots. For some cpus this is faster than a PSHUFD. */
38863 if (TARGET_XOP
&& mode
== V4SImode
)
38865 x
= force_reg (wmode
, CONST0_RTX (wmode
));
38866 emit_insn (gen_xop_pmacsdqh (dest
, op1
, op2
, x
));
38870 x
= GEN_INT (GET_MODE_UNIT_BITSIZE (mode
));
38871 op1
= expand_binop (wmode
, lshr_optab
, gen_lowpart (wmode
, op1
),
38872 x
, NULL
, 1, OPTAB_DIRECT
);
38873 op2
= expand_binop (wmode
, lshr_optab
, gen_lowpart (wmode
, op2
),
38874 x
, NULL
, 1, OPTAB_DIRECT
);
38875 op1
= gen_lowpart (mode
, op1
);
38876 op2
= gen_lowpart (mode
, op2
);
38879 if (mode
== V8SImode
)
38882 x
= gen_vec_widen_umult_even_v8si (dest
, op1
, op2
);
38884 x
= gen_vec_widen_smult_even_v8si (dest
, op1
, op2
);
38887 x
= gen_vec_widen_umult_even_v4si (dest
, op1
, op2
);
38888 else if (TARGET_SSE4_1
)
38889 x
= gen_sse4_1_mulv2siv2di3 (dest
, op1
, op2
);
38892 rtx s1
, s2
, t0
, t1
, t2
;
38894 /* The easiest way to implement this without PMULDQ is to go through
38895 the motions as if we are performing a full 64-bit multiply. With
38896 the exception that we need to do less shuffling of the elements. */
38898 /* Compute the sign-extension, aka highparts, of the two operands. */
38899 s1
= ix86_expand_sse_cmp (gen_reg_rtx (mode
), GT
, CONST0_RTX (mode
),
38900 op1
, pc_rtx
, pc_rtx
);
38901 s2
= ix86_expand_sse_cmp (gen_reg_rtx (mode
), GT
, CONST0_RTX (mode
),
38902 op2
, pc_rtx
, pc_rtx
);
38904 /* Multiply LO(A) * HI(B), and vice-versa. */
38905 t1
= gen_reg_rtx (wmode
);
38906 t2
= gen_reg_rtx (wmode
);
38907 emit_insn (gen_vec_widen_umult_even_v4si (t1
, s1
, op2
));
38908 emit_insn (gen_vec_widen_umult_even_v4si (t2
, s2
, op1
));
38910 /* Multiply LO(A) * LO(B). */
38911 t0
= gen_reg_rtx (wmode
);
38912 emit_insn (gen_vec_widen_umult_even_v4si (t0
, op1
, op2
));
38914 /* Combine and shift the highparts into place. */
38915 t1
= expand_binop (wmode
, add_optab
, t1
, t2
, t1
, 1, OPTAB_DIRECT
);
38916 t1
= expand_binop (wmode
, ashl_optab
, t1
, GEN_INT (32), t1
,
38919 /* Combine high and low parts. */
38920 force_expand_binop (wmode
, add_optab
, t0
, t1
, dest
, 1, OPTAB_DIRECT
);
38927 ix86_expand_mul_widen_hilo (rtx dest
, rtx op1
, rtx op2
,
38928 bool uns_p
, bool high_p
)
38930 enum machine_mode wmode
= GET_MODE (dest
);
38931 enum machine_mode mode
= GET_MODE (op1
);
38932 rtx t1
, t2
, t3
, t4
, mask
;
38937 t1
= gen_reg_rtx (mode
);
38938 t2
= gen_reg_rtx (mode
);
38939 if (TARGET_XOP
&& !uns_p
)
38941 /* With XOP, we have pmacsdqh, aka mul_widen_odd. In this case,
38942 shuffle the elements once so that all elements are in the right
38943 place for immediate use: { A C B D }. */
38944 emit_insn (gen_sse2_pshufd_1 (t1
, op1
, const0_rtx
, const2_rtx
,
38945 const1_rtx
, GEN_INT (3)));
38946 emit_insn (gen_sse2_pshufd_1 (t2
, op2
, const0_rtx
, const2_rtx
,
38947 const1_rtx
, GEN_INT (3)));
38951 /* Put the elements into place for the multiply. */
38952 ix86_expand_vec_interleave (t1
, op1
, op1
, high_p
);
38953 ix86_expand_vec_interleave (t2
, op2
, op2
, high_p
);
38956 ix86_expand_mul_widen_evenodd (dest
, t1
, t2
, uns_p
, high_p
);
38960 /* Shuffle the elements between the lanes. After this we
38961 have { A B E F | C D G H } for each operand. */
38962 t1
= gen_reg_rtx (V4DImode
);
38963 t2
= gen_reg_rtx (V4DImode
);
38964 emit_insn (gen_avx2_permv4di_1 (t1
, gen_lowpart (V4DImode
, op1
),
38965 const0_rtx
, const2_rtx
,
38966 const1_rtx
, GEN_INT (3)));
38967 emit_insn (gen_avx2_permv4di_1 (t2
, gen_lowpart (V4DImode
, op2
),
38968 const0_rtx
, const2_rtx
,
38969 const1_rtx
, GEN_INT (3)));
38971 /* Shuffle the elements within the lanes. After this we
38972 have { A A B B | C C D D } or { E E F F | G G H H }. */
38973 t3
= gen_reg_rtx (V8SImode
);
38974 t4
= gen_reg_rtx (V8SImode
);
38975 mask
= GEN_INT (high_p
38976 ? 2 + (2 << 2) + (3 << 4) + (3 << 6)
38977 : 0 + (0 << 2) + (1 << 4) + (1 << 6));
38978 emit_insn (gen_avx2_pshufdv3 (t3
, gen_lowpart (V8SImode
, t1
), mask
));
38979 emit_insn (gen_avx2_pshufdv3 (t4
, gen_lowpart (V8SImode
, t2
), mask
));
38981 ix86_expand_mul_widen_evenodd (dest
, t3
, t4
, uns_p
, false);
38986 t1
= expand_binop (mode
, smul_optab
, op1
, op2
, NULL_RTX
,
38987 uns_p
, OPTAB_DIRECT
);
38988 t2
= expand_binop (mode
,
38989 uns_p
? umul_highpart_optab
: smul_highpart_optab
,
38990 op1
, op2
, NULL_RTX
, uns_p
, OPTAB_DIRECT
);
38991 gcc_assert (t1
&& t2
);
38993 ix86_expand_vec_interleave (gen_lowpart (mode
, dest
), t1
, t2
, high_p
);
38998 t1
= gen_reg_rtx (wmode
);
38999 t2
= gen_reg_rtx (wmode
);
39000 ix86_expand_sse_unpack (t1
, op1
, uns_p
, high_p
);
39001 ix86_expand_sse_unpack (t2
, op2
, uns_p
, high_p
);
39003 emit_insn (gen_rtx_SET (VOIDmode
, dest
, gen_rtx_MULT (wmode
, t1
, t2
)));
39007 gcc_unreachable ();
39012 ix86_expand_sse2_mulv4si3 (rtx op0
, rtx op1
, rtx op2
)
39016 res_1
= gen_reg_rtx (V4SImode
);
39017 res_2
= gen_reg_rtx (V4SImode
);
39018 ix86_expand_mul_widen_evenodd (gen_lowpart (V2DImode
, res_1
),
39019 op1
, op2
, true, false);
39020 ix86_expand_mul_widen_evenodd (gen_lowpart (V2DImode
, res_2
),
39021 op1
, op2
, true, true);
39023 /* Move the results in element 2 down to element 1; we don't care
39024 what goes in elements 2 and 3. Then we can merge the parts
39025 back together with an interleave.
39027 Note that two other sequences were tried:
39028 (1) Use interleaves at the start instead of psrldq, which allows
39029 us to use a single shufps to merge things back at the end.
39030 (2) Use shufps here to combine the two vectors, then pshufd to
39031 put the elements in the correct order.
39032 In both cases the cost of the reformatting stall was too high
39033 and the overall sequence slower. */
39035 emit_insn (gen_sse2_pshufd_1 (res_1
, res_1
, const0_rtx
, const2_rtx
,
39036 const0_rtx
, const0_rtx
));
39037 emit_insn (gen_sse2_pshufd_1 (res_2
, res_2
, const0_rtx
, const2_rtx
,
39038 const0_rtx
, const0_rtx
));
39039 res_1
= emit_insn (gen_vec_interleave_lowv4si (op0
, res_1
, res_2
));
39041 set_unique_reg_note (res_1
, REG_EQUAL
, gen_rtx_MULT (V4SImode
, op1
, op2
));
39045 ix86_expand_sse2_mulvxdi3 (rtx op0
, rtx op1
, rtx op2
)
39047 enum machine_mode mode
= GET_MODE (op0
);
39048 rtx t1
, t2
, t3
, t4
, t5
, t6
;
39050 if (TARGET_XOP
&& mode
== V2DImode
)
39052 /* op1: A,B,C,D, op2: E,F,G,H */
39053 op1
= gen_lowpart (V4SImode
, op1
);
39054 op2
= gen_lowpart (V4SImode
, op2
);
39056 t1
= gen_reg_rtx (V4SImode
);
39057 t2
= gen_reg_rtx (V4SImode
);
39058 t3
= gen_reg_rtx (V2DImode
);
39059 t4
= gen_reg_rtx (V2DImode
);
39062 emit_insn (gen_sse2_pshufd_1 (t1
, op1
,
39068 /* t2: (B*E),(A*F),(D*G),(C*H) */
39069 emit_insn (gen_mulv4si3 (t2
, t1
, op2
));
39071 /* t3: (B*E)+(A*F), (D*G)+(C*H) */
39072 emit_insn (gen_xop_phadddq (t3
, t2
));
39074 /* t4: ((B*E)+(A*F))<<32, ((D*G)+(C*H))<<32 */
39075 emit_insn (gen_ashlv2di3 (t4
, t3
, GEN_INT (32)));
39077 /* op0: (((B*E)+(A*F))<<32)+(B*F), (((D*G)+(C*H))<<32)+(D*H) */
39078 emit_insn (gen_xop_pmacsdql (op0
, op1
, op2
, t4
));
39082 enum machine_mode nmode
;
39083 rtx (*umul
) (rtx
, rtx
, rtx
);
39085 if (mode
== V2DImode
)
39087 umul
= gen_vec_widen_umult_even_v4si
;
39090 else if (mode
== V4DImode
)
39092 umul
= gen_vec_widen_umult_even_v8si
;
39096 gcc_unreachable ();
39099 /* Multiply low parts. */
39100 t1
= gen_reg_rtx (mode
);
39101 emit_insn (umul (t1
, gen_lowpart (nmode
, op1
), gen_lowpart (nmode
, op2
)));
39103 /* Shift input vectors right 32 bits so we can multiply high parts. */
39105 t2
= expand_binop (mode
, lshr_optab
, op1
, t6
, NULL
, 1, OPTAB_DIRECT
);
39106 t3
= expand_binop (mode
, lshr_optab
, op2
, t6
, NULL
, 1, OPTAB_DIRECT
);
39108 /* Multiply high parts by low parts. */
39109 t4
= gen_reg_rtx (mode
);
39110 t5
= gen_reg_rtx (mode
);
39111 emit_insn (umul (t4
, gen_lowpart (nmode
, t2
), gen_lowpart (nmode
, op2
)));
39112 emit_insn (umul (t5
, gen_lowpart (nmode
, t3
), gen_lowpart (nmode
, op1
)));
39114 /* Combine and shift the highparts back. */
39115 t4
= expand_binop (mode
, add_optab
, t4
, t5
, t4
, 1, OPTAB_DIRECT
);
39116 t4
= expand_binop (mode
, ashl_optab
, t4
, t6
, t4
, 1, OPTAB_DIRECT
);
39118 /* Combine high and low parts. */
39119 force_expand_binop (mode
, add_optab
, t1
, t4
, op0
, 1, OPTAB_DIRECT
);
39122 set_unique_reg_note (get_last_insn (), REG_EQUAL
,
39123 gen_rtx_MULT (mode
, op1
, op2
));
39126 /* Expand an insert into a vector register through pinsr insn.
39127 Return true if successful. */
39130 ix86_expand_pinsr (rtx
*operands
)
39132 rtx dst
= operands
[0];
39133 rtx src
= operands
[3];
39135 unsigned int size
= INTVAL (operands
[1]);
39136 unsigned int pos
= INTVAL (operands
[2]);
39138 if (GET_CODE (dst
) == SUBREG
)
39140 pos
+= SUBREG_BYTE (dst
) * BITS_PER_UNIT
;
39141 dst
= SUBREG_REG (dst
);
39144 if (GET_CODE (src
) == SUBREG
)
39145 src
= SUBREG_REG (src
);
39147 switch (GET_MODE (dst
))
39154 enum machine_mode srcmode
, dstmode
;
39155 rtx (*pinsr
)(rtx
, rtx
, rtx
, rtx
);
39157 srcmode
= mode_for_size (size
, MODE_INT
, 0);
39162 if (!TARGET_SSE4_1
)
39164 dstmode
= V16QImode
;
39165 pinsr
= gen_sse4_1_pinsrb
;
39171 dstmode
= V8HImode
;
39172 pinsr
= gen_sse2_pinsrw
;
39176 if (!TARGET_SSE4_1
)
39178 dstmode
= V4SImode
;
39179 pinsr
= gen_sse4_1_pinsrd
;
39183 gcc_assert (TARGET_64BIT
);
39184 if (!TARGET_SSE4_1
)
39186 dstmode
= V2DImode
;
39187 pinsr
= gen_sse4_1_pinsrq
;
39194 dst
= gen_lowpart (dstmode
, dst
);
39195 src
= gen_lowpart (srcmode
, src
);
39199 emit_insn (pinsr (dst
, dst
, src
, GEN_INT (1 << pos
)));
39208 /* This function returns the calling abi specific va_list type node.
39209 It returns the FNDECL specific va_list type. */
39212 ix86_fn_abi_va_list (tree fndecl
)
39215 return va_list_type_node
;
39216 gcc_assert (fndecl
!= NULL_TREE
);
39218 if (ix86_function_abi ((const_tree
) fndecl
) == MS_ABI
)
39219 return ms_va_list_type_node
;
39221 return sysv_va_list_type_node
;
39224 /* Returns the canonical va_list type specified by TYPE. If there
39225 is no valid TYPE provided, it return NULL_TREE. */
39228 ix86_canonical_va_list_type (tree type
)
39232 /* Resolve references and pointers to va_list type. */
39233 if (TREE_CODE (type
) == MEM_REF
)
39234 type
= TREE_TYPE (type
);
39235 else if (POINTER_TYPE_P (type
) && POINTER_TYPE_P (TREE_TYPE(type
)))
39236 type
= TREE_TYPE (type
);
39237 else if (POINTER_TYPE_P (type
) && TREE_CODE (TREE_TYPE (type
)) == ARRAY_TYPE
)
39238 type
= TREE_TYPE (type
);
39240 if (TARGET_64BIT
&& va_list_type_node
!= NULL_TREE
)
39242 wtype
= va_list_type_node
;
39243 gcc_assert (wtype
!= NULL_TREE
);
39245 if (TREE_CODE (wtype
) == ARRAY_TYPE
)
39247 /* If va_list is an array type, the argument may have decayed
39248 to a pointer type, e.g. by being passed to another function.
39249 In that case, unwrap both types so that we can compare the
39250 underlying records. */
39251 if (TREE_CODE (htype
) == ARRAY_TYPE
39252 || POINTER_TYPE_P (htype
))
39254 wtype
= TREE_TYPE (wtype
);
39255 htype
= TREE_TYPE (htype
);
39258 if (TYPE_MAIN_VARIANT (wtype
) == TYPE_MAIN_VARIANT (htype
))
39259 return va_list_type_node
;
39260 wtype
= sysv_va_list_type_node
;
39261 gcc_assert (wtype
!= NULL_TREE
);
39263 if (TREE_CODE (wtype
) == ARRAY_TYPE
)
39265 /* If va_list is an array type, the argument may have decayed
39266 to a pointer type, e.g. by being passed to another function.
39267 In that case, unwrap both types so that we can compare the
39268 underlying records. */
39269 if (TREE_CODE (htype
) == ARRAY_TYPE
39270 || POINTER_TYPE_P (htype
))
39272 wtype
= TREE_TYPE (wtype
);
39273 htype
= TREE_TYPE (htype
);
39276 if (TYPE_MAIN_VARIANT (wtype
) == TYPE_MAIN_VARIANT (htype
))
39277 return sysv_va_list_type_node
;
39278 wtype
= ms_va_list_type_node
;
39279 gcc_assert (wtype
!= NULL_TREE
);
39281 if (TREE_CODE (wtype
) == ARRAY_TYPE
)
39283 /* If va_list is an array type, the argument may have decayed
39284 to a pointer type, e.g. by being passed to another function.
39285 In that case, unwrap both types so that we can compare the
39286 underlying records. */
39287 if (TREE_CODE (htype
) == ARRAY_TYPE
39288 || POINTER_TYPE_P (htype
))
39290 wtype
= TREE_TYPE (wtype
);
39291 htype
= TREE_TYPE (htype
);
39294 if (TYPE_MAIN_VARIANT (wtype
) == TYPE_MAIN_VARIANT (htype
))
39295 return ms_va_list_type_node
;
39298 return std_canonical_va_list_type (type
);
39301 /* Iterate through the target-specific builtin types for va_list.
39302 IDX denotes the iterator, *PTREE is set to the result type of
39303 the va_list builtin, and *PNAME to its internal type.
39304 Returns zero if there is no element for this index, otherwise
39305 IDX should be increased upon the next call.
39306 Note, do not iterate a base builtin's name like __builtin_va_list.
39307 Used from c_common_nodes_and_builtins. */
39310 ix86_enum_va_list (int idx
, const char **pname
, tree
*ptree
)
39320 *ptree
= ms_va_list_type_node
;
39321 *pname
= "__builtin_ms_va_list";
39325 *ptree
= sysv_va_list_type_node
;
39326 *pname
= "__builtin_sysv_va_list";
39334 #undef TARGET_SCHED_DISPATCH
39335 #define TARGET_SCHED_DISPATCH has_dispatch
39336 #undef TARGET_SCHED_DISPATCH_DO
39337 #define TARGET_SCHED_DISPATCH_DO do_dispatch
39338 #undef TARGET_SCHED_REASSOCIATION_WIDTH
39339 #define TARGET_SCHED_REASSOCIATION_WIDTH ix86_reassociation_width
39340 #undef TARGET_SCHED_REORDER
39341 #define TARGET_SCHED_REORDER ix86_sched_reorder
39343 /* The size of the dispatch window is the total number of bytes of
39344 object code allowed in a window. */
39345 #define DISPATCH_WINDOW_SIZE 16
39347 /* Number of dispatch windows considered for scheduling. */
39348 #define MAX_DISPATCH_WINDOWS 3
39350 /* Maximum number of instructions in a window. */
39353 /* Maximum number of immediate operands in a window. */
39356 /* Maximum number of immediate bits allowed in a window. */
39357 #define MAX_IMM_SIZE 128
39359 /* Maximum number of 32 bit immediates allowed in a window. */
39360 #define MAX_IMM_32 4
39362 /* Maximum number of 64 bit immediates allowed in a window. */
39363 #define MAX_IMM_64 2
39365 /* Maximum total of loads or prefetches allowed in a window. */
39368 /* Maximum total of stores allowed in a window. */
39369 #define MAX_STORE 1
39375 /* Dispatch groups. Istructions that affect the mix in a dispatch window. */
39376 enum dispatch_group
{
39391 /* Number of allowable groups in a dispatch window. It is an array
39392 indexed by dispatch_group enum. 100 is used as a big number,
39393 because the number of these kind of operations does not have any
39394 effect in dispatch window, but we need them for other reasons in
39396 static unsigned int num_allowable_groups
[disp_last
] = {
39397 0, 2, 1, 1, 2, 4, 4, 2, 1, BIG
, BIG
39400 char group_name
[disp_last
+ 1][16] = {
39401 "disp_no_group", "disp_load", "disp_store", "disp_load_store",
39402 "disp_prefetch", "disp_imm", "disp_imm_32", "disp_imm_64",
39403 "disp_branch", "disp_cmp", "disp_jcc", "disp_last"
39406 /* Instruction path. */
39409 path_single
, /* Single micro op. */
39410 path_double
, /* Double micro op. */
39411 path_multi
, /* Instructions with more than 2 micro op.. */
39415 /* sched_insn_info defines a window to the instructions scheduled in
39416 the basic block. It contains a pointer to the insn_info table and
39417 the instruction scheduled.
39419 Windows are allocated for each basic block and are linked
39421 typedef struct sched_insn_info_s
{
39423 enum dispatch_group group
;
39424 enum insn_path path
;
39429 /* Linked list of dispatch windows. This is a two way list of
39430 dispatch windows of a basic block. It contains information about
39431 the number of uops in the window and the total number of
39432 instructions and of bytes in the object code for this dispatch
39434 typedef struct dispatch_windows_s
{
39435 int num_insn
; /* Number of insn in the window. */
39436 int num_uops
; /* Number of uops in the window. */
39437 int window_size
; /* Number of bytes in the window. */
39438 int window_num
; /* Window number between 0 or 1. */
39439 int num_imm
; /* Number of immediates in an insn. */
39440 int num_imm_32
; /* Number of 32 bit immediates in an insn. */
39441 int num_imm_64
; /* Number of 64 bit immediates in an insn. */
39442 int imm_size
; /* Total immediates in the window. */
39443 int num_loads
; /* Total memory loads in the window. */
39444 int num_stores
; /* Total memory stores in the window. */
39445 int violation
; /* Violation exists in window. */
39446 sched_insn_info
*window
; /* Pointer to the window. */
39447 struct dispatch_windows_s
*next
;
39448 struct dispatch_windows_s
*prev
;
39449 } dispatch_windows
;
39451 /* Immediate valuse used in an insn. */
39452 typedef struct imm_info_s
39459 static dispatch_windows
*dispatch_window_list
;
39460 static dispatch_windows
*dispatch_window_list1
;
39462 /* Get dispatch group of insn. */
39464 static enum dispatch_group
39465 get_mem_group (rtx insn
)
39467 enum attr_memory memory
;
39469 if (INSN_CODE (insn
) < 0)
39470 return disp_no_group
;
39471 memory
= get_attr_memory (insn
);
39472 if (memory
== MEMORY_STORE
)
39475 if (memory
== MEMORY_LOAD
)
39478 if (memory
== MEMORY_BOTH
)
39479 return disp_load_store
;
39481 return disp_no_group
;
39484 /* Return true if insn is a compare instruction. */
39489 enum attr_type type
;
39491 type
= get_attr_type (insn
);
39492 return (type
== TYPE_TEST
39493 || type
== TYPE_ICMP
39494 || type
== TYPE_FCMP
39495 || GET_CODE (PATTERN (insn
)) == COMPARE
);
39498 /* Return true if a dispatch violation encountered. */
39501 dispatch_violation (void)
39503 if (dispatch_window_list
->next
)
39504 return dispatch_window_list
->next
->violation
;
39505 return dispatch_window_list
->violation
;
39508 /* Return true if insn is a branch instruction. */
39511 is_branch (rtx insn
)
39513 return (CALL_P (insn
) || JUMP_P (insn
));
39516 /* Return true if insn is a prefetch instruction. */
39519 is_prefetch (rtx insn
)
39521 return NONJUMP_INSN_P (insn
) && GET_CODE (PATTERN (insn
)) == PREFETCH
;
39524 /* This function initializes a dispatch window and the list container holding a
39525 pointer to the window. */
39528 init_window (int window_num
)
39531 dispatch_windows
*new_list
;
39533 if (window_num
== 0)
39534 new_list
= dispatch_window_list
;
39536 new_list
= dispatch_window_list1
;
39538 new_list
->num_insn
= 0;
39539 new_list
->num_uops
= 0;
39540 new_list
->window_size
= 0;
39541 new_list
->next
= NULL
;
39542 new_list
->prev
= NULL
;
39543 new_list
->window_num
= window_num
;
39544 new_list
->num_imm
= 0;
39545 new_list
->num_imm_32
= 0;
39546 new_list
->num_imm_64
= 0;
39547 new_list
->imm_size
= 0;
39548 new_list
->num_loads
= 0;
39549 new_list
->num_stores
= 0;
39550 new_list
->violation
= false;
39552 for (i
= 0; i
< MAX_INSN
; i
++)
39554 new_list
->window
[i
].insn
= NULL
;
39555 new_list
->window
[i
].group
= disp_no_group
;
39556 new_list
->window
[i
].path
= no_path
;
39557 new_list
->window
[i
].byte_len
= 0;
39558 new_list
->window
[i
].imm_bytes
= 0;
39563 /* This function allocates and initializes a dispatch window and the
39564 list container holding a pointer to the window. */
39566 static dispatch_windows
*
39567 allocate_window (void)
39569 dispatch_windows
*new_list
= XNEW (struct dispatch_windows_s
);
39570 new_list
->window
= XNEWVEC (struct sched_insn_info_s
, MAX_INSN
+ 1);
39575 /* This routine initializes the dispatch scheduling information. It
39576 initiates building dispatch scheduler tables and constructs the
39577 first dispatch window. */
39580 init_dispatch_sched (void)
39582 /* Allocate a dispatch list and a window. */
39583 dispatch_window_list
= allocate_window ();
39584 dispatch_window_list1
= allocate_window ();
39589 /* This function returns true if a branch is detected. End of a basic block
39590 does not have to be a branch, but here we assume only branches end a
39594 is_end_basic_block (enum dispatch_group group
)
39596 return group
== disp_branch
;
39599 /* This function is called when the end of a window processing is reached. */
39602 process_end_window (void)
39604 gcc_assert (dispatch_window_list
->num_insn
<= MAX_INSN
);
39605 if (dispatch_window_list
->next
)
39607 gcc_assert (dispatch_window_list1
->num_insn
<= MAX_INSN
);
39608 gcc_assert (dispatch_window_list
->window_size
39609 + dispatch_window_list1
->window_size
<= 48);
39615 /* Allocates a new dispatch window and adds it to WINDOW_LIST.
39616 WINDOW_NUM is either 0 or 1. A maximum of two windows are generated
39617 for 48 bytes of instructions. Note that these windows are not dispatch
39618 windows that their sizes are DISPATCH_WINDOW_SIZE. */
39620 static dispatch_windows
*
39621 allocate_next_window (int window_num
)
39623 if (window_num
== 0)
39625 if (dispatch_window_list
->next
)
39628 return dispatch_window_list
;
39631 dispatch_window_list
->next
= dispatch_window_list1
;
39632 dispatch_window_list1
->prev
= dispatch_window_list
;
39634 return dispatch_window_list1
;
39637 /* Increment the number of immediate operands of an instruction. */
39640 find_constant_1 (rtx
*in_rtx
, imm_info
*imm_values
)
39645 switch ( GET_CODE (*in_rtx
))
39650 (imm_values
->imm
)++;
39651 if (x86_64_immediate_operand (*in_rtx
, SImode
))
39652 (imm_values
->imm32
)++;
39654 (imm_values
->imm64
)++;
39658 (imm_values
->imm
)++;
39659 (imm_values
->imm64
)++;
39663 if (LABEL_KIND (*in_rtx
) == LABEL_NORMAL
)
39665 (imm_values
->imm
)++;
39666 (imm_values
->imm32
)++;
39677 /* Compute number of immediate operands of an instruction. */
39680 find_constant (rtx in_rtx
, imm_info
*imm_values
)
39682 for_each_rtx (INSN_P (in_rtx
) ? &PATTERN (in_rtx
) : &in_rtx
,
39683 (rtx_function
) find_constant_1
, (void *) imm_values
);
39686 /* Return total size of immediate operands of an instruction along with number
39687 of corresponding immediate-operands. It initializes its parameters to zero
39688 befor calling FIND_CONSTANT.
39689 INSN is the input instruction. IMM is the total of immediates.
39690 IMM32 is the number of 32 bit immediates. IMM64 is the number of 64
39694 get_num_immediates (rtx insn
, int *imm
, int *imm32
, int *imm64
)
39696 imm_info imm_values
= {0, 0, 0};
39698 find_constant (insn
, &imm_values
);
39699 *imm
= imm_values
.imm
;
39700 *imm32
= imm_values
.imm32
;
39701 *imm64
= imm_values
.imm64
;
39702 return imm_values
.imm32
* 4 + imm_values
.imm64
* 8;
39705 /* This function indicates if an operand of an instruction is an
39709 has_immediate (rtx insn
)
39711 int num_imm_operand
;
39712 int num_imm32_operand
;
39713 int num_imm64_operand
;
39716 return get_num_immediates (insn
, &num_imm_operand
, &num_imm32_operand
,
39717 &num_imm64_operand
);
39721 /* Return single or double path for instructions. */
39723 static enum insn_path
39724 get_insn_path (rtx insn
)
39726 enum attr_amdfam10_decode path
= get_attr_amdfam10_decode (insn
);
39728 if ((int)path
== 0)
39729 return path_single
;
39731 if ((int)path
== 1)
39732 return path_double
;
39737 /* Return insn dispatch group. */
39739 static enum dispatch_group
39740 get_insn_group (rtx insn
)
39742 enum dispatch_group group
= get_mem_group (insn
);
39746 if (is_branch (insn
))
39747 return disp_branch
;
39752 if (has_immediate (insn
))
39755 if (is_prefetch (insn
))
39756 return disp_prefetch
;
39758 return disp_no_group
;
39761 /* Count number of GROUP restricted instructions in a dispatch
39762 window WINDOW_LIST. */
39765 count_num_restricted (rtx insn
, dispatch_windows
*window_list
)
39767 enum dispatch_group group
= get_insn_group (insn
);
39769 int num_imm_operand
;
39770 int num_imm32_operand
;
39771 int num_imm64_operand
;
39773 if (group
== disp_no_group
)
39776 if (group
== disp_imm
)
39778 imm_size
= get_num_immediates (insn
, &num_imm_operand
, &num_imm32_operand
,
39779 &num_imm64_operand
);
39780 if (window_list
->imm_size
+ imm_size
> MAX_IMM_SIZE
39781 || num_imm_operand
+ window_list
->num_imm
> MAX_IMM
39782 || (num_imm32_operand
> 0
39783 && (window_list
->num_imm_32
+ num_imm32_operand
> MAX_IMM_32
39784 || window_list
->num_imm_64
* 2 + num_imm32_operand
> MAX_IMM_32
))
39785 || (num_imm64_operand
> 0
39786 && (window_list
->num_imm_64
+ num_imm64_operand
> MAX_IMM_64
39787 || window_list
->num_imm_32
+ num_imm64_operand
* 2 > MAX_IMM_32
))
39788 || (window_list
->imm_size
+ imm_size
== MAX_IMM_SIZE
39789 && num_imm64_operand
> 0
39790 && ((window_list
->num_imm_64
> 0
39791 && window_list
->num_insn
>= 2)
39792 || window_list
->num_insn
>= 3)))
39798 if ((group
== disp_load_store
39799 && (window_list
->num_loads
>= MAX_LOAD
39800 || window_list
->num_stores
>= MAX_STORE
))
39801 || ((group
== disp_load
39802 || group
== disp_prefetch
)
39803 && window_list
->num_loads
>= MAX_LOAD
)
39804 || (group
== disp_store
39805 && window_list
->num_stores
>= MAX_STORE
))
39811 /* This function returns true if insn satisfies dispatch rules on the
39812 last window scheduled. */
39815 fits_dispatch_window (rtx insn
)
39817 dispatch_windows
*window_list
= dispatch_window_list
;
39818 dispatch_windows
*window_list_next
= dispatch_window_list
->next
;
39819 unsigned int num_restrict
;
39820 enum dispatch_group group
= get_insn_group (insn
);
39821 enum insn_path path
= get_insn_path (insn
);
39824 /* Make disp_cmp and disp_jcc get scheduled at the latest. These
39825 instructions should be given the lowest priority in the
39826 scheduling process in Haifa scheduler to make sure they will be
39827 scheduled in the same dispatch window as the reference to them. */
39828 if (group
== disp_jcc
|| group
== disp_cmp
)
39831 /* Check nonrestricted. */
39832 if (group
== disp_no_group
|| group
== disp_branch
)
39835 /* Get last dispatch window. */
39836 if (window_list_next
)
39837 window_list
= window_list_next
;
39839 if (window_list
->window_num
== 1)
39841 sum
= window_list
->prev
->window_size
+ window_list
->window_size
;
39844 || (min_insn_size (insn
) + sum
) >= 48)
39845 /* Window 1 is full. Go for next window. */
39849 num_restrict
= count_num_restricted (insn
, window_list
);
39851 if (num_restrict
> num_allowable_groups
[group
])
39854 /* See if it fits in the first window. */
39855 if (window_list
->window_num
== 0)
39857 /* The first widow should have only single and double path
39859 if (path
== path_double
39860 && (window_list
->num_uops
+ 2) > MAX_INSN
)
39862 else if (path
!= path_single
)
39868 /* Add an instruction INSN with NUM_UOPS micro-operations to the
39869 dispatch window WINDOW_LIST. */
39872 add_insn_window (rtx insn
, dispatch_windows
*window_list
, int num_uops
)
39874 int byte_len
= min_insn_size (insn
);
39875 int num_insn
= window_list
->num_insn
;
39877 sched_insn_info
*window
= window_list
->window
;
39878 enum dispatch_group group
= get_insn_group (insn
);
39879 enum insn_path path
= get_insn_path (insn
);
39880 int num_imm_operand
;
39881 int num_imm32_operand
;
39882 int num_imm64_operand
;
39884 if (!window_list
->violation
&& group
!= disp_cmp
39885 && !fits_dispatch_window (insn
))
39886 window_list
->violation
= true;
39888 imm_size
= get_num_immediates (insn
, &num_imm_operand
, &num_imm32_operand
,
39889 &num_imm64_operand
);
39891 /* Initialize window with new instruction. */
39892 window
[num_insn
].insn
= insn
;
39893 window
[num_insn
].byte_len
= byte_len
;
39894 window
[num_insn
].group
= group
;
39895 window
[num_insn
].path
= path
;
39896 window
[num_insn
].imm_bytes
= imm_size
;
39898 window_list
->window_size
+= byte_len
;
39899 window_list
->num_insn
= num_insn
+ 1;
39900 window_list
->num_uops
= window_list
->num_uops
+ num_uops
;
39901 window_list
->imm_size
+= imm_size
;
39902 window_list
->num_imm
+= num_imm_operand
;
39903 window_list
->num_imm_32
+= num_imm32_operand
;
39904 window_list
->num_imm_64
+= num_imm64_operand
;
39906 if (group
== disp_store
)
39907 window_list
->num_stores
+= 1;
39908 else if (group
== disp_load
39909 || group
== disp_prefetch
)
39910 window_list
->num_loads
+= 1;
39911 else if (group
== disp_load_store
)
39913 window_list
->num_stores
+= 1;
39914 window_list
->num_loads
+= 1;
39918 /* Adds a scheduled instruction, INSN, to the current dispatch window.
39919 If the total bytes of instructions or the number of instructions in
39920 the window exceed allowable, it allocates a new window. */
39923 add_to_dispatch_window (rtx insn
)
39926 dispatch_windows
*window_list
;
39927 dispatch_windows
*next_list
;
39928 dispatch_windows
*window0_list
;
39929 enum insn_path path
;
39930 enum dispatch_group insn_group
;
39938 if (INSN_CODE (insn
) < 0)
39941 byte_len
= min_insn_size (insn
);
39942 window_list
= dispatch_window_list
;
39943 next_list
= window_list
->next
;
39944 path
= get_insn_path (insn
);
39945 insn_group
= get_insn_group (insn
);
39947 /* Get the last dispatch window. */
39949 window_list
= dispatch_window_list
->next
;
39951 if (path
== path_single
)
39953 else if (path
== path_double
)
39956 insn_num_uops
= (int) path
;
39958 /* If current window is full, get a new window.
39959 Window number zero is full, if MAX_INSN uops are scheduled in it.
39960 Window number one is full, if window zero's bytes plus window
39961 one's bytes is 32, or if the bytes of the new instruction added
39962 to the total makes it greater than 48, or it has already MAX_INSN
39963 instructions in it. */
39964 num_insn
= window_list
->num_insn
;
39965 num_uops
= window_list
->num_uops
;
39966 window_num
= window_list
->window_num
;
39967 insn_fits
= fits_dispatch_window (insn
);
39969 if (num_insn
>= MAX_INSN
39970 || num_uops
+ insn_num_uops
> MAX_INSN
39973 window_num
= ~window_num
& 1;
39974 window_list
= allocate_next_window (window_num
);
39977 if (window_num
== 0)
39979 add_insn_window (insn
, window_list
, insn_num_uops
);
39980 if (window_list
->num_insn
>= MAX_INSN
39981 && insn_group
== disp_branch
)
39983 process_end_window ();
39987 else if (window_num
== 1)
39989 window0_list
= window_list
->prev
;
39990 sum
= window0_list
->window_size
+ window_list
->window_size
;
39992 || (byte_len
+ sum
) >= 48)
39994 process_end_window ();
39995 window_list
= dispatch_window_list
;
39998 add_insn_window (insn
, window_list
, insn_num_uops
);
40001 gcc_unreachable ();
40003 if (is_end_basic_block (insn_group
))
40005 /* End of basic block is reached do end-basic-block process. */
40006 process_end_window ();
40011 /* Print the dispatch window, WINDOW_NUM, to FILE. */
40013 DEBUG_FUNCTION
static void
40014 debug_dispatch_window_file (FILE *file
, int window_num
)
40016 dispatch_windows
*list
;
40019 if (window_num
== 0)
40020 list
= dispatch_window_list
;
40022 list
= dispatch_window_list1
;
40024 fprintf (file
, "Window #%d:\n", list
->window_num
);
40025 fprintf (file
, " num_insn = %d, num_uops = %d, window_size = %d\n",
40026 list
->num_insn
, list
->num_uops
, list
->window_size
);
40027 fprintf (file
, " num_imm = %d, num_imm_32 = %d, num_imm_64 = %d, imm_size = %d\n",
40028 list
->num_imm
, list
->num_imm_32
, list
->num_imm_64
, list
->imm_size
);
40030 fprintf (file
, " num_loads = %d, num_stores = %d\n", list
->num_loads
,
40032 fprintf (file
, " insn info:\n");
40034 for (i
= 0; i
< MAX_INSN
; i
++)
40036 if (!list
->window
[i
].insn
)
40038 fprintf (file
, " group[%d] = %s, insn[%d] = %p, path[%d] = %d byte_len[%d] = %d, imm_bytes[%d] = %d\n",
40039 i
, group_name
[list
->window
[i
].group
],
40040 i
, (void *)list
->window
[i
].insn
,
40041 i
, list
->window
[i
].path
,
40042 i
, list
->window
[i
].byte_len
,
40043 i
, list
->window
[i
].imm_bytes
);
40047 /* Print to stdout a dispatch window. */
40049 DEBUG_FUNCTION
void
40050 debug_dispatch_window (int window_num
)
40052 debug_dispatch_window_file (stdout
, window_num
);
40055 /* Print INSN dispatch information to FILE. */
40057 DEBUG_FUNCTION
static void
40058 debug_insn_dispatch_info_file (FILE *file
, rtx insn
)
40061 enum insn_path path
;
40062 enum dispatch_group group
;
40064 int num_imm_operand
;
40065 int num_imm32_operand
;
40066 int num_imm64_operand
;
40068 if (INSN_CODE (insn
) < 0)
40071 byte_len
= min_insn_size (insn
);
40072 path
= get_insn_path (insn
);
40073 group
= get_insn_group (insn
);
40074 imm_size
= get_num_immediates (insn
, &num_imm_operand
, &num_imm32_operand
,
40075 &num_imm64_operand
);
40077 fprintf (file
, " insn info:\n");
40078 fprintf (file
, " group = %s, path = %d, byte_len = %d\n",
40079 group_name
[group
], path
, byte_len
);
40080 fprintf (file
, " num_imm = %d, num_imm_32 = %d, num_imm_64 = %d, imm_size = %d\n",
40081 num_imm_operand
, num_imm32_operand
, num_imm64_operand
, imm_size
);
40084 /* Print to STDERR the status of the ready list with respect to
40085 dispatch windows. */
40087 DEBUG_FUNCTION
void
40088 debug_ready_dispatch (void)
40091 int no_ready
= number_in_ready ();
40093 fprintf (stdout
, "Number of ready: %d\n", no_ready
);
40095 for (i
= 0; i
< no_ready
; i
++)
40096 debug_insn_dispatch_info_file (stdout
, get_ready_element (i
));
40099 /* This routine is the driver of the dispatch scheduler. */
40102 do_dispatch (rtx insn
, int mode
)
40104 if (mode
== DISPATCH_INIT
)
40105 init_dispatch_sched ();
40106 else if (mode
== ADD_TO_DISPATCH_WINDOW
)
40107 add_to_dispatch_window (insn
);
40110 /* Return TRUE if Dispatch Scheduling is supported. */
40113 has_dispatch (rtx insn
, int action
)
40115 if ((TARGET_BDVER1
|| TARGET_BDVER2
)
40116 && flag_dispatch_scheduler
)
40122 case IS_DISPATCH_ON
:
40127 return is_cmp (insn
);
40129 case DISPATCH_VIOLATION
:
40130 return dispatch_violation ();
40132 case FITS_DISPATCH_WINDOW
:
40133 return fits_dispatch_window (insn
);
40139 /* Implementation of reassociation_width target hook used by
40140 reassoc phase to identify parallelism level in reassociated
40141 tree. Statements tree_code is passed in OPC. Arguments type
40144 Currently parallel reassociation is enabled for Atom
40145 processors only and we set reassociation width to be 2
40146 because Atom may issue up to 2 instructions per cycle.
40148 Return value should be fixed if parallel reassociation is
40149 enabled for other processors. */
40152 ix86_reassociation_width (unsigned int opc ATTRIBUTE_UNUSED
,
40153 enum machine_mode mode
)
40157 if (INTEGRAL_MODE_P (mode
) && TARGET_REASSOC_INT_TO_PARALLEL
)
40159 else if (FLOAT_MODE_P (mode
) && TARGET_REASSOC_FP_TO_PARALLEL
)
40165 /* ??? No autovectorization into MMX or 3DNOW until we can reliably
40166 place emms and femms instructions. */
40168 static enum machine_mode
40169 ix86_preferred_simd_mode (enum machine_mode mode
)
40177 return (TARGET_AVX
&& !TARGET_PREFER_AVX128
) ? V32QImode
: V16QImode
;
40179 return (TARGET_AVX
&& !TARGET_PREFER_AVX128
) ? V16HImode
: V8HImode
;
40181 return (TARGET_AVX
&& !TARGET_PREFER_AVX128
) ? V8SImode
: V4SImode
;
40183 return (TARGET_AVX
&& !TARGET_PREFER_AVX128
) ? V4DImode
: V2DImode
;
40186 if (TARGET_AVX
&& !TARGET_PREFER_AVX128
)
40192 if (!TARGET_VECTORIZE_DOUBLE
)
40194 else if (TARGET_AVX
&& !TARGET_PREFER_AVX128
)
40196 else if (TARGET_SSE2
)
40205 /* If AVX is enabled then try vectorizing with both 256bit and 128bit
40208 static unsigned int
40209 ix86_autovectorize_vector_sizes (void)
40211 return (TARGET_AVX
&& !TARGET_PREFER_AVX128
) ? 32 | 16 : 0;
40214 /* Implement targetm.vectorize.init_cost. */
40217 ix86_init_cost (struct loop
*loop_info ATTRIBUTE_UNUSED
)
40219 unsigned *cost
= XNEWVEC (unsigned, 3);
40220 cost
[vect_prologue
] = cost
[vect_body
] = cost
[vect_epilogue
] = 0;
40224 /* Implement targetm.vectorize.add_stmt_cost. */
40227 ix86_add_stmt_cost (void *data
, int count
, enum vect_cost_for_stmt kind
,
40228 struct _stmt_vec_info
*stmt_info
, int misalign
,
40229 enum vect_cost_model_location where
)
40231 unsigned *cost
= (unsigned *) data
;
40232 unsigned retval
= 0;
40234 if (flag_vect_cost_model
)
40236 tree vectype
= stmt_info
? stmt_vectype (stmt_info
) : NULL_TREE
;
40237 int stmt_cost
= ix86_builtin_vectorization_cost (kind
, vectype
, misalign
);
40239 /* Statements in an inner loop relative to the loop being
40240 vectorized are weighted more heavily. The value here is
40241 arbitrary and could potentially be improved with analysis. */
40242 if (where
== vect_body
&& stmt_info
&& stmt_in_inner_loop_p (stmt_info
))
40243 count
*= 50; /* FIXME. */
40245 retval
= (unsigned) (count
* stmt_cost
);
40246 cost
[where
] += retval
;
40252 /* Implement targetm.vectorize.finish_cost. */
40255 ix86_finish_cost (void *data
, unsigned *prologue_cost
,
40256 unsigned *body_cost
, unsigned *epilogue_cost
)
40258 unsigned *cost
= (unsigned *) data
;
40259 *prologue_cost
= cost
[vect_prologue
];
40260 *body_cost
= cost
[vect_body
];
40261 *epilogue_cost
= cost
[vect_epilogue
];
40264 /* Implement targetm.vectorize.destroy_cost_data. */
40267 ix86_destroy_cost_data (void *data
)
40272 /* Validate target specific memory model bits in VAL. */
40274 static unsigned HOST_WIDE_INT
40275 ix86_memmodel_check (unsigned HOST_WIDE_INT val
)
40277 unsigned HOST_WIDE_INT model
= val
& MEMMODEL_MASK
;
40278 unsigned HOST_WIDE_INT strong
;
40280 if (val
& ~(unsigned HOST_WIDE_INT
)(IX86_HLE_ACQUIRE
|IX86_HLE_RELEASE
40282 || ((val
& IX86_HLE_ACQUIRE
) && (val
& IX86_HLE_RELEASE
)))
40284 warning (OPT_Winvalid_memory_model
,
40285 "Unknown architecture specific memory model");
40286 return MEMMODEL_SEQ_CST
;
40288 strong
= (model
== MEMMODEL_ACQ_REL
|| model
== MEMMODEL_SEQ_CST
);
40289 if (val
& IX86_HLE_ACQUIRE
&& !(model
== MEMMODEL_ACQUIRE
|| strong
))
40291 warning (OPT_Winvalid_memory_model
,
40292 "HLE_ACQUIRE not used with ACQUIRE or stronger memory model");
40293 return MEMMODEL_SEQ_CST
| IX86_HLE_ACQUIRE
;
40295 if (val
& IX86_HLE_RELEASE
&& !(model
== MEMMODEL_RELEASE
|| strong
))
40297 warning (OPT_Winvalid_memory_model
,
40298 "HLE_RELEASE not used with RELEASE or stronger memory model");
40299 return MEMMODEL_SEQ_CST
| IX86_HLE_RELEASE
;
40304 /* Initialize the GCC target structure. */
40305 #undef TARGET_RETURN_IN_MEMORY
40306 #define TARGET_RETURN_IN_MEMORY ix86_return_in_memory
40308 #undef TARGET_LEGITIMIZE_ADDRESS
40309 #define TARGET_LEGITIMIZE_ADDRESS ix86_legitimize_address
40311 #undef TARGET_ATTRIBUTE_TABLE
40312 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
40313 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
40314 # undef TARGET_MERGE_DECL_ATTRIBUTES
40315 # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
40318 #undef TARGET_COMP_TYPE_ATTRIBUTES
40319 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
40321 #undef TARGET_INIT_BUILTINS
40322 #define TARGET_INIT_BUILTINS ix86_init_builtins
40323 #undef TARGET_BUILTIN_DECL
40324 #define TARGET_BUILTIN_DECL ix86_builtin_decl
40325 #undef TARGET_EXPAND_BUILTIN
40326 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
40328 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
40329 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
40330 ix86_builtin_vectorized_function
40332 #undef TARGET_VECTORIZE_BUILTIN_TM_LOAD
40333 #define TARGET_VECTORIZE_BUILTIN_TM_LOAD ix86_builtin_tm_load
40335 #undef TARGET_VECTORIZE_BUILTIN_TM_STORE
40336 #define TARGET_VECTORIZE_BUILTIN_TM_STORE ix86_builtin_tm_store
40338 #undef TARGET_VECTORIZE_BUILTIN_GATHER
40339 #define TARGET_VECTORIZE_BUILTIN_GATHER ix86_vectorize_builtin_gather
40341 #undef TARGET_BUILTIN_RECIPROCAL
40342 #define TARGET_BUILTIN_RECIPROCAL ix86_builtin_reciprocal
40344 #undef TARGET_ASM_FUNCTION_EPILOGUE
40345 #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
40347 #undef TARGET_ENCODE_SECTION_INFO
40348 #ifndef SUBTARGET_ENCODE_SECTION_INFO
40349 #define TARGET_ENCODE_SECTION_INFO ix86_encode_section_info
40351 #define TARGET_ENCODE_SECTION_INFO SUBTARGET_ENCODE_SECTION_INFO
40354 #undef TARGET_ASM_OPEN_PAREN
40355 #define TARGET_ASM_OPEN_PAREN ""
40356 #undef TARGET_ASM_CLOSE_PAREN
40357 #define TARGET_ASM_CLOSE_PAREN ""
40359 #undef TARGET_ASM_BYTE_OP
40360 #define TARGET_ASM_BYTE_OP ASM_BYTE
40362 #undef TARGET_ASM_ALIGNED_HI_OP
40363 #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
40364 #undef TARGET_ASM_ALIGNED_SI_OP
40365 #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
40367 #undef TARGET_ASM_ALIGNED_DI_OP
40368 #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
40371 #undef TARGET_PROFILE_BEFORE_PROLOGUE
40372 #define TARGET_PROFILE_BEFORE_PROLOGUE ix86_profile_before_prologue
40374 #undef TARGET_ASM_UNALIGNED_HI_OP
40375 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
40376 #undef TARGET_ASM_UNALIGNED_SI_OP
40377 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
40378 #undef TARGET_ASM_UNALIGNED_DI_OP
40379 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
40381 #undef TARGET_PRINT_OPERAND
40382 #define TARGET_PRINT_OPERAND ix86_print_operand
40383 #undef TARGET_PRINT_OPERAND_ADDRESS
40384 #define TARGET_PRINT_OPERAND_ADDRESS ix86_print_operand_address
40385 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
40386 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P ix86_print_operand_punct_valid_p
40387 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
40388 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA i386_asm_output_addr_const_extra
40390 #undef TARGET_SCHED_INIT_GLOBAL
40391 #define TARGET_SCHED_INIT_GLOBAL ix86_sched_init_global
40392 #undef TARGET_SCHED_ADJUST_COST
40393 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
40394 #undef TARGET_SCHED_ISSUE_RATE
40395 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
40396 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
40397 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
40398 ia32_multipass_dfa_lookahead
40400 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
40401 #define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
40403 #undef TARGET_MEMMODEL_CHECK
40404 #define TARGET_MEMMODEL_CHECK ix86_memmodel_check
40407 #undef TARGET_HAVE_TLS
40408 #define TARGET_HAVE_TLS true
40410 #undef TARGET_CANNOT_FORCE_CONST_MEM
40411 #define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
40412 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
40413 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P hook_bool_mode_const_rtx_true
40415 #undef TARGET_DELEGITIMIZE_ADDRESS
40416 #define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address
40418 #undef TARGET_MS_BITFIELD_LAYOUT_P
40419 #define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
40422 #undef TARGET_BINDS_LOCAL_P
40423 #define TARGET_BINDS_LOCAL_P darwin_binds_local_p
40425 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
40426 #undef TARGET_BINDS_LOCAL_P
40427 #define TARGET_BINDS_LOCAL_P i386_pe_binds_local_p
40430 #undef TARGET_ASM_OUTPUT_MI_THUNK
40431 #define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
40432 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
40433 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
40435 #undef TARGET_ASM_FILE_START
40436 #define TARGET_ASM_FILE_START x86_file_start
40438 #undef TARGET_OPTION_OVERRIDE
40439 #define TARGET_OPTION_OVERRIDE ix86_option_override
40441 #undef TARGET_REGISTER_MOVE_COST
40442 #define TARGET_REGISTER_MOVE_COST ix86_register_move_cost
40443 #undef TARGET_MEMORY_MOVE_COST
40444 #define TARGET_MEMORY_MOVE_COST ix86_memory_move_cost
40445 #undef TARGET_RTX_COSTS
40446 #define TARGET_RTX_COSTS ix86_rtx_costs
40447 #undef TARGET_ADDRESS_COST
40448 #define TARGET_ADDRESS_COST ix86_address_cost
40450 #undef TARGET_FIXED_CONDITION_CODE_REGS
40451 #define TARGET_FIXED_CONDITION_CODE_REGS ix86_fixed_condition_code_regs
40452 #undef TARGET_CC_MODES_COMPATIBLE
40453 #define TARGET_CC_MODES_COMPATIBLE ix86_cc_modes_compatible
40455 #undef TARGET_MACHINE_DEPENDENT_REORG
40456 #define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg
40458 #undef TARGET_BUILTIN_SETJMP_FRAME_VALUE
40459 #define TARGET_BUILTIN_SETJMP_FRAME_VALUE ix86_builtin_setjmp_frame_value
40461 #undef TARGET_BUILD_BUILTIN_VA_LIST
40462 #define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list
40464 #undef TARGET_FOLD_BUILTIN
40465 #define TARGET_FOLD_BUILTIN ix86_fold_builtin
40467 #undef TARGET_ENUM_VA_LIST_P
40468 #define TARGET_ENUM_VA_LIST_P ix86_enum_va_list
40470 #undef TARGET_FN_ABI_VA_LIST
40471 #define TARGET_FN_ABI_VA_LIST ix86_fn_abi_va_list
40473 #undef TARGET_CANONICAL_VA_LIST_TYPE
40474 #define TARGET_CANONICAL_VA_LIST_TYPE ix86_canonical_va_list_type
40476 #undef TARGET_EXPAND_BUILTIN_VA_START
40477 #define TARGET_EXPAND_BUILTIN_VA_START ix86_va_start
40479 #undef TARGET_MD_ASM_CLOBBERS
40480 #define TARGET_MD_ASM_CLOBBERS ix86_md_asm_clobbers
40482 #undef TARGET_PROMOTE_PROTOTYPES
40483 #define TARGET_PROMOTE_PROTOTYPES hook_bool_const_tree_true
40484 #undef TARGET_STRUCT_VALUE_RTX
40485 #define TARGET_STRUCT_VALUE_RTX ix86_struct_value_rtx
40486 #undef TARGET_SETUP_INCOMING_VARARGS
40487 #define TARGET_SETUP_INCOMING_VARARGS ix86_setup_incoming_varargs
40488 #undef TARGET_MUST_PASS_IN_STACK
40489 #define TARGET_MUST_PASS_IN_STACK ix86_must_pass_in_stack
40490 #undef TARGET_FUNCTION_ARG_ADVANCE
40491 #define TARGET_FUNCTION_ARG_ADVANCE ix86_function_arg_advance
40492 #undef TARGET_FUNCTION_ARG
40493 #define TARGET_FUNCTION_ARG ix86_function_arg
40494 #undef TARGET_FUNCTION_ARG_BOUNDARY
40495 #define TARGET_FUNCTION_ARG_BOUNDARY ix86_function_arg_boundary
40496 #undef TARGET_PASS_BY_REFERENCE
40497 #define TARGET_PASS_BY_REFERENCE ix86_pass_by_reference
40498 #undef TARGET_INTERNAL_ARG_POINTER
40499 #define TARGET_INTERNAL_ARG_POINTER ix86_internal_arg_pointer
40500 #undef TARGET_UPDATE_STACK_BOUNDARY
40501 #define TARGET_UPDATE_STACK_BOUNDARY ix86_update_stack_boundary
40502 #undef TARGET_GET_DRAP_RTX
40503 #define TARGET_GET_DRAP_RTX ix86_get_drap_rtx
40504 #undef TARGET_STRICT_ARGUMENT_NAMING
40505 #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
40506 #undef TARGET_STATIC_CHAIN
40507 #define TARGET_STATIC_CHAIN ix86_static_chain
40508 #undef TARGET_TRAMPOLINE_INIT
40509 #define TARGET_TRAMPOLINE_INIT ix86_trampoline_init
40510 #undef TARGET_RETURN_POPS_ARGS
40511 #define TARGET_RETURN_POPS_ARGS ix86_return_pops_args
40513 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
40514 #define TARGET_GIMPLIFY_VA_ARG_EXPR ix86_gimplify_va_arg
40516 #undef TARGET_SCALAR_MODE_SUPPORTED_P
40517 #define TARGET_SCALAR_MODE_SUPPORTED_P ix86_scalar_mode_supported_p
40519 #undef TARGET_VECTOR_MODE_SUPPORTED_P
40520 #define TARGET_VECTOR_MODE_SUPPORTED_P ix86_vector_mode_supported_p
40522 #undef TARGET_C_MODE_FOR_SUFFIX
40523 #define TARGET_C_MODE_FOR_SUFFIX ix86_c_mode_for_suffix
40526 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
40527 #define TARGET_ASM_OUTPUT_DWARF_DTPREL i386_output_dwarf_dtprel
40530 #ifdef SUBTARGET_INSERT_ATTRIBUTES
40531 #undef TARGET_INSERT_ATTRIBUTES
40532 #define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
40535 #undef TARGET_MANGLE_TYPE
40536 #define TARGET_MANGLE_TYPE ix86_mangle_type
40539 #undef TARGET_STACK_PROTECT_FAIL
40540 #define TARGET_STACK_PROTECT_FAIL ix86_stack_protect_fail
40543 #undef TARGET_FUNCTION_VALUE
40544 #define TARGET_FUNCTION_VALUE ix86_function_value
40546 #undef TARGET_FUNCTION_VALUE_REGNO_P
40547 #define TARGET_FUNCTION_VALUE_REGNO_P ix86_function_value_regno_p
40549 #undef TARGET_PROMOTE_FUNCTION_MODE
40550 #define TARGET_PROMOTE_FUNCTION_MODE ix86_promote_function_mode
40552 #undef TARGET_SECONDARY_RELOAD
40553 #define TARGET_SECONDARY_RELOAD ix86_secondary_reload
40555 #undef TARGET_CLASS_MAX_NREGS
40556 #define TARGET_CLASS_MAX_NREGS ix86_class_max_nregs
40558 #undef TARGET_PREFERRED_RELOAD_CLASS
40559 #define TARGET_PREFERRED_RELOAD_CLASS ix86_preferred_reload_class
40560 #undef TARGET_PREFERRED_OUTPUT_RELOAD_CLASS
40561 #define TARGET_PREFERRED_OUTPUT_RELOAD_CLASS ix86_preferred_output_reload_class
40562 #undef TARGET_CLASS_LIKELY_SPILLED_P
40563 #define TARGET_CLASS_LIKELY_SPILLED_P ix86_class_likely_spilled_p
40565 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
40566 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
40567 ix86_builtin_vectorization_cost
40568 #undef TARGET_VECTORIZE_VEC_PERM_CONST_OK
40569 #define TARGET_VECTORIZE_VEC_PERM_CONST_OK \
40570 ix86_vectorize_vec_perm_const_ok
40571 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
40572 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE \
40573 ix86_preferred_simd_mode
40574 #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
40575 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
40576 ix86_autovectorize_vector_sizes
40577 #undef TARGET_VECTORIZE_INIT_COST
40578 #define TARGET_VECTORIZE_INIT_COST ix86_init_cost
40579 #undef TARGET_VECTORIZE_ADD_STMT_COST
40580 #define TARGET_VECTORIZE_ADD_STMT_COST ix86_add_stmt_cost
40581 #undef TARGET_VECTORIZE_FINISH_COST
40582 #define TARGET_VECTORIZE_FINISH_COST ix86_finish_cost
40583 #undef TARGET_VECTORIZE_DESTROY_COST_DATA
40584 #define TARGET_VECTORIZE_DESTROY_COST_DATA ix86_destroy_cost_data
40586 #undef TARGET_SET_CURRENT_FUNCTION
40587 #define TARGET_SET_CURRENT_FUNCTION ix86_set_current_function
40589 #undef TARGET_OPTION_VALID_ATTRIBUTE_P
40590 #define TARGET_OPTION_VALID_ATTRIBUTE_P ix86_valid_target_attribute_p
40592 #undef TARGET_OPTION_SAVE
40593 #define TARGET_OPTION_SAVE ix86_function_specific_save
40595 #undef TARGET_OPTION_RESTORE
40596 #define TARGET_OPTION_RESTORE ix86_function_specific_restore
40598 #undef TARGET_OPTION_PRINT
40599 #define TARGET_OPTION_PRINT ix86_function_specific_print
40601 #undef TARGET_CAN_INLINE_P
40602 #define TARGET_CAN_INLINE_P ix86_can_inline_p
40604 #undef TARGET_EXPAND_TO_RTL_HOOK
40605 #define TARGET_EXPAND_TO_RTL_HOOK ix86_maybe_switch_abi
40607 #undef TARGET_LEGITIMATE_ADDRESS_P
40608 #define TARGET_LEGITIMATE_ADDRESS_P ix86_legitimate_address_p
40610 #undef TARGET_LEGITIMATE_CONSTANT_P
40611 #define TARGET_LEGITIMATE_CONSTANT_P ix86_legitimate_constant_p
40613 #undef TARGET_FRAME_POINTER_REQUIRED
40614 #define TARGET_FRAME_POINTER_REQUIRED ix86_frame_pointer_required
40616 #undef TARGET_CAN_ELIMINATE
40617 #define TARGET_CAN_ELIMINATE ix86_can_eliminate
40619 #undef TARGET_EXTRA_LIVE_ON_ENTRY
40620 #define TARGET_EXTRA_LIVE_ON_ENTRY ix86_live_on_entry
40622 #undef TARGET_ASM_CODE_END
40623 #define TARGET_ASM_CODE_END ix86_code_end
40625 #undef TARGET_CONDITIONAL_REGISTER_USAGE
40626 #define TARGET_CONDITIONAL_REGISTER_USAGE ix86_conditional_register_usage
40629 #undef TARGET_INIT_LIBFUNCS
40630 #define TARGET_INIT_LIBFUNCS darwin_rename_builtins
40633 struct gcc_target targetm
= TARGET_INITIALIZER
;
40635 #include "gt-i386.h"