1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
3 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012
4 Free Software Foundation, Inc.
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 3, or (at your option)
13 GCC is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
24 #include "coretypes.h"
30 #include "hard-reg-set.h"
31 #include "insn-config.h"
32 #include "conditions.h"
34 #include "insn-codes.h"
35 #include "insn-attr.h"
42 #include "diagnostic-core.h"
44 #include "basic-block.h"
47 #include "target-def.h"
48 #include "common/common-target.h"
49 #include "langhooks.h"
54 #include "tm-constrs.h"
58 #include "sched-int.h"
62 #include "diagnostic.h"
64 enum upper_128bits_state
71 typedef struct block_info_def
73 /* State of the upper 128bits of AVX registers at exit. */
74 enum upper_128bits_state state
;
75 /* TRUE if state of the upper 128bits of AVX registers is unchanged
78 /* TRUE if block has been processed. */
80 /* TRUE if block has been scanned. */
82 /* Previous state of the upper 128bits of AVX registers at entry. */
83 enum upper_128bits_state prev
;
86 #define BLOCK_INFO(B) ((block_info) (B)->aux)
88 enum call_avx256_state
90 /* Callee returns 256bit AVX register. */
91 callee_return_avx256
= -1,
92 /* Callee returns and passes 256bit AVX register. */
93 callee_return_pass_avx256
,
94 /* Callee passes 256bit AVX register. */
96 /* Callee doesn't return nor passe 256bit AVX register, or no
97 256bit AVX register in function return. */
99 /* vzeroupper intrinsic. */
103 /* Check if a 256bit AVX register is referenced in stores. */
106 check_avx256_stores (rtx dest
, const_rtx set
, void *data
)
109 && VALID_AVX256_REG_MODE (GET_MODE (dest
)))
110 || (GET_CODE (set
) == SET
111 && REG_P (SET_SRC (set
))
112 && VALID_AVX256_REG_MODE (GET_MODE (SET_SRC (set
)))))
114 enum upper_128bits_state
*state
115 = (enum upper_128bits_state
*) data
;
120 /* Helper function for move_or_delete_vzeroupper_1. Look for vzeroupper
121 in basic block BB. Delete it if upper 128bit AVX registers are
122 unused. If it isn't deleted, move it to just before a jump insn.
124 STATE is state of the upper 128bits of AVX registers at entry. */
127 move_or_delete_vzeroupper_2 (basic_block bb
,
128 enum upper_128bits_state state
)
131 rtx vzeroupper_insn
= NULL_RTX
;
136 if (BLOCK_INFO (bb
)->unchanged
)
139 fprintf (dump_file
, " [bb %i] unchanged: upper 128bits: %d\n",
142 BLOCK_INFO (bb
)->state
= state
;
146 if (BLOCK_INFO (bb
)->scanned
&& BLOCK_INFO (bb
)->prev
== state
)
149 fprintf (dump_file
, " [bb %i] scanned: upper 128bits: %d\n",
150 bb
->index
, BLOCK_INFO (bb
)->state
);
154 BLOCK_INFO (bb
)->prev
= state
;
157 fprintf (dump_file
, " [bb %i] entry: upper 128bits: %d\n",
162 /* BB_END changes when it is deleted. */
163 bb_end
= BB_END (bb
);
165 while (insn
!= bb_end
)
167 insn
= NEXT_INSN (insn
);
169 if (!NONDEBUG_INSN_P (insn
))
172 /* Move vzeroupper before jump/call. */
173 if (JUMP_P (insn
) || CALL_P (insn
))
175 if (!vzeroupper_insn
)
178 if (PREV_INSN (insn
) != vzeroupper_insn
)
182 fprintf (dump_file
, "Move vzeroupper after:\n");
183 print_rtl_single (dump_file
, PREV_INSN (insn
));
184 fprintf (dump_file
, "before:\n");
185 print_rtl_single (dump_file
, insn
);
187 reorder_insns_nobb (vzeroupper_insn
, vzeroupper_insn
,
190 vzeroupper_insn
= NULL_RTX
;
194 pat
= PATTERN (insn
);
196 /* Check insn for vzeroupper intrinsic. */
197 if (GET_CODE (pat
) == UNSPEC_VOLATILE
198 && XINT (pat
, 1) == UNSPECV_VZEROUPPER
)
202 /* Found vzeroupper intrinsic. */
203 fprintf (dump_file
, "Found vzeroupper:\n");
204 print_rtl_single (dump_file
, insn
);
209 /* Check insn for vzeroall intrinsic. */
210 if (GET_CODE (pat
) == PARALLEL
211 && GET_CODE (XVECEXP (pat
, 0, 0)) == UNSPEC_VOLATILE
212 && XINT (XVECEXP (pat
, 0, 0), 1) == UNSPECV_VZEROALL
)
217 /* Delete pending vzeroupper insertion. */
220 delete_insn (vzeroupper_insn
);
221 vzeroupper_insn
= NULL_RTX
;
224 else if (state
!= used
)
226 note_stores (pat
, check_avx256_stores
, &state
);
233 /* Process vzeroupper intrinsic. */
234 avx256
= INTVAL (XVECEXP (pat
, 0, 0));
238 /* Since the upper 128bits are cleared, callee must not pass
239 256bit AVX register. We only need to check if callee
240 returns 256bit AVX register. */
241 if (avx256
== callee_return_avx256
)
247 /* Remove unnecessary vzeroupper since upper 128bits are
251 fprintf (dump_file
, "Delete redundant vzeroupper:\n");
252 print_rtl_single (dump_file
, insn
);
258 /* Set state to UNUSED if callee doesn't return 256bit AVX
260 if (avx256
!= callee_return_pass_avx256
)
263 if (avx256
== callee_return_pass_avx256
264 || avx256
== callee_pass_avx256
)
266 /* Must remove vzeroupper since callee passes in 256bit
270 fprintf (dump_file
, "Delete callee pass vzeroupper:\n");
271 print_rtl_single (dump_file
, insn
);
277 vzeroupper_insn
= insn
;
283 BLOCK_INFO (bb
)->state
= state
;
284 BLOCK_INFO (bb
)->unchanged
= unchanged
;
285 BLOCK_INFO (bb
)->scanned
= true;
288 fprintf (dump_file
, " [bb %i] exit: %s: upper 128bits: %d\n",
289 bb
->index
, unchanged
? "unchanged" : "changed",
293 /* Helper function for move_or_delete_vzeroupper. Process vzeroupper
294 in BLOCK and check its predecessor blocks. Treat UNKNOWN state
295 as USED if UNKNOWN_IS_UNUSED is true. Return TRUE if the exit
299 move_or_delete_vzeroupper_1 (basic_block block
, bool unknown_is_unused
)
303 enum upper_128bits_state state
, old_state
, new_state
;
307 fprintf (dump_file
, " Process [bb %i]: status: %d\n",
308 block
->index
, BLOCK_INFO (block
)->processed
);
310 if (BLOCK_INFO (block
)->processed
)
315 /* Check all predecessor edges of this block. */
316 seen_unknown
= false;
317 FOR_EACH_EDGE (e
, ei
, block
->preds
)
321 switch (BLOCK_INFO (e
->src
)->state
)
324 if (!unknown_is_unused
)
338 old_state
= BLOCK_INFO (block
)->state
;
339 move_or_delete_vzeroupper_2 (block
, state
);
340 new_state
= BLOCK_INFO (block
)->state
;
342 if (state
!= unknown
|| new_state
== used
)
343 BLOCK_INFO (block
)->processed
= true;
345 /* Need to rescan if the upper 128bits of AVX registers are changed
347 if (new_state
!= old_state
)
349 if (new_state
== used
)
350 cfun
->machine
->rescan_vzeroupper_p
= 1;
357 /* Go through the instruction stream looking for vzeroupper. Delete
358 it if upper 128bit AVX registers are unused. If it isn't deleted,
359 move it to just before a jump insn. */
362 move_or_delete_vzeroupper (void)
367 fibheap_t worklist
, pending
, fibheap_swap
;
368 sbitmap visited
, in_worklist
, in_pending
, sbitmap_swap
;
373 /* Set up block info for each basic block. */
374 alloc_aux_for_blocks (sizeof (struct block_info_def
));
376 /* Process outgoing edges of entry point. */
378 fprintf (dump_file
, "Process outgoing edges of entry point\n");
380 FOR_EACH_EDGE (e
, ei
, ENTRY_BLOCK_PTR
->succs
)
382 move_or_delete_vzeroupper_2 (e
->dest
,
383 cfun
->machine
->caller_pass_avx256_p
385 BLOCK_INFO (e
->dest
)->processed
= true;
388 /* Compute reverse completion order of depth first search of the CFG
389 so that the data-flow runs faster. */
390 rc_order
= XNEWVEC (int, n_basic_blocks
- NUM_FIXED_BLOCKS
);
391 bb_order
= XNEWVEC (int, last_basic_block
);
392 pre_and_rev_post_order_compute (NULL
, rc_order
, false);
393 for (i
= 0; i
< n_basic_blocks
- NUM_FIXED_BLOCKS
; i
++)
394 bb_order
[rc_order
[i
]] = i
;
397 worklist
= fibheap_new ();
398 pending
= fibheap_new ();
399 visited
= sbitmap_alloc (last_basic_block
);
400 in_worklist
= sbitmap_alloc (last_basic_block
);
401 in_pending
= sbitmap_alloc (last_basic_block
);
402 sbitmap_zero (in_worklist
);
404 /* Don't check outgoing edges of entry point. */
405 sbitmap_ones (in_pending
);
407 if (BLOCK_INFO (bb
)->processed
)
408 RESET_BIT (in_pending
, bb
->index
);
411 move_or_delete_vzeroupper_1 (bb
, false);
412 fibheap_insert (pending
, bb_order
[bb
->index
], bb
);
416 fprintf (dump_file
, "Check remaining basic blocks\n");
418 while (!fibheap_empty (pending
))
420 fibheap_swap
= pending
;
422 worklist
= fibheap_swap
;
423 sbitmap_swap
= in_pending
;
424 in_pending
= in_worklist
;
425 in_worklist
= sbitmap_swap
;
427 sbitmap_zero (visited
);
429 cfun
->machine
->rescan_vzeroupper_p
= 0;
431 while (!fibheap_empty (worklist
))
433 bb
= (basic_block
) fibheap_extract_min (worklist
);
434 RESET_BIT (in_worklist
, bb
->index
);
435 gcc_assert (!TEST_BIT (visited
, bb
->index
));
436 if (!TEST_BIT (visited
, bb
->index
))
440 SET_BIT (visited
, bb
->index
);
442 if (move_or_delete_vzeroupper_1 (bb
, false))
443 FOR_EACH_EDGE (e
, ei
, bb
->succs
)
445 if (e
->dest
== EXIT_BLOCK_PTR
446 || BLOCK_INFO (e
->dest
)->processed
)
449 if (TEST_BIT (visited
, e
->dest
->index
))
451 if (!TEST_BIT (in_pending
, e
->dest
->index
))
453 /* Send E->DEST to next round. */
454 SET_BIT (in_pending
, e
->dest
->index
);
455 fibheap_insert (pending
,
456 bb_order
[e
->dest
->index
],
460 else if (!TEST_BIT (in_worklist
, e
->dest
->index
))
462 /* Add E->DEST to current round. */
463 SET_BIT (in_worklist
, e
->dest
->index
);
464 fibheap_insert (worklist
, bb_order
[e
->dest
->index
],
471 if (!cfun
->machine
->rescan_vzeroupper_p
)
476 fibheap_delete (worklist
);
477 fibheap_delete (pending
);
478 sbitmap_free (visited
);
479 sbitmap_free (in_worklist
);
480 sbitmap_free (in_pending
);
483 fprintf (dump_file
, "Process remaining basic blocks\n");
486 move_or_delete_vzeroupper_1 (bb
, true);
488 free_aux_for_blocks ();
491 static rtx
legitimize_dllimport_symbol (rtx
, bool);
493 #ifndef CHECK_STACK_LIMIT
494 #define CHECK_STACK_LIMIT (-1)
497 /* Return index of given mode in mult and division cost tables. */
498 #define MODE_INDEX(mode) \
499 ((mode) == QImode ? 0 \
500 : (mode) == HImode ? 1 \
501 : (mode) == SImode ? 2 \
502 : (mode) == DImode ? 3 \
505 /* Processor costs (relative to an add) */
506 /* We assume COSTS_N_INSNS is defined as (N)*4 and an addition is 2 bytes. */
507 #define COSTS_N_BYTES(N) ((N) * 2)
509 #define DUMMY_STRINGOP_ALGS {libcall, {{-1, libcall}}}
512 struct processor_costs ix86_size_cost
= {/* costs for tuning for size */
513 COSTS_N_BYTES (2), /* cost of an add instruction */
514 COSTS_N_BYTES (3), /* cost of a lea instruction */
515 COSTS_N_BYTES (2), /* variable shift costs */
516 COSTS_N_BYTES (3), /* constant shift costs */
517 {COSTS_N_BYTES (3), /* cost of starting multiply for QI */
518 COSTS_N_BYTES (3), /* HI */
519 COSTS_N_BYTES (3), /* SI */
520 COSTS_N_BYTES (3), /* DI */
521 COSTS_N_BYTES (5)}, /* other */
522 0, /* cost of multiply per each bit set */
523 {COSTS_N_BYTES (3), /* cost of a divide/mod for QI */
524 COSTS_N_BYTES (3), /* HI */
525 COSTS_N_BYTES (3), /* SI */
526 COSTS_N_BYTES (3), /* DI */
527 COSTS_N_BYTES (5)}, /* other */
528 COSTS_N_BYTES (3), /* cost of movsx */
529 COSTS_N_BYTES (3), /* cost of movzx */
530 0, /* "large" insn */
532 2, /* cost for loading QImode using movzbl */
533 {2, 2, 2}, /* cost of loading integer registers
534 in QImode, HImode and SImode.
535 Relative to reg-reg move (2). */
536 {2, 2, 2}, /* cost of storing integer registers */
537 2, /* cost of reg,reg fld/fst */
538 {2, 2, 2}, /* cost of loading fp registers
539 in SFmode, DFmode and XFmode */
540 {2, 2, 2}, /* cost of storing fp registers
541 in SFmode, DFmode and XFmode */
542 3, /* cost of moving MMX register */
543 {3, 3}, /* cost of loading MMX registers
544 in SImode and DImode */
545 {3, 3}, /* cost of storing MMX registers
546 in SImode and DImode */
547 3, /* cost of moving SSE register */
548 {3, 3, 3}, /* cost of loading SSE registers
549 in SImode, DImode and TImode */
550 {3, 3, 3}, /* cost of storing SSE registers
551 in SImode, DImode and TImode */
552 3, /* MMX or SSE register to integer */
553 0, /* size of l1 cache */
554 0, /* size of l2 cache */
555 0, /* size of prefetch block */
556 0, /* number of parallel prefetches */
558 COSTS_N_BYTES (2), /* cost of FADD and FSUB insns. */
559 COSTS_N_BYTES (2), /* cost of FMUL instruction. */
560 COSTS_N_BYTES (2), /* cost of FDIV instruction. */
561 COSTS_N_BYTES (2), /* cost of FABS instruction. */
562 COSTS_N_BYTES (2), /* cost of FCHS instruction. */
563 COSTS_N_BYTES (2), /* cost of FSQRT instruction. */
564 {{rep_prefix_1_byte
, {{-1, rep_prefix_1_byte
}}},
565 {rep_prefix_1_byte
, {{-1, rep_prefix_1_byte
}}}},
566 {{rep_prefix_1_byte
, {{-1, rep_prefix_1_byte
}}},
567 {rep_prefix_1_byte
, {{-1, rep_prefix_1_byte
}}}},
568 1, /* scalar_stmt_cost. */
569 1, /* scalar load_cost. */
570 1, /* scalar_store_cost. */
571 1, /* vec_stmt_cost. */
572 1, /* vec_to_scalar_cost. */
573 1, /* scalar_to_vec_cost. */
574 1, /* vec_align_load_cost. */
575 1, /* vec_unalign_load_cost. */
576 1, /* vec_store_cost. */
577 1, /* cond_taken_branch_cost. */
578 1, /* cond_not_taken_branch_cost. */
581 /* Processor costs (relative to an add) */
583 struct processor_costs i386_cost
= { /* 386 specific costs */
584 COSTS_N_INSNS (1), /* cost of an add instruction */
585 COSTS_N_INSNS (1), /* cost of a lea instruction */
586 COSTS_N_INSNS (3), /* variable shift costs */
587 COSTS_N_INSNS (2), /* constant shift costs */
588 {COSTS_N_INSNS (6), /* cost of starting multiply for QI */
589 COSTS_N_INSNS (6), /* HI */
590 COSTS_N_INSNS (6), /* SI */
591 COSTS_N_INSNS (6), /* DI */
592 COSTS_N_INSNS (6)}, /* other */
593 COSTS_N_INSNS (1), /* cost of multiply per each bit set */
594 {COSTS_N_INSNS (23), /* cost of a divide/mod for QI */
595 COSTS_N_INSNS (23), /* HI */
596 COSTS_N_INSNS (23), /* SI */
597 COSTS_N_INSNS (23), /* DI */
598 COSTS_N_INSNS (23)}, /* other */
599 COSTS_N_INSNS (3), /* cost of movsx */
600 COSTS_N_INSNS (2), /* cost of movzx */
601 15, /* "large" insn */
603 4, /* cost for loading QImode using movzbl */
604 {2, 4, 2}, /* cost of loading integer registers
605 in QImode, HImode and SImode.
606 Relative to reg-reg move (2). */
607 {2, 4, 2}, /* cost of storing integer registers */
608 2, /* cost of reg,reg fld/fst */
609 {8, 8, 8}, /* cost of loading fp registers
610 in SFmode, DFmode and XFmode */
611 {8, 8, 8}, /* cost of storing fp registers
612 in SFmode, DFmode and XFmode */
613 2, /* cost of moving MMX register */
614 {4, 8}, /* cost of loading MMX registers
615 in SImode and DImode */
616 {4, 8}, /* cost of storing MMX registers
617 in SImode and DImode */
618 2, /* cost of moving SSE register */
619 {4, 8, 16}, /* cost of loading SSE registers
620 in SImode, DImode and TImode */
621 {4, 8, 16}, /* cost of storing SSE registers
622 in SImode, DImode and TImode */
623 3, /* MMX or SSE register to integer */
624 0, /* size of l1 cache */
625 0, /* size of l2 cache */
626 0, /* size of prefetch block */
627 0, /* number of parallel prefetches */
629 COSTS_N_INSNS (23), /* cost of FADD and FSUB insns. */
630 COSTS_N_INSNS (27), /* cost of FMUL instruction. */
631 COSTS_N_INSNS (88), /* cost of FDIV instruction. */
632 COSTS_N_INSNS (22), /* cost of FABS instruction. */
633 COSTS_N_INSNS (24), /* cost of FCHS instruction. */
634 COSTS_N_INSNS (122), /* cost of FSQRT instruction. */
635 {{rep_prefix_1_byte
, {{-1, rep_prefix_1_byte
}}},
636 DUMMY_STRINGOP_ALGS
},
637 {{rep_prefix_1_byte
, {{-1, rep_prefix_1_byte
}}},
638 DUMMY_STRINGOP_ALGS
},
639 1, /* scalar_stmt_cost. */
640 1, /* scalar load_cost. */
641 1, /* scalar_store_cost. */
642 1, /* vec_stmt_cost. */
643 1, /* vec_to_scalar_cost. */
644 1, /* scalar_to_vec_cost. */
645 1, /* vec_align_load_cost. */
646 2, /* vec_unalign_load_cost. */
647 1, /* vec_store_cost. */
648 3, /* cond_taken_branch_cost. */
649 1, /* cond_not_taken_branch_cost. */
653 struct processor_costs i486_cost
= { /* 486 specific costs */
654 COSTS_N_INSNS (1), /* cost of an add instruction */
655 COSTS_N_INSNS (1), /* cost of a lea instruction */
656 COSTS_N_INSNS (3), /* variable shift costs */
657 COSTS_N_INSNS (2), /* constant shift costs */
658 {COSTS_N_INSNS (12), /* cost of starting multiply for QI */
659 COSTS_N_INSNS (12), /* HI */
660 COSTS_N_INSNS (12), /* SI */
661 COSTS_N_INSNS (12), /* DI */
662 COSTS_N_INSNS (12)}, /* other */
663 1, /* cost of multiply per each bit set */
664 {COSTS_N_INSNS (40), /* cost of a divide/mod for QI */
665 COSTS_N_INSNS (40), /* HI */
666 COSTS_N_INSNS (40), /* SI */
667 COSTS_N_INSNS (40), /* DI */
668 COSTS_N_INSNS (40)}, /* other */
669 COSTS_N_INSNS (3), /* cost of movsx */
670 COSTS_N_INSNS (2), /* cost of movzx */
671 15, /* "large" insn */
673 4, /* cost for loading QImode using movzbl */
674 {2, 4, 2}, /* cost of loading integer registers
675 in QImode, HImode and SImode.
676 Relative to reg-reg move (2). */
677 {2, 4, 2}, /* cost of storing integer registers */
678 2, /* cost of reg,reg fld/fst */
679 {8, 8, 8}, /* cost of loading fp registers
680 in SFmode, DFmode and XFmode */
681 {8, 8, 8}, /* cost of storing fp registers
682 in SFmode, DFmode and XFmode */
683 2, /* cost of moving MMX register */
684 {4, 8}, /* cost of loading MMX registers
685 in SImode and DImode */
686 {4, 8}, /* cost of storing MMX registers
687 in SImode and DImode */
688 2, /* cost of moving SSE register */
689 {4, 8, 16}, /* cost of loading SSE registers
690 in SImode, DImode and TImode */
691 {4, 8, 16}, /* cost of storing SSE registers
692 in SImode, DImode and TImode */
693 3, /* MMX or SSE register to integer */
694 4, /* size of l1 cache. 486 has 8kB cache
695 shared for code and data, so 4kB is
696 not really precise. */
697 4, /* size of l2 cache */
698 0, /* size of prefetch block */
699 0, /* number of parallel prefetches */
701 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
702 COSTS_N_INSNS (16), /* cost of FMUL instruction. */
703 COSTS_N_INSNS (73), /* cost of FDIV instruction. */
704 COSTS_N_INSNS (3), /* cost of FABS instruction. */
705 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
706 COSTS_N_INSNS (83), /* cost of FSQRT instruction. */
707 {{rep_prefix_4_byte
, {{-1, rep_prefix_4_byte
}}},
708 DUMMY_STRINGOP_ALGS
},
709 {{rep_prefix_4_byte
, {{-1, rep_prefix_4_byte
}}},
710 DUMMY_STRINGOP_ALGS
},
711 1, /* scalar_stmt_cost. */
712 1, /* scalar load_cost. */
713 1, /* scalar_store_cost. */
714 1, /* vec_stmt_cost. */
715 1, /* vec_to_scalar_cost. */
716 1, /* scalar_to_vec_cost. */
717 1, /* vec_align_load_cost. */
718 2, /* vec_unalign_load_cost. */
719 1, /* vec_store_cost. */
720 3, /* cond_taken_branch_cost. */
721 1, /* cond_not_taken_branch_cost. */
725 struct processor_costs pentium_cost
= {
726 COSTS_N_INSNS (1), /* cost of an add instruction */
727 COSTS_N_INSNS (1), /* cost of a lea instruction */
728 COSTS_N_INSNS (4), /* variable shift costs */
729 COSTS_N_INSNS (1), /* constant shift costs */
730 {COSTS_N_INSNS (11), /* cost of starting multiply for QI */
731 COSTS_N_INSNS (11), /* HI */
732 COSTS_N_INSNS (11), /* SI */
733 COSTS_N_INSNS (11), /* DI */
734 COSTS_N_INSNS (11)}, /* other */
735 0, /* cost of multiply per each bit set */
736 {COSTS_N_INSNS (25), /* cost of a divide/mod for QI */
737 COSTS_N_INSNS (25), /* HI */
738 COSTS_N_INSNS (25), /* SI */
739 COSTS_N_INSNS (25), /* DI */
740 COSTS_N_INSNS (25)}, /* other */
741 COSTS_N_INSNS (3), /* cost of movsx */
742 COSTS_N_INSNS (2), /* cost of movzx */
743 8, /* "large" insn */
745 6, /* cost for loading QImode using movzbl */
746 {2, 4, 2}, /* cost of loading integer registers
747 in QImode, HImode and SImode.
748 Relative to reg-reg move (2). */
749 {2, 4, 2}, /* cost of storing integer registers */
750 2, /* cost of reg,reg fld/fst */
751 {2, 2, 6}, /* cost of loading fp registers
752 in SFmode, DFmode and XFmode */
753 {4, 4, 6}, /* cost of storing fp registers
754 in SFmode, DFmode and XFmode */
755 8, /* cost of moving MMX register */
756 {8, 8}, /* cost of loading MMX registers
757 in SImode and DImode */
758 {8, 8}, /* cost of storing MMX registers
759 in SImode and DImode */
760 2, /* cost of moving SSE register */
761 {4, 8, 16}, /* cost of loading SSE registers
762 in SImode, DImode and TImode */
763 {4, 8, 16}, /* cost of storing SSE registers
764 in SImode, DImode and TImode */
765 3, /* MMX or SSE register to integer */
766 8, /* size of l1 cache. */
767 8, /* size of l2 cache */
768 0, /* size of prefetch block */
769 0, /* number of parallel prefetches */
771 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
772 COSTS_N_INSNS (3), /* cost of FMUL instruction. */
773 COSTS_N_INSNS (39), /* cost of FDIV instruction. */
774 COSTS_N_INSNS (1), /* cost of FABS instruction. */
775 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
776 COSTS_N_INSNS (70), /* cost of FSQRT instruction. */
777 {{libcall
, {{256, rep_prefix_4_byte
}, {-1, libcall
}}},
778 DUMMY_STRINGOP_ALGS
},
779 {{libcall
, {{-1, rep_prefix_4_byte
}}},
780 DUMMY_STRINGOP_ALGS
},
781 1, /* scalar_stmt_cost. */
782 1, /* scalar load_cost. */
783 1, /* scalar_store_cost. */
784 1, /* vec_stmt_cost. */
785 1, /* vec_to_scalar_cost. */
786 1, /* scalar_to_vec_cost. */
787 1, /* vec_align_load_cost. */
788 2, /* vec_unalign_load_cost. */
789 1, /* vec_store_cost. */
790 3, /* cond_taken_branch_cost. */
791 1, /* cond_not_taken_branch_cost. */
795 struct processor_costs pentiumpro_cost
= {
796 COSTS_N_INSNS (1), /* cost of an add instruction */
797 COSTS_N_INSNS (1), /* cost of a lea instruction */
798 COSTS_N_INSNS (1), /* variable shift costs */
799 COSTS_N_INSNS (1), /* constant shift costs */
800 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
801 COSTS_N_INSNS (4), /* HI */
802 COSTS_N_INSNS (4), /* SI */
803 COSTS_N_INSNS (4), /* DI */
804 COSTS_N_INSNS (4)}, /* other */
805 0, /* cost of multiply per each bit set */
806 {COSTS_N_INSNS (17), /* cost of a divide/mod for QI */
807 COSTS_N_INSNS (17), /* HI */
808 COSTS_N_INSNS (17), /* SI */
809 COSTS_N_INSNS (17), /* DI */
810 COSTS_N_INSNS (17)}, /* other */
811 COSTS_N_INSNS (1), /* cost of movsx */
812 COSTS_N_INSNS (1), /* cost of movzx */
813 8, /* "large" insn */
815 2, /* cost for loading QImode using movzbl */
816 {4, 4, 4}, /* cost of loading integer registers
817 in QImode, HImode and SImode.
818 Relative to reg-reg move (2). */
819 {2, 2, 2}, /* cost of storing integer registers */
820 2, /* cost of reg,reg fld/fst */
821 {2, 2, 6}, /* cost of loading fp registers
822 in SFmode, DFmode and XFmode */
823 {4, 4, 6}, /* cost of storing fp registers
824 in SFmode, DFmode and XFmode */
825 2, /* cost of moving MMX register */
826 {2, 2}, /* cost of loading MMX registers
827 in SImode and DImode */
828 {2, 2}, /* cost of storing MMX registers
829 in SImode and DImode */
830 2, /* cost of moving SSE register */
831 {2, 2, 8}, /* cost of loading SSE registers
832 in SImode, DImode and TImode */
833 {2, 2, 8}, /* cost of storing SSE registers
834 in SImode, DImode and TImode */
835 3, /* MMX or SSE register to integer */
836 8, /* size of l1 cache. */
837 256, /* size of l2 cache */
838 32, /* size of prefetch block */
839 6, /* number of parallel prefetches */
841 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
842 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
843 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
844 COSTS_N_INSNS (2), /* cost of FABS instruction. */
845 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
846 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
847 /* PentiumPro has optimized rep instructions for blocks aligned by 8 bytes
848 (we ensure the alignment). For small blocks inline loop is still a
849 noticeable win, for bigger blocks either rep movsl or rep movsb is
850 way to go. Rep movsb has apparently more expensive startup time in CPU,
851 but after 4K the difference is down in the noise. */
852 {{rep_prefix_4_byte
, {{128, loop
}, {1024, unrolled_loop
},
853 {8192, rep_prefix_4_byte
}, {-1, rep_prefix_1_byte
}}},
854 DUMMY_STRINGOP_ALGS
},
855 {{rep_prefix_4_byte
, {{1024, unrolled_loop
},
856 {8192, rep_prefix_4_byte
}, {-1, libcall
}}},
857 DUMMY_STRINGOP_ALGS
},
858 1, /* scalar_stmt_cost. */
859 1, /* scalar load_cost. */
860 1, /* scalar_store_cost. */
861 1, /* vec_stmt_cost. */
862 1, /* vec_to_scalar_cost. */
863 1, /* scalar_to_vec_cost. */
864 1, /* vec_align_load_cost. */
865 2, /* vec_unalign_load_cost. */
866 1, /* vec_store_cost. */
867 3, /* cond_taken_branch_cost. */
868 1, /* cond_not_taken_branch_cost. */
872 struct processor_costs geode_cost
= {
873 COSTS_N_INSNS (1), /* cost of an add instruction */
874 COSTS_N_INSNS (1), /* cost of a lea instruction */
875 COSTS_N_INSNS (2), /* variable shift costs */
876 COSTS_N_INSNS (1), /* constant shift costs */
877 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
878 COSTS_N_INSNS (4), /* HI */
879 COSTS_N_INSNS (7), /* SI */
880 COSTS_N_INSNS (7), /* DI */
881 COSTS_N_INSNS (7)}, /* other */
882 0, /* cost of multiply per each bit set */
883 {COSTS_N_INSNS (15), /* cost of a divide/mod for QI */
884 COSTS_N_INSNS (23), /* HI */
885 COSTS_N_INSNS (39), /* SI */
886 COSTS_N_INSNS (39), /* DI */
887 COSTS_N_INSNS (39)}, /* other */
888 COSTS_N_INSNS (1), /* cost of movsx */
889 COSTS_N_INSNS (1), /* cost of movzx */
890 8, /* "large" insn */
892 1, /* cost for loading QImode using movzbl */
893 {1, 1, 1}, /* cost of loading integer registers
894 in QImode, HImode and SImode.
895 Relative to reg-reg move (2). */
896 {1, 1, 1}, /* cost of storing integer registers */
897 1, /* cost of reg,reg fld/fst */
898 {1, 1, 1}, /* cost of loading fp registers
899 in SFmode, DFmode and XFmode */
900 {4, 6, 6}, /* cost of storing fp registers
901 in SFmode, DFmode and XFmode */
903 1, /* cost of moving MMX register */
904 {1, 1}, /* cost of loading MMX registers
905 in SImode and DImode */
906 {1, 1}, /* cost of storing MMX registers
907 in SImode and DImode */
908 1, /* cost of moving SSE register */
909 {1, 1, 1}, /* cost of loading SSE registers
910 in SImode, DImode and TImode */
911 {1, 1, 1}, /* cost of storing SSE registers
912 in SImode, DImode and TImode */
913 1, /* MMX or SSE register to integer */
914 64, /* size of l1 cache. */
915 128, /* size of l2 cache. */
916 32, /* size of prefetch block */
917 1, /* number of parallel prefetches */
919 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
920 COSTS_N_INSNS (11), /* cost of FMUL instruction. */
921 COSTS_N_INSNS (47), /* cost of FDIV instruction. */
922 COSTS_N_INSNS (1), /* cost of FABS instruction. */
923 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
924 COSTS_N_INSNS (54), /* cost of FSQRT instruction. */
925 {{libcall
, {{256, rep_prefix_4_byte
}, {-1, libcall
}}},
926 DUMMY_STRINGOP_ALGS
},
927 {{libcall
, {{256, rep_prefix_4_byte
}, {-1, libcall
}}},
928 DUMMY_STRINGOP_ALGS
},
929 1, /* scalar_stmt_cost. */
930 1, /* scalar load_cost. */
931 1, /* scalar_store_cost. */
932 1, /* vec_stmt_cost. */
933 1, /* vec_to_scalar_cost. */
934 1, /* scalar_to_vec_cost. */
935 1, /* vec_align_load_cost. */
936 2, /* vec_unalign_load_cost. */
937 1, /* vec_store_cost. */
938 3, /* cond_taken_branch_cost. */
939 1, /* cond_not_taken_branch_cost. */
943 struct processor_costs k6_cost
= {
944 COSTS_N_INSNS (1), /* cost of an add instruction */
945 COSTS_N_INSNS (2), /* cost of a lea instruction */
946 COSTS_N_INSNS (1), /* variable shift costs */
947 COSTS_N_INSNS (1), /* constant shift costs */
948 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
949 COSTS_N_INSNS (3), /* HI */
950 COSTS_N_INSNS (3), /* SI */
951 COSTS_N_INSNS (3), /* DI */
952 COSTS_N_INSNS (3)}, /* other */
953 0, /* cost of multiply per each bit set */
954 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
955 COSTS_N_INSNS (18), /* HI */
956 COSTS_N_INSNS (18), /* SI */
957 COSTS_N_INSNS (18), /* DI */
958 COSTS_N_INSNS (18)}, /* other */
959 COSTS_N_INSNS (2), /* cost of movsx */
960 COSTS_N_INSNS (2), /* cost of movzx */
961 8, /* "large" insn */
963 3, /* cost for loading QImode using movzbl */
964 {4, 5, 4}, /* cost of loading integer registers
965 in QImode, HImode and SImode.
966 Relative to reg-reg move (2). */
967 {2, 3, 2}, /* cost of storing integer registers */
968 4, /* cost of reg,reg fld/fst */
969 {6, 6, 6}, /* cost of loading fp registers
970 in SFmode, DFmode and XFmode */
971 {4, 4, 4}, /* cost of storing fp registers
972 in SFmode, DFmode and XFmode */
973 2, /* cost of moving MMX register */
974 {2, 2}, /* cost of loading MMX registers
975 in SImode and DImode */
976 {2, 2}, /* cost of storing MMX registers
977 in SImode and DImode */
978 2, /* cost of moving SSE register */
979 {2, 2, 8}, /* cost of loading SSE registers
980 in SImode, DImode and TImode */
981 {2, 2, 8}, /* cost of storing SSE registers
982 in SImode, DImode and TImode */
983 6, /* MMX or SSE register to integer */
984 32, /* size of l1 cache. */
985 32, /* size of l2 cache. Some models
986 have integrated l2 cache, but
987 optimizing for k6 is not important
988 enough to worry about that. */
989 32, /* size of prefetch block */
990 1, /* number of parallel prefetches */
992 COSTS_N_INSNS (2), /* cost of FADD and FSUB insns. */
993 COSTS_N_INSNS (2), /* cost of FMUL instruction. */
994 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
995 COSTS_N_INSNS (2), /* cost of FABS instruction. */
996 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
997 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
998 {{libcall
, {{256, rep_prefix_4_byte
}, {-1, libcall
}}},
999 DUMMY_STRINGOP_ALGS
},
1000 {{libcall
, {{256, rep_prefix_4_byte
}, {-1, libcall
}}},
1001 DUMMY_STRINGOP_ALGS
},
1002 1, /* scalar_stmt_cost. */
1003 1, /* scalar load_cost. */
1004 1, /* scalar_store_cost. */
1005 1, /* vec_stmt_cost. */
1006 1, /* vec_to_scalar_cost. */
1007 1, /* scalar_to_vec_cost. */
1008 1, /* vec_align_load_cost. */
1009 2, /* vec_unalign_load_cost. */
1010 1, /* vec_store_cost. */
1011 3, /* cond_taken_branch_cost. */
1012 1, /* cond_not_taken_branch_cost. */
1016 struct processor_costs athlon_cost
= {
1017 COSTS_N_INSNS (1), /* cost of an add instruction */
1018 COSTS_N_INSNS (2), /* cost of a lea instruction */
1019 COSTS_N_INSNS (1), /* variable shift costs */
1020 COSTS_N_INSNS (1), /* constant shift costs */
1021 {COSTS_N_INSNS (5), /* cost of starting multiply for QI */
1022 COSTS_N_INSNS (5), /* HI */
1023 COSTS_N_INSNS (5), /* SI */
1024 COSTS_N_INSNS (5), /* DI */
1025 COSTS_N_INSNS (5)}, /* other */
1026 0, /* cost of multiply per each bit set */
1027 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1028 COSTS_N_INSNS (26), /* HI */
1029 COSTS_N_INSNS (42), /* SI */
1030 COSTS_N_INSNS (74), /* DI */
1031 COSTS_N_INSNS (74)}, /* other */
1032 COSTS_N_INSNS (1), /* cost of movsx */
1033 COSTS_N_INSNS (1), /* cost of movzx */
1034 8, /* "large" insn */
1036 4, /* cost for loading QImode using movzbl */
1037 {3, 4, 3}, /* cost of loading integer registers
1038 in QImode, HImode and SImode.
1039 Relative to reg-reg move (2). */
1040 {3, 4, 3}, /* cost of storing integer registers */
1041 4, /* cost of reg,reg fld/fst */
1042 {4, 4, 12}, /* cost of loading fp registers
1043 in SFmode, DFmode and XFmode */
1044 {6, 6, 8}, /* cost of storing fp registers
1045 in SFmode, DFmode and XFmode */
1046 2, /* cost of moving MMX register */
1047 {4, 4}, /* cost of loading MMX registers
1048 in SImode and DImode */
1049 {4, 4}, /* cost of storing MMX registers
1050 in SImode and DImode */
1051 2, /* cost of moving SSE register */
1052 {4, 4, 6}, /* cost of loading SSE registers
1053 in SImode, DImode and TImode */
1054 {4, 4, 5}, /* cost of storing SSE registers
1055 in SImode, DImode and TImode */
1056 5, /* MMX or SSE register to integer */
1057 64, /* size of l1 cache. */
1058 256, /* size of l2 cache. */
1059 64, /* size of prefetch block */
1060 6, /* number of parallel prefetches */
1061 5, /* Branch cost */
1062 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
1063 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
1064 COSTS_N_INSNS (24), /* cost of FDIV instruction. */
1065 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1066 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1067 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
1068 /* For some reason, Athlon deals better with REP prefix (relative to loops)
1069 compared to K8. Alignment becomes important after 8 bytes for memcpy and
1070 128 bytes for memset. */
1071 {{libcall
, {{2048, rep_prefix_4_byte
}, {-1, libcall
}}},
1072 DUMMY_STRINGOP_ALGS
},
1073 {{libcall
, {{2048, rep_prefix_4_byte
}, {-1, libcall
}}},
1074 DUMMY_STRINGOP_ALGS
},
1075 1, /* scalar_stmt_cost. */
1076 1, /* scalar load_cost. */
1077 1, /* scalar_store_cost. */
1078 1, /* vec_stmt_cost. */
1079 1, /* vec_to_scalar_cost. */
1080 1, /* scalar_to_vec_cost. */
1081 1, /* vec_align_load_cost. */
1082 2, /* vec_unalign_load_cost. */
1083 1, /* vec_store_cost. */
1084 3, /* cond_taken_branch_cost. */
1085 1, /* cond_not_taken_branch_cost. */
1089 struct processor_costs k8_cost
= {
1090 COSTS_N_INSNS (1), /* cost of an add instruction */
1091 COSTS_N_INSNS (2), /* cost of a lea instruction */
1092 COSTS_N_INSNS (1), /* variable shift costs */
1093 COSTS_N_INSNS (1), /* constant shift costs */
1094 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1095 COSTS_N_INSNS (4), /* HI */
1096 COSTS_N_INSNS (3), /* SI */
1097 COSTS_N_INSNS (4), /* DI */
1098 COSTS_N_INSNS (5)}, /* other */
1099 0, /* cost of multiply per each bit set */
1100 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1101 COSTS_N_INSNS (26), /* HI */
1102 COSTS_N_INSNS (42), /* SI */
1103 COSTS_N_INSNS (74), /* DI */
1104 COSTS_N_INSNS (74)}, /* other */
1105 COSTS_N_INSNS (1), /* cost of movsx */
1106 COSTS_N_INSNS (1), /* cost of movzx */
1107 8, /* "large" insn */
1109 4, /* cost for loading QImode using movzbl */
1110 {3, 4, 3}, /* cost of loading integer registers
1111 in QImode, HImode and SImode.
1112 Relative to reg-reg move (2). */
1113 {3, 4, 3}, /* cost of storing integer registers */
1114 4, /* cost of reg,reg fld/fst */
1115 {4, 4, 12}, /* cost of loading fp registers
1116 in SFmode, DFmode and XFmode */
1117 {6, 6, 8}, /* cost of storing fp registers
1118 in SFmode, DFmode and XFmode */
1119 2, /* cost of moving MMX register */
1120 {3, 3}, /* cost of loading MMX registers
1121 in SImode and DImode */
1122 {4, 4}, /* cost of storing MMX registers
1123 in SImode and DImode */
1124 2, /* cost of moving SSE register */
1125 {4, 3, 6}, /* cost of loading SSE registers
1126 in SImode, DImode and TImode */
1127 {4, 4, 5}, /* cost of storing SSE registers
1128 in SImode, DImode and TImode */
1129 5, /* MMX or SSE register to integer */
1130 64, /* size of l1 cache. */
1131 512, /* size of l2 cache. */
1132 64, /* size of prefetch block */
1133 /* New AMD processors never drop prefetches; if they cannot be performed
1134 immediately, they are queued. We set number of simultaneous prefetches
1135 to a large constant to reflect this (it probably is not a good idea not
1136 to limit number of prefetches at all, as their execution also takes some
1138 100, /* number of parallel prefetches */
1139 3, /* Branch cost */
1140 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
1141 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
1142 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
1143 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1144 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1145 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
1146 /* K8 has optimized REP instruction for medium sized blocks, but for very
1147 small blocks it is better to use loop. For large blocks, libcall can
1148 do nontemporary accesses and beat inline considerably. */
1149 {{libcall
, {{6, loop
}, {14, unrolled_loop
}, {-1, rep_prefix_4_byte
}}},
1150 {libcall
, {{16, loop
}, {8192, rep_prefix_8_byte
}, {-1, libcall
}}}},
1151 {{libcall
, {{8, loop
}, {24, unrolled_loop
},
1152 {2048, rep_prefix_4_byte
}, {-1, libcall
}}},
1153 {libcall
, {{48, unrolled_loop
}, {8192, rep_prefix_8_byte
}, {-1, libcall
}}}},
1154 4, /* scalar_stmt_cost. */
1155 2, /* scalar load_cost. */
1156 2, /* scalar_store_cost. */
1157 5, /* vec_stmt_cost. */
1158 0, /* vec_to_scalar_cost. */
1159 2, /* scalar_to_vec_cost. */
1160 2, /* vec_align_load_cost. */
1161 3, /* vec_unalign_load_cost. */
1162 3, /* vec_store_cost. */
1163 3, /* cond_taken_branch_cost. */
1164 2, /* cond_not_taken_branch_cost. */
1167 struct processor_costs amdfam10_cost
= {
1168 COSTS_N_INSNS (1), /* cost of an add instruction */
1169 COSTS_N_INSNS (2), /* cost of a lea instruction */
1170 COSTS_N_INSNS (1), /* variable shift costs */
1171 COSTS_N_INSNS (1), /* constant shift costs */
1172 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1173 COSTS_N_INSNS (4), /* HI */
1174 COSTS_N_INSNS (3), /* SI */
1175 COSTS_N_INSNS (4), /* DI */
1176 COSTS_N_INSNS (5)}, /* other */
1177 0, /* cost of multiply per each bit set */
1178 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1179 COSTS_N_INSNS (35), /* HI */
1180 COSTS_N_INSNS (51), /* SI */
1181 COSTS_N_INSNS (83), /* DI */
1182 COSTS_N_INSNS (83)}, /* other */
1183 COSTS_N_INSNS (1), /* cost of movsx */
1184 COSTS_N_INSNS (1), /* cost of movzx */
1185 8, /* "large" insn */
1187 4, /* cost for loading QImode using movzbl */
1188 {3, 4, 3}, /* cost of loading integer registers
1189 in QImode, HImode and SImode.
1190 Relative to reg-reg move (2). */
1191 {3, 4, 3}, /* cost of storing integer registers */
1192 4, /* cost of reg,reg fld/fst */
1193 {4, 4, 12}, /* cost of loading fp registers
1194 in SFmode, DFmode and XFmode */
1195 {6, 6, 8}, /* cost of storing fp registers
1196 in SFmode, DFmode and XFmode */
1197 2, /* cost of moving MMX register */
1198 {3, 3}, /* cost of loading MMX registers
1199 in SImode and DImode */
1200 {4, 4}, /* cost of storing MMX registers
1201 in SImode and DImode */
1202 2, /* cost of moving SSE register */
1203 {4, 4, 3}, /* cost of loading SSE registers
1204 in SImode, DImode and TImode */
1205 {4, 4, 5}, /* cost of storing SSE registers
1206 in SImode, DImode and TImode */
1207 3, /* MMX or SSE register to integer */
1209 MOVD reg64, xmmreg Double FSTORE 4
1210 MOVD reg32, xmmreg Double FSTORE 4
1212 MOVD reg64, xmmreg Double FADD 3
1214 MOVD reg32, xmmreg Double FADD 3
1216 64, /* size of l1 cache. */
1217 512, /* size of l2 cache. */
1218 64, /* size of prefetch block */
1219 /* New AMD processors never drop prefetches; if they cannot be performed
1220 immediately, they are queued. We set number of simultaneous prefetches
1221 to a large constant to reflect this (it probably is not a good idea not
1222 to limit number of prefetches at all, as their execution also takes some
1224 100, /* number of parallel prefetches */
1225 2, /* Branch cost */
1226 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
1227 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
1228 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
1229 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1230 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1231 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
1233 /* AMDFAM10 has optimized REP instruction for medium sized blocks, but for
1234 very small blocks it is better to use loop. For large blocks, libcall can
1235 do nontemporary accesses and beat inline considerably. */
1236 {{libcall
, {{6, loop
}, {14, unrolled_loop
}, {-1, rep_prefix_4_byte
}}},
1237 {libcall
, {{16, loop
}, {8192, rep_prefix_8_byte
}, {-1, libcall
}}}},
1238 {{libcall
, {{8, loop
}, {24, unrolled_loop
},
1239 {2048, rep_prefix_4_byte
}, {-1, libcall
}}},
1240 {libcall
, {{48, unrolled_loop
}, {8192, rep_prefix_8_byte
}, {-1, libcall
}}}},
1241 4, /* scalar_stmt_cost. */
1242 2, /* scalar load_cost. */
1243 2, /* scalar_store_cost. */
1244 6, /* vec_stmt_cost. */
1245 0, /* vec_to_scalar_cost. */
1246 2, /* scalar_to_vec_cost. */
1247 2, /* vec_align_load_cost. */
1248 2, /* vec_unalign_load_cost. */
1249 2, /* vec_store_cost. */
1250 2, /* cond_taken_branch_cost. */
1251 1, /* cond_not_taken_branch_cost. */
1254 struct processor_costs bdver1_cost
= {
1255 COSTS_N_INSNS (1), /* cost of an add instruction */
1256 COSTS_N_INSNS (1), /* cost of a lea instruction */
1257 COSTS_N_INSNS (1), /* variable shift costs */
1258 COSTS_N_INSNS (1), /* constant shift costs */
1259 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
1260 COSTS_N_INSNS (4), /* HI */
1261 COSTS_N_INSNS (4), /* SI */
1262 COSTS_N_INSNS (6), /* DI */
1263 COSTS_N_INSNS (6)}, /* other */
1264 0, /* cost of multiply per each bit set */
1265 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1266 COSTS_N_INSNS (35), /* HI */
1267 COSTS_N_INSNS (51), /* SI */
1268 COSTS_N_INSNS (83), /* DI */
1269 COSTS_N_INSNS (83)}, /* other */
1270 COSTS_N_INSNS (1), /* cost of movsx */
1271 COSTS_N_INSNS (1), /* cost of movzx */
1272 8, /* "large" insn */
1274 4, /* cost for loading QImode using movzbl */
1275 {5, 5, 4}, /* cost of loading integer registers
1276 in QImode, HImode and SImode.
1277 Relative to reg-reg move (2). */
1278 {4, 4, 4}, /* cost of storing integer registers */
1279 2, /* cost of reg,reg fld/fst */
1280 {5, 5, 12}, /* cost of loading fp registers
1281 in SFmode, DFmode and XFmode */
1282 {4, 4, 8}, /* cost of storing fp registers
1283 in SFmode, DFmode and XFmode */
1284 2, /* cost of moving MMX register */
1285 {4, 4}, /* cost of loading MMX registers
1286 in SImode and DImode */
1287 {4, 4}, /* cost of storing MMX registers
1288 in SImode and DImode */
1289 2, /* cost of moving SSE register */
1290 {4, 4, 4}, /* cost of loading SSE registers
1291 in SImode, DImode and TImode */
1292 {4, 4, 4}, /* cost of storing SSE registers
1293 in SImode, DImode and TImode */
1294 2, /* MMX or SSE register to integer */
1296 MOVD reg64, xmmreg Double FSTORE 4
1297 MOVD reg32, xmmreg Double FSTORE 4
1299 MOVD reg64, xmmreg Double FADD 3
1301 MOVD reg32, xmmreg Double FADD 3
1303 16, /* size of l1 cache. */
1304 2048, /* size of l2 cache. */
1305 64, /* size of prefetch block */
1306 /* New AMD processors never drop prefetches; if they cannot be performed
1307 immediately, they are queued. We set number of simultaneous prefetches
1308 to a large constant to reflect this (it probably is not a good idea not
1309 to limit number of prefetches at all, as their execution also takes some
1311 100, /* number of parallel prefetches */
1312 2, /* Branch cost */
1313 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1314 COSTS_N_INSNS (6), /* cost of FMUL instruction. */
1315 COSTS_N_INSNS (42), /* cost of FDIV instruction. */
1316 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1317 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1318 COSTS_N_INSNS (52), /* cost of FSQRT instruction. */
1320 /* BDVER1 has optimized REP instruction for medium sized blocks, but for
1321 very small blocks it is better to use loop. For large blocks, libcall
1322 can do nontemporary accesses and beat inline considerably. */
1323 {{libcall
, {{6, loop
}, {14, unrolled_loop
}, {-1, rep_prefix_4_byte
}}},
1324 {libcall
, {{16, loop
}, {8192, rep_prefix_8_byte
}, {-1, libcall
}}}},
1325 {{libcall
, {{8, loop
}, {24, unrolled_loop
},
1326 {2048, rep_prefix_4_byte
}, {-1, libcall
}}},
1327 {libcall
, {{48, unrolled_loop
}, {8192, rep_prefix_8_byte
}, {-1, libcall
}}}},
1328 6, /* scalar_stmt_cost. */
1329 4, /* scalar load_cost. */
1330 4, /* scalar_store_cost. */
1331 6, /* vec_stmt_cost. */
1332 0, /* vec_to_scalar_cost. */
1333 2, /* scalar_to_vec_cost. */
1334 4, /* vec_align_load_cost. */
1335 4, /* vec_unalign_load_cost. */
1336 4, /* vec_store_cost. */
1337 2, /* cond_taken_branch_cost. */
1338 1, /* cond_not_taken_branch_cost. */
1341 struct processor_costs bdver2_cost
= {
1342 COSTS_N_INSNS (1), /* cost of an add instruction */
1343 COSTS_N_INSNS (1), /* cost of a lea instruction */
1344 COSTS_N_INSNS (1), /* variable shift costs */
1345 COSTS_N_INSNS (1), /* constant shift costs */
1346 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
1347 COSTS_N_INSNS (4), /* HI */
1348 COSTS_N_INSNS (4), /* SI */
1349 COSTS_N_INSNS (6), /* DI */
1350 COSTS_N_INSNS (6)}, /* other */
1351 0, /* cost of multiply per each bit set */
1352 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1353 COSTS_N_INSNS (35), /* HI */
1354 COSTS_N_INSNS (51), /* SI */
1355 COSTS_N_INSNS (83), /* DI */
1356 COSTS_N_INSNS (83)}, /* other */
1357 COSTS_N_INSNS (1), /* cost of movsx */
1358 COSTS_N_INSNS (1), /* cost of movzx */
1359 8, /* "large" insn */
1361 4, /* cost for loading QImode using movzbl */
1362 {5, 5, 4}, /* cost of loading integer registers
1363 in QImode, HImode and SImode.
1364 Relative to reg-reg move (2). */
1365 {4, 4, 4}, /* cost of storing integer registers */
1366 2, /* cost of reg,reg fld/fst */
1367 {5, 5, 12}, /* cost of loading fp registers
1368 in SFmode, DFmode and XFmode */
1369 {4, 4, 8}, /* cost of storing fp registers
1370 in SFmode, DFmode and XFmode */
1371 2, /* cost of moving MMX register */
1372 {4, 4}, /* cost of loading MMX registers
1373 in SImode and DImode */
1374 {4, 4}, /* cost of storing MMX registers
1375 in SImode and DImode */
1376 2, /* cost of moving SSE register */
1377 {4, 4, 4}, /* cost of loading SSE registers
1378 in SImode, DImode and TImode */
1379 {4, 4, 4}, /* cost of storing SSE registers
1380 in SImode, DImode and TImode */
1381 2, /* MMX or SSE register to integer */
1383 MOVD reg64, xmmreg Double FSTORE 4
1384 MOVD reg32, xmmreg Double FSTORE 4
1386 MOVD reg64, xmmreg Double FADD 3
1388 MOVD reg32, xmmreg Double FADD 3
1390 16, /* size of l1 cache. */
1391 2048, /* size of l2 cache. */
1392 64, /* size of prefetch block */
1393 /* New AMD processors never drop prefetches; if they cannot be performed
1394 immediately, they are queued. We set number of simultaneous prefetches
1395 to a large constant to reflect this (it probably is not a good idea not
1396 to limit number of prefetches at all, as their execution also takes some
1398 100, /* number of parallel prefetches */
1399 2, /* Branch cost */
1400 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1401 COSTS_N_INSNS (6), /* cost of FMUL instruction. */
1402 COSTS_N_INSNS (42), /* cost of FDIV instruction. */
1403 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1404 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1405 COSTS_N_INSNS (52), /* cost of FSQRT instruction. */
1407 /* BDVER2 has optimized REP instruction for medium sized blocks, but for
1408 very small blocks it is better to use loop. For large blocks, libcall
1409 can do nontemporary accesses and beat inline considerably. */
1410 {{libcall
, {{6, loop
}, {14, unrolled_loop
}, {-1, rep_prefix_4_byte
}}},
1411 {libcall
, {{16, loop
}, {8192, rep_prefix_8_byte
}, {-1, libcall
}}}},
1412 {{libcall
, {{8, loop
}, {24, unrolled_loop
},
1413 {2048, rep_prefix_4_byte
}, {-1, libcall
}}},
1414 {libcall
, {{48, unrolled_loop
}, {8192, rep_prefix_8_byte
}, {-1, libcall
}}}},
1415 6, /* scalar_stmt_cost. */
1416 4, /* scalar load_cost. */
1417 4, /* scalar_store_cost. */
1418 6, /* vec_stmt_cost. */
1419 0, /* vec_to_scalar_cost. */
1420 2, /* scalar_to_vec_cost. */
1421 4, /* vec_align_load_cost. */
1422 4, /* vec_unalign_load_cost. */
1423 4, /* vec_store_cost. */
1424 2, /* cond_taken_branch_cost. */
1425 1, /* cond_not_taken_branch_cost. */
1428 struct processor_costs btver1_cost
= {
1429 COSTS_N_INSNS (1), /* cost of an add instruction */
1430 COSTS_N_INSNS (2), /* cost of a lea instruction */
1431 COSTS_N_INSNS (1), /* variable shift costs */
1432 COSTS_N_INSNS (1), /* constant shift costs */
1433 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1434 COSTS_N_INSNS (4), /* HI */
1435 COSTS_N_INSNS (3), /* SI */
1436 COSTS_N_INSNS (4), /* DI */
1437 COSTS_N_INSNS (5)}, /* other */
1438 0, /* cost of multiply per each bit set */
1439 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1440 COSTS_N_INSNS (35), /* HI */
1441 COSTS_N_INSNS (51), /* SI */
1442 COSTS_N_INSNS (83), /* DI */
1443 COSTS_N_INSNS (83)}, /* other */
1444 COSTS_N_INSNS (1), /* cost of movsx */
1445 COSTS_N_INSNS (1), /* cost of movzx */
1446 8, /* "large" insn */
1448 4, /* cost for loading QImode using movzbl */
1449 {3, 4, 3}, /* cost of loading integer registers
1450 in QImode, HImode and SImode.
1451 Relative to reg-reg move (2). */
1452 {3, 4, 3}, /* cost of storing integer registers */
1453 4, /* cost of reg,reg fld/fst */
1454 {4, 4, 12}, /* cost of loading fp registers
1455 in SFmode, DFmode and XFmode */
1456 {6, 6, 8}, /* cost of storing fp registers
1457 in SFmode, DFmode and XFmode */
1458 2, /* cost of moving MMX register */
1459 {3, 3}, /* cost of loading MMX registers
1460 in SImode and DImode */
1461 {4, 4}, /* cost of storing MMX registers
1462 in SImode and DImode */
1463 2, /* cost of moving SSE register */
1464 {4, 4, 3}, /* cost of loading SSE registers
1465 in SImode, DImode and TImode */
1466 {4, 4, 5}, /* cost of storing SSE registers
1467 in SImode, DImode and TImode */
1468 3, /* MMX or SSE register to integer */
1470 MOVD reg64, xmmreg Double FSTORE 4
1471 MOVD reg32, xmmreg Double FSTORE 4
1473 MOVD reg64, xmmreg Double FADD 3
1475 MOVD reg32, xmmreg Double FADD 3
1477 32, /* size of l1 cache. */
1478 512, /* size of l2 cache. */
1479 64, /* size of prefetch block */
1480 100, /* number of parallel prefetches */
1481 2, /* Branch cost */
1482 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
1483 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
1484 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
1485 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1486 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1487 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
1489 /* BTVER1 has optimized REP instruction for medium sized blocks, but for
1490 very small blocks it is better to use loop. For large blocks, libcall can
1491 do nontemporary accesses and beat inline considerably. */
1492 {{libcall
, {{6, loop
}, {14, unrolled_loop
}, {-1, rep_prefix_4_byte
}}},
1493 {libcall
, {{16, loop
}, {8192, rep_prefix_8_byte
}, {-1, libcall
}}}},
1494 {{libcall
, {{8, loop
}, {24, unrolled_loop
},
1495 {2048, rep_prefix_4_byte
}, {-1, libcall
}}},
1496 {libcall
, {{48, unrolled_loop
}, {8192, rep_prefix_8_byte
}, {-1, libcall
}}}},
1497 4, /* scalar_stmt_cost. */
1498 2, /* scalar load_cost. */
1499 2, /* scalar_store_cost. */
1500 6, /* vec_stmt_cost. */
1501 0, /* vec_to_scalar_cost. */
1502 2, /* scalar_to_vec_cost. */
1503 2, /* vec_align_load_cost. */
1504 2, /* vec_unalign_load_cost. */
1505 2, /* vec_store_cost. */
1506 2, /* cond_taken_branch_cost. */
1507 1, /* cond_not_taken_branch_cost. */
1511 struct processor_costs pentium4_cost
= {
1512 COSTS_N_INSNS (1), /* cost of an add instruction */
1513 COSTS_N_INSNS (3), /* cost of a lea instruction */
1514 COSTS_N_INSNS (4), /* variable shift costs */
1515 COSTS_N_INSNS (4), /* constant shift costs */
1516 {COSTS_N_INSNS (15), /* cost of starting multiply for QI */
1517 COSTS_N_INSNS (15), /* HI */
1518 COSTS_N_INSNS (15), /* SI */
1519 COSTS_N_INSNS (15), /* DI */
1520 COSTS_N_INSNS (15)}, /* other */
1521 0, /* cost of multiply per each bit set */
1522 {COSTS_N_INSNS (56), /* cost of a divide/mod for QI */
1523 COSTS_N_INSNS (56), /* HI */
1524 COSTS_N_INSNS (56), /* SI */
1525 COSTS_N_INSNS (56), /* DI */
1526 COSTS_N_INSNS (56)}, /* other */
1527 COSTS_N_INSNS (1), /* cost of movsx */
1528 COSTS_N_INSNS (1), /* cost of movzx */
1529 16, /* "large" insn */
1531 2, /* cost for loading QImode using movzbl */
1532 {4, 5, 4}, /* cost of loading integer registers
1533 in QImode, HImode and SImode.
1534 Relative to reg-reg move (2). */
1535 {2, 3, 2}, /* cost of storing integer registers */
1536 2, /* cost of reg,reg fld/fst */
1537 {2, 2, 6}, /* cost of loading fp registers
1538 in SFmode, DFmode and XFmode */
1539 {4, 4, 6}, /* cost of storing fp registers
1540 in SFmode, DFmode and XFmode */
1541 2, /* cost of moving MMX register */
1542 {2, 2}, /* cost of loading MMX registers
1543 in SImode and DImode */
1544 {2, 2}, /* cost of storing MMX registers
1545 in SImode and DImode */
1546 12, /* cost of moving SSE register */
1547 {12, 12, 12}, /* cost of loading SSE registers
1548 in SImode, DImode and TImode */
1549 {2, 2, 8}, /* cost of storing SSE registers
1550 in SImode, DImode and TImode */
1551 10, /* MMX or SSE register to integer */
1552 8, /* size of l1 cache. */
1553 256, /* size of l2 cache. */
1554 64, /* size of prefetch block */
1555 6, /* number of parallel prefetches */
1556 2, /* Branch cost */
1557 COSTS_N_INSNS (5), /* cost of FADD and FSUB insns. */
1558 COSTS_N_INSNS (7), /* cost of FMUL instruction. */
1559 COSTS_N_INSNS (43), /* cost of FDIV instruction. */
1560 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1561 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1562 COSTS_N_INSNS (43), /* cost of FSQRT instruction. */
1563 {{libcall
, {{12, loop_1_byte
}, {-1, rep_prefix_4_byte
}}},
1564 DUMMY_STRINGOP_ALGS
},
1565 {{libcall
, {{6, loop_1_byte
}, {48, loop
}, {20480, rep_prefix_4_byte
},
1567 DUMMY_STRINGOP_ALGS
},
1568 1, /* scalar_stmt_cost. */
1569 1, /* scalar load_cost. */
1570 1, /* scalar_store_cost. */
1571 1, /* vec_stmt_cost. */
1572 1, /* vec_to_scalar_cost. */
1573 1, /* scalar_to_vec_cost. */
1574 1, /* vec_align_load_cost. */
1575 2, /* vec_unalign_load_cost. */
1576 1, /* vec_store_cost. */
1577 3, /* cond_taken_branch_cost. */
1578 1, /* cond_not_taken_branch_cost. */
1582 struct processor_costs nocona_cost
= {
1583 COSTS_N_INSNS (1), /* cost of an add instruction */
1584 COSTS_N_INSNS (1), /* cost of a lea instruction */
1585 COSTS_N_INSNS (1), /* variable shift costs */
1586 COSTS_N_INSNS (1), /* constant shift costs */
1587 {COSTS_N_INSNS (10), /* cost of starting multiply for QI */
1588 COSTS_N_INSNS (10), /* HI */
1589 COSTS_N_INSNS (10), /* SI */
1590 COSTS_N_INSNS (10), /* DI */
1591 COSTS_N_INSNS (10)}, /* other */
1592 0, /* cost of multiply per each bit set */
1593 {COSTS_N_INSNS (66), /* cost of a divide/mod for QI */
1594 COSTS_N_INSNS (66), /* HI */
1595 COSTS_N_INSNS (66), /* SI */
1596 COSTS_N_INSNS (66), /* DI */
1597 COSTS_N_INSNS (66)}, /* other */
1598 COSTS_N_INSNS (1), /* cost of movsx */
1599 COSTS_N_INSNS (1), /* cost of movzx */
1600 16, /* "large" insn */
1601 17, /* MOVE_RATIO */
1602 4, /* cost for loading QImode using movzbl */
1603 {4, 4, 4}, /* cost of loading integer registers
1604 in QImode, HImode and SImode.
1605 Relative to reg-reg move (2). */
1606 {4, 4, 4}, /* cost of storing integer registers */
1607 3, /* cost of reg,reg fld/fst */
1608 {12, 12, 12}, /* cost of loading fp registers
1609 in SFmode, DFmode and XFmode */
1610 {4, 4, 4}, /* cost of storing fp registers
1611 in SFmode, DFmode and XFmode */
1612 6, /* cost of moving MMX register */
1613 {12, 12}, /* cost of loading MMX registers
1614 in SImode and DImode */
1615 {12, 12}, /* cost of storing MMX registers
1616 in SImode and DImode */
1617 6, /* cost of moving SSE register */
1618 {12, 12, 12}, /* cost of loading SSE registers
1619 in SImode, DImode and TImode */
1620 {12, 12, 12}, /* cost of storing SSE registers
1621 in SImode, DImode and TImode */
1622 8, /* MMX or SSE register to integer */
1623 8, /* size of l1 cache. */
1624 1024, /* size of l2 cache. */
1625 128, /* size of prefetch block */
1626 8, /* number of parallel prefetches */
1627 1, /* Branch cost */
1628 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1629 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1630 COSTS_N_INSNS (40), /* cost of FDIV instruction. */
1631 COSTS_N_INSNS (3), /* cost of FABS instruction. */
1632 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
1633 COSTS_N_INSNS (44), /* cost of FSQRT instruction. */
1634 {{libcall
, {{12, loop_1_byte
}, {-1, rep_prefix_4_byte
}}},
1635 {libcall
, {{32, loop
}, {20000, rep_prefix_8_byte
},
1636 {100000, unrolled_loop
}, {-1, libcall
}}}},
1637 {{libcall
, {{6, loop_1_byte
}, {48, loop
}, {20480, rep_prefix_4_byte
},
1639 {libcall
, {{24, loop
}, {64, unrolled_loop
},
1640 {8192, rep_prefix_8_byte
}, {-1, libcall
}}}},
1641 1, /* scalar_stmt_cost. */
1642 1, /* scalar load_cost. */
1643 1, /* scalar_store_cost. */
1644 1, /* vec_stmt_cost. */
1645 1, /* vec_to_scalar_cost. */
1646 1, /* scalar_to_vec_cost. */
1647 1, /* vec_align_load_cost. */
1648 2, /* vec_unalign_load_cost. */
1649 1, /* vec_store_cost. */
1650 3, /* cond_taken_branch_cost. */
1651 1, /* cond_not_taken_branch_cost. */
1655 struct processor_costs atom_cost
= {
1656 COSTS_N_INSNS (1), /* cost of an add instruction */
1657 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1658 COSTS_N_INSNS (1), /* variable shift costs */
1659 COSTS_N_INSNS (1), /* constant shift costs */
1660 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1661 COSTS_N_INSNS (4), /* HI */
1662 COSTS_N_INSNS (3), /* SI */
1663 COSTS_N_INSNS (4), /* DI */
1664 COSTS_N_INSNS (2)}, /* other */
1665 0, /* cost of multiply per each bit set */
1666 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1667 COSTS_N_INSNS (26), /* HI */
1668 COSTS_N_INSNS (42), /* SI */
1669 COSTS_N_INSNS (74), /* DI */
1670 COSTS_N_INSNS (74)}, /* other */
1671 COSTS_N_INSNS (1), /* cost of movsx */
1672 COSTS_N_INSNS (1), /* cost of movzx */
1673 8, /* "large" insn */
1674 17, /* MOVE_RATIO */
1675 4, /* cost for loading QImode using movzbl */
1676 {4, 4, 4}, /* cost of loading integer registers
1677 in QImode, HImode and SImode.
1678 Relative to reg-reg move (2). */
1679 {4, 4, 4}, /* cost of storing integer registers */
1680 4, /* cost of reg,reg fld/fst */
1681 {12, 12, 12}, /* cost of loading fp registers
1682 in SFmode, DFmode and XFmode */
1683 {6, 6, 8}, /* cost of storing fp registers
1684 in SFmode, DFmode and XFmode */
1685 2, /* cost of moving MMX register */
1686 {8, 8}, /* cost of loading MMX registers
1687 in SImode and DImode */
1688 {8, 8}, /* cost of storing MMX registers
1689 in SImode and DImode */
1690 2, /* cost of moving SSE register */
1691 {8, 8, 8}, /* cost of loading SSE registers
1692 in SImode, DImode and TImode */
1693 {8, 8, 8}, /* cost of storing SSE registers
1694 in SImode, DImode and TImode */
1695 5, /* MMX or SSE register to integer */
1696 32, /* size of l1 cache. */
1697 256, /* size of l2 cache. */
1698 64, /* size of prefetch block */
1699 6, /* number of parallel prefetches */
1700 3, /* Branch cost */
1701 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1702 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1703 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1704 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1705 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1706 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1707 {{libcall
, {{11, loop
}, {-1, rep_prefix_4_byte
}}},
1708 {libcall
, {{32, loop
}, {64, rep_prefix_4_byte
},
1709 {8192, rep_prefix_8_byte
}, {-1, libcall
}}}},
1710 {{libcall
, {{8, loop
}, {15, unrolled_loop
},
1711 {2048, rep_prefix_4_byte
}, {-1, libcall
}}},
1712 {libcall
, {{24, loop
}, {32, unrolled_loop
},
1713 {8192, rep_prefix_8_byte
}, {-1, libcall
}}}},
1714 1, /* scalar_stmt_cost. */
1715 1, /* scalar load_cost. */
1716 1, /* scalar_store_cost. */
1717 1, /* vec_stmt_cost. */
1718 1, /* vec_to_scalar_cost. */
1719 1, /* scalar_to_vec_cost. */
1720 1, /* vec_align_load_cost. */
1721 2, /* vec_unalign_load_cost. */
1722 1, /* vec_store_cost. */
1723 3, /* cond_taken_branch_cost. */
1724 1, /* cond_not_taken_branch_cost. */
1727 /* Generic64 should produce code tuned for Nocona and K8. */
1729 struct processor_costs generic64_cost
= {
1730 COSTS_N_INSNS (1), /* cost of an add instruction */
1731 /* On all chips taken into consideration lea is 2 cycles and more. With
1732 this cost however our current implementation of synth_mult results in
1733 use of unnecessary temporary registers causing regression on several
1734 SPECfp benchmarks. */
1735 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1736 COSTS_N_INSNS (1), /* variable shift costs */
1737 COSTS_N_INSNS (1), /* constant shift costs */
1738 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1739 COSTS_N_INSNS (4), /* HI */
1740 COSTS_N_INSNS (3), /* SI */
1741 COSTS_N_INSNS (4), /* DI */
1742 COSTS_N_INSNS (2)}, /* other */
1743 0, /* cost of multiply per each bit set */
1744 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1745 COSTS_N_INSNS (26), /* HI */
1746 COSTS_N_INSNS (42), /* SI */
1747 COSTS_N_INSNS (74), /* DI */
1748 COSTS_N_INSNS (74)}, /* other */
1749 COSTS_N_INSNS (1), /* cost of movsx */
1750 COSTS_N_INSNS (1), /* cost of movzx */
1751 8, /* "large" insn */
1752 17, /* MOVE_RATIO */
1753 4, /* cost for loading QImode using movzbl */
1754 {4, 4, 4}, /* cost of loading integer registers
1755 in QImode, HImode and SImode.
1756 Relative to reg-reg move (2). */
1757 {4, 4, 4}, /* cost of storing integer registers */
1758 4, /* cost of reg,reg fld/fst */
1759 {12, 12, 12}, /* cost of loading fp registers
1760 in SFmode, DFmode and XFmode */
1761 {6, 6, 8}, /* cost of storing fp registers
1762 in SFmode, DFmode and XFmode */
1763 2, /* cost of moving MMX register */
1764 {8, 8}, /* cost of loading MMX registers
1765 in SImode and DImode */
1766 {8, 8}, /* cost of storing MMX registers
1767 in SImode and DImode */
1768 2, /* cost of moving SSE register */
1769 {8, 8, 8}, /* cost of loading SSE registers
1770 in SImode, DImode and TImode */
1771 {8, 8, 8}, /* cost of storing SSE registers
1772 in SImode, DImode and TImode */
1773 5, /* MMX or SSE register to integer */
1774 32, /* size of l1 cache. */
1775 512, /* size of l2 cache. */
1776 64, /* size of prefetch block */
1777 6, /* number of parallel prefetches */
1778 /* Benchmarks shows large regressions on K8 sixtrack benchmark when this
1779 value is increased to perhaps more appropriate value of 5. */
1780 3, /* Branch cost */
1781 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1782 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1783 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1784 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1785 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1786 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1787 {DUMMY_STRINGOP_ALGS
,
1788 {libcall
, {{32, loop
}, {8192, rep_prefix_8_byte
}, {-1, libcall
}}}},
1789 {DUMMY_STRINGOP_ALGS
,
1790 {libcall
, {{32, loop
}, {8192, rep_prefix_8_byte
}, {-1, libcall
}}}},
1791 1, /* scalar_stmt_cost. */
1792 1, /* scalar load_cost. */
1793 1, /* scalar_store_cost. */
1794 1, /* vec_stmt_cost. */
1795 1, /* vec_to_scalar_cost. */
1796 1, /* scalar_to_vec_cost. */
1797 1, /* vec_align_load_cost. */
1798 2, /* vec_unalign_load_cost. */
1799 1, /* vec_store_cost. */
1800 3, /* cond_taken_branch_cost. */
1801 1, /* cond_not_taken_branch_cost. */
1804 /* Generic32 should produce code tuned for PPro, Pentium4, Nocona,
1807 struct processor_costs generic32_cost
= {
1808 COSTS_N_INSNS (1), /* cost of an add instruction */
1809 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1810 COSTS_N_INSNS (1), /* variable shift costs */
1811 COSTS_N_INSNS (1), /* constant shift costs */
1812 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1813 COSTS_N_INSNS (4), /* HI */
1814 COSTS_N_INSNS (3), /* SI */
1815 COSTS_N_INSNS (4), /* DI */
1816 COSTS_N_INSNS (2)}, /* other */
1817 0, /* cost of multiply per each bit set */
1818 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1819 COSTS_N_INSNS (26), /* HI */
1820 COSTS_N_INSNS (42), /* SI */
1821 COSTS_N_INSNS (74), /* DI */
1822 COSTS_N_INSNS (74)}, /* other */
1823 COSTS_N_INSNS (1), /* cost of movsx */
1824 COSTS_N_INSNS (1), /* cost of movzx */
1825 8, /* "large" insn */
1826 17, /* MOVE_RATIO */
1827 4, /* cost for loading QImode using movzbl */
1828 {4, 4, 4}, /* cost of loading integer registers
1829 in QImode, HImode and SImode.
1830 Relative to reg-reg move (2). */
1831 {4, 4, 4}, /* cost of storing integer registers */
1832 4, /* cost of reg,reg fld/fst */
1833 {12, 12, 12}, /* cost of loading fp registers
1834 in SFmode, DFmode and XFmode */
1835 {6, 6, 8}, /* cost of storing fp registers
1836 in SFmode, DFmode and XFmode */
1837 2, /* cost of moving MMX register */
1838 {8, 8}, /* cost of loading MMX registers
1839 in SImode and DImode */
1840 {8, 8}, /* cost of storing MMX registers
1841 in SImode and DImode */
1842 2, /* cost of moving SSE register */
1843 {8, 8, 8}, /* cost of loading SSE registers
1844 in SImode, DImode and TImode */
1845 {8, 8, 8}, /* cost of storing SSE registers
1846 in SImode, DImode and TImode */
1847 5, /* MMX or SSE register to integer */
1848 32, /* size of l1 cache. */
1849 256, /* size of l2 cache. */
1850 64, /* size of prefetch block */
1851 6, /* number of parallel prefetches */
1852 3, /* Branch cost */
1853 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1854 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1855 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1856 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1857 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1858 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1859 {{libcall
, {{32, loop
}, {8192, rep_prefix_4_byte
}, {-1, libcall
}}},
1860 DUMMY_STRINGOP_ALGS
},
1861 {{libcall
, {{32, loop
}, {8192, rep_prefix_4_byte
}, {-1, libcall
}}},
1862 DUMMY_STRINGOP_ALGS
},
1863 1, /* scalar_stmt_cost. */
1864 1, /* scalar load_cost. */
1865 1, /* scalar_store_cost. */
1866 1, /* vec_stmt_cost. */
1867 1, /* vec_to_scalar_cost. */
1868 1, /* scalar_to_vec_cost. */
1869 1, /* vec_align_load_cost. */
1870 2, /* vec_unalign_load_cost. */
1871 1, /* vec_store_cost. */
1872 3, /* cond_taken_branch_cost. */
1873 1, /* cond_not_taken_branch_cost. */
1876 const struct processor_costs
*ix86_cost
= &pentium_cost
;
1878 /* Processor feature/optimization bitmasks. */
1879 #define m_386 (1<<PROCESSOR_I386)
1880 #define m_486 (1<<PROCESSOR_I486)
1881 #define m_PENT (1<<PROCESSOR_PENTIUM)
1882 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
1883 #define m_PENT4 (1<<PROCESSOR_PENTIUM4)
1884 #define m_NOCONA (1<<PROCESSOR_NOCONA)
1885 #define m_P4_NOCONA (m_PENT4 | m_NOCONA)
1886 #define m_CORE2_32 (1<<PROCESSOR_CORE2_32)
1887 #define m_CORE2_64 (1<<PROCESSOR_CORE2_64)
1888 #define m_COREI7_32 (1<<PROCESSOR_COREI7_32)
1889 #define m_COREI7_64 (1<<PROCESSOR_COREI7_64)
1890 #define m_COREI7 (m_COREI7_32 | m_COREI7_64)
1891 #define m_CORE2I7_32 (m_CORE2_32 | m_COREI7_32)
1892 #define m_CORE2I7_64 (m_CORE2_64 | m_COREI7_64)
1893 #define m_CORE2I7 (m_CORE2I7_32 | m_CORE2I7_64)
1894 #define m_ATOM (1<<PROCESSOR_ATOM)
1896 #define m_GEODE (1<<PROCESSOR_GEODE)
1897 #define m_K6 (1<<PROCESSOR_K6)
1898 #define m_K6_GEODE (m_K6 | m_GEODE)
1899 #define m_K8 (1<<PROCESSOR_K8)
1900 #define m_ATHLON (1<<PROCESSOR_ATHLON)
1901 #define m_ATHLON_K8 (m_K8 | m_ATHLON)
1902 #define m_AMDFAM10 (1<<PROCESSOR_AMDFAM10)
1903 #define m_BDVER1 (1<<PROCESSOR_BDVER1)
1904 #define m_BDVER2 (1<<PROCESSOR_BDVER2)
1905 #define m_BDVER (m_BDVER1 | m_BDVER2)
1906 #define m_BTVER1 (1<<PROCESSOR_BTVER1)
1907 #define m_AMD_MULTIPLE (m_ATHLON_K8 | m_AMDFAM10 | m_BDVER | m_BTVER1)
1909 #define m_GENERIC32 (1<<PROCESSOR_GENERIC32)
1910 #define m_GENERIC64 (1<<PROCESSOR_GENERIC64)
1912 /* Generic instruction choice should be common subset of supported CPUs
1913 (PPro/PENT4/NOCONA/CORE2/Athlon/K8). */
1914 #define m_GENERIC (m_GENERIC32 | m_GENERIC64)
1916 /* Feature tests against the various tunings. */
1917 unsigned char ix86_tune_features
[X86_TUNE_LAST
];
1919 /* Feature tests against the various tunings used to create ix86_tune_features
1920 based on the processor mask. */
1921 static unsigned int initial_ix86_tune_features
[X86_TUNE_LAST
] = {
1922 /* X86_TUNE_USE_LEAVE: Leave does not affect Nocona SPEC2000 results
1923 negatively, so enabling for Generic64 seems like good code size
1924 tradeoff. We can't enable it for 32bit generic because it does not
1925 work well with PPro base chips. */
1926 m_386
| m_CORE2I7_64
| m_K6_GEODE
| m_AMD_MULTIPLE
| m_GENERIC64
,
1928 /* X86_TUNE_PUSH_MEMORY */
1929 m_386
| m_P4_NOCONA
| m_CORE2I7
| m_K6_GEODE
| m_AMD_MULTIPLE
| m_GENERIC
,
1931 /* X86_TUNE_ZERO_EXTEND_WITH_AND */
1934 /* X86_TUNE_UNROLL_STRLEN */
1935 m_486
| m_PENT
| m_PPRO
| m_ATOM
| m_CORE2I7
| m_K6
| m_AMD_MULTIPLE
| m_GENERIC
,
1937 /* X86_TUNE_BRANCH_PREDICTION_HINTS: Branch hints were put in P4 based
1938 on simulation result. But after P4 was made, no performance benefit
1939 was observed with branch hints. It also increases the code size.
1940 As a result, icc never generates branch hints. */
1943 /* X86_TUNE_DOUBLE_WITH_ADD */
1946 /* X86_TUNE_USE_SAHF */
1947 m_PPRO
| m_P4_NOCONA
| m_CORE2I7
| m_ATOM
| m_K6_GEODE
| m_K8
| m_AMDFAM10
| m_BDVER
| m_BTVER1
| m_GENERIC
,
1949 /* X86_TUNE_MOVX: Enable to zero extend integer registers to avoid
1950 partial dependencies. */
1951 m_PPRO
| m_P4_NOCONA
| m_CORE2I7
| m_ATOM
| m_GEODE
| m_AMD_MULTIPLE
| m_GENERIC
,
1953 /* X86_TUNE_PARTIAL_REG_STALL: We probably ought to watch for partial
1954 register stalls on Generic32 compilation setting as well. However
1955 in current implementation the partial register stalls are not eliminated
1956 very well - they can be introduced via subregs synthesized by combine
1957 and can happen in caller/callee saving sequences. Because this option
1958 pays back little on PPro based chips and is in conflict with partial reg
1959 dependencies used by Athlon/P4 based chips, it is better to leave it off
1960 for generic32 for now. */
1963 /* X86_TUNE_PARTIAL_FLAG_REG_STALL */
1964 m_CORE2I7
| m_GENERIC
,
1966 /* X86_TUNE_USE_HIMODE_FIOP */
1967 m_386
| m_486
| m_K6_GEODE
,
1969 /* X86_TUNE_USE_SIMODE_FIOP */
1970 ~(m_PENT
| m_PPRO
| m_CORE2I7
| m_ATOM
| m_AMD_MULTIPLE
| m_GENERIC
),
1972 /* X86_TUNE_USE_MOV0 */
1975 /* X86_TUNE_USE_CLTD */
1976 ~(m_PENT
| m_CORE2I7
| m_ATOM
| m_K6
| m_GENERIC
),
1978 /* X86_TUNE_USE_XCHGB: Use xchgb %rh,%rl instead of rolw/rorw $8,rx. */
1981 /* X86_TUNE_SPLIT_LONG_MOVES */
1984 /* X86_TUNE_READ_MODIFY_WRITE */
1987 /* X86_TUNE_READ_MODIFY */
1990 /* X86_TUNE_PROMOTE_QIMODE */
1991 m_386
| m_486
| m_PENT
| m_CORE2I7
| m_ATOM
| m_K6_GEODE
| m_AMD_MULTIPLE
| m_GENERIC
,
1993 /* X86_TUNE_FAST_PREFIX */
1994 ~(m_386
| m_486
| m_PENT
),
1996 /* X86_TUNE_SINGLE_STRINGOP */
1997 m_386
| m_P4_NOCONA
,
1999 /* X86_TUNE_QIMODE_MATH */
2002 /* X86_TUNE_HIMODE_MATH: On PPro this flag is meant to avoid partial
2003 register stalls. Just like X86_TUNE_PARTIAL_REG_STALL this option
2004 might be considered for Generic32 if our scheme for avoiding partial
2005 stalls was more effective. */
2008 /* X86_TUNE_PROMOTE_QI_REGS */
2011 /* X86_TUNE_PROMOTE_HI_REGS */
2014 /* X86_TUNE_SINGLE_POP: Enable if single pop insn is preferred
2015 over esp addition. */
2016 m_386
| m_486
| m_PENT
| m_PPRO
,
2018 /* X86_TUNE_DOUBLE_POP: Enable if double pop insn is preferred
2019 over esp addition. */
2022 /* X86_TUNE_SINGLE_PUSH: Enable if single push insn is preferred
2023 over esp subtraction. */
2024 m_386
| m_486
| m_PENT
| m_K6_GEODE
,
2026 /* X86_TUNE_DOUBLE_PUSH. Enable if double push insn is preferred
2027 over esp subtraction. */
2028 m_PENT
| m_K6_GEODE
,
2030 /* X86_TUNE_INTEGER_DFMODE_MOVES: Enable if integer moves are preferred
2031 for DFmode copies */
2032 ~(m_PPRO
| m_P4_NOCONA
| m_CORE2I7
| m_ATOM
| m_GEODE
| m_AMD_MULTIPLE
| m_ATOM
| m_GENERIC
),
2034 /* X86_TUNE_PARTIAL_REG_DEPENDENCY */
2035 m_P4_NOCONA
| m_CORE2I7
| m_ATOM
| m_AMD_MULTIPLE
| m_GENERIC
,
2037 /* X86_TUNE_SSE_PARTIAL_REG_DEPENDENCY: In the Generic model we have a
2038 conflict here in between PPro/Pentium4 based chips that thread 128bit
2039 SSE registers as single units versus K8 based chips that divide SSE
2040 registers to two 64bit halves. This knob promotes all store destinations
2041 to be 128bit to allow register renaming on 128bit SSE units, but usually
2042 results in one extra microop on 64bit SSE units. Experimental results
2043 shows that disabling this option on P4 brings over 20% SPECfp regression,
2044 while enabling it on K8 brings roughly 2.4% regression that can be partly
2045 masked by careful scheduling of moves. */
2046 m_PPRO
| m_P4_NOCONA
| m_CORE2I7
| m_ATOM
| m_AMDFAM10
| m_BDVER
| m_GENERIC
,
2048 /* X86_TUNE_SSE_UNALIGNED_LOAD_OPTIMAL */
2049 m_COREI7
| m_AMDFAM10
| m_BDVER
| m_BTVER1
,
2051 /* X86_TUNE_SSE_UNALIGNED_STORE_OPTIMAL */
2054 /* X86_TUNE_SSE_PACKED_SINGLE_INSN_OPTIMAL */
2057 /* X86_TUNE_SSE_SPLIT_REGS: Set for machines where the type and dependencies
2058 are resolved on SSE register parts instead of whole registers, so we may
2059 maintain just lower part of scalar values in proper format leaving the
2060 upper part undefined. */
2063 /* X86_TUNE_SSE_TYPELESS_STORES */
2066 /* X86_TUNE_SSE_LOAD0_BY_PXOR */
2067 m_PPRO
| m_P4_NOCONA
,
2069 /* X86_TUNE_MEMORY_MISMATCH_STALL */
2070 m_P4_NOCONA
| m_CORE2I7
| m_ATOM
| m_AMD_MULTIPLE
| m_GENERIC
,
2072 /* X86_TUNE_PROLOGUE_USING_MOVE */
2073 m_PPRO
| m_CORE2I7
| m_ATOM
| m_ATHLON_K8
| m_GENERIC
,
2075 /* X86_TUNE_EPILOGUE_USING_MOVE */
2076 m_PPRO
| m_CORE2I7
| m_ATOM
| m_ATHLON_K8
| m_GENERIC
,
2078 /* X86_TUNE_SHIFT1 */
2081 /* X86_TUNE_USE_FFREEP */
2084 /* X86_TUNE_INTER_UNIT_MOVES */
2085 ~(m_AMD_MULTIPLE
| m_GENERIC
),
2087 /* X86_TUNE_INTER_UNIT_CONVERSIONS */
2088 ~(m_AMDFAM10
| m_BDVER
),
2090 /* X86_TUNE_FOUR_JUMP_LIMIT: Some CPU cores are not able to predict more
2091 than 4 branch instructions in the 16 byte window. */
2092 m_PPRO
| m_P4_NOCONA
| m_CORE2I7
| m_ATOM
| m_AMD_MULTIPLE
| m_GENERIC
,
2094 /* X86_TUNE_SCHEDULE */
2095 m_PENT
| m_PPRO
| m_CORE2I7
| m_ATOM
| m_K6_GEODE
| m_AMD_MULTIPLE
| m_GENERIC
,
2097 /* X86_TUNE_USE_BT */
2098 m_CORE2I7
| m_ATOM
| m_AMD_MULTIPLE
| m_GENERIC
,
2100 /* X86_TUNE_USE_INCDEC */
2101 ~(m_P4_NOCONA
| m_CORE2I7
| m_ATOM
| m_GENERIC
),
2103 /* X86_TUNE_PAD_RETURNS */
2104 m_CORE2I7
| m_AMD_MULTIPLE
| m_GENERIC
,
2106 /* X86_TUNE_PAD_SHORT_FUNCTION: Pad short funtion. */
2109 /* X86_TUNE_EXT_80387_CONSTANTS */
2110 m_PPRO
| m_P4_NOCONA
| m_CORE2I7
| m_ATOM
| m_K6_GEODE
| m_ATHLON_K8
| m_GENERIC
,
2112 /* X86_TUNE_SHORTEN_X87_SSE */
2115 /* X86_TUNE_AVOID_VECTOR_DECODE */
2116 m_CORE2I7_64
| m_K8
| m_GENERIC64
,
2118 /* X86_TUNE_PROMOTE_HIMODE_IMUL: Modern CPUs have same latency for HImode
2119 and SImode multiply, but 386 and 486 do HImode multiply faster. */
2122 /* X86_TUNE_SLOW_IMUL_IMM32_MEM: Imul of 32-bit constant and memory is
2123 vector path on AMD machines. */
2124 m_CORE2I7_64
| m_K8
| m_AMDFAM10
| m_BDVER
| m_BTVER1
| m_GENERIC64
,
2126 /* X86_TUNE_SLOW_IMUL_IMM8: Imul of 8-bit constant is vector path on AMD
2128 m_CORE2I7_64
| m_K8
| m_AMDFAM10
| m_BDVER
| m_BTVER1
| m_GENERIC64
,
2130 /* X86_TUNE_MOVE_M1_VIA_OR: On pentiums, it is faster to load -1 via OR
2134 /* X86_TUNE_NOT_UNPAIRABLE: NOT is not pairable on Pentium, while XOR is,
2135 but one byte longer. */
2138 /* X86_TUNE_NOT_VECTORMODE: On AMD K6, NOT is vector decoded with memory
2139 operand that cannot be represented using a modRM byte. The XOR
2140 replacement is long decoded, so this split helps here as well. */
2143 /* X86_TUNE_USE_VECTOR_FP_CONVERTS: Prefer vector packed SSE conversion
2145 m_CORE2I7
| m_AMDFAM10
| m_GENERIC
,
2147 /* X86_TUNE_USE_VECTOR_CONVERTS: Prefer vector packed SSE conversion
2148 from integer to FP. */
2151 /* X86_TUNE_FUSE_CMP_AND_BRANCH: Fuse a compare or test instruction
2152 with a subsequent conditional jump instruction into a single
2153 compare-and-branch uop. */
2156 /* X86_TUNE_OPT_AGU: Optimize for Address Generation Unit. This flag
2157 will impact LEA instruction selection. */
2160 /* X86_TUNE_VECTORIZE_DOUBLE: Enable double precision vector
2164 /* X86_SOFTARE_PREFETCHING_BENEFICIAL: Enable software prefetching
2165 at -O3. For the moment, the prefetching seems badly tuned for Intel
2167 m_K6_GEODE
| m_AMD_MULTIPLE
,
2169 /* X86_TUNE_AVX128_OPTIMAL: Enable 128-bit AVX instruction generation for
2170 the auto-vectorizer. */
2173 /* X86_TUNE_REASSOC_INT_TO_PARALLEL: Try to produce parallel computations
2174 during reassociation of integer computation. */
2177 /* X86_TUNE_REASSOC_FP_TO_PARALLEL: Try to produce parallel computations
2178 during reassociation of fp computation. */
2182 /* Feature tests against the various architecture variations. */
2183 unsigned char ix86_arch_features
[X86_ARCH_LAST
];
2185 /* Feature tests against the various architecture variations, used to create
2186 ix86_arch_features based on the processor mask. */
2187 static unsigned int initial_ix86_arch_features
[X86_ARCH_LAST
] = {
2188 /* X86_ARCH_CMOVE: Conditional move was added for pentiumpro. */
2189 ~(m_386
| m_486
| m_PENT
| m_K6
),
2191 /* X86_ARCH_CMPXCHG: Compare and exchange was added for 80486. */
2194 /* X86_ARCH_CMPXCHG8B: Compare and exchange 8 bytes was added for pentium. */
2197 /* X86_ARCH_XADD: Exchange and add was added for 80486. */
2200 /* X86_ARCH_BSWAP: Byteswap was added for 80486. */
2204 static const unsigned int x86_accumulate_outgoing_args
2205 = m_PPRO
| m_P4_NOCONA
| m_ATOM
| m_CORE2I7
| m_AMD_MULTIPLE
| m_GENERIC
;
2207 static const unsigned int x86_arch_always_fancy_math_387
2208 = m_PENT
| m_PPRO
| m_P4_NOCONA
| m_CORE2I7
| m_ATOM
| m_AMD_MULTIPLE
| m_GENERIC
;
2210 static const unsigned int x86_avx256_split_unaligned_load
2211 = m_COREI7
| m_GENERIC
;
2213 static const unsigned int x86_avx256_split_unaligned_store
2214 = m_COREI7
| m_BDVER
| m_GENERIC
;
2216 /* In case the average insn count for single function invocation is
2217 lower than this constant, emit fast (but longer) prologue and
2219 #define FAST_PROLOGUE_INSN_COUNT 20
2221 /* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
2222 static const char *const qi_reg_name
[] = QI_REGISTER_NAMES
;
2223 static const char *const qi_high_reg_name
[] = QI_HIGH_REGISTER_NAMES
;
2224 static const char *const hi_reg_name
[] = HI_REGISTER_NAMES
;
2226 /* Array of the smallest class containing reg number REGNO, indexed by
2227 REGNO. Used by REGNO_REG_CLASS in i386.h. */
2229 enum reg_class
const regclass_map
[FIRST_PSEUDO_REGISTER
] =
2231 /* ax, dx, cx, bx */
2232 AREG
, DREG
, CREG
, BREG
,
2233 /* si, di, bp, sp */
2234 SIREG
, DIREG
, NON_Q_REGS
, NON_Q_REGS
,
2236 FP_TOP_REG
, FP_SECOND_REG
, FLOAT_REGS
, FLOAT_REGS
,
2237 FLOAT_REGS
, FLOAT_REGS
, FLOAT_REGS
, FLOAT_REGS
,
2240 /* flags, fpsr, fpcr, frame */
2241 NO_REGS
, NO_REGS
, NO_REGS
, NON_Q_REGS
,
2243 SSE_FIRST_REG
, SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
,
2246 MMX_REGS
, MMX_REGS
, MMX_REGS
, MMX_REGS
, MMX_REGS
, MMX_REGS
,
2249 NON_Q_REGS
, NON_Q_REGS
, NON_Q_REGS
, NON_Q_REGS
,
2250 NON_Q_REGS
, NON_Q_REGS
, NON_Q_REGS
, NON_Q_REGS
,
2251 /* SSE REX registers */
2252 SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
,
2256 /* The "default" register map used in 32bit mode. */
2258 int const dbx_register_map
[FIRST_PSEUDO_REGISTER
] =
2260 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
2261 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
2262 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
2263 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
2264 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
2265 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
2266 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
2269 /* The "default" register map used in 64bit mode. */
2271 int const dbx64_register_map
[FIRST_PSEUDO_REGISTER
] =
2273 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
2274 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
2275 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
2276 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
2277 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
2278 8,9,10,11,12,13,14,15, /* extended integer registers */
2279 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
2282 /* Define the register numbers to be used in Dwarf debugging information.
2283 The SVR4 reference port C compiler uses the following register numbers
2284 in its Dwarf output code:
2285 0 for %eax (gcc regno = 0)
2286 1 for %ecx (gcc regno = 2)
2287 2 for %edx (gcc regno = 1)
2288 3 for %ebx (gcc regno = 3)
2289 4 for %esp (gcc regno = 7)
2290 5 for %ebp (gcc regno = 6)
2291 6 for %esi (gcc regno = 4)
2292 7 for %edi (gcc regno = 5)
2293 The following three DWARF register numbers are never generated by
2294 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
2295 believes these numbers have these meanings.
2296 8 for %eip (no gcc equivalent)
2297 9 for %eflags (gcc regno = 17)
2298 10 for %trapno (no gcc equivalent)
2299 It is not at all clear how we should number the FP stack registers
2300 for the x86 architecture. If the version of SDB on x86/svr4 were
2301 a bit less brain dead with respect to floating-point then we would
2302 have a precedent to follow with respect to DWARF register numbers
2303 for x86 FP registers, but the SDB on x86/svr4 is so completely
2304 broken with respect to FP registers that it is hardly worth thinking
2305 of it as something to strive for compatibility with.
2306 The version of x86/svr4 SDB I have at the moment does (partially)
2307 seem to believe that DWARF register number 11 is associated with
2308 the x86 register %st(0), but that's about all. Higher DWARF
2309 register numbers don't seem to be associated with anything in
2310 particular, and even for DWARF regno 11, SDB only seems to under-
2311 stand that it should say that a variable lives in %st(0) (when
2312 asked via an `=' command) if we said it was in DWARF regno 11,
2313 but SDB still prints garbage when asked for the value of the
2314 variable in question (via a `/' command).
2315 (Also note that the labels SDB prints for various FP stack regs
2316 when doing an `x' command are all wrong.)
2317 Note that these problems generally don't affect the native SVR4
2318 C compiler because it doesn't allow the use of -O with -g and
2319 because when it is *not* optimizing, it allocates a memory
2320 location for each floating-point variable, and the memory
2321 location is what gets described in the DWARF AT_location
2322 attribute for the variable in question.
2323 Regardless of the severe mental illness of the x86/svr4 SDB, we
2324 do something sensible here and we use the following DWARF
2325 register numbers. Note that these are all stack-top-relative
2327 11 for %st(0) (gcc regno = 8)
2328 12 for %st(1) (gcc regno = 9)
2329 13 for %st(2) (gcc regno = 10)
2330 14 for %st(3) (gcc regno = 11)
2331 15 for %st(4) (gcc regno = 12)
2332 16 for %st(5) (gcc regno = 13)
2333 17 for %st(6) (gcc regno = 14)
2334 18 for %st(7) (gcc regno = 15)
2336 int const svr4_dbx_register_map
[FIRST_PSEUDO_REGISTER
] =
2338 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
2339 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
2340 -1, 9, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
2341 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
2342 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
2343 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
2344 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
2347 /* Define parameter passing and return registers. */
2349 static int const x86_64_int_parameter_registers
[6] =
2351 DI_REG
, SI_REG
, DX_REG
, CX_REG
, R8_REG
, R9_REG
2354 static int const x86_64_ms_abi_int_parameter_registers
[4] =
2356 CX_REG
, DX_REG
, R8_REG
, R9_REG
2359 static int const x86_64_int_return_registers
[4] =
2361 AX_REG
, DX_REG
, DI_REG
, SI_REG
2364 /* Define the structure for the machine field in struct function. */
2366 struct GTY(()) stack_local_entry
{
2367 unsigned short mode
;
2370 struct stack_local_entry
*next
;
2373 /* Structure describing stack frame layout.
2374 Stack grows downward:
2380 saved static chain if ix86_static_chain_on_stack
2382 saved frame pointer if frame_pointer_needed
2383 <- HARD_FRAME_POINTER
2389 <- sse_regs_save_offset
2392 [va_arg registers] |
2396 [padding2] | = to_allocate
2405 int outgoing_arguments_size
;
2406 HOST_WIDE_INT frame
;
2408 /* The offsets relative to ARG_POINTER. */
2409 HOST_WIDE_INT frame_pointer_offset
;
2410 HOST_WIDE_INT hard_frame_pointer_offset
;
2411 HOST_WIDE_INT stack_pointer_offset
;
2412 HOST_WIDE_INT hfp_save_offset
;
2413 HOST_WIDE_INT reg_save_offset
;
2414 HOST_WIDE_INT sse_reg_save_offset
;
2416 /* When save_regs_using_mov is set, emit prologue using
2417 move instead of push instructions. */
2418 bool save_regs_using_mov
;
2421 /* Which cpu are we scheduling for. */
2422 enum attr_cpu ix86_schedule
;
2424 /* Which cpu are we optimizing for. */
2425 enum processor_type ix86_tune
;
2427 /* Which instruction set architecture to use. */
2428 enum processor_type ix86_arch
;
2430 /* true if sse prefetch instruction is not NOOP. */
2431 int x86_prefetch_sse
;
2433 /* -mstackrealign option */
2434 static const char ix86_force_align_arg_pointer_string
[]
2435 = "force_align_arg_pointer";
2437 static rtx (*ix86_gen_leave
) (void);
2438 static rtx (*ix86_gen_add3
) (rtx
, rtx
, rtx
);
2439 static rtx (*ix86_gen_sub3
) (rtx
, rtx
, rtx
);
2440 static rtx (*ix86_gen_sub3_carry
) (rtx
, rtx
, rtx
, rtx
, rtx
);
2441 static rtx (*ix86_gen_one_cmpl2
) (rtx
, rtx
);
2442 static rtx (*ix86_gen_monitor
) (rtx
, rtx
, rtx
);
2443 static rtx (*ix86_gen_andsp
) (rtx
, rtx
, rtx
);
2444 static rtx (*ix86_gen_allocate_stack_worker
) (rtx
, rtx
);
2445 static rtx (*ix86_gen_adjust_stack_and_probe
) (rtx
, rtx
, rtx
);
2446 static rtx (*ix86_gen_probe_stack_range
) (rtx
, rtx
, rtx
);
2447 static rtx (*ix86_gen_tls_global_dynamic_64
) (rtx
, rtx
, rtx
);
2448 static rtx (*ix86_gen_tls_local_dynamic_base_64
) (rtx
, rtx
);
2450 /* Preferred alignment for stack boundary in bits. */
2451 unsigned int ix86_preferred_stack_boundary
;
2453 /* Alignment for incoming stack boundary in bits specified at
2455 static unsigned int ix86_user_incoming_stack_boundary
;
2457 /* Default alignment for incoming stack boundary in bits. */
2458 static unsigned int ix86_default_incoming_stack_boundary
;
2460 /* Alignment for incoming stack boundary in bits. */
2461 unsigned int ix86_incoming_stack_boundary
;
2463 /* Calling abi specific va_list type nodes. */
2464 static GTY(()) tree sysv_va_list_type_node
;
2465 static GTY(()) tree ms_va_list_type_node
;
2467 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
2468 char internal_label_prefix
[16];
2469 int internal_label_prefix_len
;
2471 /* Fence to use after loop using movnt. */
2474 /* Register class used for passing given 64bit part of the argument.
2475 These represent classes as documented by the PS ABI, with the exception
2476 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
2477 use SF or DFmode move instead of DImode to avoid reformatting penalties.
2479 Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
2480 whenever possible (upper half does contain padding). */
2481 enum x86_64_reg_class
2484 X86_64_INTEGER_CLASS
,
2485 X86_64_INTEGERSI_CLASS
,
2492 X86_64_COMPLEX_X87_CLASS
,
2496 #define MAX_CLASSES 4
2498 /* Table of constants used by fldpi, fldln2, etc.... */
2499 static REAL_VALUE_TYPE ext_80387_constants_table
[5];
2500 static bool ext_80387_constants_init
= 0;
2503 static struct machine_function
* ix86_init_machine_status (void);
2504 static rtx
ix86_function_value (const_tree
, const_tree
, bool);
2505 static bool ix86_function_value_regno_p (const unsigned int);
2506 static unsigned int ix86_function_arg_boundary (enum machine_mode
,
2508 static rtx
ix86_static_chain (const_tree
, bool);
2509 static int ix86_function_regparm (const_tree
, const_tree
);
2510 static void ix86_compute_frame_layout (struct ix86_frame
*);
2511 static bool ix86_expand_vector_init_one_nonzero (bool, enum machine_mode
,
2513 static void ix86_add_new_builtins (HOST_WIDE_INT
);
2514 static tree
ix86_canonical_va_list_type (tree
);
2515 static void predict_jump (int);
2516 static unsigned int split_stack_prologue_scratch_regno (void);
2517 static bool i386_asm_output_addr_const_extra (FILE *, rtx
);
2519 enum ix86_function_specific_strings
2521 IX86_FUNCTION_SPECIFIC_ARCH
,
2522 IX86_FUNCTION_SPECIFIC_TUNE
,
2523 IX86_FUNCTION_SPECIFIC_MAX
2526 static char *ix86_target_string (HOST_WIDE_INT
, int, const char *,
2527 const char *, enum fpmath_unit
, bool);
2528 static void ix86_debug_options (void) ATTRIBUTE_UNUSED
;
2529 static void ix86_function_specific_save (struct cl_target_option
*);
2530 static void ix86_function_specific_restore (struct cl_target_option
*);
2531 static void ix86_function_specific_print (FILE *, int,
2532 struct cl_target_option
*);
2533 static bool ix86_valid_target_attribute_p (tree
, tree
, tree
, int);
2534 static bool ix86_valid_target_attribute_inner_p (tree
, char *[],
2535 struct gcc_options
*);
2536 static bool ix86_can_inline_p (tree
, tree
);
2537 static void ix86_set_current_function (tree
);
2538 static unsigned int ix86_minimum_incoming_stack_boundary (bool);
2540 static enum calling_abi
ix86_function_abi (const_tree
);
2543 #ifndef SUBTARGET32_DEFAULT_CPU
2544 #define SUBTARGET32_DEFAULT_CPU "i386"
2547 /* The svr4 ABI for the i386 says that records and unions are returned
2549 #ifndef DEFAULT_PCC_STRUCT_RETURN
2550 #define DEFAULT_PCC_STRUCT_RETURN 1
2553 /* Whether -mtune= or -march= were specified */
2554 static int ix86_tune_defaulted
;
2555 static int ix86_arch_specified
;
2557 /* Vectorization library interface and handlers. */
2558 static tree (*ix86_veclib_handler
) (enum built_in_function
, tree
, tree
);
2560 static tree
ix86_veclibabi_svml (enum built_in_function
, tree
, tree
);
2561 static tree
ix86_veclibabi_acml (enum built_in_function
, tree
, tree
);
2563 /* Processor target table, indexed by processor number */
2566 const struct processor_costs
*cost
; /* Processor costs */
2567 const int align_loop
; /* Default alignments. */
2568 const int align_loop_max_skip
;
2569 const int align_jump
;
2570 const int align_jump_max_skip
;
2571 const int align_func
;
2574 static const struct ptt processor_target_table
[PROCESSOR_max
] =
2576 {&i386_cost
, 4, 3, 4, 3, 4},
2577 {&i486_cost
, 16, 15, 16, 15, 16},
2578 {&pentium_cost
, 16, 7, 16, 7, 16},
2579 {&pentiumpro_cost
, 16, 15, 16, 10, 16},
2580 {&geode_cost
, 0, 0, 0, 0, 0},
2581 {&k6_cost
, 32, 7, 32, 7, 32},
2582 {&athlon_cost
, 16, 7, 16, 7, 16},
2583 {&pentium4_cost
, 0, 0, 0, 0, 0},
2584 {&k8_cost
, 16, 7, 16, 7, 16},
2585 {&nocona_cost
, 0, 0, 0, 0, 0},
2586 /* Core 2 32-bit. */
2587 {&generic32_cost
, 16, 10, 16, 10, 16},
2588 /* Core 2 64-bit. */
2589 {&generic64_cost
, 16, 10, 16, 10, 16},
2590 /* Core i7 32-bit. */
2591 {&generic32_cost
, 16, 10, 16, 10, 16},
2592 /* Core i7 64-bit. */
2593 {&generic64_cost
, 16, 10, 16, 10, 16},
2594 {&generic32_cost
, 16, 7, 16, 7, 16},
2595 {&generic64_cost
, 16, 10, 16, 10, 16},
2596 {&amdfam10_cost
, 32, 24, 32, 7, 32},
2597 {&bdver1_cost
, 32, 24, 32, 7, 32},
2598 {&bdver2_cost
, 32, 24, 32, 7, 32},
2599 {&btver1_cost
, 32, 24, 32, 7, 32},
2600 {&atom_cost
, 16, 15, 16, 7, 16}
2603 static const char *const cpu_names
[TARGET_CPU_DEFAULT_max
] =
2633 /* Return true if a red-zone is in use. */
2636 ix86_using_red_zone (void)
2638 return TARGET_RED_ZONE
&& !TARGET_64BIT_MS_ABI
;
2641 /* Return a string that documents the current -m options. The caller is
2642 responsible for freeing the string. */
2645 ix86_target_string (HOST_WIDE_INT isa
, int flags
, const char *arch
,
2646 const char *tune
, enum fpmath_unit fpmath
,
2649 struct ix86_target_opts
2651 const char *option
; /* option string */
2652 HOST_WIDE_INT mask
; /* isa mask options */
2655 /* This table is ordered so that options like -msse4.2 that imply
2656 preceding options while match those first. */
2657 static struct ix86_target_opts isa_opts
[] =
2659 { "-m64", OPTION_MASK_ISA_64BIT
},
2660 { "-mfma4", OPTION_MASK_ISA_FMA4
},
2661 { "-mfma", OPTION_MASK_ISA_FMA
},
2662 { "-mxop", OPTION_MASK_ISA_XOP
},
2663 { "-mlwp", OPTION_MASK_ISA_LWP
},
2664 { "-msse4a", OPTION_MASK_ISA_SSE4A
},
2665 { "-msse4.2", OPTION_MASK_ISA_SSE4_2
},
2666 { "-msse4.1", OPTION_MASK_ISA_SSE4_1
},
2667 { "-mssse3", OPTION_MASK_ISA_SSSE3
},
2668 { "-msse3", OPTION_MASK_ISA_SSE3
},
2669 { "-msse2", OPTION_MASK_ISA_SSE2
},
2670 { "-msse", OPTION_MASK_ISA_SSE
},
2671 { "-m3dnow", OPTION_MASK_ISA_3DNOW
},
2672 { "-m3dnowa", OPTION_MASK_ISA_3DNOW_A
},
2673 { "-mmmx", OPTION_MASK_ISA_MMX
},
2674 { "-mabm", OPTION_MASK_ISA_ABM
},
2675 { "-mbmi", OPTION_MASK_ISA_BMI
},
2676 { "-mbmi2", OPTION_MASK_ISA_BMI2
},
2677 { "-mlzcnt", OPTION_MASK_ISA_LZCNT
},
2678 { "-mtbm", OPTION_MASK_ISA_TBM
},
2679 { "-mpopcnt", OPTION_MASK_ISA_POPCNT
},
2680 { "-mmovbe", OPTION_MASK_ISA_MOVBE
},
2681 { "-mcrc32", OPTION_MASK_ISA_CRC32
},
2682 { "-maes", OPTION_MASK_ISA_AES
},
2683 { "-mpclmul", OPTION_MASK_ISA_PCLMUL
},
2684 { "-mfsgsbase", OPTION_MASK_ISA_FSGSBASE
},
2685 { "-mrdrnd", OPTION_MASK_ISA_RDRND
},
2686 { "-mf16c", OPTION_MASK_ISA_F16C
},
2687 { "-mrtm", OPTION_MASK_ISA_RTM
},
2691 static struct ix86_target_opts flag_opts
[] =
2693 { "-m128bit-long-double", MASK_128BIT_LONG_DOUBLE
},
2694 { "-m80387", MASK_80387
},
2695 { "-maccumulate-outgoing-args", MASK_ACCUMULATE_OUTGOING_ARGS
},
2696 { "-malign-double", MASK_ALIGN_DOUBLE
},
2697 { "-mcld", MASK_CLD
},
2698 { "-mfp-ret-in-387", MASK_FLOAT_RETURNS
},
2699 { "-mieee-fp", MASK_IEEE_FP
},
2700 { "-minline-all-stringops", MASK_INLINE_ALL_STRINGOPS
},
2701 { "-minline-stringops-dynamically", MASK_INLINE_STRINGOPS_DYNAMICALLY
},
2702 { "-mms-bitfields", MASK_MS_BITFIELD_LAYOUT
},
2703 { "-mno-align-stringops", MASK_NO_ALIGN_STRINGOPS
},
2704 { "-mno-fancy-math-387", MASK_NO_FANCY_MATH_387
},
2705 { "-mno-push-args", MASK_NO_PUSH_ARGS
},
2706 { "-mno-red-zone", MASK_NO_RED_ZONE
},
2707 { "-momit-leaf-frame-pointer", MASK_OMIT_LEAF_FRAME_POINTER
},
2708 { "-mrecip", MASK_RECIP
},
2709 { "-mrtd", MASK_RTD
},
2710 { "-msseregparm", MASK_SSEREGPARM
},
2711 { "-mstack-arg-probe", MASK_STACK_PROBE
},
2712 { "-mtls-direct-seg-refs", MASK_TLS_DIRECT_SEG_REFS
},
2713 { "-mvect8-ret-in-mem", MASK_VECT8_RETURNS
},
2714 { "-m8bit-idiv", MASK_USE_8BIT_IDIV
},
2715 { "-mvzeroupper", MASK_VZEROUPPER
},
2716 { "-mavx256-split-unaligned-load", MASK_AVX256_SPLIT_UNALIGNED_LOAD
},
2717 { "-mavx256-split-unaligned-store", MASK_AVX256_SPLIT_UNALIGNED_STORE
},
2718 { "-mprefer-avx128", MASK_PREFER_AVX128
},
2721 const char *opts
[ARRAY_SIZE (isa_opts
) + ARRAY_SIZE (flag_opts
) + 6][2];
2724 char target_other
[40];
2733 memset (opts
, '\0', sizeof (opts
));
2735 /* Add -march= option. */
2738 opts
[num
][0] = "-march=";
2739 opts
[num
++][1] = arch
;
2742 /* Add -mtune= option. */
2745 opts
[num
][0] = "-mtune=";
2746 opts
[num
++][1] = tune
;
2749 /* Pick out the options in isa options. */
2750 for (i
= 0; i
< ARRAY_SIZE (isa_opts
); i
++)
2752 if ((isa
& isa_opts
[i
].mask
) != 0)
2754 opts
[num
++][0] = isa_opts
[i
].option
;
2755 isa
&= ~ isa_opts
[i
].mask
;
2759 if (isa
&& add_nl_p
)
2761 opts
[num
++][0] = isa_other
;
2762 sprintf (isa_other
, "(other isa: %#" HOST_WIDE_INT_PRINT
"x)",
2766 /* Add flag options. */
2767 for (i
= 0; i
< ARRAY_SIZE (flag_opts
); i
++)
2769 if ((flags
& flag_opts
[i
].mask
) != 0)
2771 opts
[num
++][0] = flag_opts
[i
].option
;
2772 flags
&= ~ flag_opts
[i
].mask
;
2776 if (flags
&& add_nl_p
)
2778 opts
[num
++][0] = target_other
;
2779 sprintf (target_other
, "(other flags: %#x)", flags
);
2782 /* Add -fpmath= option. */
2785 opts
[num
][0] = "-mfpmath=";
2786 switch ((int) fpmath
)
2789 opts
[num
++][1] = "387";
2793 opts
[num
++][1] = "sse";
2796 case FPMATH_387
| FPMATH_SSE
:
2797 opts
[num
++][1] = "sse+387";
2809 gcc_assert (num
< ARRAY_SIZE (opts
));
2811 /* Size the string. */
2813 sep_len
= (add_nl_p
) ? 3 : 1;
2814 for (i
= 0; i
< num
; i
++)
2817 for (j
= 0; j
< 2; j
++)
2819 len
+= strlen (opts
[i
][j
]);
2822 /* Build the string. */
2823 ret
= ptr
= (char *) xmalloc (len
);
2826 for (i
= 0; i
< num
; i
++)
2830 for (j
= 0; j
< 2; j
++)
2831 len2
[j
] = (opts
[i
][j
]) ? strlen (opts
[i
][j
]) : 0;
2838 if (add_nl_p
&& line_len
+ len2
[0] + len2
[1] > 70)
2846 for (j
= 0; j
< 2; j
++)
2849 memcpy (ptr
, opts
[i
][j
], len2
[j
]);
2851 line_len
+= len2
[j
];
2856 gcc_assert (ret
+ len
>= ptr
);
2861 /* Return true, if profiling code should be emitted before
2862 prologue. Otherwise it returns false.
2863 Note: For x86 with "hotfix" it is sorried. */
2865 ix86_profile_before_prologue (void)
2867 return flag_fentry
!= 0;
2870 /* Function that is callable from the debugger to print the current
2873 ix86_debug_options (void)
2875 char *opts
= ix86_target_string (ix86_isa_flags
, target_flags
,
2876 ix86_arch_string
, ix86_tune_string
,
2881 fprintf (stderr
, "%s\n\n", opts
);
2885 fputs ("<no options>\n\n", stderr
);
2890 /* Override various settings based on options. If MAIN_ARGS_P, the
2891 options are from the command line, otherwise they are from
2895 ix86_option_override_internal (bool main_args_p
)
2898 unsigned int ix86_arch_mask
, ix86_tune_mask
;
2899 const bool ix86_tune_specified
= (ix86_tune_string
!= NULL
);
2904 #define PTA_3DNOW (HOST_WIDE_INT_1 << 0)
2905 #define PTA_3DNOW_A (HOST_WIDE_INT_1 << 1)
2906 #define PTA_64BIT (HOST_WIDE_INT_1 << 2)
2907 #define PTA_ABM (HOST_WIDE_INT_1 << 3)
2908 #define PTA_AES (HOST_WIDE_INT_1 << 4)
2909 #define PTA_AVX (HOST_WIDE_INT_1 << 5)
2910 #define PTA_BMI (HOST_WIDE_INT_1 << 6)
2911 #define PTA_CX16 (HOST_WIDE_INT_1 << 7)
2912 #define PTA_F16C (HOST_WIDE_INT_1 << 8)
2913 #define PTA_FMA (HOST_WIDE_INT_1 << 9)
2914 #define PTA_FMA4 (HOST_WIDE_INT_1 << 10)
2915 #define PTA_FSGSBASE (HOST_WIDE_INT_1 << 11)
2916 #define PTA_LWP (HOST_WIDE_INT_1 << 12)
2917 #define PTA_LZCNT (HOST_WIDE_INT_1 << 13)
2918 #define PTA_MMX (HOST_WIDE_INT_1 << 14)
2919 #define PTA_MOVBE (HOST_WIDE_INT_1 << 15)
2920 #define PTA_NO_SAHF (HOST_WIDE_INT_1 << 16)
2921 #define PTA_PCLMUL (HOST_WIDE_INT_1 << 17)
2922 #define PTA_POPCNT (HOST_WIDE_INT_1 << 18)
2923 #define PTA_PREFETCH_SSE (HOST_WIDE_INT_1 << 19)
2924 #define PTA_RDRND (HOST_WIDE_INT_1 << 20)
2925 #define PTA_SSE (HOST_WIDE_INT_1 << 21)
2926 #define PTA_SSE2 (HOST_WIDE_INT_1 << 22)
2927 #define PTA_SSE3 (HOST_WIDE_INT_1 << 23)
2928 #define PTA_SSE4_1 (HOST_WIDE_INT_1 << 24)
2929 #define PTA_SSE4_2 (HOST_WIDE_INT_1 << 25)
2930 #define PTA_SSE4A (HOST_WIDE_INT_1 << 26)
2931 #define PTA_SSSE3 (HOST_WIDE_INT_1 << 27)
2932 #define PTA_TBM (HOST_WIDE_INT_1 << 28)
2933 #define PTA_XOP (HOST_WIDE_INT_1 << 29)
2934 #define PTA_AVX2 (HOST_WIDE_INT_1 << 30)
2935 #define PTA_BMI2 (HOST_WIDE_INT_1 << 31)
2936 #define PTA_RTM (HOST_WIDE_INT_1 << 32)
2937 /* if this reaches 64, need to widen struct pta flags below */
2941 const char *const name
; /* processor name or nickname. */
2942 const enum processor_type processor
;
2943 const enum attr_cpu schedule
;
2944 const unsigned HOST_WIDE_INT flags
;
2946 const processor_alias_table
[] =
2948 {"i386", PROCESSOR_I386
, CPU_NONE
, 0},
2949 {"i486", PROCESSOR_I486
, CPU_NONE
, 0},
2950 {"i586", PROCESSOR_PENTIUM
, CPU_PENTIUM
, 0},
2951 {"pentium", PROCESSOR_PENTIUM
, CPU_PENTIUM
, 0},
2952 {"pentium-mmx", PROCESSOR_PENTIUM
, CPU_PENTIUM
, PTA_MMX
},
2953 {"winchip-c6", PROCESSOR_I486
, CPU_NONE
, PTA_MMX
},
2954 {"winchip2", PROCESSOR_I486
, CPU_NONE
, PTA_MMX
| PTA_3DNOW
},
2955 {"c3", PROCESSOR_I486
, CPU_NONE
, PTA_MMX
| PTA_3DNOW
},
2956 {"c3-2", PROCESSOR_PENTIUMPRO
, CPU_PENTIUMPRO
, PTA_MMX
| PTA_SSE
},
2957 {"i686", PROCESSOR_PENTIUMPRO
, CPU_PENTIUMPRO
, 0},
2958 {"pentiumpro", PROCESSOR_PENTIUMPRO
, CPU_PENTIUMPRO
, 0},
2959 {"pentium2", PROCESSOR_PENTIUMPRO
, CPU_PENTIUMPRO
, PTA_MMX
},
2960 {"pentium3", PROCESSOR_PENTIUMPRO
, CPU_PENTIUMPRO
,
2962 {"pentium3m", PROCESSOR_PENTIUMPRO
, CPU_PENTIUMPRO
,
2964 {"pentium-m", PROCESSOR_PENTIUMPRO
, CPU_PENTIUMPRO
,
2965 PTA_MMX
| PTA_SSE
| PTA_SSE2
},
2966 {"pentium4", PROCESSOR_PENTIUM4
, CPU_NONE
,
2967 PTA_MMX
|PTA_SSE
| PTA_SSE2
},
2968 {"pentium4m", PROCESSOR_PENTIUM4
, CPU_NONE
,
2969 PTA_MMX
| PTA_SSE
| PTA_SSE2
},
2970 {"prescott", PROCESSOR_NOCONA
, CPU_NONE
,
2971 PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_SSE3
},
2972 {"nocona", PROCESSOR_NOCONA
, CPU_NONE
,
2973 PTA_64BIT
| PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_SSE3
2974 | PTA_CX16
| PTA_NO_SAHF
},
2975 {"core2", PROCESSOR_CORE2_64
, CPU_CORE2
,
2976 PTA_64BIT
| PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_SSE3
2977 | PTA_SSSE3
| PTA_CX16
},
2978 {"corei7", PROCESSOR_COREI7_64
, CPU_COREI7
,
2979 PTA_64BIT
| PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_SSE3
2980 | PTA_SSSE3
| PTA_SSE4_1
| PTA_SSE4_2
| PTA_CX16
},
2981 {"corei7-avx", PROCESSOR_COREI7_64
, CPU_COREI7
,
2982 PTA_64BIT
| PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_SSE3
2983 | PTA_SSSE3
| PTA_SSE4_1
| PTA_SSE4_2
| PTA_AVX
2984 | PTA_CX16
| PTA_POPCNT
| PTA_AES
| PTA_PCLMUL
},
2985 {"core-avx-i", PROCESSOR_COREI7_64
, CPU_COREI7
,
2986 PTA_64BIT
| PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_SSE3
2987 | PTA_SSSE3
| PTA_SSE4_1
| PTA_SSE4_2
| PTA_AVX
2988 | PTA_CX16
| PTA_POPCNT
| PTA_AES
| PTA_PCLMUL
| PTA_FSGSBASE
2989 | PTA_RDRND
| PTA_F16C
},
2990 {"core-avx2", PROCESSOR_COREI7_64
, CPU_COREI7
,
2991 PTA_64BIT
| PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_SSE3
2992 | PTA_SSSE3
| PTA_SSE4_1
| PTA_SSE4_2
| PTA_AVX
| PTA_AVX2
2993 | PTA_CX16
| PTA_POPCNT
| PTA_AES
| PTA_PCLMUL
| PTA_FSGSBASE
2994 | PTA_RDRND
| PTA_F16C
| PTA_BMI
| PTA_BMI2
| PTA_LZCNT
2995 | PTA_FMA
| PTA_MOVBE
| PTA_RTM
},
2996 {"atom", PROCESSOR_ATOM
, CPU_ATOM
,
2997 PTA_64BIT
| PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_SSE3
2998 | PTA_SSSE3
| PTA_CX16
| PTA_MOVBE
},
2999 {"geode", PROCESSOR_GEODE
, CPU_GEODE
,
3000 PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
|PTA_PREFETCH_SSE
},
3001 {"k6", PROCESSOR_K6
, CPU_K6
, PTA_MMX
},
3002 {"k6-2", PROCESSOR_K6
, CPU_K6
, PTA_MMX
| PTA_3DNOW
},
3003 {"k6-3", PROCESSOR_K6
, CPU_K6
, PTA_MMX
| PTA_3DNOW
},
3004 {"athlon", PROCESSOR_ATHLON
, CPU_ATHLON
,
3005 PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_PREFETCH_SSE
},
3006 {"athlon-tbird", PROCESSOR_ATHLON
, CPU_ATHLON
,
3007 PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_PREFETCH_SSE
},
3008 {"athlon-4", PROCESSOR_ATHLON
, CPU_ATHLON
,
3009 PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_SSE
},
3010 {"athlon-xp", PROCESSOR_ATHLON
, CPU_ATHLON
,
3011 PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_SSE
},
3012 {"athlon-mp", PROCESSOR_ATHLON
, CPU_ATHLON
,
3013 PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_SSE
},
3014 {"x86-64", PROCESSOR_K8
, CPU_K8
,
3015 PTA_64BIT
| PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_NO_SAHF
},
3016 {"k8", PROCESSOR_K8
, CPU_K8
,
3017 PTA_64BIT
| PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_SSE
3018 | PTA_SSE2
| PTA_NO_SAHF
},
3019 {"k8-sse3", PROCESSOR_K8
, CPU_K8
,
3020 PTA_64BIT
| PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_SSE
3021 | PTA_SSE2
| PTA_SSE3
| PTA_NO_SAHF
},
3022 {"opteron", PROCESSOR_K8
, CPU_K8
,
3023 PTA_64BIT
| PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_SSE
3024 | PTA_SSE2
| PTA_NO_SAHF
},
3025 {"opteron-sse3", PROCESSOR_K8
, CPU_K8
,
3026 PTA_64BIT
| PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_SSE
3027 | PTA_SSE2
| PTA_SSE3
| PTA_NO_SAHF
},
3028 {"athlon64", PROCESSOR_K8
, CPU_K8
,
3029 PTA_64BIT
| PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_SSE
3030 | PTA_SSE2
| PTA_NO_SAHF
},
3031 {"athlon64-sse3", PROCESSOR_K8
, CPU_K8
,
3032 PTA_64BIT
| PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_SSE
3033 | PTA_SSE2
| PTA_SSE3
| PTA_NO_SAHF
},
3034 {"athlon-fx", PROCESSOR_K8
, CPU_K8
,
3035 PTA_64BIT
| PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_SSE
3036 | PTA_SSE2
| PTA_NO_SAHF
},
3037 {"amdfam10", PROCESSOR_AMDFAM10
, CPU_AMDFAM10
,
3038 PTA_64BIT
| PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_SSE
3039 | PTA_SSE2
| PTA_SSE3
| PTA_SSE4A
| PTA_CX16
| PTA_ABM
},
3040 {"barcelona", PROCESSOR_AMDFAM10
, CPU_AMDFAM10
,
3041 PTA_64BIT
| PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_SSE
3042 | PTA_SSE2
| PTA_SSE3
| PTA_SSE4A
| PTA_CX16
| PTA_ABM
},
3043 {"bdver1", PROCESSOR_BDVER1
, CPU_BDVER1
,
3044 PTA_64BIT
| PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_SSE3
3045 | PTA_SSE4A
| PTA_CX16
| PTA_ABM
| PTA_SSSE3
| PTA_SSE4_1
3046 | PTA_SSE4_2
| PTA_AES
| PTA_PCLMUL
| PTA_AVX
| PTA_FMA4
3047 | PTA_XOP
| PTA_LWP
},
3048 {"bdver2", PROCESSOR_BDVER2
, CPU_BDVER2
,
3049 PTA_64BIT
| PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_SSE3
3050 | PTA_SSE4A
| PTA_CX16
| PTA_ABM
| PTA_SSSE3
| PTA_SSE4_1
3051 | PTA_SSE4_2
| PTA_AES
| PTA_PCLMUL
| PTA_AVX
3052 | PTA_XOP
| PTA_LWP
| PTA_BMI
| PTA_TBM
| PTA_F16C
3054 {"btver1", PROCESSOR_BTVER1
, CPU_GENERIC64
,
3055 PTA_64BIT
| PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_SSE3
3056 | PTA_SSSE3
| PTA_SSE4A
|PTA_ABM
| PTA_CX16
},
3057 {"generic32", PROCESSOR_GENERIC32
, CPU_PENTIUMPRO
,
3058 0 /* flags are only used for -march switch. */ },
3059 {"generic64", PROCESSOR_GENERIC64
, CPU_GENERIC64
,
3060 PTA_64BIT
/* flags are only used for -march switch. */ },
3063 /* -mrecip options. */
3066 const char *string
; /* option name */
3067 unsigned int mask
; /* mask bits to set */
3069 const recip_options
[] =
3071 { "all", RECIP_MASK_ALL
},
3072 { "none", RECIP_MASK_NONE
},
3073 { "div", RECIP_MASK_DIV
},
3074 { "sqrt", RECIP_MASK_SQRT
},
3075 { "vec-div", RECIP_MASK_VEC_DIV
},
3076 { "vec-sqrt", RECIP_MASK_VEC_SQRT
},
3079 int const pta_size
= ARRAY_SIZE (processor_alias_table
);
3081 /* Set up prefix/suffix so the error messages refer to either the command
3082 line argument, or the attribute(target). */
3091 prefix
= "option(\"";
3096 #ifdef SUBTARGET_OVERRIDE_OPTIONS
3097 SUBTARGET_OVERRIDE_OPTIONS
;
3100 #ifdef SUBSUBTARGET_OVERRIDE_OPTIONS
3101 SUBSUBTARGET_OVERRIDE_OPTIONS
;
3105 ix86_isa_flags
|= OPTION_MASK_ISA_64BIT
;
3107 /* -fPIC is the default for x86_64. */
3108 if (TARGET_MACHO
&& TARGET_64BIT
)
3111 /* Need to check -mtune=generic first. */
3112 if (ix86_tune_string
)
3114 if (!strcmp (ix86_tune_string
, "generic")
3115 || !strcmp (ix86_tune_string
, "i686")
3116 /* As special support for cross compilers we read -mtune=native
3117 as -mtune=generic. With native compilers we won't see the
3118 -mtune=native, as it was changed by the driver. */
3119 || !strcmp (ix86_tune_string
, "native"))
3122 ix86_tune_string
= "generic64";
3124 ix86_tune_string
= "generic32";
3126 /* If this call is for setting the option attribute, allow the
3127 generic32/generic64 that was previously set. */
3128 else if (!main_args_p
3129 && (!strcmp (ix86_tune_string
, "generic32")
3130 || !strcmp (ix86_tune_string
, "generic64")))
3132 else if (!strncmp (ix86_tune_string
, "generic", 7))
3133 error ("bad value (%s) for %stune=%s %s",
3134 ix86_tune_string
, prefix
, suffix
, sw
);
3135 else if (!strcmp (ix86_tune_string
, "x86-64"))
3136 warning (OPT_Wdeprecated
, "%stune=x86-64%s is deprecated; use "
3137 "%stune=k8%s or %stune=generic%s instead as appropriate",
3138 prefix
, suffix
, prefix
, suffix
, prefix
, suffix
);
3142 if (ix86_arch_string
)
3143 ix86_tune_string
= ix86_arch_string
;
3144 if (!ix86_tune_string
)
3146 ix86_tune_string
= cpu_names
[TARGET_CPU_DEFAULT
];
3147 ix86_tune_defaulted
= 1;
3150 /* ix86_tune_string is set to ix86_arch_string or defaulted. We
3151 need to use a sensible tune option. */
3152 if (!strcmp (ix86_tune_string
, "generic")
3153 || !strcmp (ix86_tune_string
, "x86-64")
3154 || !strcmp (ix86_tune_string
, "i686"))
3157 ix86_tune_string
= "generic64";
3159 ix86_tune_string
= "generic32";
3163 if (ix86_stringop_alg
== rep_prefix_8_byte
&& !TARGET_64BIT
)
3165 /* rep; movq isn't available in 32-bit code. */
3166 error ("-mstringop-strategy=rep_8byte not supported for 32-bit code");
3167 ix86_stringop_alg
= no_stringop
;
3170 if (!ix86_arch_string
)
3171 ix86_arch_string
= TARGET_64BIT
? "x86-64" : SUBTARGET32_DEFAULT_CPU
;
3173 ix86_arch_specified
= 1;
3175 if (global_options_set
.x_ix86_pmode
)
3177 if ((TARGET_LP64
&& ix86_pmode
== PMODE_SI
)
3178 || (!TARGET_64BIT
&& ix86_pmode
== PMODE_DI
))
3179 error ("address mode %qs not supported in the %s bit mode",
3180 TARGET_64BIT
? "short" : "long",
3181 TARGET_64BIT
? "64" : "32");
3184 ix86_pmode
= TARGET_LP64
? PMODE_DI
: PMODE_SI
;
3186 if (!global_options_set
.x_ix86_abi
)
3187 ix86_abi
= DEFAULT_ABI
;
3189 if (global_options_set
.x_ix86_cmodel
)
3191 switch (ix86_cmodel
)
3196 ix86_cmodel
= CM_SMALL_PIC
;
3198 error ("code model %qs not supported in the %s bit mode",
3205 ix86_cmodel
= CM_MEDIUM_PIC
;
3207 error ("code model %qs not supported in the %s bit mode",
3209 else if (TARGET_X32
)
3210 error ("code model %qs not supported in x32 mode",
3217 ix86_cmodel
= CM_LARGE_PIC
;
3219 error ("code model %qs not supported in the %s bit mode",
3221 else if (TARGET_X32
)
3222 error ("code model %qs not supported in x32 mode",
3228 error ("code model %s does not support PIC mode", "32");
3230 error ("code model %qs not supported in the %s bit mode",
3237 error ("code model %s does not support PIC mode", "kernel");
3238 ix86_cmodel
= CM_32
;
3241 error ("code model %qs not supported in the %s bit mode",
3251 /* For TARGET_64BIT and MS_ABI, force pic on, in order to enable the
3252 use of rip-relative addressing. This eliminates fixups that
3253 would otherwise be needed if this object is to be placed in a
3254 DLL, and is essentially just as efficient as direct addressing. */
3255 if (TARGET_64BIT
&& DEFAULT_ABI
== MS_ABI
)
3256 ix86_cmodel
= CM_SMALL_PIC
, flag_pic
= 1;
3257 else if (TARGET_64BIT
)
3258 ix86_cmodel
= flag_pic
? CM_SMALL_PIC
: CM_SMALL
;
3260 ix86_cmodel
= CM_32
;
3262 if (TARGET_MACHO
&& ix86_asm_dialect
== ASM_INTEL
)
3264 error ("-masm=intel not supported in this configuration");
3265 ix86_asm_dialect
= ASM_ATT
;
3267 if ((TARGET_64BIT
!= 0) != ((ix86_isa_flags
& OPTION_MASK_ISA_64BIT
) != 0))
3268 sorry ("%i-bit mode not compiled in",
3269 (ix86_isa_flags
& OPTION_MASK_ISA_64BIT
) ? 64 : 32);
3271 for (i
= 0; i
< pta_size
; i
++)
3272 if (! strcmp (ix86_arch_string
, processor_alias_table
[i
].name
))
3274 ix86_schedule
= processor_alias_table
[i
].schedule
;
3275 ix86_arch
= processor_alias_table
[i
].processor
;
3276 /* Default cpu tuning to the architecture. */
3277 ix86_tune
= ix86_arch
;
3279 if (TARGET_64BIT
&& !(processor_alias_table
[i
].flags
& PTA_64BIT
))
3280 error ("CPU you selected does not support x86-64 "
3283 if (processor_alias_table
[i
].flags
& PTA_MMX
3284 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_MMX
))
3285 ix86_isa_flags
|= OPTION_MASK_ISA_MMX
;
3286 if (processor_alias_table
[i
].flags
& PTA_3DNOW
3287 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_3DNOW
))
3288 ix86_isa_flags
|= OPTION_MASK_ISA_3DNOW
;
3289 if (processor_alias_table
[i
].flags
& PTA_3DNOW_A
3290 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_3DNOW_A
))
3291 ix86_isa_flags
|= OPTION_MASK_ISA_3DNOW_A
;
3292 if (processor_alias_table
[i
].flags
& PTA_SSE
3293 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_SSE
))
3294 ix86_isa_flags
|= OPTION_MASK_ISA_SSE
;
3295 if (processor_alias_table
[i
].flags
& PTA_SSE2
3296 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_SSE2
))
3297 ix86_isa_flags
|= OPTION_MASK_ISA_SSE2
;
3298 if (processor_alias_table
[i
].flags
& PTA_SSE3
3299 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_SSE3
))
3300 ix86_isa_flags
|= OPTION_MASK_ISA_SSE3
;
3301 if (processor_alias_table
[i
].flags
& PTA_SSSE3
3302 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_SSSE3
))
3303 ix86_isa_flags
|= OPTION_MASK_ISA_SSSE3
;
3304 if (processor_alias_table
[i
].flags
& PTA_SSE4_1
3305 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_SSE4_1
))
3306 ix86_isa_flags
|= OPTION_MASK_ISA_SSE4_1
;
3307 if (processor_alias_table
[i
].flags
& PTA_SSE4_2
3308 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_SSE4_2
))
3309 ix86_isa_flags
|= OPTION_MASK_ISA_SSE4_2
;
3310 if (processor_alias_table
[i
].flags
& PTA_AVX
3311 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_AVX
))
3312 ix86_isa_flags
|= OPTION_MASK_ISA_AVX
;
3313 if (processor_alias_table
[i
].flags
& PTA_AVX2
3314 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_AVX2
))
3315 ix86_isa_flags
|= OPTION_MASK_ISA_AVX2
;
3316 if (processor_alias_table
[i
].flags
& PTA_FMA
3317 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_FMA
))
3318 ix86_isa_flags
|= OPTION_MASK_ISA_FMA
;
3319 if (processor_alias_table
[i
].flags
& PTA_SSE4A
3320 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_SSE4A
))
3321 ix86_isa_flags
|= OPTION_MASK_ISA_SSE4A
;
3322 if (processor_alias_table
[i
].flags
& PTA_FMA4
3323 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_FMA4
))
3324 ix86_isa_flags
|= OPTION_MASK_ISA_FMA4
;
3325 if (processor_alias_table
[i
].flags
& PTA_XOP
3326 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_XOP
))
3327 ix86_isa_flags
|= OPTION_MASK_ISA_XOP
;
3328 if (processor_alias_table
[i
].flags
& PTA_LWP
3329 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_LWP
))
3330 ix86_isa_flags
|= OPTION_MASK_ISA_LWP
;
3331 if (processor_alias_table
[i
].flags
& PTA_ABM
3332 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_ABM
))
3333 ix86_isa_flags
|= OPTION_MASK_ISA_ABM
;
3334 if (processor_alias_table
[i
].flags
& PTA_BMI
3335 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_BMI
))
3336 ix86_isa_flags
|= OPTION_MASK_ISA_BMI
;
3337 if (processor_alias_table
[i
].flags
& (PTA_LZCNT
| PTA_ABM
)
3338 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_LZCNT
))
3339 ix86_isa_flags
|= OPTION_MASK_ISA_LZCNT
;
3340 if (processor_alias_table
[i
].flags
& PTA_TBM
3341 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_TBM
))
3342 ix86_isa_flags
|= OPTION_MASK_ISA_TBM
;
3343 if (processor_alias_table
[i
].flags
& PTA_BMI2
3344 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_BMI2
))
3345 ix86_isa_flags
|= OPTION_MASK_ISA_BMI2
;
3346 if (processor_alias_table
[i
].flags
& PTA_CX16
3347 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_CX16
))
3348 ix86_isa_flags
|= OPTION_MASK_ISA_CX16
;
3349 if (processor_alias_table
[i
].flags
& (PTA_POPCNT
| PTA_ABM
)
3350 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_POPCNT
))
3351 ix86_isa_flags
|= OPTION_MASK_ISA_POPCNT
;
3352 if (!(TARGET_64BIT
&& (processor_alias_table
[i
].flags
& PTA_NO_SAHF
))
3353 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_SAHF
))
3354 ix86_isa_flags
|= OPTION_MASK_ISA_SAHF
;
3355 if (processor_alias_table
[i
].flags
& PTA_MOVBE
3356 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_MOVBE
))
3357 ix86_isa_flags
|= OPTION_MASK_ISA_MOVBE
;
3358 if (processor_alias_table
[i
].flags
& PTA_AES
3359 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_AES
))
3360 ix86_isa_flags
|= OPTION_MASK_ISA_AES
;
3361 if (processor_alias_table
[i
].flags
& PTA_PCLMUL
3362 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_PCLMUL
))
3363 ix86_isa_flags
|= OPTION_MASK_ISA_PCLMUL
;
3364 if (processor_alias_table
[i
].flags
& PTA_FSGSBASE
3365 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_FSGSBASE
))
3366 ix86_isa_flags
|= OPTION_MASK_ISA_FSGSBASE
;
3367 if (processor_alias_table
[i
].flags
& PTA_RDRND
3368 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_RDRND
))
3369 ix86_isa_flags
|= OPTION_MASK_ISA_RDRND
;
3370 if (processor_alias_table
[i
].flags
& PTA_F16C
3371 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_F16C
))
3372 ix86_isa_flags
|= OPTION_MASK_ISA_F16C
;
3373 if (processor_alias_table
[i
].flags
& PTA_RTM
3374 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_RTM
))
3375 ix86_isa_flags
|= OPTION_MASK_ISA_RTM
;
3376 if (processor_alias_table
[i
].flags
& (PTA_PREFETCH_SSE
| PTA_SSE
))
3377 x86_prefetch_sse
= true;
3382 if (!strcmp (ix86_arch_string
, "generic"))
3383 error ("generic CPU can be used only for %stune=%s %s",
3384 prefix
, suffix
, sw
);
3385 else if (!strncmp (ix86_arch_string
, "generic", 7) || i
== pta_size
)
3386 error ("bad value (%s) for %sarch=%s %s",
3387 ix86_arch_string
, prefix
, suffix
, sw
);
3389 ix86_arch_mask
= 1u << ix86_arch
;
3390 for (i
= 0; i
< X86_ARCH_LAST
; ++i
)
3391 ix86_arch_features
[i
] = !!(initial_ix86_arch_features
[i
] & ix86_arch_mask
);
3393 for (i
= 0; i
< pta_size
; i
++)
3394 if (! strcmp (ix86_tune_string
, processor_alias_table
[i
].name
))
3396 ix86_schedule
= processor_alias_table
[i
].schedule
;
3397 ix86_tune
= processor_alias_table
[i
].processor
;
3400 if (!(processor_alias_table
[i
].flags
& PTA_64BIT
))
3402 if (ix86_tune_defaulted
)
3404 ix86_tune_string
= "x86-64";
3405 for (i
= 0; i
< pta_size
; i
++)
3406 if (! strcmp (ix86_tune_string
,
3407 processor_alias_table
[i
].name
))
3409 ix86_schedule
= processor_alias_table
[i
].schedule
;
3410 ix86_tune
= processor_alias_table
[i
].processor
;
3413 error ("CPU you selected does not support x86-64 "
3419 /* Adjust tuning when compiling for 32-bit ABI. */
3422 case PROCESSOR_GENERIC64
:
3423 ix86_tune
= PROCESSOR_GENERIC32
;
3424 ix86_schedule
= CPU_PENTIUMPRO
;
3427 case PROCESSOR_CORE2_64
:
3428 ix86_tune
= PROCESSOR_CORE2_32
;
3431 case PROCESSOR_COREI7_64
:
3432 ix86_tune
= PROCESSOR_COREI7_32
;
3439 /* Intel CPUs have always interpreted SSE prefetch instructions as
3440 NOPs; so, we can enable SSE prefetch instructions even when
3441 -mtune (rather than -march) points us to a processor that has them.
3442 However, the VIA C3 gives a SIGILL, so we only do that for i686 and
3443 higher processors. */
3445 && (processor_alias_table
[i
].flags
& (PTA_PREFETCH_SSE
| PTA_SSE
)))
3446 x86_prefetch_sse
= true;
3450 if (ix86_tune_specified
&& i
== pta_size
)
3451 error ("bad value (%s) for %stune=%s %s",
3452 ix86_tune_string
, prefix
, suffix
, sw
);
3454 ix86_tune_mask
= 1u << ix86_tune
;
3455 for (i
= 0; i
< X86_TUNE_LAST
; ++i
)
3456 ix86_tune_features
[i
] = !!(initial_ix86_tune_features
[i
] & ix86_tune_mask
);
3458 #ifndef USE_IX86_FRAME_POINTER
3459 #define USE_IX86_FRAME_POINTER 0
3462 #ifndef USE_X86_64_FRAME_POINTER
3463 #define USE_X86_64_FRAME_POINTER 0
3466 /* Set the default values for switches whose default depends on TARGET_64BIT
3467 in case they weren't overwritten by command line options. */
3470 if (optimize
>= 1 && !global_options_set
.x_flag_omit_frame_pointer
)
3471 flag_omit_frame_pointer
= !USE_X86_64_FRAME_POINTER
;
3472 if (flag_asynchronous_unwind_tables
== 2)
3473 flag_unwind_tables
= flag_asynchronous_unwind_tables
= 1;
3474 if (flag_pcc_struct_return
== 2)
3475 flag_pcc_struct_return
= 0;
3479 if (optimize
>= 1 && !global_options_set
.x_flag_omit_frame_pointer
)
3480 flag_omit_frame_pointer
= !(USE_IX86_FRAME_POINTER
|| optimize_size
);
3481 if (flag_asynchronous_unwind_tables
== 2)
3482 flag_asynchronous_unwind_tables
= !USE_IX86_FRAME_POINTER
;
3483 if (flag_pcc_struct_return
== 2)
3484 flag_pcc_struct_return
= DEFAULT_PCC_STRUCT_RETURN
;
3488 ix86_cost
= &ix86_size_cost
;
3490 ix86_cost
= processor_target_table
[ix86_tune
].cost
;
3492 /* Arrange to set up i386_stack_locals for all functions. */
3493 init_machine_status
= ix86_init_machine_status
;
3495 /* Validate -mregparm= value. */
3496 if (global_options_set
.x_ix86_regparm
)
3499 warning (0, "-mregparm is ignored in 64-bit mode");
3500 if (ix86_regparm
> REGPARM_MAX
)
3502 error ("-mregparm=%d is not between 0 and %d",
3503 ix86_regparm
, REGPARM_MAX
);
3508 ix86_regparm
= REGPARM_MAX
;
3510 /* Default align_* from the processor table. */
3511 if (align_loops
== 0)
3513 align_loops
= processor_target_table
[ix86_tune
].align_loop
;
3514 align_loops_max_skip
= processor_target_table
[ix86_tune
].align_loop_max_skip
;
3516 if (align_jumps
== 0)
3518 align_jumps
= processor_target_table
[ix86_tune
].align_jump
;
3519 align_jumps_max_skip
= processor_target_table
[ix86_tune
].align_jump_max_skip
;
3521 if (align_functions
== 0)
3523 align_functions
= processor_target_table
[ix86_tune
].align_func
;
3526 /* Provide default for -mbranch-cost= value. */
3527 if (!global_options_set
.x_ix86_branch_cost
)
3528 ix86_branch_cost
= ix86_cost
->branch_cost
;
3532 target_flags
|= TARGET_SUBTARGET64_DEFAULT
& ~target_flags_explicit
;
3534 /* Enable by default the SSE and MMX builtins. Do allow the user to
3535 explicitly disable any of these. In particular, disabling SSE and
3536 MMX for kernel code is extremely useful. */
3537 if (!ix86_arch_specified
)
3539 |= ((OPTION_MASK_ISA_SSE2
| OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_MMX
3540 | TARGET_SUBTARGET64_ISA_DEFAULT
) & ~ix86_isa_flags_explicit
);
3543 warning (0, "%srtd%s is ignored in 64bit mode", prefix
, suffix
);
3547 target_flags
|= TARGET_SUBTARGET32_DEFAULT
& ~target_flags_explicit
;
3549 if (!ix86_arch_specified
)
3551 |= TARGET_SUBTARGET32_ISA_DEFAULT
& ~ix86_isa_flags_explicit
;
3553 /* i386 ABI does not specify red zone. It still makes sense to use it
3554 when programmer takes care to stack from being destroyed. */
3555 if (!(target_flags_explicit
& MASK_NO_RED_ZONE
))
3556 target_flags
|= MASK_NO_RED_ZONE
;
3559 /* Keep nonleaf frame pointers. */
3560 if (flag_omit_frame_pointer
)
3561 target_flags
&= ~MASK_OMIT_LEAF_FRAME_POINTER
;
3562 else if (TARGET_OMIT_LEAF_FRAME_POINTER
)
3563 flag_omit_frame_pointer
= 1;
3565 /* If we're doing fast math, we don't care about comparison order
3566 wrt NaNs. This lets us use a shorter comparison sequence. */
3567 if (flag_finite_math_only
)
3568 target_flags
&= ~MASK_IEEE_FP
;
3570 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
3571 since the insns won't need emulation. */
3572 if (x86_arch_always_fancy_math_387
& ix86_arch_mask
)
3573 target_flags
&= ~MASK_NO_FANCY_MATH_387
;
3575 /* Likewise, if the target doesn't have a 387, or we've specified
3576 software floating point, don't use 387 inline intrinsics. */
3578 target_flags
|= MASK_NO_FANCY_MATH_387
;
3580 /* Turn on MMX builtins for -msse. */
3583 ix86_isa_flags
|= OPTION_MASK_ISA_MMX
& ~ix86_isa_flags_explicit
;
3584 x86_prefetch_sse
= true;
3587 /* Turn on popcnt instruction for -msse4.2 or -mabm. */
3588 if (TARGET_SSE4_2
|| TARGET_ABM
)
3589 ix86_isa_flags
|= OPTION_MASK_ISA_POPCNT
& ~ix86_isa_flags_explicit
;
3591 /* Turn on lzcnt instruction for -mabm. */
3593 ix86_isa_flags
|= OPTION_MASK_ISA_LZCNT
& ~ix86_isa_flags_explicit
;
3595 /* Validate -mpreferred-stack-boundary= value or default it to
3596 PREFERRED_STACK_BOUNDARY_DEFAULT. */
3597 ix86_preferred_stack_boundary
= PREFERRED_STACK_BOUNDARY_DEFAULT
;
3598 if (global_options_set
.x_ix86_preferred_stack_boundary_arg
)
3600 int min
= (TARGET_64BIT
? 4 : 2);
3601 int max
= (TARGET_SEH
? 4 : 12);
3603 if (ix86_preferred_stack_boundary_arg
< min
3604 || ix86_preferred_stack_boundary_arg
> max
)
3607 error ("-mpreferred-stack-boundary is not supported "
3610 error ("-mpreferred-stack-boundary=%d is not between %d and %d",
3611 ix86_preferred_stack_boundary_arg
, min
, max
);
3614 ix86_preferred_stack_boundary
3615 = (1 << ix86_preferred_stack_boundary_arg
) * BITS_PER_UNIT
;
3618 /* Set the default value for -mstackrealign. */
3619 if (ix86_force_align_arg_pointer
== -1)
3620 ix86_force_align_arg_pointer
= STACK_REALIGN_DEFAULT
;
3622 ix86_default_incoming_stack_boundary
= PREFERRED_STACK_BOUNDARY
;
3624 /* Validate -mincoming-stack-boundary= value or default it to
3625 MIN_STACK_BOUNDARY/PREFERRED_STACK_BOUNDARY. */
3626 ix86_incoming_stack_boundary
= ix86_default_incoming_stack_boundary
;
3627 if (global_options_set
.x_ix86_incoming_stack_boundary_arg
)
3629 if (ix86_incoming_stack_boundary_arg
< (TARGET_64BIT
? 4 : 2)
3630 || ix86_incoming_stack_boundary_arg
> 12)
3631 error ("-mincoming-stack-boundary=%d is not between %d and 12",
3632 ix86_incoming_stack_boundary_arg
, TARGET_64BIT
? 4 : 2);
3635 ix86_user_incoming_stack_boundary
3636 = (1 << ix86_incoming_stack_boundary_arg
) * BITS_PER_UNIT
;
3637 ix86_incoming_stack_boundary
3638 = ix86_user_incoming_stack_boundary
;
3642 /* Accept -msseregparm only if at least SSE support is enabled. */
3643 if (TARGET_SSEREGPARM
3645 error ("%ssseregparm%s used without SSE enabled", prefix
, suffix
);
3647 if (global_options_set
.x_ix86_fpmath
)
3649 if (ix86_fpmath
& FPMATH_SSE
)
3653 warning (0, "SSE instruction set disabled, using 387 arithmetics");
3654 ix86_fpmath
= FPMATH_387
;
3656 else if ((ix86_fpmath
& FPMATH_387
) && !TARGET_80387
)
3658 warning (0, "387 instruction set disabled, using SSE arithmetics");
3659 ix86_fpmath
= FPMATH_SSE
;
3664 ix86_fpmath
= TARGET_FPMATH_DEFAULT
;
3666 /* If the i387 is disabled, then do not return values in it. */
3668 target_flags
&= ~MASK_FLOAT_RETURNS
;
3670 /* Use external vectorized library in vectorizing intrinsics. */
3671 if (global_options_set
.x_ix86_veclibabi_type
)
3672 switch (ix86_veclibabi_type
)
3674 case ix86_veclibabi_type_svml
:
3675 ix86_veclib_handler
= ix86_veclibabi_svml
;
3678 case ix86_veclibabi_type_acml
:
3679 ix86_veclib_handler
= ix86_veclibabi_acml
;
3686 if ((!USE_IX86_FRAME_POINTER
3687 || (x86_accumulate_outgoing_args
& ix86_tune_mask
))
3688 && !(target_flags_explicit
& MASK_ACCUMULATE_OUTGOING_ARGS
)
3690 target_flags
|= MASK_ACCUMULATE_OUTGOING_ARGS
;
3692 /* ??? Unwind info is not correct around the CFG unless either a frame
3693 pointer is present or M_A_O_A is set. Fixing this requires rewriting
3694 unwind info generation to be aware of the CFG and propagating states
3696 if ((flag_unwind_tables
|| flag_asynchronous_unwind_tables
3697 || flag_exceptions
|| flag_non_call_exceptions
)
3698 && flag_omit_frame_pointer
3699 && !(target_flags
& MASK_ACCUMULATE_OUTGOING_ARGS
))
3701 if (target_flags_explicit
& MASK_ACCUMULATE_OUTGOING_ARGS
)
3702 warning (0, "unwind tables currently require either a frame pointer "
3703 "or %saccumulate-outgoing-args%s for correctness",
3705 target_flags
|= MASK_ACCUMULATE_OUTGOING_ARGS
;
3708 /* If stack probes are required, the space used for large function
3709 arguments on the stack must also be probed, so enable
3710 -maccumulate-outgoing-args so this happens in the prologue. */
3711 if (TARGET_STACK_PROBE
3712 && !(target_flags
& MASK_ACCUMULATE_OUTGOING_ARGS
))
3714 if (target_flags_explicit
& MASK_ACCUMULATE_OUTGOING_ARGS
)
3715 warning (0, "stack probing requires %saccumulate-outgoing-args%s "
3716 "for correctness", prefix
, suffix
);
3717 target_flags
|= MASK_ACCUMULATE_OUTGOING_ARGS
;
3720 /* For sane SSE instruction set generation we need fcomi instruction.
3721 It is safe to enable all CMOVE instructions. Also, RDRAND intrinsic
3722 expands to a sequence that includes conditional move. */
3723 if (TARGET_SSE
|| TARGET_RDRND
)
3726 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
3729 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix
, "LX", 0);
3730 p
= strchr (internal_label_prefix
, 'X');
3731 internal_label_prefix_len
= p
- internal_label_prefix
;
3735 /* When scheduling description is not available, disable scheduler pass
3736 so it won't slow down the compilation and make x87 code slower. */
3737 if (!TARGET_SCHEDULE
)
3738 flag_schedule_insns_after_reload
= flag_schedule_insns
= 0;
3740 maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES
,
3741 ix86_cost
->simultaneous_prefetches
,
3742 global_options
.x_param_values
,
3743 global_options_set
.x_param_values
);
3744 maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE
, ix86_cost
->prefetch_block
,
3745 global_options
.x_param_values
,
3746 global_options_set
.x_param_values
);
3747 maybe_set_param_value (PARAM_L1_CACHE_SIZE
, ix86_cost
->l1_cache_size
,
3748 global_options
.x_param_values
,
3749 global_options_set
.x_param_values
);
3750 maybe_set_param_value (PARAM_L2_CACHE_SIZE
, ix86_cost
->l2_cache_size
,
3751 global_options
.x_param_values
,
3752 global_options_set
.x_param_values
);
3754 /* Enable sw prefetching at -O3 for CPUS that prefetching is helpful. */
3755 if (flag_prefetch_loop_arrays
< 0
3758 && TARGET_SOFTWARE_PREFETCHING_BENEFICIAL
)
3759 flag_prefetch_loop_arrays
= 1;
3761 /* If using typedef char *va_list, signal that __builtin_va_start (&ap, 0)
3762 can be optimized to ap = __builtin_next_arg (0). */
3763 if (!TARGET_64BIT
&& !flag_split_stack
)
3764 targetm
.expand_builtin_va_start
= NULL
;
3768 ix86_gen_leave
= gen_leave_rex64
;
3769 if (Pmode
== DImode
)
3771 ix86_gen_monitor
= gen_sse3_monitor64_di
;
3772 ix86_gen_tls_global_dynamic_64
= gen_tls_global_dynamic_64_di
;
3773 ix86_gen_tls_local_dynamic_base_64
3774 = gen_tls_local_dynamic_base_64_di
;
3778 ix86_gen_monitor
= gen_sse3_monitor64_si
;
3779 ix86_gen_tls_global_dynamic_64
= gen_tls_global_dynamic_64_si
;
3780 ix86_gen_tls_local_dynamic_base_64
3781 = gen_tls_local_dynamic_base_64_si
;
3786 ix86_gen_leave
= gen_leave
;
3787 ix86_gen_monitor
= gen_sse3_monitor
;
3790 if (Pmode
== DImode
)
3792 ix86_gen_add3
= gen_adddi3
;
3793 ix86_gen_sub3
= gen_subdi3
;
3794 ix86_gen_sub3_carry
= gen_subdi3_carry
;
3795 ix86_gen_one_cmpl2
= gen_one_cmpldi2
;
3796 ix86_gen_andsp
= gen_anddi3
;
3797 ix86_gen_allocate_stack_worker
= gen_allocate_stack_worker_probe_di
;
3798 ix86_gen_adjust_stack_and_probe
= gen_adjust_stack_and_probedi
;
3799 ix86_gen_probe_stack_range
= gen_probe_stack_rangedi
;
3803 ix86_gen_add3
= gen_addsi3
;
3804 ix86_gen_sub3
= gen_subsi3
;
3805 ix86_gen_sub3_carry
= gen_subsi3_carry
;
3806 ix86_gen_one_cmpl2
= gen_one_cmplsi2
;
3807 ix86_gen_andsp
= gen_andsi3
;
3808 ix86_gen_allocate_stack_worker
= gen_allocate_stack_worker_probe_si
;
3809 ix86_gen_adjust_stack_and_probe
= gen_adjust_stack_and_probesi
;
3810 ix86_gen_probe_stack_range
= gen_probe_stack_rangesi
;
3814 /* Use -mcld by default for 32-bit code if configured with --enable-cld. */
3816 target_flags
|= MASK_CLD
& ~target_flags_explicit
;
3819 if (!TARGET_64BIT
&& flag_pic
)
3821 if (flag_fentry
> 0)
3822 sorry ("-mfentry isn%'t supported for 32-bit in combination "
3826 else if (TARGET_SEH
)
3828 if (flag_fentry
== 0)
3829 sorry ("-mno-fentry isn%'t compatible with SEH");
3832 else if (flag_fentry
< 0)
3834 #if defined(PROFILE_BEFORE_PROLOGUE)
3843 /* When not optimize for size, enable vzeroupper optimization for
3844 TARGET_AVX with -fexpensive-optimizations and split 32-byte
3845 AVX unaligned load/store. */
3848 if (flag_expensive_optimizations
3849 && !(target_flags_explicit
& MASK_VZEROUPPER
))
3850 target_flags
|= MASK_VZEROUPPER
;
3851 if ((x86_avx256_split_unaligned_load
& ix86_tune_mask
)
3852 && !(target_flags_explicit
& MASK_AVX256_SPLIT_UNALIGNED_LOAD
))
3853 target_flags
|= MASK_AVX256_SPLIT_UNALIGNED_LOAD
;
3854 if ((x86_avx256_split_unaligned_store
& ix86_tune_mask
)
3855 && !(target_flags_explicit
& MASK_AVX256_SPLIT_UNALIGNED_STORE
))
3856 target_flags
|= MASK_AVX256_SPLIT_UNALIGNED_STORE
;
3857 /* Enable 128-bit AVX instruction generation for the auto-vectorizer. */
3858 if (TARGET_AVX128_OPTIMAL
&& !(target_flags_explicit
& MASK_PREFER_AVX128
))
3859 target_flags
|= MASK_PREFER_AVX128
;
3864 /* Disable vzeroupper pass if TARGET_AVX is disabled. */
3865 target_flags
&= ~MASK_VZEROUPPER
;
3868 if (ix86_recip_name
)
3870 char *p
= ASTRDUP (ix86_recip_name
);
3872 unsigned int mask
, i
;
3875 while ((q
= strtok (p
, ",")) != NULL
)
3886 if (!strcmp (q
, "default"))
3887 mask
= RECIP_MASK_ALL
;
3890 for (i
= 0; i
< ARRAY_SIZE (recip_options
); i
++)
3891 if (!strcmp (q
, recip_options
[i
].string
))
3893 mask
= recip_options
[i
].mask
;
3897 if (i
== ARRAY_SIZE (recip_options
))
3899 error ("unknown option for -mrecip=%s", q
);
3901 mask
= RECIP_MASK_NONE
;
3905 recip_mask_explicit
|= mask
;
3907 recip_mask
&= ~mask
;
3914 recip_mask
|= RECIP_MASK_ALL
& ~recip_mask_explicit
;
3915 else if (target_flags_explicit
& MASK_RECIP
)
3916 recip_mask
&= ~(RECIP_MASK_ALL
& ~recip_mask_explicit
);
3918 /* Save the initial options in case the user does function specific
3921 target_option_default_node
= target_option_current_node
3922 = build_target_option_node ();
3925 /* Return TRUE if VAL is passed in register with 256bit AVX modes. */
3928 function_pass_avx256_p (const_rtx val
)
3933 if (REG_P (val
) && VALID_AVX256_REG_MODE (GET_MODE (val
)))
3936 if (GET_CODE (val
) == PARALLEL
)
3941 for (i
= XVECLEN (val
, 0) - 1; i
>= 0; i
--)
3943 r
= XVECEXP (val
, 0, i
);
3944 if (GET_CODE (r
) == EXPR_LIST
3946 && REG_P (XEXP (r
, 0))
3947 && (GET_MODE (XEXP (r
, 0)) == OImode
3948 || VALID_AVX256_REG_MODE (GET_MODE (XEXP (r
, 0)))))
3956 /* Implement the TARGET_OPTION_OVERRIDE hook. */
3959 ix86_option_override (void)
3961 ix86_option_override_internal (true);
3964 /* Update register usage after having seen the compiler flags. */
3967 ix86_conditional_register_usage (void)
3972 for (i
= 0; i
< FIRST_PSEUDO_REGISTER
; i
++)
3974 if (fixed_regs
[i
] > 1)
3975 fixed_regs
[i
] = (fixed_regs
[i
] == (TARGET_64BIT
? 3 : 2));
3976 if (call_used_regs
[i
] > 1)
3977 call_used_regs
[i
] = (call_used_regs
[i
] == (TARGET_64BIT
? 3 : 2));
3980 /* The PIC register, if it exists, is fixed. */
3981 j
= PIC_OFFSET_TABLE_REGNUM
;
3982 if (j
!= INVALID_REGNUM
)
3983 fixed_regs
[j
] = call_used_regs
[j
] = 1;
3985 /* The 64-bit MS_ABI changes the set of call-used registers. */
3986 if (TARGET_64BIT_MS_ABI
)
3988 call_used_regs
[SI_REG
] = 0;
3989 call_used_regs
[DI_REG
] = 0;
3990 call_used_regs
[XMM6_REG
] = 0;
3991 call_used_regs
[XMM7_REG
] = 0;
3992 for (i
= FIRST_REX_SSE_REG
; i
<= LAST_REX_SSE_REG
; i
++)
3993 call_used_regs
[i
] = 0;
3996 /* The default setting of CLOBBERED_REGS is for 32-bit; add in the
3997 other call-clobbered regs for 64-bit. */
4000 CLEAR_HARD_REG_SET (reg_class_contents
[(int)CLOBBERED_REGS
]);
4002 for (i
= 0; i
< FIRST_PSEUDO_REGISTER
; i
++)
4003 if (TEST_HARD_REG_BIT (reg_class_contents
[(int)GENERAL_REGS
], i
)
4004 && call_used_regs
[i
])
4005 SET_HARD_REG_BIT (reg_class_contents
[(int)CLOBBERED_REGS
], i
);
4008 /* If MMX is disabled, squash the registers. */
4010 for (i
= 0; i
< FIRST_PSEUDO_REGISTER
; i
++)
4011 if (TEST_HARD_REG_BIT (reg_class_contents
[(int)MMX_REGS
], i
))
4012 fixed_regs
[i
] = call_used_regs
[i
] = 1, reg_names
[i
] = "";
4014 /* If SSE is disabled, squash the registers. */
4016 for (i
= 0; i
< FIRST_PSEUDO_REGISTER
; i
++)
4017 if (TEST_HARD_REG_BIT (reg_class_contents
[(int)SSE_REGS
], i
))
4018 fixed_regs
[i
] = call_used_regs
[i
] = 1, reg_names
[i
] = "";
4020 /* If the FPU is disabled, squash the registers. */
4021 if (! (TARGET_80387
|| TARGET_FLOAT_RETURNS_IN_80387
))
4022 for (i
= 0; i
< FIRST_PSEUDO_REGISTER
; i
++)
4023 if (TEST_HARD_REG_BIT (reg_class_contents
[(int)FLOAT_REGS
], i
))
4024 fixed_regs
[i
] = call_used_regs
[i
] = 1, reg_names
[i
] = "";
4026 /* If 32-bit, squash the 64-bit registers. */
4029 for (i
= FIRST_REX_INT_REG
; i
<= LAST_REX_INT_REG
; i
++)
4031 for (i
= FIRST_REX_SSE_REG
; i
<= LAST_REX_SSE_REG
; i
++)
4037 /* Save the current options */
4040 ix86_function_specific_save (struct cl_target_option
*ptr
)
4042 ptr
->arch
= ix86_arch
;
4043 ptr
->schedule
= ix86_schedule
;
4044 ptr
->tune
= ix86_tune
;
4045 ptr
->branch_cost
= ix86_branch_cost
;
4046 ptr
->tune_defaulted
= ix86_tune_defaulted
;
4047 ptr
->arch_specified
= ix86_arch_specified
;
4048 ptr
->x_ix86_isa_flags_explicit
= ix86_isa_flags_explicit
;
4049 ptr
->ix86_target_flags_explicit
= target_flags_explicit
;
4050 ptr
->x_recip_mask_explicit
= recip_mask_explicit
;
4052 /* The fields are char but the variables are not; make sure the
4053 values fit in the fields. */
4054 gcc_assert (ptr
->arch
== ix86_arch
);
4055 gcc_assert (ptr
->schedule
== ix86_schedule
);
4056 gcc_assert (ptr
->tune
== ix86_tune
);
4057 gcc_assert (ptr
->branch_cost
== ix86_branch_cost
);
4060 /* Restore the current options */
4063 ix86_function_specific_restore (struct cl_target_option
*ptr
)
4065 enum processor_type old_tune
= ix86_tune
;
4066 enum processor_type old_arch
= ix86_arch
;
4067 unsigned int ix86_arch_mask
, ix86_tune_mask
;
4070 ix86_arch
= (enum processor_type
) ptr
->arch
;
4071 ix86_schedule
= (enum attr_cpu
) ptr
->schedule
;
4072 ix86_tune
= (enum processor_type
) ptr
->tune
;
4073 ix86_branch_cost
= ptr
->branch_cost
;
4074 ix86_tune_defaulted
= ptr
->tune_defaulted
;
4075 ix86_arch_specified
= ptr
->arch_specified
;
4076 ix86_isa_flags_explicit
= ptr
->x_ix86_isa_flags_explicit
;
4077 target_flags_explicit
= ptr
->ix86_target_flags_explicit
;
4078 recip_mask_explicit
= ptr
->x_recip_mask_explicit
;
4080 /* Recreate the arch feature tests if the arch changed */
4081 if (old_arch
!= ix86_arch
)
4083 ix86_arch_mask
= 1u << ix86_arch
;
4084 for (i
= 0; i
< X86_ARCH_LAST
; ++i
)
4085 ix86_arch_features
[i
]
4086 = !!(initial_ix86_arch_features
[i
] & ix86_arch_mask
);
4089 /* Recreate the tune optimization tests */
4090 if (old_tune
!= ix86_tune
)
4092 ix86_tune_mask
= 1u << ix86_tune
;
4093 for (i
= 0; i
< X86_TUNE_LAST
; ++i
)
4094 ix86_tune_features
[i
]
4095 = !!(initial_ix86_tune_features
[i
] & ix86_tune_mask
);
4099 /* Print the current options */
4102 ix86_function_specific_print (FILE *file
, int indent
,
4103 struct cl_target_option
*ptr
)
4106 = ix86_target_string (ptr
->x_ix86_isa_flags
, ptr
->x_target_flags
,
4107 NULL
, NULL
, ptr
->x_ix86_fpmath
, false);
4109 fprintf (file
, "%*sarch = %d (%s)\n",
4112 ((ptr
->arch
< TARGET_CPU_DEFAULT_max
)
4113 ? cpu_names
[ptr
->arch
]
4116 fprintf (file
, "%*stune = %d (%s)\n",
4119 ((ptr
->tune
< TARGET_CPU_DEFAULT_max
)
4120 ? cpu_names
[ptr
->tune
]
4123 fprintf (file
, "%*sbranch_cost = %d\n", indent
, "", ptr
->branch_cost
);
4127 fprintf (file
, "%*s%s\n", indent
, "", target_string
);
4128 free (target_string
);
4133 /* Inner function to process the attribute((target(...))), take an argument and
4134 set the current options from the argument. If we have a list, recursively go
4138 ix86_valid_target_attribute_inner_p (tree args
, char *p_strings
[],
4139 struct gcc_options
*enum_opts_set
)
4144 #define IX86_ATTR_ISA(S,O) { S, sizeof (S)-1, ix86_opt_isa, O, 0 }
4145 #define IX86_ATTR_STR(S,O) { S, sizeof (S)-1, ix86_opt_str, O, 0 }
4146 #define IX86_ATTR_ENUM(S,O) { S, sizeof (S)-1, ix86_opt_enum, O, 0 }
4147 #define IX86_ATTR_YES(S,O,M) { S, sizeof (S)-1, ix86_opt_yes, O, M }
4148 #define IX86_ATTR_NO(S,O,M) { S, sizeof (S)-1, ix86_opt_no, O, M }
4164 enum ix86_opt_type type
;
4169 IX86_ATTR_ISA ("3dnow", OPT_m3dnow
),
4170 IX86_ATTR_ISA ("abm", OPT_mabm
),
4171 IX86_ATTR_ISA ("bmi", OPT_mbmi
),
4172 IX86_ATTR_ISA ("bmi2", OPT_mbmi2
),
4173 IX86_ATTR_ISA ("lzcnt", OPT_mlzcnt
),
4174 IX86_ATTR_ISA ("tbm", OPT_mtbm
),
4175 IX86_ATTR_ISA ("aes", OPT_maes
),
4176 IX86_ATTR_ISA ("avx", OPT_mavx
),
4177 IX86_ATTR_ISA ("avx2", OPT_mavx2
),
4178 IX86_ATTR_ISA ("mmx", OPT_mmmx
),
4179 IX86_ATTR_ISA ("pclmul", OPT_mpclmul
),
4180 IX86_ATTR_ISA ("popcnt", OPT_mpopcnt
),
4181 IX86_ATTR_ISA ("sse", OPT_msse
),
4182 IX86_ATTR_ISA ("sse2", OPT_msse2
),
4183 IX86_ATTR_ISA ("sse3", OPT_msse3
),
4184 IX86_ATTR_ISA ("sse4", OPT_msse4
),
4185 IX86_ATTR_ISA ("sse4.1", OPT_msse4_1
),
4186 IX86_ATTR_ISA ("sse4.2", OPT_msse4_2
),
4187 IX86_ATTR_ISA ("sse4a", OPT_msse4a
),
4188 IX86_ATTR_ISA ("ssse3", OPT_mssse3
),
4189 IX86_ATTR_ISA ("fma4", OPT_mfma4
),
4190 IX86_ATTR_ISA ("fma", OPT_mfma
),
4191 IX86_ATTR_ISA ("xop", OPT_mxop
),
4192 IX86_ATTR_ISA ("lwp", OPT_mlwp
),
4193 IX86_ATTR_ISA ("fsgsbase", OPT_mfsgsbase
),
4194 IX86_ATTR_ISA ("rdrnd", OPT_mrdrnd
),
4195 IX86_ATTR_ISA ("f16c", OPT_mf16c
),
4196 IX86_ATTR_ISA ("rtm", OPT_mrtm
),
4199 IX86_ATTR_ENUM ("fpmath=", OPT_mfpmath_
),
4201 /* string options */
4202 IX86_ATTR_STR ("arch=", IX86_FUNCTION_SPECIFIC_ARCH
),
4203 IX86_ATTR_STR ("tune=", IX86_FUNCTION_SPECIFIC_TUNE
),
4206 IX86_ATTR_YES ("cld",
4210 IX86_ATTR_NO ("fancy-math-387",
4211 OPT_mfancy_math_387
,
4212 MASK_NO_FANCY_MATH_387
),
4214 IX86_ATTR_YES ("ieee-fp",
4218 IX86_ATTR_YES ("inline-all-stringops",
4219 OPT_minline_all_stringops
,
4220 MASK_INLINE_ALL_STRINGOPS
),
4222 IX86_ATTR_YES ("inline-stringops-dynamically",
4223 OPT_minline_stringops_dynamically
,
4224 MASK_INLINE_STRINGOPS_DYNAMICALLY
),
4226 IX86_ATTR_NO ("align-stringops",
4227 OPT_mno_align_stringops
,
4228 MASK_NO_ALIGN_STRINGOPS
),
4230 IX86_ATTR_YES ("recip",
4236 /* If this is a list, recurse to get the options. */
4237 if (TREE_CODE (args
) == TREE_LIST
)
4241 for (; args
; args
= TREE_CHAIN (args
))
4242 if (TREE_VALUE (args
)
4243 && !ix86_valid_target_attribute_inner_p (TREE_VALUE (args
),
4244 p_strings
, enum_opts_set
))
4250 else if (TREE_CODE (args
) != STRING_CST
)
4253 /* Handle multiple arguments separated by commas. */
4254 next_optstr
= ASTRDUP (TREE_STRING_POINTER (args
));
4256 while (next_optstr
&& *next_optstr
!= '\0')
4258 char *p
= next_optstr
;
4260 char *comma
= strchr (next_optstr
, ',');
4261 const char *opt_string
;
4262 size_t len
, opt_len
;
4267 enum ix86_opt_type type
= ix86_opt_unknown
;
4273 len
= comma
- next_optstr
;
4274 next_optstr
= comma
+ 1;
4282 /* Recognize no-xxx. */
4283 if (len
> 3 && p
[0] == 'n' && p
[1] == 'o' && p
[2] == '-')
4292 /* Find the option. */
4295 for (i
= 0; i
< ARRAY_SIZE (attrs
); i
++)
4297 type
= attrs
[i
].type
;
4298 opt_len
= attrs
[i
].len
;
4299 if (ch
== attrs
[i
].string
[0]
4300 && ((type
!= ix86_opt_str
&& type
!= ix86_opt_enum
)
4303 && memcmp (p
, attrs
[i
].string
, opt_len
) == 0)
4306 mask
= attrs
[i
].mask
;
4307 opt_string
= attrs
[i
].string
;
4312 /* Process the option. */
4315 error ("attribute(target(\"%s\")) is unknown", orig_p
);
4319 else if (type
== ix86_opt_isa
)
4321 struct cl_decoded_option decoded
;
4323 generate_option (opt
, NULL
, opt_set_p
, CL_TARGET
, &decoded
);
4324 ix86_handle_option (&global_options
, &global_options_set
,
4325 &decoded
, input_location
);
4328 else if (type
== ix86_opt_yes
|| type
== ix86_opt_no
)
4330 if (type
== ix86_opt_no
)
4331 opt_set_p
= !opt_set_p
;
4334 target_flags
|= mask
;
4336 target_flags
&= ~mask
;
4339 else if (type
== ix86_opt_str
)
4343 error ("option(\"%s\") was already specified", opt_string
);
4347 p_strings
[opt
] = xstrdup (p
+ opt_len
);
4350 else if (type
== ix86_opt_enum
)
4355 arg_ok
= opt_enum_arg_to_value (opt
, p
+ opt_len
, &value
, CL_TARGET
);
4357 set_option (&global_options
, enum_opts_set
, opt
, value
,
4358 p
+ opt_len
, DK_UNSPECIFIED
, input_location
,
4362 error ("attribute(target(\"%s\")) is unknown", orig_p
);
4374 /* Return a TARGET_OPTION_NODE tree of the target options listed or NULL. */
4377 ix86_valid_target_attribute_tree (tree args
)
4379 const char *orig_arch_string
= ix86_arch_string
;
4380 const char *orig_tune_string
= ix86_tune_string
;
4381 enum fpmath_unit orig_fpmath_set
= global_options_set
.x_ix86_fpmath
;
4382 int orig_tune_defaulted
= ix86_tune_defaulted
;
4383 int orig_arch_specified
= ix86_arch_specified
;
4384 char *option_strings
[IX86_FUNCTION_SPECIFIC_MAX
] = { NULL
, NULL
};
4387 struct cl_target_option
*def
4388 = TREE_TARGET_OPTION (target_option_default_node
);
4389 struct gcc_options enum_opts_set
;
4391 memset (&enum_opts_set
, 0, sizeof (enum_opts_set
));
4393 /* Process each of the options on the chain. */
4394 if (! ix86_valid_target_attribute_inner_p (args
, option_strings
,
4398 /* If the changed options are different from the default, rerun
4399 ix86_option_override_internal, and then save the options away.
4400 The string options are are attribute options, and will be undone
4401 when we copy the save structure. */
4402 if (ix86_isa_flags
!= def
->x_ix86_isa_flags
4403 || target_flags
!= def
->x_target_flags
4404 || option_strings
[IX86_FUNCTION_SPECIFIC_ARCH
]
4405 || option_strings
[IX86_FUNCTION_SPECIFIC_TUNE
]
4406 || enum_opts_set
.x_ix86_fpmath
)
4408 /* If we are using the default tune= or arch=, undo the string assigned,
4409 and use the default. */
4410 if (option_strings
[IX86_FUNCTION_SPECIFIC_ARCH
])
4411 ix86_arch_string
= option_strings
[IX86_FUNCTION_SPECIFIC_ARCH
];
4412 else if (!orig_arch_specified
)
4413 ix86_arch_string
= NULL
;
4415 if (option_strings
[IX86_FUNCTION_SPECIFIC_TUNE
])
4416 ix86_tune_string
= option_strings
[IX86_FUNCTION_SPECIFIC_TUNE
];
4417 else if (orig_tune_defaulted
)
4418 ix86_tune_string
= NULL
;
4420 /* If fpmath= is not set, and we now have sse2 on 32-bit, use it. */
4421 if (enum_opts_set
.x_ix86_fpmath
)
4422 global_options_set
.x_ix86_fpmath
= (enum fpmath_unit
) 1;
4423 else if (!TARGET_64BIT
&& TARGET_SSE
)
4425 ix86_fpmath
= (enum fpmath_unit
) (FPMATH_SSE
| FPMATH_387
);
4426 global_options_set
.x_ix86_fpmath
= (enum fpmath_unit
) 1;
4429 /* Do any overrides, such as arch=xxx, or tune=xxx support. */
4430 ix86_option_override_internal (false);
4432 /* Add any builtin functions with the new isa if any. */
4433 ix86_add_new_builtins (ix86_isa_flags
);
4435 /* Save the current options unless we are validating options for
4437 t
= build_target_option_node ();
4439 ix86_arch_string
= orig_arch_string
;
4440 ix86_tune_string
= orig_tune_string
;
4441 global_options_set
.x_ix86_fpmath
= orig_fpmath_set
;
4443 /* Free up memory allocated to hold the strings */
4444 for (i
= 0; i
< IX86_FUNCTION_SPECIFIC_MAX
; i
++)
4445 free (option_strings
[i
]);
4451 /* Hook to validate attribute((target("string"))). */
4454 ix86_valid_target_attribute_p (tree fndecl
,
4455 tree
ARG_UNUSED (name
),
4457 int ARG_UNUSED (flags
))
4459 struct cl_target_option cur_target
;
4461 tree old_optimize
= build_optimization_node ();
4462 tree new_target
, new_optimize
;
4463 tree func_optimize
= DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl
);
4465 /* If the function changed the optimization levels as well as setting target
4466 options, start with the optimizations specified. */
4467 if (func_optimize
&& func_optimize
!= old_optimize
)
4468 cl_optimization_restore (&global_options
,
4469 TREE_OPTIMIZATION (func_optimize
));
4471 /* The target attributes may also change some optimization flags, so update
4472 the optimization options if necessary. */
4473 cl_target_option_save (&cur_target
, &global_options
);
4474 new_target
= ix86_valid_target_attribute_tree (args
);
4475 new_optimize
= build_optimization_node ();
4482 DECL_FUNCTION_SPECIFIC_TARGET (fndecl
) = new_target
;
4484 if (old_optimize
!= new_optimize
)
4485 DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl
) = new_optimize
;
4488 cl_target_option_restore (&global_options
, &cur_target
);
4490 if (old_optimize
!= new_optimize
)
4491 cl_optimization_restore (&global_options
,
4492 TREE_OPTIMIZATION (old_optimize
));
4498 /* Hook to determine if one function can safely inline another. */
4501 ix86_can_inline_p (tree caller
, tree callee
)
4504 tree caller_tree
= DECL_FUNCTION_SPECIFIC_TARGET (caller
);
4505 tree callee_tree
= DECL_FUNCTION_SPECIFIC_TARGET (callee
);
4507 /* If callee has no option attributes, then it is ok to inline. */
4511 /* If caller has no option attributes, but callee does then it is not ok to
4513 else if (!caller_tree
)
4518 struct cl_target_option
*caller_opts
= TREE_TARGET_OPTION (caller_tree
);
4519 struct cl_target_option
*callee_opts
= TREE_TARGET_OPTION (callee_tree
);
4521 /* Callee's isa options should a subset of the caller's, i.e. a SSE4 function
4522 can inline a SSE2 function but a SSE2 function can't inline a SSE4
4524 if ((caller_opts
->x_ix86_isa_flags
& callee_opts
->x_ix86_isa_flags
)
4525 != callee_opts
->x_ix86_isa_flags
)
4528 /* See if we have the same non-isa options. */
4529 else if (caller_opts
->x_target_flags
!= callee_opts
->x_target_flags
)
4532 /* See if arch, tune, etc. are the same. */
4533 else if (caller_opts
->arch
!= callee_opts
->arch
)
4536 else if (caller_opts
->tune
!= callee_opts
->tune
)
4539 else if (caller_opts
->x_ix86_fpmath
!= callee_opts
->x_ix86_fpmath
)
4542 else if (caller_opts
->branch_cost
!= callee_opts
->branch_cost
)
4553 /* Remember the last target of ix86_set_current_function. */
4554 static GTY(()) tree ix86_previous_fndecl
;
4556 /* Establish appropriate back-end context for processing the function
4557 FNDECL. The argument might be NULL to indicate processing at top
4558 level, outside of any function scope. */
4560 ix86_set_current_function (tree fndecl
)
4562 /* Only change the context if the function changes. This hook is called
4563 several times in the course of compiling a function, and we don't want to
4564 slow things down too much or call target_reinit when it isn't safe. */
4565 if (fndecl
&& fndecl
!= ix86_previous_fndecl
)
4567 tree old_tree
= (ix86_previous_fndecl
4568 ? DECL_FUNCTION_SPECIFIC_TARGET (ix86_previous_fndecl
)
4571 tree new_tree
= (fndecl
4572 ? DECL_FUNCTION_SPECIFIC_TARGET (fndecl
)
4575 ix86_previous_fndecl
= fndecl
;
4576 if (old_tree
== new_tree
)
4581 cl_target_option_restore (&global_options
,
4582 TREE_TARGET_OPTION (new_tree
));
4588 struct cl_target_option
*def
4589 = TREE_TARGET_OPTION (target_option_current_node
);
4591 cl_target_option_restore (&global_options
, def
);
4598 /* Return true if this goes in large data/bss. */
4601 ix86_in_large_data_p (tree exp
)
4603 if (ix86_cmodel
!= CM_MEDIUM
&& ix86_cmodel
!= CM_MEDIUM_PIC
)
4606 /* Functions are never large data. */
4607 if (TREE_CODE (exp
) == FUNCTION_DECL
)
4610 if (TREE_CODE (exp
) == VAR_DECL
&& DECL_SECTION_NAME (exp
))
4612 const char *section
= TREE_STRING_POINTER (DECL_SECTION_NAME (exp
));
4613 if (strcmp (section
, ".ldata") == 0
4614 || strcmp (section
, ".lbss") == 0)
4620 HOST_WIDE_INT size
= int_size_in_bytes (TREE_TYPE (exp
));
4622 /* If this is an incomplete type with size 0, then we can't put it
4623 in data because it might be too big when completed. */
4624 if (!size
|| size
> ix86_section_threshold
)
4631 /* Switch to the appropriate section for output of DECL.
4632 DECL is either a `VAR_DECL' node or a constant of some sort.
4633 RELOC indicates whether forming the initial value of DECL requires
4634 link-time relocations. */
4636 static section
* x86_64_elf_select_section (tree
, int, unsigned HOST_WIDE_INT
)
4640 x86_64_elf_select_section (tree decl
, int reloc
,
4641 unsigned HOST_WIDE_INT align
)
4643 if ((ix86_cmodel
== CM_MEDIUM
|| ix86_cmodel
== CM_MEDIUM_PIC
)
4644 && ix86_in_large_data_p (decl
))
4646 const char *sname
= NULL
;
4647 unsigned int flags
= SECTION_WRITE
;
4648 switch (categorize_decl_for_section (decl
, reloc
))
4653 case SECCAT_DATA_REL
:
4654 sname
= ".ldata.rel";
4656 case SECCAT_DATA_REL_LOCAL
:
4657 sname
= ".ldata.rel.local";
4659 case SECCAT_DATA_REL_RO
:
4660 sname
= ".ldata.rel.ro";
4662 case SECCAT_DATA_REL_RO_LOCAL
:
4663 sname
= ".ldata.rel.ro.local";
4667 flags
|= SECTION_BSS
;
4670 case SECCAT_RODATA_MERGE_STR
:
4671 case SECCAT_RODATA_MERGE_STR_INIT
:
4672 case SECCAT_RODATA_MERGE_CONST
:
4676 case SECCAT_SRODATA
:
4683 /* We don't split these for medium model. Place them into
4684 default sections and hope for best. */
4689 /* We might get called with string constants, but get_named_section
4690 doesn't like them as they are not DECLs. Also, we need to set
4691 flags in that case. */
4693 return get_section (sname
, flags
, NULL
);
4694 return get_named_section (decl
, sname
, reloc
);
4697 return default_elf_select_section (decl
, reloc
, align
);
4700 /* Build up a unique section name, expressed as a
4701 STRING_CST node, and assign it to DECL_SECTION_NAME (decl).
4702 RELOC indicates whether the initial value of EXP requires
4703 link-time relocations. */
4705 static void ATTRIBUTE_UNUSED
4706 x86_64_elf_unique_section (tree decl
, int reloc
)
4708 if ((ix86_cmodel
== CM_MEDIUM
|| ix86_cmodel
== CM_MEDIUM_PIC
)
4709 && ix86_in_large_data_p (decl
))
4711 const char *prefix
= NULL
;
4712 /* We only need to use .gnu.linkonce if we don't have COMDAT groups. */
4713 bool one_only
= DECL_ONE_ONLY (decl
) && !HAVE_COMDAT_GROUP
;
4715 switch (categorize_decl_for_section (decl
, reloc
))
4718 case SECCAT_DATA_REL
:
4719 case SECCAT_DATA_REL_LOCAL
:
4720 case SECCAT_DATA_REL_RO
:
4721 case SECCAT_DATA_REL_RO_LOCAL
:
4722 prefix
= one_only
? ".ld" : ".ldata";
4725 prefix
= one_only
? ".lb" : ".lbss";
4728 case SECCAT_RODATA_MERGE_STR
:
4729 case SECCAT_RODATA_MERGE_STR_INIT
:
4730 case SECCAT_RODATA_MERGE_CONST
:
4731 prefix
= one_only
? ".lr" : ".lrodata";
4733 case SECCAT_SRODATA
:
4740 /* We don't split these for medium model. Place them into
4741 default sections and hope for best. */
4746 const char *name
, *linkonce
;
4749 name
= IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl
));
4750 name
= targetm
.strip_name_encoding (name
);
4752 /* If we're using one_only, then there needs to be a .gnu.linkonce
4753 prefix to the section name. */
4754 linkonce
= one_only
? ".gnu.linkonce" : "";
4756 string
= ACONCAT ((linkonce
, prefix
, ".", name
, NULL
));
4758 DECL_SECTION_NAME (decl
) = build_string (strlen (string
), string
);
4762 default_unique_section (decl
, reloc
);
4765 #ifdef COMMON_ASM_OP
4766 /* This says how to output assembler code to declare an
4767 uninitialized external linkage data object.
4769 For medium model x86-64 we need to use .largecomm opcode for
4772 x86_elf_aligned_common (FILE *file
,
4773 const char *name
, unsigned HOST_WIDE_INT size
,
4776 if ((ix86_cmodel
== CM_MEDIUM
|| ix86_cmodel
== CM_MEDIUM_PIC
)
4777 && size
> (unsigned int)ix86_section_threshold
)
4778 fputs (".largecomm\t", file
);
4780 fputs (COMMON_ASM_OP
, file
);
4781 assemble_name (file
, name
);
4782 fprintf (file
, "," HOST_WIDE_INT_PRINT_UNSIGNED
",%u\n",
4783 size
, align
/ BITS_PER_UNIT
);
4787 /* Utility function for targets to use in implementing
4788 ASM_OUTPUT_ALIGNED_BSS. */
4791 x86_output_aligned_bss (FILE *file
, tree decl ATTRIBUTE_UNUSED
,
4792 const char *name
, unsigned HOST_WIDE_INT size
,
4795 if ((ix86_cmodel
== CM_MEDIUM
|| ix86_cmodel
== CM_MEDIUM_PIC
)
4796 && size
> (unsigned int)ix86_section_threshold
)
4797 switch_to_section (get_named_section (decl
, ".lbss", 0));
4799 switch_to_section (bss_section
);
4800 ASM_OUTPUT_ALIGN (file
, floor_log2 (align
/ BITS_PER_UNIT
));
4801 #ifdef ASM_DECLARE_OBJECT_NAME
4802 last_assemble_variable_decl
= decl
;
4803 ASM_DECLARE_OBJECT_NAME (file
, name
, decl
);
4805 /* Standard thing is just output label for the object. */
4806 ASM_OUTPUT_LABEL (file
, name
);
4807 #endif /* ASM_DECLARE_OBJECT_NAME */
4808 ASM_OUTPUT_SKIP (file
, size
? size
: 1);
4811 /* Decide whether we must probe the stack before any space allocation
4812 on this target. It's essentially TARGET_STACK_PROBE except when
4813 -fstack-check causes the stack to be already probed differently. */
4816 ix86_target_stack_probe (void)
4818 /* Do not probe the stack twice if static stack checking is enabled. */
4819 if (flag_stack_check
== STATIC_BUILTIN_STACK_CHECK
)
4822 return TARGET_STACK_PROBE
;
4825 /* Decide whether we can make a sibling call to a function. DECL is the
4826 declaration of the function being targeted by the call and EXP is the
4827 CALL_EXPR representing the call. */
4830 ix86_function_ok_for_sibcall (tree decl
, tree exp
)
4832 tree type
, decl_or_type
;
4835 /* If we are generating position-independent code, we cannot sibcall
4836 optimize any indirect call, or a direct call to a global function,
4837 as the PLT requires %ebx be live. (Darwin does not have a PLT.) */
4841 && (!decl
|| !targetm
.binds_local_p (decl
)))
4844 /* If we need to align the outgoing stack, then sibcalling would
4845 unalign the stack, which may break the called function. */
4846 if (ix86_minimum_incoming_stack_boundary (true)
4847 < PREFERRED_STACK_BOUNDARY
)
4852 decl_or_type
= decl
;
4853 type
= TREE_TYPE (decl
);
4857 /* We're looking at the CALL_EXPR, we need the type of the function. */
4858 type
= CALL_EXPR_FN (exp
); /* pointer expression */
4859 type
= TREE_TYPE (type
); /* pointer type */
4860 type
= TREE_TYPE (type
); /* function type */
4861 decl_or_type
= type
;
4864 /* Check that the return value locations are the same. Like
4865 if we are returning floats on the 80387 register stack, we cannot
4866 make a sibcall from a function that doesn't return a float to a
4867 function that does or, conversely, from a function that does return
4868 a float to a function that doesn't; the necessary stack adjustment
4869 would not be executed. This is also the place we notice
4870 differences in the return value ABI. Note that it is ok for one
4871 of the functions to have void return type as long as the return
4872 value of the other is passed in a register. */
4873 a
= ix86_function_value (TREE_TYPE (exp
), decl_or_type
, false);
4874 b
= ix86_function_value (TREE_TYPE (DECL_RESULT (cfun
->decl
)),
4876 if (STACK_REG_P (a
) || STACK_REG_P (b
))
4878 if (!rtx_equal_p (a
, b
))
4881 else if (VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun
->decl
))))
4883 /* Disable sibcall if we need to generate vzeroupper after
4885 if (TARGET_VZEROUPPER
4886 && cfun
->machine
->callee_return_avx256_p
4887 && !cfun
->machine
->caller_return_avx256_p
)
4890 else if (!rtx_equal_p (a
, b
))
4895 /* The SYSV ABI has more call-clobbered registers;
4896 disallow sibcalls from MS to SYSV. */
4897 if (cfun
->machine
->call_abi
== MS_ABI
4898 && ix86_function_type_abi (type
) == SYSV_ABI
)
4903 /* If this call is indirect, we'll need to be able to use a
4904 call-clobbered register for the address of the target function.
4905 Make sure that all such registers are not used for passing
4906 parameters. Note that DLLIMPORT functions are indirect. */
4908 || (TARGET_DLLIMPORT_DECL_ATTRIBUTES
&& DECL_DLLIMPORT_P (decl
)))
4910 if (ix86_function_regparm (type
, NULL
) >= 3)
4912 /* ??? Need to count the actual number of registers to be used,
4913 not the possible number of registers. Fix later. */
4919 /* Otherwise okay. That also includes certain types of indirect calls. */
4923 /* Handle "cdecl", "stdcall", "fastcall", "regparm", "thiscall",
4924 and "sseregparm" calling convention attributes;
4925 arguments as in struct attribute_spec.handler. */
4928 ix86_handle_cconv_attribute (tree
*node
, tree name
,
4930 int flags ATTRIBUTE_UNUSED
,
4933 if (TREE_CODE (*node
) != FUNCTION_TYPE
4934 && TREE_CODE (*node
) != METHOD_TYPE
4935 && TREE_CODE (*node
) != FIELD_DECL
4936 && TREE_CODE (*node
) != TYPE_DECL
)
4938 warning (OPT_Wattributes
, "%qE attribute only applies to functions",
4940 *no_add_attrs
= true;
4944 /* Can combine regparm with all attributes but fastcall, and thiscall. */
4945 if (is_attribute_p ("regparm", name
))
4949 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node
)))
4951 error ("fastcall and regparm attributes are not compatible");
4954 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node
)))
4956 error ("regparam and thiscall attributes are not compatible");
4959 cst
= TREE_VALUE (args
);
4960 if (TREE_CODE (cst
) != INTEGER_CST
)
4962 warning (OPT_Wattributes
,
4963 "%qE attribute requires an integer constant argument",
4965 *no_add_attrs
= true;
4967 else if (compare_tree_int (cst
, REGPARM_MAX
) > 0)
4969 warning (OPT_Wattributes
, "argument to %qE attribute larger than %d",
4971 *no_add_attrs
= true;
4979 /* Do not warn when emulating the MS ABI. */
4980 if ((TREE_CODE (*node
) != FUNCTION_TYPE
4981 && TREE_CODE (*node
) != METHOD_TYPE
)
4982 || ix86_function_type_abi (*node
) != MS_ABI
)
4983 warning (OPT_Wattributes
, "%qE attribute ignored",
4985 *no_add_attrs
= true;
4989 /* Can combine fastcall with stdcall (redundant) and sseregparm. */
4990 if (is_attribute_p ("fastcall", name
))
4992 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node
)))
4994 error ("fastcall and cdecl attributes are not compatible");
4996 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node
)))
4998 error ("fastcall and stdcall attributes are not compatible");
5000 if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node
)))
5002 error ("fastcall and regparm attributes are not compatible");
5004 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node
)))
5006 error ("fastcall and thiscall attributes are not compatible");
5010 /* Can combine stdcall with fastcall (redundant), regparm and
5012 else if (is_attribute_p ("stdcall", name
))
5014 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node
)))
5016 error ("stdcall and cdecl attributes are not compatible");
5018 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node
)))
5020 error ("stdcall and fastcall attributes are not compatible");
5022 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node
)))
5024 error ("stdcall and thiscall attributes are not compatible");
5028 /* Can combine cdecl with regparm and sseregparm. */
5029 else if (is_attribute_p ("cdecl", name
))
5031 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node
)))
5033 error ("stdcall and cdecl attributes are not compatible");
5035 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node
)))
5037 error ("fastcall and cdecl attributes are not compatible");
5039 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node
)))
5041 error ("cdecl and thiscall attributes are not compatible");
5044 else if (is_attribute_p ("thiscall", name
))
5046 if (TREE_CODE (*node
) != METHOD_TYPE
&& pedantic
)
5047 warning (OPT_Wattributes
, "%qE attribute is used for none class-method",
5049 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node
)))
5051 error ("stdcall and thiscall attributes are not compatible");
5053 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node
)))
5055 error ("fastcall and thiscall attributes are not compatible");
5057 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node
)))
5059 error ("cdecl and thiscall attributes are not compatible");
5063 /* Can combine sseregparm with all attributes. */
5068 /* The transactional memory builtins are implicitly regparm or fastcall
5069 depending on the ABI. Override the generic do-nothing attribute that
5070 these builtins were declared with, and replace it with one of the two
5071 attributes that we expect elsewhere. */
5074 ix86_handle_tm_regparm_attribute (tree
*node
, tree name ATTRIBUTE_UNUSED
,
5075 tree args ATTRIBUTE_UNUSED
,
5076 int flags ATTRIBUTE_UNUSED
,
5081 /* In no case do we want to add the placeholder attribute. */
5082 *no_add_attrs
= true;
5084 /* The 64-bit ABI is unchanged for transactional memory. */
5088 /* ??? Is there a better way to validate 32-bit windows? We have
5089 cfun->machine->call_abi, but that seems to be set only for 64-bit. */
5090 if (CHECK_STACK_LIMIT
> 0)
5091 alt
= tree_cons (get_identifier ("fastcall"), NULL
, NULL
);
5094 alt
= tree_cons (NULL
, build_int_cst (NULL
, 2), NULL
);
5095 alt
= tree_cons (get_identifier ("regparm"), alt
, NULL
);
5097 decl_attributes (node
, alt
, flags
);
5102 /* This function determines from TYPE the calling-convention. */
5105 ix86_get_callcvt (const_tree type
)
5107 unsigned int ret
= 0;
5112 return IX86_CALLCVT_CDECL
;
5114 attrs
= TYPE_ATTRIBUTES (type
);
5115 if (attrs
!= NULL_TREE
)
5117 if (lookup_attribute ("cdecl", attrs
))
5118 ret
|= IX86_CALLCVT_CDECL
;
5119 else if (lookup_attribute ("stdcall", attrs
))
5120 ret
|= IX86_CALLCVT_STDCALL
;
5121 else if (lookup_attribute ("fastcall", attrs
))
5122 ret
|= IX86_CALLCVT_FASTCALL
;
5123 else if (lookup_attribute ("thiscall", attrs
))
5124 ret
|= IX86_CALLCVT_THISCALL
;
5126 /* Regparam isn't allowed for thiscall and fastcall. */
5127 if ((ret
& (IX86_CALLCVT_THISCALL
| IX86_CALLCVT_FASTCALL
)) == 0)
5129 if (lookup_attribute ("regparm", attrs
))
5130 ret
|= IX86_CALLCVT_REGPARM
;
5131 if (lookup_attribute ("sseregparm", attrs
))
5132 ret
|= IX86_CALLCVT_SSEREGPARM
;
5135 if (IX86_BASE_CALLCVT(ret
) != 0)
5139 is_stdarg
= stdarg_p (type
);
5140 if (TARGET_RTD
&& !is_stdarg
)
5141 return IX86_CALLCVT_STDCALL
| ret
;
5145 || TREE_CODE (type
) != METHOD_TYPE
5146 || ix86_function_type_abi (type
) != MS_ABI
)
5147 return IX86_CALLCVT_CDECL
| ret
;
5149 return IX86_CALLCVT_THISCALL
;
5152 /* Return 0 if the attributes for two types are incompatible, 1 if they
5153 are compatible, and 2 if they are nearly compatible (which causes a
5154 warning to be generated). */
5157 ix86_comp_type_attributes (const_tree type1
, const_tree type2
)
5159 unsigned int ccvt1
, ccvt2
;
5161 if (TREE_CODE (type1
) != FUNCTION_TYPE
5162 && TREE_CODE (type1
) != METHOD_TYPE
)
5165 ccvt1
= ix86_get_callcvt (type1
);
5166 ccvt2
= ix86_get_callcvt (type2
);
5169 if (ix86_function_regparm (type1
, NULL
)
5170 != ix86_function_regparm (type2
, NULL
))
5176 /* Return the regparm value for a function with the indicated TYPE and DECL.
5177 DECL may be NULL when calling function indirectly
5178 or considering a libcall. */
5181 ix86_function_regparm (const_tree type
, const_tree decl
)
5188 return (ix86_function_type_abi (type
) == SYSV_ABI
5189 ? X86_64_REGPARM_MAX
: X86_64_MS_REGPARM_MAX
);
5190 ccvt
= ix86_get_callcvt (type
);
5191 regparm
= ix86_regparm
;
5193 if ((ccvt
& IX86_CALLCVT_REGPARM
) != 0)
5195 attr
= lookup_attribute ("regparm", TYPE_ATTRIBUTES (type
));
5198 regparm
= TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr
)));
5202 else if ((ccvt
& IX86_CALLCVT_FASTCALL
) != 0)
5204 else if ((ccvt
& IX86_CALLCVT_THISCALL
) != 0)
5207 /* Use register calling convention for local functions when possible. */
5209 && TREE_CODE (decl
) == FUNCTION_DECL
5211 && !(profile_flag
&& !flag_fentry
))
5213 /* FIXME: remove this CONST_CAST when cgraph.[ch] is constified. */
5214 struct cgraph_local_info
*i
= cgraph_local_info (CONST_CAST_TREE (decl
));
5215 if (i
&& i
->local
&& i
->can_change_signature
)
5217 int local_regparm
, globals
= 0, regno
;
5219 /* Make sure no regparm register is taken by a
5220 fixed register variable. */
5221 for (local_regparm
= 0; local_regparm
< REGPARM_MAX
; local_regparm
++)
5222 if (fixed_regs
[local_regparm
])
5225 /* We don't want to use regparm(3) for nested functions as
5226 these use a static chain pointer in the third argument. */
5227 if (local_regparm
== 3 && DECL_STATIC_CHAIN (decl
))
5230 /* In 32-bit mode save a register for the split stack. */
5231 if (!TARGET_64BIT
&& local_regparm
== 3 && flag_split_stack
)
5234 /* Each fixed register usage increases register pressure,
5235 so less registers should be used for argument passing.
5236 This functionality can be overriden by an explicit
5238 for (regno
= 0; regno
<= DI_REG
; regno
++)
5239 if (fixed_regs
[regno
])
5243 = globals
< local_regparm
? local_regparm
- globals
: 0;
5245 if (local_regparm
> regparm
)
5246 regparm
= local_regparm
;
5253 /* Return 1 or 2, if we can pass up to SSE_REGPARM_MAX SFmode (1) and
5254 DFmode (2) arguments in SSE registers for a function with the
5255 indicated TYPE and DECL. DECL may be NULL when calling function
5256 indirectly or considering a libcall. Otherwise return 0. */
5259 ix86_function_sseregparm (const_tree type
, const_tree decl
, bool warn
)
5261 gcc_assert (!TARGET_64BIT
);
5263 /* Use SSE registers to pass SFmode and DFmode arguments if requested
5264 by the sseregparm attribute. */
5265 if (TARGET_SSEREGPARM
5266 || (type
&& lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type
))))
5273 error ("calling %qD with attribute sseregparm without "
5274 "SSE/SSE2 enabled", decl
);
5276 error ("calling %qT with attribute sseregparm without "
5277 "SSE/SSE2 enabled", type
);
5285 /* For local functions, pass up to SSE_REGPARM_MAX SFmode
5286 (and DFmode for SSE2) arguments in SSE registers. */
5287 if (decl
&& TARGET_SSE_MATH
&& optimize
5288 && !(profile_flag
&& !flag_fentry
))
5290 /* FIXME: remove this CONST_CAST when cgraph.[ch] is constified. */
5291 struct cgraph_local_info
*i
= cgraph_local_info (CONST_CAST_TREE(decl
));
5292 if (i
&& i
->local
&& i
->can_change_signature
)
5293 return TARGET_SSE2
? 2 : 1;
5299 /* Return true if EAX is live at the start of the function. Used by
5300 ix86_expand_prologue to determine if we need special help before
5301 calling allocate_stack_worker. */
5304 ix86_eax_live_at_start_p (void)
5306 /* Cheat. Don't bother working forward from ix86_function_regparm
5307 to the function type to whether an actual argument is located in
5308 eax. Instead just look at cfg info, which is still close enough
5309 to correct at this point. This gives false positives for broken
5310 functions that might use uninitialized data that happens to be
5311 allocated in eax, but who cares? */
5312 return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR
), 0);
5316 ix86_keep_aggregate_return_pointer (tree fntype
)
5322 attr
= lookup_attribute ("callee_pop_aggregate_return",
5323 TYPE_ATTRIBUTES (fntype
));
5325 return (TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr
))) == 0);
5327 /* For 32-bit MS-ABI the default is to keep aggregate
5329 if (ix86_function_type_abi (fntype
) == MS_ABI
)
5332 return KEEP_AGGREGATE_RETURN_POINTER
!= 0;
5335 /* Value is the number of bytes of arguments automatically
5336 popped when returning from a subroutine call.
5337 FUNDECL is the declaration node of the function (as a tree),
5338 FUNTYPE is the data type of the function (as a tree),
5339 or for a library call it is an identifier node for the subroutine name.
5340 SIZE is the number of bytes of arguments passed on the stack.
5342 On the 80386, the RTD insn may be used to pop them if the number
5343 of args is fixed, but if the number is variable then the caller
5344 must pop them all. RTD can't be used for library calls now
5345 because the library is compiled with the Unix compiler.
5346 Use of RTD is a selectable option, since it is incompatible with
5347 standard Unix calling sequences. If the option is not selected,
5348 the caller must always pop the args.
5350 The attribute stdcall is equivalent to RTD on a per module basis. */
5353 ix86_return_pops_args (tree fundecl
, tree funtype
, int size
)
5357 /* None of the 64-bit ABIs pop arguments. */
5361 ccvt
= ix86_get_callcvt (funtype
);
5363 if ((ccvt
& (IX86_CALLCVT_STDCALL
| IX86_CALLCVT_FASTCALL
5364 | IX86_CALLCVT_THISCALL
)) != 0
5365 && ! stdarg_p (funtype
))
5368 /* Lose any fake structure return argument if it is passed on the stack. */
5369 if (aggregate_value_p (TREE_TYPE (funtype
), fundecl
)
5370 && !ix86_keep_aggregate_return_pointer (funtype
))
5372 int nregs
= ix86_function_regparm (funtype
, fundecl
);
5374 return GET_MODE_SIZE (Pmode
);
5380 /* Argument support functions. */
5382 /* Return true when register may be used to pass function parameters. */
5384 ix86_function_arg_regno_p (int regno
)
5387 const int *parm_regs
;
5392 return (regno
< REGPARM_MAX
5393 || (TARGET_SSE
&& SSE_REGNO_P (regno
) && !fixed_regs
[regno
]));
5395 return (regno
< REGPARM_MAX
5396 || (TARGET_MMX
&& MMX_REGNO_P (regno
)
5397 && (regno
< FIRST_MMX_REG
+ MMX_REGPARM_MAX
))
5398 || (TARGET_SSE
&& SSE_REGNO_P (regno
)
5399 && (regno
< FIRST_SSE_REG
+ SSE_REGPARM_MAX
)));
5404 if (SSE_REGNO_P (regno
) && TARGET_SSE
)
5409 if (TARGET_SSE
&& SSE_REGNO_P (regno
)
5410 && (regno
< FIRST_SSE_REG
+ SSE_REGPARM_MAX
))
5414 /* TODO: The function should depend on current function ABI but
5415 builtins.c would need updating then. Therefore we use the
5418 /* RAX is used as hidden argument to va_arg functions. */
5419 if (ix86_abi
== SYSV_ABI
&& regno
== AX_REG
)
5422 if (ix86_abi
== MS_ABI
)
5423 parm_regs
= x86_64_ms_abi_int_parameter_registers
;
5425 parm_regs
= x86_64_int_parameter_registers
;
5426 for (i
= 0; i
< (ix86_abi
== MS_ABI
5427 ? X86_64_MS_REGPARM_MAX
: X86_64_REGPARM_MAX
); i
++)
5428 if (regno
== parm_regs
[i
])
5433 /* Return if we do not know how to pass TYPE solely in registers. */
5436 ix86_must_pass_in_stack (enum machine_mode mode
, const_tree type
)
5438 if (must_pass_in_stack_var_size_or_pad (mode
, type
))
5441 /* For 32-bit, we want TImode aggregates to go on the stack. But watch out!
5442 The layout_type routine is crafty and tries to trick us into passing
5443 currently unsupported vector types on the stack by using TImode. */
5444 return (!TARGET_64BIT
&& mode
== TImode
5445 && type
&& TREE_CODE (type
) != VECTOR_TYPE
);
5448 /* It returns the size, in bytes, of the area reserved for arguments passed
5449 in registers for the function represented by fndecl dependent to the used
5452 ix86_reg_parm_stack_space (const_tree fndecl
)
5454 enum calling_abi call_abi
= SYSV_ABI
;
5455 if (fndecl
!= NULL_TREE
&& TREE_CODE (fndecl
) == FUNCTION_DECL
)
5456 call_abi
= ix86_function_abi (fndecl
);
5458 call_abi
= ix86_function_type_abi (fndecl
);
5459 if (TARGET_64BIT
&& call_abi
== MS_ABI
)
5464 /* Returns value SYSV_ABI, MS_ABI dependent on fntype, specifying the
5467 ix86_function_type_abi (const_tree fntype
)
5469 if (fntype
!= NULL_TREE
&& TYPE_ATTRIBUTES (fntype
) != NULL_TREE
)
5471 enum calling_abi abi
= ix86_abi
;
5472 if (abi
== SYSV_ABI
)
5474 if (lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (fntype
)))
5477 else if (lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (fntype
)))
5485 ix86_function_ms_hook_prologue (const_tree fn
)
5487 if (fn
&& lookup_attribute ("ms_hook_prologue", DECL_ATTRIBUTES (fn
)))
5489 if (decl_function_context (fn
) != NULL_TREE
)
5490 error_at (DECL_SOURCE_LOCATION (fn
),
5491 "ms_hook_prologue is not compatible with nested function");
5498 static enum calling_abi
5499 ix86_function_abi (const_tree fndecl
)
5503 return ix86_function_type_abi (TREE_TYPE (fndecl
));
5506 /* Returns value SYSV_ABI, MS_ABI dependent on cfun, specifying the
5509 ix86_cfun_abi (void)
5513 return cfun
->machine
->call_abi
;
5516 /* Write the extra assembler code needed to declare a function properly. */
5519 ix86_asm_output_function_label (FILE *asm_out_file
, const char *fname
,
5522 bool is_ms_hook
= ix86_function_ms_hook_prologue (decl
);
5526 int i
, filler_count
= (TARGET_64BIT
? 32 : 16);
5527 unsigned int filler_cc
= 0xcccccccc;
5529 for (i
= 0; i
< filler_count
; i
+= 4)
5530 fprintf (asm_out_file
, ASM_LONG
" %#x\n", filler_cc
);
5533 #ifdef SUBTARGET_ASM_UNWIND_INIT
5534 SUBTARGET_ASM_UNWIND_INIT (asm_out_file
);
5537 ASM_OUTPUT_LABEL (asm_out_file
, fname
);
5539 /* Output magic byte marker, if hot-patch attribute is set. */
5544 /* leaq [%rsp + 0], %rsp */
5545 asm_fprintf (asm_out_file
, ASM_BYTE
5546 "0x48, 0x8d, 0xa4, 0x24, 0x00, 0x00, 0x00, 0x00\n");
5550 /* movl.s %edi, %edi
5552 movl.s %esp, %ebp */
5553 asm_fprintf (asm_out_file
, ASM_BYTE
5554 "0x8b, 0xff, 0x55, 0x8b, 0xec\n");
5560 extern void init_regs (void);
5562 /* Implementation of call abi switching target hook. Specific to FNDECL
5563 the specific call register sets are set. See also
5564 ix86_conditional_register_usage for more details. */
5566 ix86_call_abi_override (const_tree fndecl
)
5568 if (fndecl
== NULL_TREE
)
5569 cfun
->machine
->call_abi
= ix86_abi
;
5571 cfun
->machine
->call_abi
= ix86_function_type_abi (TREE_TYPE (fndecl
));
5574 /* 64-bit MS and SYSV ABI have different set of call used registers. Avoid
5575 expensive re-initialization of init_regs each time we switch function context
5576 since this is needed only during RTL expansion. */
5578 ix86_maybe_switch_abi (void)
5581 call_used_regs
[SI_REG
] == (cfun
->machine
->call_abi
== MS_ABI
))
5585 /* Initialize a variable CUM of type CUMULATIVE_ARGS
5586 for a call to a function whose data type is FNTYPE.
5587 For a library call, FNTYPE is 0. */
5590 init_cumulative_args (CUMULATIVE_ARGS
*cum
, /* Argument info to initialize */
5591 tree fntype
, /* tree ptr for function decl */
5592 rtx libname
, /* SYMBOL_REF of library name or 0 */
5596 struct cgraph_local_info
*i
;
5599 memset (cum
, 0, sizeof (*cum
));
5601 /* Initialize for the current callee. */
5604 cfun
->machine
->callee_pass_avx256_p
= false;
5605 cfun
->machine
->callee_return_avx256_p
= false;
5610 i
= cgraph_local_info (fndecl
);
5611 cum
->call_abi
= ix86_function_abi (fndecl
);
5612 fnret_type
= TREE_TYPE (TREE_TYPE (fndecl
));
5617 cum
->call_abi
= ix86_function_type_abi (fntype
);
5619 fnret_type
= TREE_TYPE (fntype
);
5624 if (TARGET_VZEROUPPER
&& fnret_type
)
5626 rtx fnret_value
= ix86_function_value (fnret_type
, fntype
,
5628 if (function_pass_avx256_p (fnret_value
))
5630 /* The return value of this function uses 256bit AVX modes. */
5632 cfun
->machine
->callee_return_avx256_p
= true;
5634 cfun
->machine
->caller_return_avx256_p
= true;
5638 cum
->caller
= caller
;
5640 /* Set up the number of registers to use for passing arguments. */
5642 if (TARGET_64BIT
&& cum
->call_abi
== MS_ABI
&& !ACCUMULATE_OUTGOING_ARGS
)
5643 sorry ("ms_abi attribute requires -maccumulate-outgoing-args "
5644 "or subtarget optimization implying it");
5645 cum
->nregs
= ix86_regparm
;
5648 cum
->nregs
= (cum
->call_abi
== SYSV_ABI
5649 ? X86_64_REGPARM_MAX
5650 : X86_64_MS_REGPARM_MAX
);
5654 cum
->sse_nregs
= SSE_REGPARM_MAX
;
5657 cum
->sse_nregs
= (cum
->call_abi
== SYSV_ABI
5658 ? X86_64_SSE_REGPARM_MAX
5659 : X86_64_MS_SSE_REGPARM_MAX
);
5663 cum
->mmx_nregs
= MMX_REGPARM_MAX
;
5664 cum
->warn_avx
= true;
5665 cum
->warn_sse
= true;
5666 cum
->warn_mmx
= true;
5668 /* Because type might mismatch in between caller and callee, we need to
5669 use actual type of function for local calls.
5670 FIXME: cgraph_analyze can be told to actually record if function uses
5671 va_start so for local functions maybe_vaarg can be made aggressive
5673 FIXME: once typesytem is fixed, we won't need this code anymore. */
5674 if (i
&& i
->local
&& i
->can_change_signature
)
5675 fntype
= TREE_TYPE (fndecl
);
5676 cum
->maybe_vaarg
= (fntype
5677 ? (!prototype_p (fntype
) || stdarg_p (fntype
))
5682 /* If there are variable arguments, then we won't pass anything
5683 in registers in 32-bit mode. */
5684 if (stdarg_p (fntype
))
5695 /* Use ecx and edx registers if function has fastcall attribute,
5696 else look for regparm information. */
5699 unsigned int ccvt
= ix86_get_callcvt (fntype
);
5700 if ((ccvt
& IX86_CALLCVT_THISCALL
) != 0)
5703 cum
->fastcall
= 1; /* Same first register as in fastcall. */
5705 else if ((ccvt
& IX86_CALLCVT_FASTCALL
) != 0)
5711 cum
->nregs
= ix86_function_regparm (fntype
, fndecl
);
5714 /* Set up the number of SSE registers used for passing SFmode
5715 and DFmode arguments. Warn for mismatching ABI. */
5716 cum
->float_in_sse
= ix86_function_sseregparm (fntype
, fndecl
, true);
5720 /* Return the "natural" mode for TYPE. In most cases, this is just TYPE_MODE.
5721 But in the case of vector types, it is some vector mode.
5723 When we have only some of our vector isa extensions enabled, then there
5724 are some modes for which vector_mode_supported_p is false. For these
5725 modes, the generic vector support in gcc will choose some non-vector mode
5726 in order to implement the type. By computing the natural mode, we'll
5727 select the proper ABI location for the operand and not depend on whatever
5728 the middle-end decides to do with these vector types.
5730 The midde-end can't deal with the vector types > 16 bytes. In this
5731 case, we return the original mode and warn ABI change if CUM isn't
5734 static enum machine_mode
5735 type_natural_mode (const_tree type
, const CUMULATIVE_ARGS
*cum
)
5737 enum machine_mode mode
= TYPE_MODE (type
);
5739 if (TREE_CODE (type
) == VECTOR_TYPE
&& !VECTOR_MODE_P (mode
))
5741 HOST_WIDE_INT size
= int_size_in_bytes (type
);
5742 if ((size
== 8 || size
== 16 || size
== 32)
5743 /* ??? Generic code allows us to create width 1 vectors. Ignore. */
5744 && TYPE_VECTOR_SUBPARTS (type
) > 1)
5746 enum machine_mode innermode
= TYPE_MODE (TREE_TYPE (type
));
5748 if (TREE_CODE (TREE_TYPE (type
)) == REAL_TYPE
)
5749 mode
= MIN_MODE_VECTOR_FLOAT
;
5751 mode
= MIN_MODE_VECTOR_INT
;
5753 /* Get the mode which has this inner mode and number of units. */
5754 for (; mode
!= VOIDmode
; mode
= GET_MODE_WIDER_MODE (mode
))
5755 if (GET_MODE_NUNITS (mode
) == TYPE_VECTOR_SUBPARTS (type
)
5756 && GET_MODE_INNER (mode
) == innermode
)
5758 if (size
== 32 && !TARGET_AVX
)
5760 static bool warnedavx
;
5767 warning (0, "AVX vector argument without AVX "
5768 "enabled changes the ABI");
5770 return TYPE_MODE (type
);
5783 /* We want to pass a value in REGNO whose "natural" mode is MODE. However,
5784 this may not agree with the mode that the type system has chosen for the
5785 register, which is ORIG_MODE. If ORIG_MODE is not BLKmode, then we can
5786 go ahead and use it. Otherwise we have to build a PARALLEL instead. */
5789 gen_reg_or_parallel (enum machine_mode mode
, enum machine_mode orig_mode
,
5794 if (orig_mode
!= BLKmode
)
5795 tmp
= gen_rtx_REG (orig_mode
, regno
);
5798 tmp
= gen_rtx_REG (mode
, regno
);
5799 tmp
= gen_rtx_EXPR_LIST (VOIDmode
, tmp
, const0_rtx
);
5800 tmp
= gen_rtx_PARALLEL (orig_mode
, gen_rtvec (1, tmp
));
5806 /* x86-64 register passing implementation. See x86-64 ABI for details. Goal
5807 of this code is to classify each 8bytes of incoming argument by the register
5808 class and assign registers accordingly. */
5810 /* Return the union class of CLASS1 and CLASS2.
5811 See the x86-64 PS ABI for details. */
5813 static enum x86_64_reg_class
5814 merge_classes (enum x86_64_reg_class class1
, enum x86_64_reg_class class2
)
5816 /* Rule #1: If both classes are equal, this is the resulting class. */
5817 if (class1
== class2
)
5820 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
5822 if (class1
== X86_64_NO_CLASS
)
5824 if (class2
== X86_64_NO_CLASS
)
5827 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
5828 if (class1
== X86_64_MEMORY_CLASS
|| class2
== X86_64_MEMORY_CLASS
)
5829 return X86_64_MEMORY_CLASS
;
5831 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
5832 if ((class1
== X86_64_INTEGERSI_CLASS
&& class2
== X86_64_SSESF_CLASS
)
5833 || (class2
== X86_64_INTEGERSI_CLASS
&& class1
== X86_64_SSESF_CLASS
))
5834 return X86_64_INTEGERSI_CLASS
;
5835 if (class1
== X86_64_INTEGER_CLASS
|| class1
== X86_64_INTEGERSI_CLASS
5836 || class2
== X86_64_INTEGER_CLASS
|| class2
== X86_64_INTEGERSI_CLASS
)
5837 return X86_64_INTEGER_CLASS
;
5839 /* Rule #5: If one of the classes is X87, X87UP, or COMPLEX_X87 class,
5841 if (class1
== X86_64_X87_CLASS
5842 || class1
== X86_64_X87UP_CLASS
5843 || class1
== X86_64_COMPLEX_X87_CLASS
5844 || class2
== X86_64_X87_CLASS
5845 || class2
== X86_64_X87UP_CLASS
5846 || class2
== X86_64_COMPLEX_X87_CLASS
)
5847 return X86_64_MEMORY_CLASS
;
5849 /* Rule #6: Otherwise class SSE is used. */
5850 return X86_64_SSE_CLASS
;
5853 /* Classify the argument of type TYPE and mode MODE.
5854 CLASSES will be filled by the register class used to pass each word
5855 of the operand. The number of words is returned. In case the parameter
5856 should be passed in memory, 0 is returned. As a special case for zero
5857 sized containers, classes[0] will be NO_CLASS and 1 is returned.
5859 BIT_OFFSET is used internally for handling records and specifies offset
5860 of the offset in bits modulo 256 to avoid overflow cases.
5862 See the x86-64 PS ABI for details.
5866 classify_argument (enum machine_mode mode
, const_tree type
,
5867 enum x86_64_reg_class classes
[MAX_CLASSES
], int bit_offset
)
5869 HOST_WIDE_INT bytes
=
5870 (mode
== BLKmode
) ? int_size_in_bytes (type
) : (int) GET_MODE_SIZE (mode
);
5872 = (bytes
+ (bit_offset
% 64) / 8 + UNITS_PER_WORD
- 1) / UNITS_PER_WORD
;
5874 /* Variable sized entities are always passed/returned in memory. */
5878 if (mode
!= VOIDmode
5879 && targetm
.calls
.must_pass_in_stack (mode
, type
))
5882 if (type
&& AGGREGATE_TYPE_P (type
))
5886 enum x86_64_reg_class subclasses
[MAX_CLASSES
];
5888 /* On x86-64 we pass structures larger than 32 bytes on the stack. */
5892 for (i
= 0; i
< words
; i
++)
5893 classes
[i
] = X86_64_NO_CLASS
;
5895 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
5896 signalize memory class, so handle it as special case. */
5899 classes
[0] = X86_64_NO_CLASS
;
5903 /* Classify each field of record and merge classes. */
5904 switch (TREE_CODE (type
))
5907 /* And now merge the fields of structure. */
5908 for (field
= TYPE_FIELDS (type
); field
; field
= DECL_CHAIN (field
))
5910 if (TREE_CODE (field
) == FIELD_DECL
)
5914 if (TREE_TYPE (field
) == error_mark_node
)
5917 /* Bitfields are always classified as integer. Handle them
5918 early, since later code would consider them to be
5919 misaligned integers. */
5920 if (DECL_BIT_FIELD (field
))
5922 for (i
= (int_bit_position (field
)
5923 + (bit_offset
% 64)) / 8 / 8;
5924 i
< ((int_bit_position (field
) + (bit_offset
% 64))
5925 + tree_low_cst (DECL_SIZE (field
), 0)
5928 merge_classes (X86_64_INTEGER_CLASS
,
5935 type
= TREE_TYPE (field
);
5937 /* Flexible array member is ignored. */
5938 if (TYPE_MODE (type
) == BLKmode
5939 && TREE_CODE (type
) == ARRAY_TYPE
5940 && TYPE_SIZE (type
) == NULL_TREE
5941 && TYPE_DOMAIN (type
) != NULL_TREE
5942 && (TYPE_MAX_VALUE (TYPE_DOMAIN (type
))
5947 if (!warned
&& warn_psabi
)
5950 inform (input_location
,
5951 "the ABI of passing struct with"
5952 " a flexible array member has"
5953 " changed in GCC 4.4");
5957 num
= classify_argument (TYPE_MODE (type
), type
,
5959 (int_bit_position (field
)
5960 + bit_offset
) % 256);
5963 pos
= (int_bit_position (field
)
5964 + (bit_offset
% 64)) / 8 / 8;
5965 for (i
= 0; i
< num
&& (i
+ pos
) < words
; i
++)
5967 merge_classes (subclasses
[i
], classes
[i
+ pos
]);
5974 /* Arrays are handled as small records. */
5977 num
= classify_argument (TYPE_MODE (TREE_TYPE (type
)),
5978 TREE_TYPE (type
), subclasses
, bit_offset
);
5982 /* The partial classes are now full classes. */
5983 if (subclasses
[0] == X86_64_SSESF_CLASS
&& bytes
!= 4)
5984 subclasses
[0] = X86_64_SSE_CLASS
;
5985 if (subclasses
[0] == X86_64_INTEGERSI_CLASS
5986 && !((bit_offset
% 64) == 0 && bytes
== 4))
5987 subclasses
[0] = X86_64_INTEGER_CLASS
;
5989 for (i
= 0; i
< words
; i
++)
5990 classes
[i
] = subclasses
[i
% num
];
5995 case QUAL_UNION_TYPE
:
5996 /* Unions are similar to RECORD_TYPE but offset is always 0.
5998 for (field
= TYPE_FIELDS (type
); field
; field
= DECL_CHAIN (field
))
6000 if (TREE_CODE (field
) == FIELD_DECL
)
6004 if (TREE_TYPE (field
) == error_mark_node
)
6007 num
= classify_argument (TYPE_MODE (TREE_TYPE (field
)),
6008 TREE_TYPE (field
), subclasses
,
6012 for (i
= 0; i
< num
; i
++)
6013 classes
[i
] = merge_classes (subclasses
[i
], classes
[i
]);
6024 /* When size > 16 bytes, if the first one isn't
6025 X86_64_SSE_CLASS or any other ones aren't
6026 X86_64_SSEUP_CLASS, everything should be passed in
6028 if (classes
[0] != X86_64_SSE_CLASS
)
6031 for (i
= 1; i
< words
; i
++)
6032 if (classes
[i
] != X86_64_SSEUP_CLASS
)
6036 /* Final merger cleanup. */
6037 for (i
= 0; i
< words
; i
++)
6039 /* If one class is MEMORY, everything should be passed in
6041 if (classes
[i
] == X86_64_MEMORY_CLASS
)
6044 /* The X86_64_SSEUP_CLASS should be always preceded by
6045 X86_64_SSE_CLASS or X86_64_SSEUP_CLASS. */
6046 if (classes
[i
] == X86_64_SSEUP_CLASS
6047 && classes
[i
- 1] != X86_64_SSE_CLASS
6048 && classes
[i
- 1] != X86_64_SSEUP_CLASS
)
6050 /* The first one should never be X86_64_SSEUP_CLASS. */
6051 gcc_assert (i
!= 0);
6052 classes
[i
] = X86_64_SSE_CLASS
;
6055 /* If X86_64_X87UP_CLASS isn't preceded by X86_64_X87_CLASS,
6056 everything should be passed in memory. */
6057 if (classes
[i
] == X86_64_X87UP_CLASS
6058 && (classes
[i
- 1] != X86_64_X87_CLASS
))
6062 /* The first one should never be X86_64_X87UP_CLASS. */
6063 gcc_assert (i
!= 0);
6064 if (!warned
&& warn_psabi
)
6067 inform (input_location
,
6068 "the ABI of passing union with long double"
6069 " has changed in GCC 4.4");
6077 /* Compute alignment needed. We align all types to natural boundaries with
6078 exception of XFmode that is aligned to 64bits. */
6079 if (mode
!= VOIDmode
&& mode
!= BLKmode
)
6081 int mode_alignment
= GET_MODE_BITSIZE (mode
);
6084 mode_alignment
= 128;
6085 else if (mode
== XCmode
)
6086 mode_alignment
= 256;
6087 if (COMPLEX_MODE_P (mode
))
6088 mode_alignment
/= 2;
6089 /* Misaligned fields are always returned in memory. */
6090 if (bit_offset
% mode_alignment
)
6094 /* for V1xx modes, just use the base mode */
6095 if (VECTOR_MODE_P (mode
) && mode
!= V1DImode
&& mode
!= V1TImode
6096 && GET_MODE_SIZE (GET_MODE_INNER (mode
)) == bytes
)
6097 mode
= GET_MODE_INNER (mode
);
6099 /* Classification of atomic types. */
6104 classes
[0] = X86_64_SSE_CLASS
;
6107 classes
[0] = X86_64_SSE_CLASS
;
6108 classes
[1] = X86_64_SSEUP_CLASS
;
6118 int size
= (bit_offset
% 64)+ (int) GET_MODE_BITSIZE (mode
);
6122 classes
[0] = X86_64_INTEGERSI_CLASS
;
6125 else if (size
<= 64)
6127 classes
[0] = X86_64_INTEGER_CLASS
;
6130 else if (size
<= 64+32)
6132 classes
[0] = X86_64_INTEGER_CLASS
;
6133 classes
[1] = X86_64_INTEGERSI_CLASS
;
6136 else if (size
<= 64+64)
6138 classes
[0] = classes
[1] = X86_64_INTEGER_CLASS
;
6146 classes
[0] = classes
[1] = X86_64_INTEGER_CLASS
;
6150 /* OImode shouldn't be used directly. */
6155 if (!(bit_offset
% 64))
6156 classes
[0] = X86_64_SSESF_CLASS
;
6158 classes
[0] = X86_64_SSE_CLASS
;
6161 classes
[0] = X86_64_SSEDF_CLASS
;
6164 classes
[0] = X86_64_X87_CLASS
;
6165 classes
[1] = X86_64_X87UP_CLASS
;
6168 classes
[0] = X86_64_SSE_CLASS
;
6169 classes
[1] = X86_64_SSEUP_CLASS
;
6172 classes
[0] = X86_64_SSE_CLASS
;
6173 if (!(bit_offset
% 64))
6179 if (!warned
&& warn_psabi
)
6182 inform (input_location
,
6183 "the ABI of passing structure with complex float"
6184 " member has changed in GCC 4.4");
6186 classes
[1] = X86_64_SSESF_CLASS
;
6190 classes
[0] = X86_64_SSEDF_CLASS
;
6191 classes
[1] = X86_64_SSEDF_CLASS
;
6194 classes
[0] = X86_64_COMPLEX_X87_CLASS
;
6197 /* This modes is larger than 16 bytes. */
6205 classes
[0] = X86_64_SSE_CLASS
;
6206 classes
[1] = X86_64_SSEUP_CLASS
;
6207 classes
[2] = X86_64_SSEUP_CLASS
;
6208 classes
[3] = X86_64_SSEUP_CLASS
;
6216 classes
[0] = X86_64_SSE_CLASS
;
6217 classes
[1] = X86_64_SSEUP_CLASS
;
6225 classes
[0] = X86_64_SSE_CLASS
;
6231 gcc_assert (VECTOR_MODE_P (mode
));
6236 gcc_assert (GET_MODE_CLASS (GET_MODE_INNER (mode
)) == MODE_INT
);
6238 if (bit_offset
+ GET_MODE_BITSIZE (mode
) <= 32)
6239 classes
[0] = X86_64_INTEGERSI_CLASS
;
6241 classes
[0] = X86_64_INTEGER_CLASS
;
6242 classes
[1] = X86_64_INTEGER_CLASS
;
6243 return 1 + (bytes
> 8);
6247 /* Examine the argument and return set number of register required in each
6248 class. Return 0 iff parameter should be passed in memory. */
6250 examine_argument (enum machine_mode mode
, const_tree type
, int in_return
,
6251 int *int_nregs
, int *sse_nregs
)
6253 enum x86_64_reg_class regclass
[MAX_CLASSES
];
6254 int n
= classify_argument (mode
, type
, regclass
, 0);
6260 for (n
--; n
>= 0; n
--)
6261 switch (regclass
[n
])
6263 case X86_64_INTEGER_CLASS
:
6264 case X86_64_INTEGERSI_CLASS
:
6267 case X86_64_SSE_CLASS
:
6268 case X86_64_SSESF_CLASS
:
6269 case X86_64_SSEDF_CLASS
:
6272 case X86_64_NO_CLASS
:
6273 case X86_64_SSEUP_CLASS
:
6275 case X86_64_X87_CLASS
:
6276 case X86_64_X87UP_CLASS
:
6280 case X86_64_COMPLEX_X87_CLASS
:
6281 return in_return
? 2 : 0;
6282 case X86_64_MEMORY_CLASS
:
6288 /* Construct container for the argument used by GCC interface. See
6289 FUNCTION_ARG for the detailed description. */
6292 construct_container (enum machine_mode mode
, enum machine_mode orig_mode
,
6293 const_tree type
, int in_return
, int nintregs
, int nsseregs
,
6294 const int *intreg
, int sse_regno
)
6296 /* The following variables hold the static issued_error state. */
6297 static bool issued_sse_arg_error
;
6298 static bool issued_sse_ret_error
;
6299 static bool issued_x87_ret_error
;
6301 enum machine_mode tmpmode
;
6303 (mode
== BLKmode
) ? int_size_in_bytes (type
) : (int) GET_MODE_SIZE (mode
);
6304 enum x86_64_reg_class regclass
[MAX_CLASSES
];
6308 int needed_sseregs
, needed_intregs
;
6309 rtx exp
[MAX_CLASSES
];
6312 n
= classify_argument (mode
, type
, regclass
, 0);
6315 if (!examine_argument (mode
, type
, in_return
, &needed_intregs
,
6318 if (needed_intregs
> nintregs
|| needed_sseregs
> nsseregs
)
6321 /* We allowed the user to turn off SSE for kernel mode. Don't crash if
6322 some less clueful developer tries to use floating-point anyway. */
6323 if (needed_sseregs
&& !TARGET_SSE
)
6327 if (!issued_sse_ret_error
)
6329 error ("SSE register return with SSE disabled");
6330 issued_sse_ret_error
= true;
6333 else if (!issued_sse_arg_error
)
6335 error ("SSE register argument with SSE disabled");
6336 issued_sse_arg_error
= true;
6341 /* Likewise, error if the ABI requires us to return values in the
6342 x87 registers and the user specified -mno-80387. */
6343 if (!TARGET_80387
&& in_return
)
6344 for (i
= 0; i
< n
; i
++)
6345 if (regclass
[i
] == X86_64_X87_CLASS
6346 || regclass
[i
] == X86_64_X87UP_CLASS
6347 || regclass
[i
] == X86_64_COMPLEX_X87_CLASS
)
6349 if (!issued_x87_ret_error
)
6351 error ("x87 register return with x87 disabled");
6352 issued_x87_ret_error
= true;
6357 /* First construct simple cases. Avoid SCmode, since we want to use
6358 single register to pass this type. */
6359 if (n
== 1 && mode
!= SCmode
)
6360 switch (regclass
[0])
6362 case X86_64_INTEGER_CLASS
:
6363 case X86_64_INTEGERSI_CLASS
:
6364 return gen_rtx_REG (mode
, intreg
[0]);
6365 case X86_64_SSE_CLASS
:
6366 case X86_64_SSESF_CLASS
:
6367 case X86_64_SSEDF_CLASS
:
6368 if (mode
!= BLKmode
)
6369 return gen_reg_or_parallel (mode
, orig_mode
,
6370 SSE_REGNO (sse_regno
));
6372 case X86_64_X87_CLASS
:
6373 case X86_64_COMPLEX_X87_CLASS
:
6374 return gen_rtx_REG (mode
, FIRST_STACK_REG
);
6375 case X86_64_NO_CLASS
:
6376 /* Zero sized array, struct or class. */
6382 && regclass
[0] == X86_64_SSE_CLASS
6383 && regclass
[1] == X86_64_SSEUP_CLASS
6385 return gen_reg_or_parallel (mode
, orig_mode
,
6386 SSE_REGNO (sse_regno
));
6388 && regclass
[0] == X86_64_SSE_CLASS
6389 && regclass
[1] == X86_64_SSEUP_CLASS
6390 && regclass
[2] == X86_64_SSEUP_CLASS
6391 && regclass
[3] == X86_64_SSEUP_CLASS
6393 return gen_reg_or_parallel (mode
, orig_mode
,
6394 SSE_REGNO (sse_regno
));
6396 && regclass
[0] == X86_64_X87_CLASS
6397 && regclass
[1] == X86_64_X87UP_CLASS
)
6398 return gen_rtx_REG (XFmode
, FIRST_STACK_REG
);
6401 && regclass
[0] == X86_64_INTEGER_CLASS
6402 && regclass
[1] == X86_64_INTEGER_CLASS
6403 && (mode
== CDImode
|| mode
== TImode
|| mode
== TFmode
)
6404 && intreg
[0] + 1 == intreg
[1])
6405 return gen_rtx_REG (mode
, intreg
[0]);
6407 /* Otherwise figure out the entries of the PARALLEL. */
6408 for (i
= 0; i
< n
; i
++)
6412 switch (regclass
[i
])
6414 case X86_64_NO_CLASS
:
6416 case X86_64_INTEGER_CLASS
:
6417 case X86_64_INTEGERSI_CLASS
:
6418 /* Merge TImodes on aligned occasions here too. */
6419 if (i
* 8 + 8 > bytes
)
6421 = mode_for_size ((bytes
- i
* 8) * BITS_PER_UNIT
, MODE_INT
, 0);
6422 else if (regclass
[i
] == X86_64_INTEGERSI_CLASS
)
6426 /* We've requested 24 bytes we
6427 don't have mode for. Use DImode. */
6428 if (tmpmode
== BLKmode
)
6431 = gen_rtx_EXPR_LIST (VOIDmode
,
6432 gen_rtx_REG (tmpmode
, *intreg
),
6436 case X86_64_SSESF_CLASS
:
6438 = gen_rtx_EXPR_LIST (VOIDmode
,
6439 gen_rtx_REG (SFmode
,
6440 SSE_REGNO (sse_regno
)),
6444 case X86_64_SSEDF_CLASS
:
6446 = gen_rtx_EXPR_LIST (VOIDmode
,
6447 gen_rtx_REG (DFmode
,
6448 SSE_REGNO (sse_regno
)),
6452 case X86_64_SSE_CLASS
:
6460 if (i
== 0 && regclass
[1] == X86_64_SSEUP_CLASS
)
6470 && regclass
[1] == X86_64_SSEUP_CLASS
6471 && regclass
[2] == X86_64_SSEUP_CLASS
6472 && regclass
[3] == X86_64_SSEUP_CLASS
);
6480 = gen_rtx_EXPR_LIST (VOIDmode
,
6481 gen_rtx_REG (tmpmode
,
6482 SSE_REGNO (sse_regno
)),
6491 /* Empty aligned struct, union or class. */
6495 ret
= gen_rtx_PARALLEL (mode
, rtvec_alloc (nexps
));
6496 for (i
= 0; i
< nexps
; i
++)
6497 XVECEXP (ret
, 0, i
) = exp
[i
];
6501 /* Update the data in CUM to advance over an argument of mode MODE
6502 and data type TYPE. (TYPE is null for libcalls where that information
6503 may not be available.) */
6506 function_arg_advance_32 (CUMULATIVE_ARGS
*cum
, enum machine_mode mode
,
6507 const_tree type
, HOST_WIDE_INT bytes
,
6508 HOST_WIDE_INT words
)
6524 cum
->words
+= words
;
6525 cum
->nregs
-= words
;
6526 cum
->regno
+= words
;
6528 if (cum
->nregs
<= 0)
6536 /* OImode shouldn't be used directly. */
6540 if (cum
->float_in_sse
< 2)
6543 if (cum
->float_in_sse
< 1)
6560 if (!type
|| !AGGREGATE_TYPE_P (type
))
6562 cum
->sse_words
+= words
;
6563 cum
->sse_nregs
-= 1;
6564 cum
->sse_regno
+= 1;
6565 if (cum
->sse_nregs
<= 0)
6579 if (!type
|| !AGGREGATE_TYPE_P (type
))
6581 cum
->mmx_words
+= words
;
6582 cum
->mmx_nregs
-= 1;
6583 cum
->mmx_regno
+= 1;
6584 if (cum
->mmx_nregs
<= 0)
6595 function_arg_advance_64 (CUMULATIVE_ARGS
*cum
, enum machine_mode mode
,
6596 const_tree type
, HOST_WIDE_INT words
, bool named
)
6598 int int_nregs
, sse_nregs
;
6600 /* Unnamed 256bit vector mode parameters are passed on stack. */
6601 if (!named
&& VALID_AVX256_REG_MODE (mode
))
6604 if (examine_argument (mode
, type
, 0, &int_nregs
, &sse_nregs
)
6605 && sse_nregs
<= cum
->sse_nregs
&& int_nregs
<= cum
->nregs
)
6607 cum
->nregs
-= int_nregs
;
6608 cum
->sse_nregs
-= sse_nregs
;
6609 cum
->regno
+= int_nregs
;
6610 cum
->sse_regno
+= sse_nregs
;
6614 int align
= ix86_function_arg_boundary (mode
, type
) / BITS_PER_WORD
;
6615 cum
->words
= (cum
->words
+ align
- 1) & ~(align
- 1);
6616 cum
->words
+= words
;
6621 function_arg_advance_ms_64 (CUMULATIVE_ARGS
*cum
, HOST_WIDE_INT bytes
,
6622 HOST_WIDE_INT words
)
6624 /* Otherwise, this should be passed indirect. */
6625 gcc_assert (bytes
== 1 || bytes
== 2 || bytes
== 4 || bytes
== 8);
6627 cum
->words
+= words
;
6635 /* Update the data in CUM to advance over an argument of mode MODE and
6636 data type TYPE. (TYPE is null for libcalls where that information
6637 may not be available.) */
6640 ix86_function_arg_advance (cumulative_args_t cum_v
, enum machine_mode mode
,
6641 const_tree type
, bool named
)
6643 CUMULATIVE_ARGS
*cum
= get_cumulative_args (cum_v
);
6644 HOST_WIDE_INT bytes
, words
;
6646 if (mode
== BLKmode
)
6647 bytes
= int_size_in_bytes (type
);
6649 bytes
= GET_MODE_SIZE (mode
);
6650 words
= (bytes
+ UNITS_PER_WORD
- 1) / UNITS_PER_WORD
;
6653 mode
= type_natural_mode (type
, NULL
);
6655 if (TARGET_64BIT
&& (cum
? cum
->call_abi
: ix86_abi
) == MS_ABI
)
6656 function_arg_advance_ms_64 (cum
, bytes
, words
);
6657 else if (TARGET_64BIT
)
6658 function_arg_advance_64 (cum
, mode
, type
, words
, named
);
6660 function_arg_advance_32 (cum
, mode
, type
, bytes
, words
);
6663 /* Define where to put the arguments to a function.
6664 Value is zero to push the argument on the stack,
6665 or a hard register in which to store the argument.
6667 MODE is the argument's machine mode.
6668 TYPE is the data type of the argument (as a tree).
6669 This is null for libcalls where that information may
6671 CUM is a variable of type CUMULATIVE_ARGS which gives info about
6672 the preceding args and about the function being called.
6673 NAMED is nonzero if this argument is a named parameter
6674 (otherwise it is an extra parameter matching an ellipsis). */
6677 function_arg_32 (const CUMULATIVE_ARGS
*cum
, enum machine_mode mode
,
6678 enum machine_mode orig_mode
, const_tree type
,
6679 HOST_WIDE_INT bytes
, HOST_WIDE_INT words
)
6681 static bool warnedsse
, warnedmmx
;
6683 /* Avoid the AL settings for the Unix64 ABI. */
6684 if (mode
== VOIDmode
)
6700 if (words
<= cum
->nregs
)
6702 int regno
= cum
->regno
;
6704 /* Fastcall allocates the first two DWORD (SImode) or
6705 smaller arguments to ECX and EDX if it isn't an
6711 || (type
&& AGGREGATE_TYPE_P (type
)))
6714 /* ECX not EAX is the first allocated register. */
6715 if (regno
== AX_REG
)
6718 return gen_rtx_REG (mode
, regno
);
6723 if (cum
->float_in_sse
< 2)
6726 if (cum
->float_in_sse
< 1)
6730 /* In 32bit, we pass TImode in xmm registers. */
6737 if (!type
|| !AGGREGATE_TYPE_P (type
))
6739 if (!TARGET_SSE
&& !warnedsse
&& cum
->warn_sse
)
6742 warning (0, "SSE vector argument without SSE enabled "
6746 return gen_reg_or_parallel (mode
, orig_mode
,
6747 cum
->sse_regno
+ FIRST_SSE_REG
);
6752 /* OImode shouldn't be used directly. */
6761 if (!type
|| !AGGREGATE_TYPE_P (type
))
6764 return gen_reg_or_parallel (mode
, orig_mode
,
6765 cum
->sse_regno
+ FIRST_SSE_REG
);
6775 if (!type
|| !AGGREGATE_TYPE_P (type
))
6777 if (!TARGET_MMX
&& !warnedmmx
&& cum
->warn_mmx
)
6780 warning (0, "MMX vector argument without MMX enabled "
6784 return gen_reg_or_parallel (mode
, orig_mode
,
6785 cum
->mmx_regno
+ FIRST_MMX_REG
);
6794 function_arg_64 (const CUMULATIVE_ARGS
*cum
, enum machine_mode mode
,
6795 enum machine_mode orig_mode
, const_tree type
, bool named
)
6797 /* Handle a hidden AL argument containing number of registers
6798 for varargs x86-64 functions. */
6799 if (mode
== VOIDmode
)
6800 return GEN_INT (cum
->maybe_vaarg
6801 ? (cum
->sse_nregs
< 0
6802 ? X86_64_SSE_REGPARM_MAX
6817 /* Unnamed 256bit vector mode parameters are passed on stack. */
6823 return construct_container (mode
, orig_mode
, type
, 0, cum
->nregs
,
6825 &x86_64_int_parameter_registers
[cum
->regno
],
6830 function_arg_ms_64 (const CUMULATIVE_ARGS
*cum
, enum machine_mode mode
,
6831 enum machine_mode orig_mode
, bool named
,
6832 HOST_WIDE_INT bytes
)
6836 /* We need to add clobber for MS_ABI->SYSV ABI calls in expand_call.
6837 We use value of -2 to specify that current function call is MSABI. */
6838 if (mode
== VOIDmode
)
6839 return GEN_INT (-2);
6841 /* If we've run out of registers, it goes on the stack. */
6842 if (cum
->nregs
== 0)
6845 regno
= x86_64_ms_abi_int_parameter_registers
[cum
->regno
];
6847 /* Only floating point modes are passed in anything but integer regs. */
6848 if (TARGET_SSE
&& (mode
== SFmode
|| mode
== DFmode
))
6851 regno
= cum
->regno
+ FIRST_SSE_REG
;
6856 /* Unnamed floating parameters are passed in both the
6857 SSE and integer registers. */
6858 t1
= gen_rtx_REG (mode
, cum
->regno
+ FIRST_SSE_REG
);
6859 t2
= gen_rtx_REG (mode
, regno
);
6860 t1
= gen_rtx_EXPR_LIST (VOIDmode
, t1
, const0_rtx
);
6861 t2
= gen_rtx_EXPR_LIST (VOIDmode
, t2
, const0_rtx
);
6862 return gen_rtx_PARALLEL (mode
, gen_rtvec (2, t1
, t2
));
6865 /* Handle aggregated types passed in register. */
6866 if (orig_mode
== BLKmode
)
6868 if (bytes
> 0 && bytes
<= 8)
6869 mode
= (bytes
> 4 ? DImode
: SImode
);
6870 if (mode
== BLKmode
)
6874 return gen_reg_or_parallel (mode
, orig_mode
, regno
);
6877 /* Return where to put the arguments to a function.
6878 Return zero to push the argument on the stack, or a hard register in which to store the argument.
6880 MODE is the argument's machine mode. TYPE is the data type of the
6881 argument. It is null for libcalls where that information may not be
6882 available. CUM gives information about the preceding args and about
6883 the function being called. NAMED is nonzero if this argument is a
6884 named parameter (otherwise it is an extra parameter matching an
6888 ix86_function_arg (cumulative_args_t cum_v
, enum machine_mode omode
,
6889 const_tree type
, bool named
)
6891 CUMULATIVE_ARGS
*cum
= get_cumulative_args (cum_v
);
6892 enum machine_mode mode
= omode
;
6893 HOST_WIDE_INT bytes
, words
;
6896 if (mode
== BLKmode
)
6897 bytes
= int_size_in_bytes (type
);
6899 bytes
= GET_MODE_SIZE (mode
);
6900 words
= (bytes
+ UNITS_PER_WORD
- 1) / UNITS_PER_WORD
;
6902 /* To simplify the code below, represent vector types with a vector mode
6903 even if MMX/SSE are not active. */
6904 if (type
&& TREE_CODE (type
) == VECTOR_TYPE
)
6905 mode
= type_natural_mode (type
, cum
);
6907 if (TARGET_64BIT
&& (cum
? cum
->call_abi
: ix86_abi
) == MS_ABI
)
6908 arg
= function_arg_ms_64 (cum
, mode
, omode
, named
, bytes
);
6909 else if (TARGET_64BIT
)
6910 arg
= function_arg_64 (cum
, mode
, omode
, type
, named
);
6912 arg
= function_arg_32 (cum
, mode
, omode
, type
, bytes
, words
);
6914 if (TARGET_VZEROUPPER
&& function_pass_avx256_p (arg
))
6916 /* This argument uses 256bit AVX modes. */
6918 cfun
->machine
->callee_pass_avx256_p
= true;
6920 cfun
->machine
->caller_pass_avx256_p
= true;
6926 /* A C expression that indicates when an argument must be passed by
6927 reference. If nonzero for an argument, a copy of that argument is
6928 made in memory and a pointer to the argument is passed instead of
6929 the argument itself. The pointer is passed in whatever way is
6930 appropriate for passing a pointer to that type. */
6933 ix86_pass_by_reference (cumulative_args_t cum_v ATTRIBUTE_UNUSED
,
6934 enum machine_mode mode ATTRIBUTE_UNUSED
,
6935 const_tree type
, bool named ATTRIBUTE_UNUSED
)
6937 CUMULATIVE_ARGS
*cum
= get_cumulative_args (cum_v
);
6939 /* See Windows x64 Software Convention. */
6940 if (TARGET_64BIT
&& (cum
? cum
->call_abi
: ix86_abi
) == MS_ABI
)
6942 int msize
= (int) GET_MODE_SIZE (mode
);
6945 /* Arrays are passed by reference. */
6946 if (TREE_CODE (type
) == ARRAY_TYPE
)
6949 if (AGGREGATE_TYPE_P (type
))
6951 /* Structs/unions of sizes other than 8, 16, 32, or 64 bits
6952 are passed by reference. */
6953 msize
= int_size_in_bytes (type
);
6957 /* __m128 is passed by reference. */
6959 case 1: case 2: case 4: case 8:
6965 else if (TARGET_64BIT
&& type
&& int_size_in_bytes (type
) == -1)
6971 /* Return true when TYPE should be 128bit aligned for 32bit argument
6972 passing ABI. XXX: This function is obsolete and is only used for
6973 checking psABI compatibility with previous versions of GCC. */
6976 ix86_compat_aligned_value_p (const_tree type
)
6978 enum machine_mode mode
= TYPE_MODE (type
);
6979 if (((TARGET_SSE
&& SSE_REG_MODE_P (mode
))
6983 && (!TYPE_USER_ALIGN (type
) || TYPE_ALIGN (type
) > 128))
6985 if (TYPE_ALIGN (type
) < 128)
6988 if (AGGREGATE_TYPE_P (type
))
6990 /* Walk the aggregates recursively. */
6991 switch (TREE_CODE (type
))
6995 case QUAL_UNION_TYPE
:
6999 /* Walk all the structure fields. */
7000 for (field
= TYPE_FIELDS (type
); field
; field
= DECL_CHAIN (field
))
7002 if (TREE_CODE (field
) == FIELD_DECL
7003 && ix86_compat_aligned_value_p (TREE_TYPE (field
)))
7010 /* Just for use if some languages passes arrays by value. */
7011 if (ix86_compat_aligned_value_p (TREE_TYPE (type
)))
7022 /* Return the alignment boundary for MODE and TYPE with alignment ALIGN.
7023 XXX: This function is obsolete and is only used for checking psABI
7024 compatibility with previous versions of GCC. */
7027 ix86_compat_function_arg_boundary (enum machine_mode mode
,
7028 const_tree type
, unsigned int align
)
7030 /* In 32bit, only _Decimal128 and __float128 are aligned to their
7031 natural boundaries. */
7032 if (!TARGET_64BIT
&& mode
!= TDmode
&& mode
!= TFmode
)
7034 /* i386 ABI defines all arguments to be 4 byte aligned. We have to
7035 make an exception for SSE modes since these require 128bit
7038 The handling here differs from field_alignment. ICC aligns MMX
7039 arguments to 4 byte boundaries, while structure fields are aligned
7040 to 8 byte boundaries. */
7043 if (!(TARGET_SSE
&& SSE_REG_MODE_P (mode
)))
7044 align
= PARM_BOUNDARY
;
7048 if (!ix86_compat_aligned_value_p (type
))
7049 align
= PARM_BOUNDARY
;
7052 if (align
> BIGGEST_ALIGNMENT
)
7053 align
= BIGGEST_ALIGNMENT
;
7057 /* Return true when TYPE should be 128bit aligned for 32bit argument
7061 ix86_contains_aligned_value_p (const_tree type
)
7063 enum machine_mode mode
= TYPE_MODE (type
);
7065 if (mode
== XFmode
|| mode
== XCmode
)
7068 if (TYPE_ALIGN (type
) < 128)
7071 if (AGGREGATE_TYPE_P (type
))
7073 /* Walk the aggregates recursively. */
7074 switch (TREE_CODE (type
))
7078 case QUAL_UNION_TYPE
:
7082 /* Walk all the structure fields. */
7083 for (field
= TYPE_FIELDS (type
);
7085 field
= DECL_CHAIN (field
))
7087 if (TREE_CODE (field
) == FIELD_DECL
7088 && ix86_contains_aligned_value_p (TREE_TYPE (field
)))
7095 /* Just for use if some languages passes arrays by value. */
7096 if (ix86_contains_aligned_value_p (TREE_TYPE (type
)))
7105 return TYPE_ALIGN (type
) >= 128;
7110 /* Gives the alignment boundary, in bits, of an argument with the
7111 specified mode and type. */
7114 ix86_function_arg_boundary (enum machine_mode mode
, const_tree type
)
7119 /* Since the main variant type is used for call, we convert it to
7120 the main variant type. */
7121 type
= TYPE_MAIN_VARIANT (type
);
7122 align
= TYPE_ALIGN (type
);
7125 align
= GET_MODE_ALIGNMENT (mode
);
7126 if (align
< PARM_BOUNDARY
)
7127 align
= PARM_BOUNDARY
;
7131 unsigned int saved_align
= align
;
7135 /* i386 ABI defines XFmode arguments to be 4 byte aligned. */
7138 if (mode
== XFmode
|| mode
== XCmode
)
7139 align
= PARM_BOUNDARY
;
7141 else if (!ix86_contains_aligned_value_p (type
))
7142 align
= PARM_BOUNDARY
;
7145 align
= PARM_BOUNDARY
;
7150 && align
!= ix86_compat_function_arg_boundary (mode
, type
,
7154 inform (input_location
,
7155 "The ABI for passing parameters with %d-byte"
7156 " alignment has changed in GCC 4.6",
7157 align
/ BITS_PER_UNIT
);
7164 /* Return true if N is a possible register number of function value. */
7167 ix86_function_value_regno_p (const unsigned int regno
)
7174 case FIRST_FLOAT_REG
:
7175 /* TODO: The function should depend on current function ABI but
7176 builtins.c would need updating then. Therefore we use the
7178 if (TARGET_64BIT
&& ix86_abi
== MS_ABI
)
7180 return TARGET_FLOAT_RETURNS_IN_80387
;
7186 if (TARGET_MACHO
|| TARGET_64BIT
)
7194 /* Define how to find the value returned by a function.
7195 VALTYPE is the data type of the value (as a tree).
7196 If the precise function being called is known, FUNC is its FUNCTION_DECL;
7197 otherwise, FUNC is 0. */
7200 function_value_32 (enum machine_mode orig_mode
, enum machine_mode mode
,
7201 const_tree fntype
, const_tree fn
)
7205 /* 8-byte vector modes in %mm0. See ix86_return_in_memory for where
7206 we normally prevent this case when mmx is not available. However
7207 some ABIs may require the result to be returned like DImode. */
7208 if (VECTOR_MODE_P (mode
) && GET_MODE_SIZE (mode
) == 8)
7209 regno
= FIRST_MMX_REG
;
7211 /* 16-byte vector modes in %xmm0. See ix86_return_in_memory for where
7212 we prevent this case when sse is not available. However some ABIs
7213 may require the result to be returned like integer TImode. */
7214 else if (mode
== TImode
7215 || (VECTOR_MODE_P (mode
) && GET_MODE_SIZE (mode
) == 16))
7216 regno
= FIRST_SSE_REG
;
7218 /* 32-byte vector modes in %ymm0. */
7219 else if (VECTOR_MODE_P (mode
) && GET_MODE_SIZE (mode
) == 32)
7220 regno
= FIRST_SSE_REG
;
7222 /* Floating point return values in %st(0) (unless -mno-fp-ret-in-387). */
7223 else if (X87_FLOAT_MODE_P (mode
) && TARGET_FLOAT_RETURNS_IN_80387
)
7224 regno
= FIRST_FLOAT_REG
;
7226 /* Most things go in %eax. */
7229 /* Override FP return register with %xmm0 for local functions when
7230 SSE math is enabled or for functions with sseregparm attribute. */
7231 if ((fn
|| fntype
) && (mode
== SFmode
|| mode
== DFmode
))
7233 int sse_level
= ix86_function_sseregparm (fntype
, fn
, false);
7234 if ((sse_level
>= 1 && mode
== SFmode
)
7235 || (sse_level
== 2 && mode
== DFmode
))
7236 regno
= FIRST_SSE_REG
;
7239 /* OImode shouldn't be used directly. */
7240 gcc_assert (mode
!= OImode
);
7242 return gen_rtx_REG (orig_mode
, regno
);
7246 function_value_64 (enum machine_mode orig_mode
, enum machine_mode mode
,
7251 /* Handle libcalls, which don't provide a type node. */
7252 if (valtype
== NULL
)
7266 regno
= FIRST_SSE_REG
;
7270 regno
= FIRST_FLOAT_REG
;
7278 return gen_rtx_REG (mode
, regno
);
7280 else if (POINTER_TYPE_P (valtype
))
7282 /* Pointers are always returned in word_mode. */
7286 ret
= construct_container (mode
, orig_mode
, valtype
, 1,
7287 X86_64_REGPARM_MAX
, X86_64_SSE_REGPARM_MAX
,
7288 x86_64_int_return_registers
, 0);
7290 /* For zero sized structures, construct_container returns NULL, but we
7291 need to keep rest of compiler happy by returning meaningful value. */
7293 ret
= gen_rtx_REG (orig_mode
, AX_REG
);
7299 function_value_ms_64 (enum machine_mode orig_mode
, enum machine_mode mode
)
7301 unsigned int regno
= AX_REG
;
7305 switch (GET_MODE_SIZE (mode
))
7308 if((SCALAR_INT_MODE_P (mode
) || VECTOR_MODE_P (mode
))
7309 && !COMPLEX_MODE_P (mode
))
7310 regno
= FIRST_SSE_REG
;
7314 if (mode
== SFmode
|| mode
== DFmode
)
7315 regno
= FIRST_SSE_REG
;
7321 return gen_rtx_REG (orig_mode
, regno
);
7325 ix86_function_value_1 (const_tree valtype
, const_tree fntype_or_decl
,
7326 enum machine_mode orig_mode
, enum machine_mode mode
)
7328 const_tree fn
, fntype
;
7331 if (fntype_or_decl
&& DECL_P (fntype_or_decl
))
7332 fn
= fntype_or_decl
;
7333 fntype
= fn
? TREE_TYPE (fn
) : fntype_or_decl
;
7335 if (TARGET_64BIT
&& ix86_function_type_abi (fntype
) == MS_ABI
)
7336 return function_value_ms_64 (orig_mode
, mode
);
7337 else if (TARGET_64BIT
)
7338 return function_value_64 (orig_mode
, mode
, valtype
);
7340 return function_value_32 (orig_mode
, mode
, fntype
, fn
);
7344 ix86_function_value (const_tree valtype
, const_tree fntype_or_decl
,
7345 bool outgoing ATTRIBUTE_UNUSED
)
7347 enum machine_mode mode
, orig_mode
;
7349 orig_mode
= TYPE_MODE (valtype
);
7350 mode
= type_natural_mode (valtype
, NULL
);
7351 return ix86_function_value_1 (valtype
, fntype_or_decl
, orig_mode
, mode
);
7354 /* Pointer function arguments and return values are promoted to
7357 static enum machine_mode
7358 ix86_promote_function_mode (const_tree type
, enum machine_mode mode
,
7359 int *punsignedp
, const_tree fntype
,
7362 if (type
!= NULL_TREE
&& POINTER_TYPE_P (type
))
7364 *punsignedp
= POINTERS_EXTEND_UNSIGNED
;
7367 return default_promote_function_mode (type
, mode
, punsignedp
, fntype
,
7372 ix86_libcall_value (enum machine_mode mode
)
7374 return ix86_function_value_1 (NULL
, NULL
, mode
, mode
);
7377 /* Return true iff type is returned in memory. */
7379 static bool ATTRIBUTE_UNUSED
7380 return_in_memory_32 (const_tree type
, enum machine_mode mode
)
7384 if (mode
== BLKmode
)
7387 size
= int_size_in_bytes (type
);
7389 if (MS_AGGREGATE_RETURN
&& AGGREGATE_TYPE_P (type
) && size
<= 8)
7392 if (VECTOR_MODE_P (mode
) || mode
== TImode
)
7394 /* User-created vectors small enough to fit in EAX. */
7398 /* MMX/3dNow values are returned in MM0,
7399 except when it doesn't exits or the ABI prescribes otherwise. */
7401 return !TARGET_MMX
|| TARGET_VECT8_RETURNS
;
7403 /* SSE values are returned in XMM0, except when it doesn't exist. */
7407 /* AVX values are returned in YMM0, except when it doesn't exist. */
7418 /* OImode shouldn't be used directly. */
7419 gcc_assert (mode
!= OImode
);
7424 static bool ATTRIBUTE_UNUSED
7425 return_in_memory_64 (const_tree type
, enum machine_mode mode
)
7427 int needed_intregs
, needed_sseregs
;
7428 return !examine_argument (mode
, type
, 1, &needed_intregs
, &needed_sseregs
);
7431 static bool ATTRIBUTE_UNUSED
7432 return_in_memory_ms_64 (const_tree type
, enum machine_mode mode
)
7434 HOST_WIDE_INT size
= int_size_in_bytes (type
);
7436 /* __m128 is returned in xmm0. */
7437 if ((SCALAR_INT_MODE_P (mode
) || VECTOR_MODE_P (mode
))
7438 && !COMPLEX_MODE_P (mode
) && (GET_MODE_SIZE (mode
) == 16 || size
== 16))
7441 /* Otherwise, the size must be exactly in [1248]. */
7442 return size
!= 1 && size
!= 2 && size
!= 4 && size
!= 8;
7446 ix86_return_in_memory (const_tree type
, const_tree fntype ATTRIBUTE_UNUSED
)
7448 #ifdef SUBTARGET_RETURN_IN_MEMORY
7449 return SUBTARGET_RETURN_IN_MEMORY (type
, fntype
);
7451 const enum machine_mode mode
= type_natural_mode (type
, NULL
);
7455 if (ix86_function_type_abi (fntype
) == MS_ABI
)
7456 return return_in_memory_ms_64 (type
, mode
);
7458 return return_in_memory_64 (type
, mode
);
7461 return return_in_memory_32 (type
, mode
);
7465 /* When returning SSE vector types, we have a choice of either
7466 (1) being abi incompatible with a -march switch, or
7467 (2) generating an error.
7468 Given no good solution, I think the safest thing is one warning.
7469 The user won't be able to use -Werror, but....
7471 Choose the STRUCT_VALUE_RTX hook because that's (at present) only
7472 called in response to actually generating a caller or callee that
7473 uses such a type. As opposed to TARGET_RETURN_IN_MEMORY, which is called
7474 via aggregate_value_p for general type probing from tree-ssa. */
7477 ix86_struct_value_rtx (tree type
, int incoming ATTRIBUTE_UNUSED
)
7479 static bool warnedsse
, warnedmmx
;
7481 if (!TARGET_64BIT
&& type
)
7483 /* Look at the return type of the function, not the function type. */
7484 enum machine_mode mode
= TYPE_MODE (TREE_TYPE (type
));
7486 if (!TARGET_SSE
&& !warnedsse
)
7489 || (VECTOR_MODE_P (mode
) && GET_MODE_SIZE (mode
) == 16))
7492 warning (0, "SSE vector return without SSE enabled "
7497 if (!TARGET_MMX
&& !warnedmmx
)
7499 if (VECTOR_MODE_P (mode
) && GET_MODE_SIZE (mode
) == 8)
7502 warning (0, "MMX vector return without MMX enabled "
7512 /* Create the va_list data type. */
7514 /* Returns the calling convention specific va_list date type.
7515 The argument ABI can be DEFAULT_ABI, MS_ABI, or SYSV_ABI. */
7518 ix86_build_builtin_va_list_abi (enum calling_abi abi
)
7520 tree f_gpr
, f_fpr
, f_ovf
, f_sav
, record
, type_decl
;
7522 /* For i386 we use plain pointer to argument area. */
7523 if (!TARGET_64BIT
|| abi
== MS_ABI
)
7524 return build_pointer_type (char_type_node
);
7526 record
= lang_hooks
.types
.make_type (RECORD_TYPE
);
7527 type_decl
= build_decl (BUILTINS_LOCATION
,
7528 TYPE_DECL
, get_identifier ("__va_list_tag"), record
);
7530 f_gpr
= build_decl (BUILTINS_LOCATION
,
7531 FIELD_DECL
, get_identifier ("gp_offset"),
7532 unsigned_type_node
);
7533 f_fpr
= build_decl (BUILTINS_LOCATION
,
7534 FIELD_DECL
, get_identifier ("fp_offset"),
7535 unsigned_type_node
);
7536 f_ovf
= build_decl (BUILTINS_LOCATION
,
7537 FIELD_DECL
, get_identifier ("overflow_arg_area"),
7539 f_sav
= build_decl (BUILTINS_LOCATION
,
7540 FIELD_DECL
, get_identifier ("reg_save_area"),
7543 va_list_gpr_counter_field
= f_gpr
;
7544 va_list_fpr_counter_field
= f_fpr
;
7546 DECL_FIELD_CONTEXT (f_gpr
) = record
;
7547 DECL_FIELD_CONTEXT (f_fpr
) = record
;
7548 DECL_FIELD_CONTEXT (f_ovf
) = record
;
7549 DECL_FIELD_CONTEXT (f_sav
) = record
;
7551 TYPE_STUB_DECL (record
) = type_decl
;
7552 TYPE_NAME (record
) = type_decl
;
7553 TYPE_FIELDS (record
) = f_gpr
;
7554 DECL_CHAIN (f_gpr
) = f_fpr
;
7555 DECL_CHAIN (f_fpr
) = f_ovf
;
7556 DECL_CHAIN (f_ovf
) = f_sav
;
7558 layout_type (record
);
7560 /* The correct type is an array type of one element. */
7561 return build_array_type (record
, build_index_type (size_zero_node
));
7564 /* Setup the builtin va_list data type and for 64-bit the additional
7565 calling convention specific va_list data types. */
7568 ix86_build_builtin_va_list (void)
7570 tree ret
= ix86_build_builtin_va_list_abi (ix86_abi
);
7572 /* Initialize abi specific va_list builtin types. */
7576 if (ix86_abi
== MS_ABI
)
7578 t
= ix86_build_builtin_va_list_abi (SYSV_ABI
);
7579 if (TREE_CODE (t
) != RECORD_TYPE
)
7580 t
= build_variant_type_copy (t
);
7581 sysv_va_list_type_node
= t
;
7586 if (TREE_CODE (t
) != RECORD_TYPE
)
7587 t
= build_variant_type_copy (t
);
7588 sysv_va_list_type_node
= t
;
7590 if (ix86_abi
!= MS_ABI
)
7592 t
= ix86_build_builtin_va_list_abi (MS_ABI
);
7593 if (TREE_CODE (t
) != RECORD_TYPE
)
7594 t
= build_variant_type_copy (t
);
7595 ms_va_list_type_node
= t
;
7600 if (TREE_CODE (t
) != RECORD_TYPE
)
7601 t
= build_variant_type_copy (t
);
7602 ms_va_list_type_node
= t
;
7609 /* Worker function for TARGET_SETUP_INCOMING_VARARGS. */
7612 setup_incoming_varargs_64 (CUMULATIVE_ARGS
*cum
)
7618 /* GPR size of varargs save area. */
7619 if (cfun
->va_list_gpr_size
)
7620 ix86_varargs_gpr_size
= X86_64_REGPARM_MAX
* UNITS_PER_WORD
;
7622 ix86_varargs_gpr_size
= 0;
7624 /* FPR size of varargs save area. We don't need it if we don't pass
7625 anything in SSE registers. */
7626 if (TARGET_SSE
&& cfun
->va_list_fpr_size
)
7627 ix86_varargs_fpr_size
= X86_64_SSE_REGPARM_MAX
* 16;
7629 ix86_varargs_fpr_size
= 0;
7631 if (! ix86_varargs_gpr_size
&& ! ix86_varargs_fpr_size
)
7634 save_area
= frame_pointer_rtx
;
7635 set
= get_varargs_alias_set ();
7637 max
= cum
->regno
+ cfun
->va_list_gpr_size
/ UNITS_PER_WORD
;
7638 if (max
> X86_64_REGPARM_MAX
)
7639 max
= X86_64_REGPARM_MAX
;
7641 for (i
= cum
->regno
; i
< max
; i
++)
7643 mem
= gen_rtx_MEM (word_mode
,
7644 plus_constant (save_area
, i
* UNITS_PER_WORD
));
7645 MEM_NOTRAP_P (mem
) = 1;
7646 set_mem_alias_set (mem
, set
);
7647 emit_move_insn (mem
,
7648 gen_rtx_REG (word_mode
,
7649 x86_64_int_parameter_registers
[i
]));
7652 if (ix86_varargs_fpr_size
)
7654 enum machine_mode smode
;
7657 /* Now emit code to save SSE registers. The AX parameter contains number
7658 of SSE parameter registers used to call this function, though all we
7659 actually check here is the zero/non-zero status. */
7661 label
= gen_label_rtx ();
7662 test
= gen_rtx_EQ (VOIDmode
, gen_rtx_REG (QImode
, AX_REG
), const0_rtx
);
7663 emit_jump_insn (gen_cbranchqi4 (test
, XEXP (test
, 0), XEXP (test
, 1),
7666 /* ??? If !TARGET_SSE_TYPELESS_STORES, would we perform better if
7667 we used movdqa (i.e. TImode) instead? Perhaps even better would
7668 be if we could determine the real mode of the data, via a hook
7669 into pass_stdarg. Ignore all that for now. */
7671 if (crtl
->stack_alignment_needed
< GET_MODE_ALIGNMENT (smode
))
7672 crtl
->stack_alignment_needed
= GET_MODE_ALIGNMENT (smode
);
7674 max
= cum
->sse_regno
+ cfun
->va_list_fpr_size
/ 16;
7675 if (max
> X86_64_SSE_REGPARM_MAX
)
7676 max
= X86_64_SSE_REGPARM_MAX
;
7678 for (i
= cum
->sse_regno
; i
< max
; ++i
)
7680 mem
= plus_constant (save_area
, i
* 16 + ix86_varargs_gpr_size
);
7681 mem
= gen_rtx_MEM (smode
, mem
);
7682 MEM_NOTRAP_P (mem
) = 1;
7683 set_mem_alias_set (mem
, set
);
7684 set_mem_align (mem
, GET_MODE_ALIGNMENT (smode
));
7686 emit_move_insn (mem
, gen_rtx_REG (smode
, SSE_REGNO (i
)));
7694 setup_incoming_varargs_ms_64 (CUMULATIVE_ARGS
*cum
)
7696 alias_set_type set
= get_varargs_alias_set ();
7699 /* Reset to zero, as there might be a sysv vaarg used
7701 ix86_varargs_gpr_size
= 0;
7702 ix86_varargs_fpr_size
= 0;
7704 for (i
= cum
->regno
; i
< X86_64_MS_REGPARM_MAX
; i
++)
7708 mem
= gen_rtx_MEM (Pmode
,
7709 plus_constant (virtual_incoming_args_rtx
,
7710 i
* UNITS_PER_WORD
));
7711 MEM_NOTRAP_P (mem
) = 1;
7712 set_mem_alias_set (mem
, set
);
7714 reg
= gen_rtx_REG (Pmode
, x86_64_ms_abi_int_parameter_registers
[i
]);
7715 emit_move_insn (mem
, reg
);
7720 ix86_setup_incoming_varargs (cumulative_args_t cum_v
, enum machine_mode mode
,
7721 tree type
, int *pretend_size ATTRIBUTE_UNUSED
,
7724 CUMULATIVE_ARGS
*cum
= get_cumulative_args (cum_v
);
7725 CUMULATIVE_ARGS next_cum
;
7728 /* This argument doesn't appear to be used anymore. Which is good,
7729 because the old code here didn't suppress rtl generation. */
7730 gcc_assert (!no_rtl
);
7735 fntype
= TREE_TYPE (current_function_decl
);
7737 /* For varargs, we do not want to skip the dummy va_dcl argument.
7738 For stdargs, we do want to skip the last named argument. */
7740 if (stdarg_p (fntype
))
7741 ix86_function_arg_advance (pack_cumulative_args (&next_cum
), mode
, type
,
7744 if (cum
->call_abi
== MS_ABI
)
7745 setup_incoming_varargs_ms_64 (&next_cum
);
7747 setup_incoming_varargs_64 (&next_cum
);
7750 /* Checks if TYPE is of kind va_list char *. */
7753 is_va_list_char_pointer (tree type
)
7757 /* For 32-bit it is always true. */
7760 canonic
= ix86_canonical_va_list_type (type
);
7761 return (canonic
== ms_va_list_type_node
7762 || (ix86_abi
== MS_ABI
&& canonic
== va_list_type_node
));
7765 /* Implement va_start. */
7768 ix86_va_start (tree valist
, rtx nextarg
)
7770 HOST_WIDE_INT words
, n_gpr
, n_fpr
;
7771 tree f_gpr
, f_fpr
, f_ovf
, f_sav
;
7772 tree gpr
, fpr
, ovf
, sav
, t
;
7776 if (flag_split_stack
7777 && cfun
->machine
->split_stack_varargs_pointer
== NULL_RTX
)
7779 unsigned int scratch_regno
;
7781 /* When we are splitting the stack, we can't refer to the stack
7782 arguments using internal_arg_pointer, because they may be on
7783 the old stack. The split stack prologue will arrange to
7784 leave a pointer to the old stack arguments in a scratch
7785 register, which we here copy to a pseudo-register. The split
7786 stack prologue can't set the pseudo-register directly because
7787 it (the prologue) runs before any registers have been saved. */
7789 scratch_regno
= split_stack_prologue_scratch_regno ();
7790 if (scratch_regno
!= INVALID_REGNUM
)
7794 reg
= gen_reg_rtx (Pmode
);
7795 cfun
->machine
->split_stack_varargs_pointer
= reg
;
7798 emit_move_insn (reg
, gen_rtx_REG (Pmode
, scratch_regno
));
7802 push_topmost_sequence ();
7803 emit_insn_after (seq
, entry_of_function ());
7804 pop_topmost_sequence ();
7808 /* Only 64bit target needs something special. */
7809 if (!TARGET_64BIT
|| is_va_list_char_pointer (TREE_TYPE (valist
)))
7811 if (cfun
->machine
->split_stack_varargs_pointer
== NULL_RTX
)
7812 std_expand_builtin_va_start (valist
, nextarg
);
7817 va_r
= expand_expr (valist
, NULL_RTX
, VOIDmode
, EXPAND_WRITE
);
7818 next
= expand_binop (ptr_mode
, add_optab
,
7819 cfun
->machine
->split_stack_varargs_pointer
,
7820 crtl
->args
.arg_offset_rtx
,
7821 NULL_RTX
, 0, OPTAB_LIB_WIDEN
);
7822 convert_move (va_r
, next
, 0);
7827 f_gpr
= TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node
));
7828 f_fpr
= DECL_CHAIN (f_gpr
);
7829 f_ovf
= DECL_CHAIN (f_fpr
);
7830 f_sav
= DECL_CHAIN (f_ovf
);
7832 valist
= build_simple_mem_ref (valist
);
7833 TREE_TYPE (valist
) = TREE_TYPE (sysv_va_list_type_node
);
7834 /* The following should be folded into the MEM_REF offset. */
7835 gpr
= build3 (COMPONENT_REF
, TREE_TYPE (f_gpr
), unshare_expr (valist
),
7837 fpr
= build3 (COMPONENT_REF
, TREE_TYPE (f_fpr
), unshare_expr (valist
),
7839 ovf
= build3 (COMPONENT_REF
, TREE_TYPE (f_ovf
), unshare_expr (valist
),
7841 sav
= build3 (COMPONENT_REF
, TREE_TYPE (f_sav
), unshare_expr (valist
),
7844 /* Count number of gp and fp argument registers used. */
7845 words
= crtl
->args
.info
.words
;
7846 n_gpr
= crtl
->args
.info
.regno
;
7847 n_fpr
= crtl
->args
.info
.sse_regno
;
7849 if (cfun
->va_list_gpr_size
)
7851 type
= TREE_TYPE (gpr
);
7852 t
= build2 (MODIFY_EXPR
, type
,
7853 gpr
, build_int_cst (type
, n_gpr
* 8));
7854 TREE_SIDE_EFFECTS (t
) = 1;
7855 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
7858 if (TARGET_SSE
&& cfun
->va_list_fpr_size
)
7860 type
= TREE_TYPE (fpr
);
7861 t
= build2 (MODIFY_EXPR
, type
, fpr
,
7862 build_int_cst (type
, n_fpr
* 16 + 8*X86_64_REGPARM_MAX
));
7863 TREE_SIDE_EFFECTS (t
) = 1;
7864 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
7867 /* Find the overflow area. */
7868 type
= TREE_TYPE (ovf
);
7869 if (cfun
->machine
->split_stack_varargs_pointer
== NULL_RTX
)
7870 ovf_rtx
= crtl
->args
.internal_arg_pointer
;
7872 ovf_rtx
= cfun
->machine
->split_stack_varargs_pointer
;
7873 t
= make_tree (type
, ovf_rtx
);
7875 t
= fold_build_pointer_plus_hwi (t
, words
* UNITS_PER_WORD
);
7876 t
= build2 (MODIFY_EXPR
, type
, ovf
, t
);
7877 TREE_SIDE_EFFECTS (t
) = 1;
7878 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
7880 if (ix86_varargs_gpr_size
|| ix86_varargs_fpr_size
)
7882 /* Find the register save area.
7883 Prologue of the function save it right above stack frame. */
7884 type
= TREE_TYPE (sav
);
7885 t
= make_tree (type
, frame_pointer_rtx
);
7886 if (!ix86_varargs_gpr_size
)
7887 t
= fold_build_pointer_plus_hwi (t
, -8 * X86_64_REGPARM_MAX
);
7888 t
= build2 (MODIFY_EXPR
, type
, sav
, t
);
7889 TREE_SIDE_EFFECTS (t
) = 1;
7890 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
7894 /* Implement va_arg. */
7897 ix86_gimplify_va_arg (tree valist
, tree type
, gimple_seq
*pre_p
,
7900 static const int intreg
[6] = { 0, 1, 2, 3, 4, 5 };
7901 tree f_gpr
, f_fpr
, f_ovf
, f_sav
;
7902 tree gpr
, fpr
, ovf
, sav
, t
;
7904 tree lab_false
, lab_over
= NULL_TREE
;
7909 enum machine_mode nat_mode
;
7910 unsigned int arg_boundary
;
7912 /* Only 64bit target needs something special. */
7913 if (!TARGET_64BIT
|| is_va_list_char_pointer (TREE_TYPE (valist
)))
7914 return std_gimplify_va_arg_expr (valist
, type
, pre_p
, post_p
);
7916 f_gpr
= TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node
));
7917 f_fpr
= DECL_CHAIN (f_gpr
);
7918 f_ovf
= DECL_CHAIN (f_fpr
);
7919 f_sav
= DECL_CHAIN (f_ovf
);
7921 gpr
= build3 (COMPONENT_REF
, TREE_TYPE (f_gpr
),
7922 build_va_arg_indirect_ref (valist
), f_gpr
, NULL_TREE
);
7923 valist
= build_va_arg_indirect_ref (valist
);
7924 fpr
= build3 (COMPONENT_REF
, TREE_TYPE (f_fpr
), valist
, f_fpr
, NULL_TREE
);
7925 ovf
= build3 (COMPONENT_REF
, TREE_TYPE (f_ovf
), valist
, f_ovf
, NULL_TREE
);
7926 sav
= build3 (COMPONENT_REF
, TREE_TYPE (f_sav
), valist
, f_sav
, NULL_TREE
);
7928 indirect_p
= pass_by_reference (NULL
, TYPE_MODE (type
), type
, false);
7930 type
= build_pointer_type (type
);
7931 size
= int_size_in_bytes (type
);
7932 rsize
= (size
+ UNITS_PER_WORD
- 1) / UNITS_PER_WORD
;
7934 nat_mode
= type_natural_mode (type
, NULL
);
7943 /* Unnamed 256bit vector mode parameters are passed on stack. */
7944 if (!TARGET_64BIT_MS_ABI
)
7951 container
= construct_container (nat_mode
, TYPE_MODE (type
),
7952 type
, 0, X86_64_REGPARM_MAX
,
7953 X86_64_SSE_REGPARM_MAX
, intreg
,
7958 /* Pull the value out of the saved registers. */
7960 addr
= create_tmp_var (ptr_type_node
, "addr");
7964 int needed_intregs
, needed_sseregs
;
7966 tree int_addr
, sse_addr
;
7968 lab_false
= create_artificial_label (UNKNOWN_LOCATION
);
7969 lab_over
= create_artificial_label (UNKNOWN_LOCATION
);
7971 examine_argument (nat_mode
, type
, 0, &needed_intregs
, &needed_sseregs
);
7973 need_temp
= (!REG_P (container
)
7974 && ((needed_intregs
&& TYPE_ALIGN (type
) > 64)
7975 || TYPE_ALIGN (type
) > 128));
7977 /* In case we are passing structure, verify that it is consecutive block
7978 on the register save area. If not we need to do moves. */
7979 if (!need_temp
&& !REG_P (container
))
7981 /* Verify that all registers are strictly consecutive */
7982 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container
, 0, 0), 0))))
7986 for (i
= 0; i
< XVECLEN (container
, 0) && !need_temp
; i
++)
7988 rtx slot
= XVECEXP (container
, 0, i
);
7989 if (REGNO (XEXP (slot
, 0)) != FIRST_SSE_REG
+ (unsigned int) i
7990 || INTVAL (XEXP (slot
, 1)) != i
* 16)
7998 for (i
= 0; i
< XVECLEN (container
, 0) && !need_temp
; i
++)
8000 rtx slot
= XVECEXP (container
, 0, i
);
8001 if (REGNO (XEXP (slot
, 0)) != (unsigned int) i
8002 || INTVAL (XEXP (slot
, 1)) != i
* 8)
8014 int_addr
= create_tmp_var (ptr_type_node
, "int_addr");
8015 sse_addr
= create_tmp_var (ptr_type_node
, "sse_addr");
8018 /* First ensure that we fit completely in registers. */
8021 t
= build_int_cst (TREE_TYPE (gpr
),
8022 (X86_64_REGPARM_MAX
- needed_intregs
+ 1) * 8);
8023 t
= build2 (GE_EXPR
, boolean_type_node
, gpr
, t
);
8024 t2
= build1 (GOTO_EXPR
, void_type_node
, lab_false
);
8025 t
= build3 (COND_EXPR
, void_type_node
, t
, t2
, NULL_TREE
);
8026 gimplify_and_add (t
, pre_p
);
8030 t
= build_int_cst (TREE_TYPE (fpr
),
8031 (X86_64_SSE_REGPARM_MAX
- needed_sseregs
+ 1) * 16
8032 + X86_64_REGPARM_MAX
* 8);
8033 t
= build2 (GE_EXPR
, boolean_type_node
, fpr
, t
);
8034 t2
= build1 (GOTO_EXPR
, void_type_node
, lab_false
);
8035 t
= build3 (COND_EXPR
, void_type_node
, t
, t2
, NULL_TREE
);
8036 gimplify_and_add (t
, pre_p
);
8039 /* Compute index to start of area used for integer regs. */
8042 /* int_addr = gpr + sav; */
8043 t
= fold_build_pointer_plus (sav
, gpr
);
8044 gimplify_assign (int_addr
, t
, pre_p
);
8048 /* sse_addr = fpr + sav; */
8049 t
= fold_build_pointer_plus (sav
, fpr
);
8050 gimplify_assign (sse_addr
, t
, pre_p
);
8054 int i
, prev_size
= 0;
8055 tree temp
= create_tmp_var (type
, "va_arg_tmp");
8058 t
= build1 (ADDR_EXPR
, build_pointer_type (type
), temp
);
8059 gimplify_assign (addr
, t
, pre_p
);
8061 for (i
= 0; i
< XVECLEN (container
, 0); i
++)
8063 rtx slot
= XVECEXP (container
, 0, i
);
8064 rtx reg
= XEXP (slot
, 0);
8065 enum machine_mode mode
= GET_MODE (reg
);
8071 tree dest_addr
, dest
;
8072 int cur_size
= GET_MODE_SIZE (mode
);
8074 gcc_assert (prev_size
<= INTVAL (XEXP (slot
, 1)));
8075 prev_size
= INTVAL (XEXP (slot
, 1));
8076 if (prev_size
+ cur_size
> size
)
8078 cur_size
= size
- prev_size
;
8079 mode
= mode_for_size (cur_size
* BITS_PER_UNIT
, MODE_INT
, 1);
8080 if (mode
== BLKmode
)
8083 piece_type
= lang_hooks
.types
.type_for_mode (mode
, 1);
8084 if (mode
== GET_MODE (reg
))
8085 addr_type
= build_pointer_type (piece_type
);
8087 addr_type
= build_pointer_type_for_mode (piece_type
, ptr_mode
,
8089 daddr_type
= build_pointer_type_for_mode (piece_type
, ptr_mode
,
8092 if (SSE_REGNO_P (REGNO (reg
)))
8094 src_addr
= sse_addr
;
8095 src_offset
= (REGNO (reg
) - FIRST_SSE_REG
) * 16;
8099 src_addr
= int_addr
;
8100 src_offset
= REGNO (reg
) * 8;
8102 src_addr
= fold_convert (addr_type
, src_addr
);
8103 src_addr
= fold_build_pointer_plus_hwi (src_addr
, src_offset
);
8105 dest_addr
= fold_convert (daddr_type
, addr
);
8106 dest_addr
= fold_build_pointer_plus_hwi (dest_addr
, prev_size
);
8107 if (cur_size
== GET_MODE_SIZE (mode
))
8109 src
= build_va_arg_indirect_ref (src_addr
);
8110 dest
= build_va_arg_indirect_ref (dest_addr
);
8112 gimplify_assign (dest
, src
, pre_p
);
8117 = build_call_expr (builtin_decl_implicit (BUILT_IN_MEMCPY
),
8118 3, dest_addr
, src_addr
,
8119 size_int (cur_size
));
8120 gimplify_and_add (copy
, pre_p
);
8122 prev_size
+= cur_size
;
8128 t
= build2 (PLUS_EXPR
, TREE_TYPE (gpr
), gpr
,
8129 build_int_cst (TREE_TYPE (gpr
), needed_intregs
* 8));
8130 gimplify_assign (gpr
, t
, pre_p
);
8135 t
= build2 (PLUS_EXPR
, TREE_TYPE (fpr
), fpr
,
8136 build_int_cst (TREE_TYPE (fpr
), needed_sseregs
* 16));
8137 gimplify_assign (fpr
, t
, pre_p
);
8140 gimple_seq_add_stmt (pre_p
, gimple_build_goto (lab_over
));
8142 gimple_seq_add_stmt (pre_p
, gimple_build_label (lab_false
));
8145 /* ... otherwise out of the overflow area. */
8147 /* When we align parameter on stack for caller, if the parameter
8148 alignment is beyond MAX_SUPPORTED_STACK_ALIGNMENT, it will be
8149 aligned at MAX_SUPPORTED_STACK_ALIGNMENT. We will match callee
8150 here with caller. */
8151 arg_boundary
= ix86_function_arg_boundary (VOIDmode
, type
);
8152 if ((unsigned int) arg_boundary
> MAX_SUPPORTED_STACK_ALIGNMENT
)
8153 arg_boundary
= MAX_SUPPORTED_STACK_ALIGNMENT
;
8155 /* Care for on-stack alignment if needed. */
8156 if (arg_boundary
<= 64 || size
== 0)
8160 HOST_WIDE_INT align
= arg_boundary
/ 8;
8161 t
= fold_build_pointer_plus_hwi (ovf
, align
- 1);
8162 t
= build2 (BIT_AND_EXPR
, TREE_TYPE (t
), t
,
8163 build_int_cst (TREE_TYPE (t
), -align
));
8166 gimplify_expr (&t
, pre_p
, NULL
, is_gimple_val
, fb_rvalue
);
8167 gimplify_assign (addr
, t
, pre_p
);
8169 t
= fold_build_pointer_plus_hwi (t
, rsize
* UNITS_PER_WORD
);
8170 gimplify_assign (unshare_expr (ovf
), t
, pre_p
);
8173 gimple_seq_add_stmt (pre_p
, gimple_build_label (lab_over
));
8175 ptrtype
= build_pointer_type_for_mode (type
, ptr_mode
, true);
8176 addr
= fold_convert (ptrtype
, addr
);
8179 addr
= build_va_arg_indirect_ref (addr
);
8180 return build_va_arg_indirect_ref (addr
);
8183 /* Return true if OPNUM's MEM should be matched
8184 in movabs* patterns. */
8187 ix86_check_movabs (rtx insn
, int opnum
)
8191 set
= PATTERN (insn
);
8192 if (GET_CODE (set
) == PARALLEL
)
8193 set
= XVECEXP (set
, 0, 0);
8194 gcc_assert (GET_CODE (set
) == SET
);
8195 mem
= XEXP (set
, opnum
);
8196 while (GET_CODE (mem
) == SUBREG
)
8197 mem
= SUBREG_REG (mem
);
8198 gcc_assert (MEM_P (mem
));
8199 return volatile_ok
|| !MEM_VOLATILE_P (mem
);
8202 /* Initialize the table of extra 80387 mathematical constants. */
8205 init_ext_80387_constants (void)
8207 static const char * cst
[5] =
8209 "0.3010299956639811952256464283594894482", /* 0: fldlg2 */
8210 "0.6931471805599453094286904741849753009", /* 1: fldln2 */
8211 "1.4426950408889634073876517827983434472", /* 2: fldl2e */
8212 "3.3219280948873623478083405569094566090", /* 3: fldl2t */
8213 "3.1415926535897932385128089594061862044", /* 4: fldpi */
8217 for (i
= 0; i
< 5; i
++)
8219 real_from_string (&ext_80387_constants_table
[i
], cst
[i
]);
8220 /* Ensure each constant is rounded to XFmode precision. */
8221 real_convert (&ext_80387_constants_table
[i
],
8222 XFmode
, &ext_80387_constants_table
[i
]);
8225 ext_80387_constants_init
= 1;
8228 /* Return non-zero if the constant is something that
8229 can be loaded with a special instruction. */
8232 standard_80387_constant_p (rtx x
)
8234 enum machine_mode mode
= GET_MODE (x
);
8238 if (!(X87_FLOAT_MODE_P (mode
) && (GET_CODE (x
) == CONST_DOUBLE
)))
8241 if (x
== CONST0_RTX (mode
))
8243 if (x
== CONST1_RTX (mode
))
8246 REAL_VALUE_FROM_CONST_DOUBLE (r
, x
);
8248 /* For XFmode constants, try to find a special 80387 instruction when
8249 optimizing for size or on those CPUs that benefit from them. */
8251 && (optimize_function_for_size_p (cfun
) || TARGET_EXT_80387_CONSTANTS
))
8255 if (! ext_80387_constants_init
)
8256 init_ext_80387_constants ();
8258 for (i
= 0; i
< 5; i
++)
8259 if (real_identical (&r
, &ext_80387_constants_table
[i
]))
8263 /* Load of the constant -0.0 or -1.0 will be split as
8264 fldz;fchs or fld1;fchs sequence. */
8265 if (real_isnegzero (&r
))
8267 if (real_identical (&r
, &dconstm1
))
8273 /* Return the opcode of the special instruction to be used to load
8277 standard_80387_constant_opcode (rtx x
)
8279 switch (standard_80387_constant_p (x
))
8303 /* Return the CONST_DOUBLE representing the 80387 constant that is
8304 loaded by the specified special instruction. The argument IDX
8305 matches the return value from standard_80387_constant_p. */
8308 standard_80387_constant_rtx (int idx
)
8312 if (! ext_80387_constants_init
)
8313 init_ext_80387_constants ();
8329 return CONST_DOUBLE_FROM_REAL_VALUE (ext_80387_constants_table
[i
],
8333 /* Return 1 if X is all 0s and 2 if x is all 1s
8334 in supported SSE/AVX vector mode. */
8337 standard_sse_constant_p (rtx x
)
8339 enum machine_mode mode
= GET_MODE (x
);
8341 if (x
== const0_rtx
|| x
== CONST0_RTX (GET_MODE (x
)))
8343 if (vector_all_ones_operand (x
, mode
))
8365 /* Return the opcode of the special instruction to be used to load
8369 standard_sse_constant_opcode (rtx insn
, rtx x
)
8371 switch (standard_sse_constant_p (x
))
8374 switch (get_attr_mode (insn
))
8377 if (!TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
)
8378 return "%vpxor\t%0, %d0";
8380 if (!TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
)
8381 return "%vxorpd\t%0, %d0";
8383 return "%vxorps\t%0, %d0";
8386 if (!TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
)
8387 return "vpxor\t%x0, %x0, %x0";
8389 if (!TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
)
8390 return "vxorpd\t%x0, %x0, %x0";
8392 return "vxorps\t%x0, %x0, %x0";
8400 return "vpcmpeqd\t%0, %0, %0";
8402 return "pcmpeqd\t%0, %0";
8410 /* Returns true if OP contains a symbol reference */
8413 symbolic_reference_mentioned_p (rtx op
)
8418 if (GET_CODE (op
) == SYMBOL_REF
|| GET_CODE (op
) == LABEL_REF
)
8421 fmt
= GET_RTX_FORMAT (GET_CODE (op
));
8422 for (i
= GET_RTX_LENGTH (GET_CODE (op
)) - 1; i
>= 0; i
--)
8428 for (j
= XVECLEN (op
, i
) - 1; j
>= 0; j
--)
8429 if (symbolic_reference_mentioned_p (XVECEXP (op
, i
, j
)))
8433 else if (fmt
[i
] == 'e' && symbolic_reference_mentioned_p (XEXP (op
, i
)))
8440 /* Return true if it is appropriate to emit `ret' instructions in the
8441 body of a function. Do this only if the epilogue is simple, needing a
8442 couple of insns. Prior to reloading, we can't tell how many registers
8443 must be saved, so return false then. Return false if there is no frame
8444 marker to de-allocate. */
8447 ix86_can_use_return_insn_p (void)
8449 struct ix86_frame frame
;
8451 if (! reload_completed
|| frame_pointer_needed
)
8454 /* Don't allow more than 32k pop, since that's all we can do
8455 with one instruction. */
8456 if (crtl
->args
.pops_args
&& crtl
->args
.size
>= 32768)
8459 ix86_compute_frame_layout (&frame
);
8460 return (frame
.stack_pointer_offset
== UNITS_PER_WORD
8461 && (frame
.nregs
+ frame
.nsseregs
) == 0);
8464 /* Value should be nonzero if functions must have frame pointers.
8465 Zero means the frame pointer need not be set up (and parms may
8466 be accessed via the stack pointer) in functions that seem suitable. */
8469 ix86_frame_pointer_required (void)
8471 /* If we accessed previous frames, then the generated code expects
8472 to be able to access the saved ebp value in our frame. */
8473 if (cfun
->machine
->accesses_prev_frame
)
8476 /* Several x86 os'es need a frame pointer for other reasons,
8477 usually pertaining to setjmp. */
8478 if (SUBTARGET_FRAME_POINTER_REQUIRED
)
8481 /* For older 32-bit runtimes setjmp requires valid frame-pointer. */
8482 if (TARGET_32BIT_MS_ABI
&& cfun
->calls_setjmp
)
8485 /* In ix86_option_override_internal, TARGET_OMIT_LEAF_FRAME_POINTER
8486 turns off the frame pointer by default. Turn it back on now if
8487 we've not got a leaf function. */
8488 if (TARGET_OMIT_LEAF_FRAME_POINTER
8489 && (!current_function_is_leaf
8490 || ix86_current_function_calls_tls_descriptor
))
8493 if (crtl
->profile
&& !flag_fentry
)
8499 /* Record that the current function accesses previous call frames. */
8502 ix86_setup_frame_addresses (void)
8504 cfun
->machine
->accesses_prev_frame
= 1;
8507 #ifndef USE_HIDDEN_LINKONCE
8508 # if defined(HAVE_GAS_HIDDEN) && (SUPPORTS_ONE_ONLY - 0)
8509 # define USE_HIDDEN_LINKONCE 1
8511 # define USE_HIDDEN_LINKONCE 0
8515 static int pic_labels_used
;
8517 /* Fills in the label name that should be used for a pc thunk for
8518 the given register. */
8521 get_pc_thunk_name (char name
[32], unsigned int regno
)
8523 gcc_assert (!TARGET_64BIT
);
8525 if (USE_HIDDEN_LINKONCE
)
8526 sprintf (name
, "__x86.get_pc_thunk.%s", reg_names
[regno
]);
8528 ASM_GENERATE_INTERNAL_LABEL (name
, "LPR", regno
);
8532 /* This function generates code for -fpic that loads %ebx with
8533 the return address of the caller and then returns. */
8536 ix86_code_end (void)
8541 for (regno
= AX_REG
; regno
<= SP_REG
; regno
++)
8546 if (!(pic_labels_used
& (1 << regno
)))
8549 get_pc_thunk_name (name
, regno
);
8551 decl
= build_decl (BUILTINS_LOCATION
, FUNCTION_DECL
,
8552 get_identifier (name
),
8553 build_function_type_list (void_type_node
, NULL_TREE
));
8554 DECL_RESULT (decl
) = build_decl (BUILTINS_LOCATION
, RESULT_DECL
,
8555 NULL_TREE
, void_type_node
);
8556 TREE_PUBLIC (decl
) = 1;
8557 TREE_STATIC (decl
) = 1;
8562 switch_to_section (darwin_sections
[text_coal_section
]);
8563 fputs ("\t.weak_definition\t", asm_out_file
);
8564 assemble_name (asm_out_file
, name
);
8565 fputs ("\n\t.private_extern\t", asm_out_file
);
8566 assemble_name (asm_out_file
, name
);
8567 putc ('\n', asm_out_file
);
8568 ASM_OUTPUT_LABEL (asm_out_file
, name
);
8569 DECL_WEAK (decl
) = 1;
8573 if (USE_HIDDEN_LINKONCE
)
8575 DECL_COMDAT_GROUP (decl
) = DECL_ASSEMBLER_NAME (decl
);
8577 targetm
.asm_out
.unique_section (decl
, 0);
8578 switch_to_section (get_named_section (decl
, NULL
, 0));
8580 targetm
.asm_out
.globalize_label (asm_out_file
, name
);
8581 fputs ("\t.hidden\t", asm_out_file
);
8582 assemble_name (asm_out_file
, name
);
8583 putc ('\n', asm_out_file
);
8584 ASM_DECLARE_FUNCTION_NAME (asm_out_file
, name
, decl
);
8588 switch_to_section (text_section
);
8589 ASM_OUTPUT_LABEL (asm_out_file
, name
);
8592 DECL_INITIAL (decl
) = make_node (BLOCK
);
8593 current_function_decl
= decl
;
8594 init_function_start (decl
);
8595 first_function_block_is_cold
= false;
8596 /* Make sure unwind info is emitted for the thunk if needed. */
8597 final_start_function (emit_barrier (), asm_out_file
, 1);
8599 /* Pad stack IP move with 4 instructions (two NOPs count
8600 as one instruction). */
8601 if (TARGET_PAD_SHORT_FUNCTION
)
8606 fputs ("\tnop\n", asm_out_file
);
8609 xops
[0] = gen_rtx_REG (Pmode
, regno
);
8610 xops
[1] = gen_rtx_MEM (Pmode
, stack_pointer_rtx
);
8611 output_asm_insn ("mov%z0\t{%1, %0|%0, %1}", xops
);
8612 fputs ("\tret\n", asm_out_file
);
8613 final_end_function ();
8614 init_insn_lengths ();
8615 free_after_compilation (cfun
);
8617 current_function_decl
= NULL
;
8620 if (flag_split_stack
)
8621 file_end_indicate_split_stack ();
8624 /* Emit code for the SET_GOT patterns. */
8627 output_set_got (rtx dest
, rtx label ATTRIBUTE_UNUSED
)
8633 if (TARGET_VXWORKS_RTP
&& flag_pic
)
8635 /* Load (*VXWORKS_GOTT_BASE) into the PIC register. */
8636 xops
[2] = gen_rtx_MEM (Pmode
,
8637 gen_rtx_SYMBOL_REF (Pmode
, VXWORKS_GOTT_BASE
));
8638 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops
);
8640 /* Load (*VXWORKS_GOTT_BASE)[VXWORKS_GOTT_INDEX] into the PIC register.
8641 Use %P and a local symbol in order to print VXWORKS_GOTT_INDEX as
8642 an unadorned address. */
8643 xops
[2] = gen_rtx_SYMBOL_REF (Pmode
, VXWORKS_GOTT_INDEX
);
8644 SYMBOL_REF_FLAGS (xops
[2]) |= SYMBOL_FLAG_LOCAL
;
8645 output_asm_insn ("mov{l}\t{%P2(%0), %0|%0, DWORD PTR %P2[%0]}", xops
);
8649 xops
[1] = gen_rtx_SYMBOL_REF (Pmode
, GOT_SYMBOL_NAME
);
8653 xops
[2] = gen_rtx_LABEL_REF (Pmode
, label
? label
: gen_label_rtx ());
8655 output_asm_insn ("mov%z0\t{%2, %0|%0, %2}", xops
);
8658 /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This
8659 is what will be referenced by the Mach-O PIC subsystem. */
8661 ASM_OUTPUT_LABEL (asm_out_file
, MACHOPIC_FUNCTION_BASE_NAME
);
8664 targetm
.asm_out
.internal_label (asm_out_file
, "L",
8665 CODE_LABEL_NUMBER (XEXP (xops
[2], 0)));
8670 get_pc_thunk_name (name
, REGNO (dest
));
8671 pic_labels_used
|= 1 << REGNO (dest
);
8673 xops
[2] = gen_rtx_SYMBOL_REF (Pmode
, ggc_strdup (name
));
8674 xops
[2] = gen_rtx_MEM (QImode
, xops
[2]);
8675 output_asm_insn ("call\t%X2", xops
);
8676 /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This
8677 is what will be referenced by the Mach-O PIC subsystem. */
8680 ASM_OUTPUT_LABEL (asm_out_file
, MACHOPIC_FUNCTION_BASE_NAME
);
8682 targetm
.asm_out
.internal_label (asm_out_file
, "L",
8683 CODE_LABEL_NUMBER (label
));
8688 output_asm_insn ("add%z0\t{%1, %0|%0, %1}", xops
);
8693 /* Generate an "push" pattern for input ARG. */
8698 struct machine_function
*m
= cfun
->machine
;
8700 if (m
->fs
.cfa_reg
== stack_pointer_rtx
)
8701 m
->fs
.cfa_offset
+= UNITS_PER_WORD
;
8702 m
->fs
.sp_offset
+= UNITS_PER_WORD
;
8704 if (REG_P (arg
) && GET_MODE (arg
) != word_mode
)
8705 arg
= gen_rtx_REG (word_mode
, REGNO (arg
));
8707 return gen_rtx_SET (VOIDmode
,
8708 gen_rtx_MEM (word_mode
,
8709 gen_rtx_PRE_DEC (Pmode
,
8710 stack_pointer_rtx
)),
8714 /* Generate an "pop" pattern for input ARG. */
8719 if (REG_P (arg
) && GET_MODE (arg
) != word_mode
)
8720 arg
= gen_rtx_REG (word_mode
, REGNO (arg
));
8722 return gen_rtx_SET (VOIDmode
,
8724 gen_rtx_MEM (word_mode
,
8725 gen_rtx_POST_INC (Pmode
,
8726 stack_pointer_rtx
)));
8729 /* Return >= 0 if there is an unused call-clobbered register available
8730 for the entire function. */
8733 ix86_select_alt_pic_regnum (void)
8735 if (current_function_is_leaf
8737 && !ix86_current_function_calls_tls_descriptor
)
8740 /* Can't use the same register for both PIC and DRAP. */
8742 drap
= REGNO (crtl
->drap_reg
);
8745 for (i
= 2; i
>= 0; --i
)
8746 if (i
!= drap
&& !df_regs_ever_live_p (i
))
8750 return INVALID_REGNUM
;
8753 /* Return TRUE if we need to save REGNO. */
8756 ix86_save_reg (unsigned int regno
, bool maybe_eh_return
)
8758 if (pic_offset_table_rtx
8759 && regno
== REAL_PIC_OFFSET_TABLE_REGNUM
8760 && (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM
)
8762 || crtl
->calls_eh_return
8763 || crtl
->uses_const_pool
))
8764 return ix86_select_alt_pic_regnum () == INVALID_REGNUM
;
8766 if (crtl
->calls_eh_return
&& maybe_eh_return
)
8771 unsigned test
= EH_RETURN_DATA_REGNO (i
);
8772 if (test
== INVALID_REGNUM
)
8779 if (crtl
->drap_reg
&& regno
== REGNO (crtl
->drap_reg
))
8782 return (df_regs_ever_live_p (regno
)
8783 && !call_used_regs
[regno
]
8784 && !fixed_regs
[regno
]
8785 && (regno
!= HARD_FRAME_POINTER_REGNUM
|| !frame_pointer_needed
));
8788 /* Return number of saved general prupose registers. */
8791 ix86_nsaved_regs (void)
8796 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; regno
++)
8797 if (!SSE_REGNO_P (regno
) && ix86_save_reg (regno
, true))
8802 /* Return number of saved SSE registrers. */
8805 ix86_nsaved_sseregs (void)
8810 if (!TARGET_64BIT_MS_ABI
)
8812 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; regno
++)
8813 if (SSE_REGNO_P (regno
) && ix86_save_reg (regno
, true))
8818 /* Given FROM and TO register numbers, say whether this elimination is
8819 allowed. If stack alignment is needed, we can only replace argument
8820 pointer with hard frame pointer, or replace frame pointer with stack
8821 pointer. Otherwise, frame pointer elimination is automatically
8822 handled and all other eliminations are valid. */
8825 ix86_can_eliminate (const int from
, const int to
)
8827 if (stack_realign_fp
)
8828 return ((from
== ARG_POINTER_REGNUM
8829 && to
== HARD_FRAME_POINTER_REGNUM
)
8830 || (from
== FRAME_POINTER_REGNUM
8831 && to
== STACK_POINTER_REGNUM
));
8833 return to
== STACK_POINTER_REGNUM
? !frame_pointer_needed
: true;
8836 /* Return the offset between two registers, one to be eliminated, and the other
8837 its replacement, at the start of a routine. */
8840 ix86_initial_elimination_offset (int from
, int to
)
8842 struct ix86_frame frame
;
8843 ix86_compute_frame_layout (&frame
);
8845 if (from
== ARG_POINTER_REGNUM
&& to
== HARD_FRAME_POINTER_REGNUM
)
8846 return frame
.hard_frame_pointer_offset
;
8847 else if (from
== FRAME_POINTER_REGNUM
8848 && to
== HARD_FRAME_POINTER_REGNUM
)
8849 return frame
.hard_frame_pointer_offset
- frame
.frame_pointer_offset
;
8852 gcc_assert (to
== STACK_POINTER_REGNUM
);
8854 if (from
== ARG_POINTER_REGNUM
)
8855 return frame
.stack_pointer_offset
;
8857 gcc_assert (from
== FRAME_POINTER_REGNUM
);
8858 return frame
.stack_pointer_offset
- frame
.frame_pointer_offset
;
8862 /* In a dynamically-aligned function, we can't know the offset from
8863 stack pointer to frame pointer, so we must ensure that setjmp
8864 eliminates fp against the hard fp (%ebp) rather than trying to
8865 index from %esp up to the top of the frame across a gap that is
8866 of unknown (at compile-time) size. */
8868 ix86_builtin_setjmp_frame_value (void)
8870 return stack_realign_fp
? hard_frame_pointer_rtx
: virtual_stack_vars_rtx
;
8873 /* When using -fsplit-stack, the allocation routines set a field in
8874 the TCB to the bottom of the stack plus this much space, measured
8877 #define SPLIT_STACK_AVAILABLE 256
8879 /* Fill structure ix86_frame about frame of currently computed function. */
8882 ix86_compute_frame_layout (struct ix86_frame
*frame
)
8884 unsigned int stack_alignment_needed
;
8885 HOST_WIDE_INT offset
;
8886 unsigned int preferred_alignment
;
8887 HOST_WIDE_INT size
= get_frame_size ();
8888 HOST_WIDE_INT to_allocate
;
8890 frame
->nregs
= ix86_nsaved_regs ();
8891 frame
->nsseregs
= ix86_nsaved_sseregs ();
8893 stack_alignment_needed
= crtl
->stack_alignment_needed
/ BITS_PER_UNIT
;
8894 preferred_alignment
= crtl
->preferred_stack_boundary
/ BITS_PER_UNIT
;
8896 /* 64-bit MS ABI seem to require stack alignment to be always 16 except for
8897 function prologues and leaf. */
8898 if ((TARGET_64BIT_MS_ABI
&& preferred_alignment
< 16)
8899 && (!current_function_is_leaf
|| cfun
->calls_alloca
!= 0
8900 || ix86_current_function_calls_tls_descriptor
))
8902 preferred_alignment
= 16;
8903 stack_alignment_needed
= 16;
8904 crtl
->preferred_stack_boundary
= 128;
8905 crtl
->stack_alignment_needed
= 128;
8908 gcc_assert (!size
|| stack_alignment_needed
);
8909 gcc_assert (preferred_alignment
>= STACK_BOUNDARY
/ BITS_PER_UNIT
);
8910 gcc_assert (preferred_alignment
<= stack_alignment_needed
);
8912 /* For SEH we have to limit the amount of code movement into the prologue.
8913 At present we do this via a BLOCKAGE, at which point there's very little
8914 scheduling that can be done, which means that there's very little point
8915 in doing anything except PUSHs. */
8917 cfun
->machine
->use_fast_prologue_epilogue
= false;
8919 /* During reload iteration the amount of registers saved can change.
8920 Recompute the value as needed. Do not recompute when amount of registers
8921 didn't change as reload does multiple calls to the function and does not
8922 expect the decision to change within single iteration. */
8923 else if (!optimize_function_for_size_p (cfun
)
8924 && cfun
->machine
->use_fast_prologue_epilogue_nregs
!= frame
->nregs
)
8926 int count
= frame
->nregs
;
8927 struct cgraph_node
*node
= cgraph_get_node (current_function_decl
);
8929 cfun
->machine
->use_fast_prologue_epilogue_nregs
= count
;
8931 /* The fast prologue uses move instead of push to save registers. This
8932 is significantly longer, but also executes faster as modern hardware
8933 can execute the moves in parallel, but can't do that for push/pop.
8935 Be careful about choosing what prologue to emit: When function takes
8936 many instructions to execute we may use slow version as well as in
8937 case function is known to be outside hot spot (this is known with
8938 feedback only). Weight the size of function by number of registers
8939 to save as it is cheap to use one or two push instructions but very
8940 slow to use many of them. */
8942 count
= (count
- 1) * FAST_PROLOGUE_INSN_COUNT
;
8943 if (node
->frequency
< NODE_FREQUENCY_NORMAL
8944 || (flag_branch_probabilities
8945 && node
->frequency
< NODE_FREQUENCY_HOT
))
8946 cfun
->machine
->use_fast_prologue_epilogue
= false;
8948 cfun
->machine
->use_fast_prologue_epilogue
8949 = !expensive_function_p (count
);
8952 frame
->save_regs_using_mov
8953 = (TARGET_PROLOGUE_USING_MOVE
&& cfun
->machine
->use_fast_prologue_epilogue
8954 /* If static stack checking is enabled and done with probes,
8955 the registers need to be saved before allocating the frame. */
8956 && flag_stack_check
!= STATIC_BUILTIN_STACK_CHECK
);
8958 /* Skip return address. */
8959 offset
= UNITS_PER_WORD
;
8961 /* Skip pushed static chain. */
8962 if (ix86_static_chain_on_stack
)
8963 offset
+= UNITS_PER_WORD
;
8965 /* Skip saved base pointer. */
8966 if (frame_pointer_needed
)
8967 offset
+= UNITS_PER_WORD
;
8968 frame
->hfp_save_offset
= offset
;
8970 /* The traditional frame pointer location is at the top of the frame. */
8971 frame
->hard_frame_pointer_offset
= offset
;
8973 /* Register save area */
8974 offset
+= frame
->nregs
* UNITS_PER_WORD
;
8975 frame
->reg_save_offset
= offset
;
8977 /* Align and set SSE register save area. */
8978 if (frame
->nsseregs
)
8980 /* The only ABI that has saved SSE registers (Win64) also has a
8981 16-byte aligned default stack, and thus we don't need to be
8982 within the re-aligned local stack frame to save them. */
8983 gcc_assert (INCOMING_STACK_BOUNDARY
>= 128);
8984 offset
= (offset
+ 16 - 1) & -16;
8985 offset
+= frame
->nsseregs
* 16;
8987 frame
->sse_reg_save_offset
= offset
;
8989 /* The re-aligned stack starts here. Values before this point are not
8990 directly comparable with values below this point. In order to make
8991 sure that no value happens to be the same before and after, force
8992 the alignment computation below to add a non-zero value. */
8993 if (stack_realign_fp
)
8994 offset
= (offset
+ stack_alignment_needed
) & -stack_alignment_needed
;
8997 frame
->va_arg_size
= ix86_varargs_gpr_size
+ ix86_varargs_fpr_size
;
8998 offset
+= frame
->va_arg_size
;
9000 /* Align start of frame for local function. */
9001 if (stack_realign_fp
9002 || offset
!= frame
->sse_reg_save_offset
9004 || !current_function_is_leaf
9005 || cfun
->calls_alloca
9006 || ix86_current_function_calls_tls_descriptor
)
9007 offset
= (offset
+ stack_alignment_needed
- 1) & -stack_alignment_needed
;
9009 /* Frame pointer points here. */
9010 frame
->frame_pointer_offset
= offset
;
9014 /* Add outgoing arguments area. Can be skipped if we eliminated
9015 all the function calls as dead code.
9016 Skipping is however impossible when function calls alloca. Alloca
9017 expander assumes that last crtl->outgoing_args_size
9018 of stack frame are unused. */
9019 if (ACCUMULATE_OUTGOING_ARGS
9020 && (!current_function_is_leaf
|| cfun
->calls_alloca
9021 || ix86_current_function_calls_tls_descriptor
))
9023 offset
+= crtl
->outgoing_args_size
;
9024 frame
->outgoing_arguments_size
= crtl
->outgoing_args_size
;
9027 frame
->outgoing_arguments_size
= 0;
9029 /* Align stack boundary. Only needed if we're calling another function
9031 if (!current_function_is_leaf
|| cfun
->calls_alloca
9032 || ix86_current_function_calls_tls_descriptor
)
9033 offset
= (offset
+ preferred_alignment
- 1) & -preferred_alignment
;
9035 /* We've reached end of stack frame. */
9036 frame
->stack_pointer_offset
= offset
;
9038 /* Size prologue needs to allocate. */
9039 to_allocate
= offset
- frame
->sse_reg_save_offset
;
9041 if ((!to_allocate
&& frame
->nregs
<= 1)
9042 || (TARGET_64BIT
&& to_allocate
>= (HOST_WIDE_INT
) 0x80000000))
9043 frame
->save_regs_using_mov
= false;
9045 if (ix86_using_red_zone ()
9046 && current_function_sp_is_unchanging
9047 && current_function_is_leaf
9048 && !ix86_current_function_calls_tls_descriptor
)
9050 frame
->red_zone_size
= to_allocate
;
9051 if (frame
->save_regs_using_mov
)
9052 frame
->red_zone_size
+= frame
->nregs
* UNITS_PER_WORD
;
9053 if (frame
->red_zone_size
> RED_ZONE_SIZE
- RED_ZONE_RESERVE
)
9054 frame
->red_zone_size
= RED_ZONE_SIZE
- RED_ZONE_RESERVE
;
9057 frame
->red_zone_size
= 0;
9058 frame
->stack_pointer_offset
-= frame
->red_zone_size
;
9060 /* The SEH frame pointer location is near the bottom of the frame.
9061 This is enforced by the fact that the difference between the
9062 stack pointer and the frame pointer is limited to 240 bytes in
9063 the unwind data structure. */
9068 /* If we can leave the frame pointer where it is, do so. */
9069 diff
= frame
->stack_pointer_offset
- frame
->hard_frame_pointer_offset
;
9070 if (diff
> 240 || (diff
& 15) != 0)
9072 /* Ideally we'd determine what portion of the local stack frame
9073 (within the constraint of the lowest 240) is most heavily used.
9074 But without that complication, simply bias the frame pointer
9075 by 128 bytes so as to maximize the amount of the local stack
9076 frame that is addressable with 8-bit offsets. */
9077 frame
->hard_frame_pointer_offset
= frame
->stack_pointer_offset
- 128;
9082 /* This is semi-inlined memory_address_length, but simplified
9083 since we know that we're always dealing with reg+offset, and
9084 to avoid having to create and discard all that rtl. */
9087 choose_baseaddr_len (unsigned int regno
, HOST_WIDE_INT offset
)
9093 /* EBP and R13 cannot be encoded without an offset. */
9094 len
= (regno
== BP_REG
|| regno
== R13_REG
);
9096 else if (IN_RANGE (offset
, -128, 127))
9099 /* ESP and R12 must be encoded with a SIB byte. */
9100 if (regno
== SP_REG
|| regno
== R12_REG
)
9106 /* Return an RTX that points to CFA_OFFSET within the stack frame.
9107 The valid base registers are taken from CFUN->MACHINE->FS. */
9110 choose_baseaddr (HOST_WIDE_INT cfa_offset
)
9112 const struct machine_function
*m
= cfun
->machine
;
9113 rtx base_reg
= NULL
;
9114 HOST_WIDE_INT base_offset
= 0;
9116 if (m
->use_fast_prologue_epilogue
)
9118 /* Choose the base register most likely to allow the most scheduling
9119 opportunities. Generally FP is valid througout the function,
9120 while DRAP must be reloaded within the epilogue. But choose either
9121 over the SP due to increased encoding size. */
9125 base_reg
= hard_frame_pointer_rtx
;
9126 base_offset
= m
->fs
.fp_offset
- cfa_offset
;
9128 else if (m
->fs
.drap_valid
)
9130 base_reg
= crtl
->drap_reg
;
9131 base_offset
= 0 - cfa_offset
;
9133 else if (m
->fs
.sp_valid
)
9135 base_reg
= stack_pointer_rtx
;
9136 base_offset
= m
->fs
.sp_offset
- cfa_offset
;
9141 HOST_WIDE_INT toffset
;
9144 /* Choose the base register with the smallest address encoding.
9145 With a tie, choose FP > DRAP > SP. */
9148 base_reg
= stack_pointer_rtx
;
9149 base_offset
= m
->fs
.sp_offset
- cfa_offset
;
9150 len
= choose_baseaddr_len (STACK_POINTER_REGNUM
, base_offset
);
9152 if (m
->fs
.drap_valid
)
9154 toffset
= 0 - cfa_offset
;
9155 tlen
= choose_baseaddr_len (REGNO (crtl
->drap_reg
), toffset
);
9158 base_reg
= crtl
->drap_reg
;
9159 base_offset
= toffset
;
9165 toffset
= m
->fs
.fp_offset
- cfa_offset
;
9166 tlen
= choose_baseaddr_len (HARD_FRAME_POINTER_REGNUM
, toffset
);
9169 base_reg
= hard_frame_pointer_rtx
;
9170 base_offset
= toffset
;
9175 gcc_assert (base_reg
!= NULL
);
9177 return plus_constant (base_reg
, base_offset
);
9180 /* Emit code to save registers in the prologue. */
9183 ix86_emit_save_regs (void)
9188 for (regno
= FIRST_PSEUDO_REGISTER
- 1; regno
-- > 0; )
9189 if (!SSE_REGNO_P (regno
) && ix86_save_reg (regno
, true))
9191 insn
= emit_insn (gen_push (gen_rtx_REG (word_mode
, regno
)));
9192 RTX_FRAME_RELATED_P (insn
) = 1;
9196 /* Emit a single register save at CFA - CFA_OFFSET. */
9199 ix86_emit_save_reg_using_mov (enum machine_mode mode
, unsigned int regno
,
9200 HOST_WIDE_INT cfa_offset
)
9202 struct machine_function
*m
= cfun
->machine
;
9203 rtx reg
= gen_rtx_REG (mode
, regno
);
9204 rtx mem
, addr
, base
, insn
;
9206 addr
= choose_baseaddr (cfa_offset
);
9207 mem
= gen_frame_mem (mode
, addr
);
9209 /* For SSE saves, we need to indicate the 128-bit alignment. */
9210 set_mem_align (mem
, GET_MODE_ALIGNMENT (mode
));
9212 insn
= emit_move_insn (mem
, reg
);
9213 RTX_FRAME_RELATED_P (insn
) = 1;
9216 if (GET_CODE (base
) == PLUS
)
9217 base
= XEXP (base
, 0);
9218 gcc_checking_assert (REG_P (base
));
9220 /* When saving registers into a re-aligned local stack frame, avoid
9221 any tricky guessing by dwarf2out. */
9222 if (m
->fs
.realigned
)
9224 gcc_checking_assert (stack_realign_drap
);
9226 if (regno
== REGNO (crtl
->drap_reg
))
9228 /* A bit of a hack. We force the DRAP register to be saved in
9229 the re-aligned stack frame, which provides us with a copy
9230 of the CFA that will last past the prologue. Install it. */
9231 gcc_checking_assert (cfun
->machine
->fs
.fp_valid
);
9232 addr
= plus_constant (hard_frame_pointer_rtx
,
9233 cfun
->machine
->fs
.fp_offset
- cfa_offset
);
9234 mem
= gen_rtx_MEM (mode
, addr
);
9235 add_reg_note (insn
, REG_CFA_DEF_CFA
, mem
);
9239 /* The frame pointer is a stable reference within the
9240 aligned frame. Use it. */
9241 gcc_checking_assert (cfun
->machine
->fs
.fp_valid
);
9242 addr
= plus_constant (hard_frame_pointer_rtx
,
9243 cfun
->machine
->fs
.fp_offset
- cfa_offset
);
9244 mem
= gen_rtx_MEM (mode
, addr
);
9245 add_reg_note (insn
, REG_CFA_EXPRESSION
,
9246 gen_rtx_SET (VOIDmode
, mem
, reg
));
9250 /* The memory may not be relative to the current CFA register,
9251 which means that we may need to generate a new pattern for
9252 use by the unwind info. */
9253 else if (base
!= m
->fs
.cfa_reg
)
9255 addr
= plus_constant (m
->fs
.cfa_reg
, m
->fs
.cfa_offset
- cfa_offset
);
9256 mem
= gen_rtx_MEM (mode
, addr
);
9257 add_reg_note (insn
, REG_CFA_OFFSET
, gen_rtx_SET (VOIDmode
, mem
, reg
));
9261 /* Emit code to save registers using MOV insns.
9262 First register is stored at CFA - CFA_OFFSET. */
9264 ix86_emit_save_regs_using_mov (HOST_WIDE_INT cfa_offset
)
9268 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; regno
++)
9269 if (!SSE_REGNO_P (regno
) && ix86_save_reg (regno
, true))
9271 ix86_emit_save_reg_using_mov (word_mode
, regno
, cfa_offset
);
9272 cfa_offset
-= UNITS_PER_WORD
;
9276 /* Emit code to save SSE registers using MOV insns.
9277 First register is stored at CFA - CFA_OFFSET. */
9279 ix86_emit_save_sse_regs_using_mov (HOST_WIDE_INT cfa_offset
)
9283 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; regno
++)
9284 if (SSE_REGNO_P (regno
) && ix86_save_reg (regno
, true))
9286 ix86_emit_save_reg_using_mov (V4SFmode
, regno
, cfa_offset
);
9291 static GTY(()) rtx queued_cfa_restores
;
9293 /* Add a REG_CFA_RESTORE REG note to INSN or queue them until next stack
9294 manipulation insn. The value is on the stack at CFA - CFA_OFFSET.
9295 Don't add the note if the previously saved value will be left untouched
9296 within stack red-zone till return, as unwinders can find the same value
9297 in the register and on the stack. */
9300 ix86_add_cfa_restore_note (rtx insn
, rtx reg
, HOST_WIDE_INT cfa_offset
)
9302 if (!crtl
->shrink_wrapped
9303 && cfa_offset
<= cfun
->machine
->fs
.red_zone_offset
)
9308 add_reg_note (insn
, REG_CFA_RESTORE
, reg
);
9309 RTX_FRAME_RELATED_P (insn
) = 1;
9313 = alloc_reg_note (REG_CFA_RESTORE
, reg
, queued_cfa_restores
);
9316 /* Add queued REG_CFA_RESTORE notes if any to INSN. */
9319 ix86_add_queued_cfa_restore_notes (rtx insn
)
9322 if (!queued_cfa_restores
)
9324 for (last
= queued_cfa_restores
; XEXP (last
, 1); last
= XEXP (last
, 1))
9326 XEXP (last
, 1) = REG_NOTES (insn
);
9327 REG_NOTES (insn
) = queued_cfa_restores
;
9328 queued_cfa_restores
= NULL_RTX
;
9329 RTX_FRAME_RELATED_P (insn
) = 1;
9332 /* Expand prologue or epilogue stack adjustment.
9333 The pattern exist to put a dependency on all ebp-based memory accesses.
9334 STYLE should be negative if instructions should be marked as frame related,
9335 zero if %r11 register is live and cannot be freely used and positive
9339 pro_epilogue_adjust_stack (rtx dest
, rtx src
, rtx offset
,
9340 int style
, bool set_cfa
)
9342 struct machine_function
*m
= cfun
->machine
;
9344 bool add_frame_related_expr
= false;
9346 if (Pmode
== SImode
)
9347 insn
= gen_pro_epilogue_adjust_stack_si_add (dest
, src
, offset
);
9348 else if (x86_64_immediate_operand (offset
, DImode
))
9349 insn
= gen_pro_epilogue_adjust_stack_di_add (dest
, src
, offset
);
9353 /* r11 is used by indirect sibcall return as well, set before the
9354 epilogue and used after the epilogue. */
9356 tmp
= gen_rtx_REG (DImode
, R11_REG
);
9359 gcc_assert (src
!= hard_frame_pointer_rtx
9360 && dest
!= hard_frame_pointer_rtx
);
9361 tmp
= hard_frame_pointer_rtx
;
9363 insn
= emit_insn (gen_rtx_SET (DImode
, tmp
, offset
));
9365 add_frame_related_expr
= true;
9367 insn
= gen_pro_epilogue_adjust_stack_di_add (dest
, src
, tmp
);
9370 insn
= emit_insn (insn
);
9372 ix86_add_queued_cfa_restore_notes (insn
);
9378 gcc_assert (m
->fs
.cfa_reg
== src
);
9379 m
->fs
.cfa_offset
+= INTVAL (offset
);
9380 m
->fs
.cfa_reg
= dest
;
9382 r
= gen_rtx_PLUS (Pmode
, src
, offset
);
9383 r
= gen_rtx_SET (VOIDmode
, dest
, r
);
9384 add_reg_note (insn
, REG_CFA_ADJUST_CFA
, r
);
9385 RTX_FRAME_RELATED_P (insn
) = 1;
9389 RTX_FRAME_RELATED_P (insn
) = 1;
9390 if (add_frame_related_expr
)
9392 rtx r
= gen_rtx_PLUS (Pmode
, src
, offset
);
9393 r
= gen_rtx_SET (VOIDmode
, dest
, r
);
9394 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, r
);
9398 if (dest
== stack_pointer_rtx
)
9400 HOST_WIDE_INT ooffset
= m
->fs
.sp_offset
;
9401 bool valid
= m
->fs
.sp_valid
;
9403 if (src
== hard_frame_pointer_rtx
)
9405 valid
= m
->fs
.fp_valid
;
9406 ooffset
= m
->fs
.fp_offset
;
9408 else if (src
== crtl
->drap_reg
)
9410 valid
= m
->fs
.drap_valid
;
9415 /* Else there are two possibilities: SP itself, which we set
9416 up as the default above. Or EH_RETURN_STACKADJ_RTX, which is
9417 taken care of this by hand along the eh_return path. */
9418 gcc_checking_assert (src
== stack_pointer_rtx
9419 || offset
== const0_rtx
);
9422 m
->fs
.sp_offset
= ooffset
- INTVAL (offset
);
9423 m
->fs
.sp_valid
= valid
;
9427 /* Find an available register to be used as dynamic realign argument
9428 pointer regsiter. Such a register will be written in prologue and
9429 used in begin of body, so it must not be
9430 1. parameter passing register.
9432 We reuse static-chain register if it is available. Otherwise, we
9433 use DI for i386 and R13 for x86-64. We chose R13 since it has
9436 Return: the regno of chosen register. */
9439 find_drap_reg (void)
9441 tree decl
= cfun
->decl
;
9445 /* Use R13 for nested function or function need static chain.
9446 Since function with tail call may use any caller-saved
9447 registers in epilogue, DRAP must not use caller-saved
9448 register in such case. */
9449 if (DECL_STATIC_CHAIN (decl
) || crtl
->tail_call_emit
)
9456 /* Use DI for nested function or function need static chain.
9457 Since function with tail call may use any caller-saved
9458 registers in epilogue, DRAP must not use caller-saved
9459 register in such case. */
9460 if (DECL_STATIC_CHAIN (decl
) || crtl
->tail_call_emit
)
9463 /* Reuse static chain register if it isn't used for parameter
9465 if (ix86_function_regparm (TREE_TYPE (decl
), decl
) <= 2)
9467 unsigned int ccvt
= ix86_get_callcvt (TREE_TYPE (decl
));
9468 if ((ccvt
& (IX86_CALLCVT_FASTCALL
| IX86_CALLCVT_THISCALL
)) == 0)
9475 /* Return minimum incoming stack alignment. */
9478 ix86_minimum_incoming_stack_boundary (bool sibcall
)
9480 unsigned int incoming_stack_boundary
;
9482 /* Prefer the one specified at command line. */
9483 if (ix86_user_incoming_stack_boundary
)
9484 incoming_stack_boundary
= ix86_user_incoming_stack_boundary
;
9485 /* In 32bit, use MIN_STACK_BOUNDARY for incoming stack boundary
9486 if -mstackrealign is used, it isn't used for sibcall check and
9487 estimated stack alignment is 128bit. */
9490 && ix86_force_align_arg_pointer
9491 && crtl
->stack_alignment_estimated
== 128)
9492 incoming_stack_boundary
= MIN_STACK_BOUNDARY
;
9494 incoming_stack_boundary
= ix86_default_incoming_stack_boundary
;
9496 /* Incoming stack alignment can be changed on individual functions
9497 via force_align_arg_pointer attribute. We use the smallest
9498 incoming stack boundary. */
9499 if (incoming_stack_boundary
> MIN_STACK_BOUNDARY
9500 && lookup_attribute (ix86_force_align_arg_pointer_string
,
9501 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl
))))
9502 incoming_stack_boundary
= MIN_STACK_BOUNDARY
;
9504 /* The incoming stack frame has to be aligned at least at
9505 parm_stack_boundary. */
9506 if (incoming_stack_boundary
< crtl
->parm_stack_boundary
)
9507 incoming_stack_boundary
= crtl
->parm_stack_boundary
;
9509 /* Stack at entrance of main is aligned by runtime. We use the
9510 smallest incoming stack boundary. */
9511 if (incoming_stack_boundary
> MAIN_STACK_BOUNDARY
9512 && DECL_NAME (current_function_decl
)
9513 && MAIN_NAME_P (DECL_NAME (current_function_decl
))
9514 && DECL_FILE_SCOPE_P (current_function_decl
))
9515 incoming_stack_boundary
= MAIN_STACK_BOUNDARY
;
9517 return incoming_stack_boundary
;
9520 /* Update incoming stack boundary and estimated stack alignment. */
9523 ix86_update_stack_boundary (void)
9525 ix86_incoming_stack_boundary
9526 = ix86_minimum_incoming_stack_boundary (false);
9528 /* x86_64 vararg needs 16byte stack alignment for register save
9532 && crtl
->stack_alignment_estimated
< 128)
9533 crtl
->stack_alignment_estimated
= 128;
9536 /* Handle the TARGET_GET_DRAP_RTX hook. Return NULL if no DRAP is
9537 needed or an rtx for DRAP otherwise. */
9540 ix86_get_drap_rtx (void)
9542 if (ix86_force_drap
|| !ACCUMULATE_OUTGOING_ARGS
)
9543 crtl
->need_drap
= true;
9545 if (stack_realign_drap
)
9547 /* Assign DRAP to vDRAP and returns vDRAP */
9548 unsigned int regno
= find_drap_reg ();
9553 arg_ptr
= gen_rtx_REG (Pmode
, regno
);
9554 crtl
->drap_reg
= arg_ptr
;
9557 drap_vreg
= copy_to_reg (arg_ptr
);
9561 insn
= emit_insn_before (seq
, NEXT_INSN (entry_of_function ()));
9564 add_reg_note (insn
, REG_CFA_SET_VDRAP
, drap_vreg
);
9565 RTX_FRAME_RELATED_P (insn
) = 1;
9573 /* Handle the TARGET_INTERNAL_ARG_POINTER hook. */
9576 ix86_internal_arg_pointer (void)
9578 return virtual_incoming_args_rtx
;
9581 struct scratch_reg
{
9586 /* Return a short-lived scratch register for use on function entry.
9587 In 32-bit mode, it is valid only after the registers are saved
9588 in the prologue. This register must be released by means of
9589 release_scratch_register_on_entry once it is dead. */
9592 get_scratch_register_on_entry (struct scratch_reg
*sr
)
9600 /* We always use R11 in 64-bit mode. */
9605 tree decl
= current_function_decl
, fntype
= TREE_TYPE (decl
);
9607 = lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype
)) != NULL_TREE
;
9608 bool static_chain_p
= DECL_STATIC_CHAIN (decl
);
9609 int regparm
= ix86_function_regparm (fntype
, decl
);
9611 = crtl
->drap_reg
? REGNO (crtl
->drap_reg
) : INVALID_REGNUM
;
9613 /* 'fastcall' sets regparm to 2, uses ecx/edx for arguments and eax
9614 for the static chain register. */
9615 if ((regparm
< 1 || (fastcall_p
&& !static_chain_p
))
9616 && drap_regno
!= AX_REG
)
9618 else if (regparm
< 2 && drap_regno
!= DX_REG
)
9620 /* ecx is the static chain register. */
9621 else if (regparm
< 3 && !fastcall_p
&& !static_chain_p
9622 && drap_regno
!= CX_REG
)
9624 else if (ix86_save_reg (BX_REG
, true))
9626 /* esi is the static chain register. */
9627 else if (!(regparm
== 3 && static_chain_p
)
9628 && ix86_save_reg (SI_REG
, true))
9630 else if (ix86_save_reg (DI_REG
, true))
9634 regno
= (drap_regno
== AX_REG
? DX_REG
: AX_REG
);
9639 sr
->reg
= gen_rtx_REG (Pmode
, regno
);
9642 rtx insn
= emit_insn (gen_push (sr
->reg
));
9643 RTX_FRAME_RELATED_P (insn
) = 1;
9647 /* Release a scratch register obtained from the preceding function. */
9650 release_scratch_register_on_entry (struct scratch_reg
*sr
)
9654 rtx x
, insn
= emit_insn (gen_pop (sr
->reg
));
9656 /* The RTX_FRAME_RELATED_P mechanism doesn't know about pop. */
9657 RTX_FRAME_RELATED_P (insn
) = 1;
9658 x
= gen_rtx_PLUS (Pmode
, stack_pointer_rtx
, GEN_INT (UNITS_PER_WORD
));
9659 x
= gen_rtx_SET (VOIDmode
, stack_pointer_rtx
, x
);
9660 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, x
);
9664 #define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP)
9666 /* Emit code to adjust the stack pointer by SIZE bytes while probing it. */
9669 ix86_adjust_stack_and_probe (const HOST_WIDE_INT size
)
9671 /* We skip the probe for the first interval + a small dope of 4 words and
9672 probe that many bytes past the specified size to maintain a protection
9673 area at the botton of the stack. */
9674 const int dope
= 4 * UNITS_PER_WORD
;
9675 rtx size_rtx
= GEN_INT (size
), last
;
9677 /* See if we have a constant small number of probes to generate. If so,
9678 that's the easy case. The run-time loop is made up of 11 insns in the
9679 generic case while the compile-time loop is made up of 3+2*(n-1) insns
9680 for n # of intervals. */
9681 if (size
<= 5 * PROBE_INTERVAL
)
9683 HOST_WIDE_INT i
, adjust
;
9684 bool first_probe
= true;
9686 /* Adjust SP and probe at PROBE_INTERVAL + N * PROBE_INTERVAL for
9687 values of N from 1 until it exceeds SIZE. If only one probe is
9688 needed, this will not generate any code. Then adjust and probe
9689 to PROBE_INTERVAL + SIZE. */
9690 for (i
= PROBE_INTERVAL
; i
< size
; i
+= PROBE_INTERVAL
)
9694 adjust
= 2 * PROBE_INTERVAL
+ dope
;
9695 first_probe
= false;
9698 adjust
= PROBE_INTERVAL
;
9700 emit_insn (gen_rtx_SET (VOIDmode
, stack_pointer_rtx
,
9701 plus_constant (stack_pointer_rtx
, -adjust
)));
9702 emit_stack_probe (stack_pointer_rtx
);
9706 adjust
= size
+ PROBE_INTERVAL
+ dope
;
9708 adjust
= size
+ PROBE_INTERVAL
- i
;
9710 emit_insn (gen_rtx_SET (VOIDmode
, stack_pointer_rtx
,
9711 plus_constant (stack_pointer_rtx
, -adjust
)));
9712 emit_stack_probe (stack_pointer_rtx
);
9714 /* Adjust back to account for the additional first interval. */
9715 last
= emit_insn (gen_rtx_SET (VOIDmode
, stack_pointer_rtx
,
9716 plus_constant (stack_pointer_rtx
,
9717 PROBE_INTERVAL
+ dope
)));
9720 /* Otherwise, do the same as above, but in a loop. Note that we must be
9721 extra careful with variables wrapping around because we might be at
9722 the very top (or the very bottom) of the address space and we have
9723 to be able to handle this case properly; in particular, we use an
9724 equality test for the loop condition. */
9727 HOST_WIDE_INT rounded_size
;
9728 struct scratch_reg sr
;
9730 get_scratch_register_on_entry (&sr
);
9733 /* Step 1: round SIZE to the previous multiple of the interval. */
9735 rounded_size
= size
& -PROBE_INTERVAL
;
9738 /* Step 2: compute initial and final value of the loop counter. */
9740 /* SP = SP_0 + PROBE_INTERVAL. */
9741 emit_insn (gen_rtx_SET (VOIDmode
, stack_pointer_rtx
,
9742 plus_constant (stack_pointer_rtx
,
9743 - (PROBE_INTERVAL
+ dope
))));
9745 /* LAST_ADDR = SP_0 + PROBE_INTERVAL + ROUNDED_SIZE. */
9746 emit_move_insn (sr
.reg
, GEN_INT (-rounded_size
));
9747 emit_insn (gen_rtx_SET (VOIDmode
, sr
.reg
,
9748 gen_rtx_PLUS (Pmode
, sr
.reg
,
9749 stack_pointer_rtx
)));
9754 while (SP != LAST_ADDR)
9756 SP = SP + PROBE_INTERVAL
9760 adjusts SP and probes to PROBE_INTERVAL + N * PROBE_INTERVAL for
9761 values of N from 1 until it is equal to ROUNDED_SIZE. */
9763 emit_insn (ix86_gen_adjust_stack_and_probe (sr
.reg
, sr
.reg
, size_rtx
));
9766 /* Step 4: adjust SP and probe at PROBE_INTERVAL + SIZE if we cannot
9767 assert at compile-time that SIZE is equal to ROUNDED_SIZE. */
9769 if (size
!= rounded_size
)
9771 emit_insn (gen_rtx_SET (VOIDmode
, stack_pointer_rtx
,
9772 plus_constant (stack_pointer_rtx
,
9773 rounded_size
- size
)));
9774 emit_stack_probe (stack_pointer_rtx
);
9777 /* Adjust back to account for the additional first interval. */
9778 last
= emit_insn (gen_rtx_SET (VOIDmode
, stack_pointer_rtx
,
9779 plus_constant (stack_pointer_rtx
,
9780 PROBE_INTERVAL
+ dope
)));
9782 release_scratch_register_on_entry (&sr
);
9785 gcc_assert (cfun
->machine
->fs
.cfa_reg
!= stack_pointer_rtx
);
9787 /* Even if the stack pointer isn't the CFA register, we need to correctly
9788 describe the adjustments made to it, in particular differentiate the
9789 frame-related ones from the frame-unrelated ones. */
9792 rtx expr
= gen_rtx_SEQUENCE (VOIDmode
, rtvec_alloc (2));
9793 XVECEXP (expr
, 0, 0)
9794 = gen_rtx_SET (VOIDmode
, stack_pointer_rtx
,
9795 plus_constant (stack_pointer_rtx
, -size
));
9796 XVECEXP (expr
, 0, 1)
9797 = gen_rtx_SET (VOIDmode
, stack_pointer_rtx
,
9798 plus_constant (stack_pointer_rtx
,
9799 PROBE_INTERVAL
+ dope
+ size
));
9800 add_reg_note (last
, REG_FRAME_RELATED_EXPR
, expr
);
9801 RTX_FRAME_RELATED_P (last
) = 1;
9803 cfun
->machine
->fs
.sp_offset
+= size
;
9806 /* Make sure nothing is scheduled before we are done. */
9807 emit_insn (gen_blockage ());
9810 /* Adjust the stack pointer up to REG while probing it. */
9813 output_adjust_stack_and_probe (rtx reg
)
9815 static int labelno
= 0;
9816 char loop_lab
[32], end_lab
[32];
9819 ASM_GENERATE_INTERNAL_LABEL (loop_lab
, "LPSRL", labelno
);
9820 ASM_GENERATE_INTERNAL_LABEL (end_lab
, "LPSRE", labelno
++);
9822 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file
, loop_lab
);
9824 /* Jump to END_LAB if SP == LAST_ADDR. */
9825 xops
[0] = stack_pointer_rtx
;
9827 output_asm_insn ("cmp%z0\t{%1, %0|%0, %1}", xops
);
9828 fputs ("\tje\t", asm_out_file
);
9829 assemble_name_raw (asm_out_file
, end_lab
);
9830 fputc ('\n', asm_out_file
);
9832 /* SP = SP + PROBE_INTERVAL. */
9833 xops
[1] = GEN_INT (PROBE_INTERVAL
);
9834 output_asm_insn ("sub%z0\t{%1, %0|%0, %1}", xops
);
9837 xops
[1] = const0_rtx
;
9838 output_asm_insn ("or%z0\t{%1, (%0)|DWORD PTR [%0], %1}", xops
);
9840 fprintf (asm_out_file
, "\tjmp\t");
9841 assemble_name_raw (asm_out_file
, loop_lab
);
9842 fputc ('\n', asm_out_file
);
9844 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file
, end_lab
);
9849 /* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE,
9850 inclusive. These are offsets from the current stack pointer. */
9853 ix86_emit_probe_stack_range (HOST_WIDE_INT first
, HOST_WIDE_INT size
)
9855 /* See if we have a constant small number of probes to generate. If so,
9856 that's the easy case. The run-time loop is made up of 7 insns in the
9857 generic case while the compile-time loop is made up of n insns for n #
9859 if (size
<= 7 * PROBE_INTERVAL
)
9863 /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 1 until
9864 it exceeds SIZE. If only one probe is needed, this will not
9865 generate any code. Then probe at FIRST + SIZE. */
9866 for (i
= PROBE_INTERVAL
; i
< size
; i
+= PROBE_INTERVAL
)
9867 emit_stack_probe (plus_constant (stack_pointer_rtx
, -(first
+ i
)));
9869 emit_stack_probe (plus_constant (stack_pointer_rtx
, -(first
+ size
)));
9872 /* Otherwise, do the same as above, but in a loop. Note that we must be
9873 extra careful with variables wrapping around because we might be at
9874 the very top (or the very bottom) of the address space and we have
9875 to be able to handle this case properly; in particular, we use an
9876 equality test for the loop condition. */
9879 HOST_WIDE_INT rounded_size
, last
;
9880 struct scratch_reg sr
;
9882 get_scratch_register_on_entry (&sr
);
9885 /* Step 1: round SIZE to the previous multiple of the interval. */
9887 rounded_size
= size
& -PROBE_INTERVAL
;
9890 /* Step 2: compute initial and final value of the loop counter. */
9892 /* TEST_OFFSET = FIRST. */
9893 emit_move_insn (sr
.reg
, GEN_INT (-first
));
9895 /* LAST_OFFSET = FIRST + ROUNDED_SIZE. */
9896 last
= first
+ rounded_size
;
9901 while (TEST_ADDR != LAST_ADDR)
9903 TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
9907 probes at FIRST + N * PROBE_INTERVAL for values of N from 1
9908 until it is equal to ROUNDED_SIZE. */
9910 emit_insn (ix86_gen_probe_stack_range (sr
.reg
, sr
.reg
, GEN_INT (-last
)));
9913 /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
9914 that SIZE is equal to ROUNDED_SIZE. */
9916 if (size
!= rounded_size
)
9917 emit_stack_probe (plus_constant (gen_rtx_PLUS (Pmode
,
9920 rounded_size
- size
));
9922 release_scratch_register_on_entry (&sr
);
9925 /* Make sure nothing is scheduled before we are done. */
9926 emit_insn (gen_blockage ());
9929 /* Probe a range of stack addresses from REG to END, inclusive. These are
9930 offsets from the current stack pointer. */
9933 output_probe_stack_range (rtx reg
, rtx end
)
9935 static int labelno
= 0;
9936 char loop_lab
[32], end_lab
[32];
9939 ASM_GENERATE_INTERNAL_LABEL (loop_lab
, "LPSRL", labelno
);
9940 ASM_GENERATE_INTERNAL_LABEL (end_lab
, "LPSRE", labelno
++);
9942 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file
, loop_lab
);
9944 /* Jump to END_LAB if TEST_ADDR == LAST_ADDR. */
9947 output_asm_insn ("cmp%z0\t{%1, %0|%0, %1}", xops
);
9948 fputs ("\tje\t", asm_out_file
);
9949 assemble_name_raw (asm_out_file
, end_lab
);
9950 fputc ('\n', asm_out_file
);
9952 /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL. */
9953 xops
[1] = GEN_INT (PROBE_INTERVAL
);
9954 output_asm_insn ("sub%z0\t{%1, %0|%0, %1}", xops
);
9956 /* Probe at TEST_ADDR. */
9957 xops
[0] = stack_pointer_rtx
;
9959 xops
[2] = const0_rtx
;
9960 output_asm_insn ("or%z0\t{%2, (%0,%1)|DWORD PTR [%0+%1], %2}", xops
);
9962 fprintf (asm_out_file
, "\tjmp\t");
9963 assemble_name_raw (asm_out_file
, loop_lab
);
9964 fputc ('\n', asm_out_file
);
9966 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file
, end_lab
);
9971 /* Finalize stack_realign_needed flag, which will guide prologue/epilogue
9972 to be generated in correct form. */
9974 ix86_finalize_stack_realign_flags (void)
9976 /* Check if stack realign is really needed after reload, and
9977 stores result in cfun */
9978 unsigned int incoming_stack_boundary
9979 = (crtl
->parm_stack_boundary
> ix86_incoming_stack_boundary
9980 ? crtl
->parm_stack_boundary
: ix86_incoming_stack_boundary
);
9981 unsigned int stack_realign
= (incoming_stack_boundary
9982 < (current_function_is_leaf
9983 ? crtl
->max_used_stack_slot_alignment
9984 : crtl
->stack_alignment_needed
));
9986 if (crtl
->stack_realign_finalized
)
9988 /* After stack_realign_needed is finalized, we can't no longer
9990 gcc_assert (crtl
->stack_realign_needed
== stack_realign
);
9994 /* If the only reason for frame_pointer_needed is that we conservatively
9995 assumed stack realignment might be needed, but in the end nothing that
9996 needed the stack alignment had been spilled, clear frame_pointer_needed
9997 and say we don't need stack realignment. */
10000 && frame_pointer_needed
10001 && current_function_is_leaf
10002 && flag_omit_frame_pointer
10003 && current_function_sp_is_unchanging
10004 && !ix86_current_function_calls_tls_descriptor
10005 && !crtl
->accesses_prior_frames
10006 && !cfun
->calls_alloca
10007 && !crtl
->calls_eh_return
10008 && !(flag_stack_check
&& STACK_CHECK_MOVING_SP
)
10009 && !ix86_frame_pointer_required ()
10010 && get_frame_size () == 0
10011 && ix86_nsaved_sseregs () == 0
10012 && ix86_varargs_gpr_size
+ ix86_varargs_fpr_size
== 0)
10014 HARD_REG_SET set_up_by_prologue
, prologue_used
;
10017 CLEAR_HARD_REG_SET (prologue_used
);
10018 CLEAR_HARD_REG_SET (set_up_by_prologue
);
10019 add_to_hard_reg_set (&set_up_by_prologue
, Pmode
, STACK_POINTER_REGNUM
);
10020 add_to_hard_reg_set (&set_up_by_prologue
, Pmode
, ARG_POINTER_REGNUM
);
10021 add_to_hard_reg_set (&set_up_by_prologue
, Pmode
,
10022 HARD_FRAME_POINTER_REGNUM
);
10026 FOR_BB_INSNS (bb
, insn
)
10027 if (NONDEBUG_INSN_P (insn
)
10028 && requires_stack_frame_p (insn
, prologue_used
,
10029 set_up_by_prologue
))
10031 crtl
->stack_realign_needed
= stack_realign
;
10032 crtl
->stack_realign_finalized
= true;
10037 frame_pointer_needed
= false;
10038 stack_realign
= false;
10039 crtl
->max_used_stack_slot_alignment
= incoming_stack_boundary
;
10040 crtl
->stack_alignment_needed
= incoming_stack_boundary
;
10041 crtl
->stack_alignment_estimated
= incoming_stack_boundary
;
10042 if (crtl
->preferred_stack_boundary
> incoming_stack_boundary
)
10043 crtl
->preferred_stack_boundary
= incoming_stack_boundary
;
10044 df_finish_pass (true);
10045 df_scan_alloc (NULL
);
10047 df_compute_regs_ever_live (true);
10051 crtl
->stack_realign_needed
= stack_realign
;
10052 crtl
->stack_realign_finalized
= true;
10055 /* Expand the prologue into a bunch of separate insns. */
10058 ix86_expand_prologue (void)
10060 struct machine_function
*m
= cfun
->machine
;
10063 struct ix86_frame frame
;
10064 HOST_WIDE_INT allocate
;
10065 bool int_registers_saved
;
10067 ix86_finalize_stack_realign_flags ();
10069 /* DRAP should not coexist with stack_realign_fp */
10070 gcc_assert (!(crtl
->drap_reg
&& stack_realign_fp
));
10072 memset (&m
->fs
, 0, sizeof (m
->fs
));
10074 /* Initialize CFA state for before the prologue. */
10075 m
->fs
.cfa_reg
= stack_pointer_rtx
;
10076 m
->fs
.cfa_offset
= INCOMING_FRAME_SP_OFFSET
;
10078 /* Track SP offset to the CFA. We continue tracking this after we've
10079 swapped the CFA register away from SP. In the case of re-alignment
10080 this is fudged; we're interested to offsets within the local frame. */
10081 m
->fs
.sp_offset
= INCOMING_FRAME_SP_OFFSET
;
10082 m
->fs
.sp_valid
= true;
10084 ix86_compute_frame_layout (&frame
);
10086 if (!TARGET_64BIT
&& ix86_function_ms_hook_prologue (current_function_decl
))
10088 /* We should have already generated an error for any use of
10089 ms_hook on a nested function. */
10090 gcc_checking_assert (!ix86_static_chain_on_stack
);
10092 /* Check if profiling is active and we shall use profiling before
10093 prologue variant. If so sorry. */
10094 if (crtl
->profile
&& flag_fentry
!= 0)
10095 sorry ("ms_hook_prologue attribute isn%'t compatible "
10096 "with -mfentry for 32-bit");
10098 /* In ix86_asm_output_function_label we emitted:
10099 8b ff movl.s %edi,%edi
10101 8b ec movl.s %esp,%ebp
10103 This matches the hookable function prologue in Win32 API
10104 functions in Microsoft Windows XP Service Pack 2 and newer.
10105 Wine uses this to enable Windows apps to hook the Win32 API
10106 functions provided by Wine.
10108 What that means is that we've already set up the frame pointer. */
10110 if (frame_pointer_needed
10111 && !(crtl
->drap_reg
&& crtl
->stack_realign_needed
))
10115 /* We've decided to use the frame pointer already set up.
10116 Describe this to the unwinder by pretending that both
10117 push and mov insns happen right here.
10119 Putting the unwind info here at the end of the ms_hook
10120 is done so that we can make absolutely certain we get
10121 the required byte sequence at the start of the function,
10122 rather than relying on an assembler that can produce
10123 the exact encoding required.
10125 However it does mean (in the unpatched case) that we have
10126 a 1 insn window where the asynchronous unwind info is
10127 incorrect. However, if we placed the unwind info at
10128 its correct location we would have incorrect unwind info
10129 in the patched case. Which is probably all moot since
10130 I don't expect Wine generates dwarf2 unwind info for the
10131 system libraries that use this feature. */
10133 insn
= emit_insn (gen_blockage ());
10135 push
= gen_push (hard_frame_pointer_rtx
);
10136 mov
= gen_rtx_SET (VOIDmode
, hard_frame_pointer_rtx
,
10137 stack_pointer_rtx
);
10138 RTX_FRAME_RELATED_P (push
) = 1;
10139 RTX_FRAME_RELATED_P (mov
) = 1;
10141 RTX_FRAME_RELATED_P (insn
) = 1;
10142 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
,
10143 gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, push
, mov
)));
10145 /* Note that gen_push incremented m->fs.cfa_offset, even
10146 though we didn't emit the push insn here. */
10147 m
->fs
.cfa_reg
= hard_frame_pointer_rtx
;
10148 m
->fs
.fp_offset
= m
->fs
.cfa_offset
;
10149 m
->fs
.fp_valid
= true;
10153 /* The frame pointer is not needed so pop %ebp again.
10154 This leaves us with a pristine state. */
10155 emit_insn (gen_pop (hard_frame_pointer_rtx
));
10159 /* The first insn of a function that accepts its static chain on the
10160 stack is to push the register that would be filled in by a direct
10161 call. This insn will be skipped by the trampoline. */
10162 else if (ix86_static_chain_on_stack
)
10164 insn
= emit_insn (gen_push (ix86_static_chain (cfun
->decl
, false)));
10165 emit_insn (gen_blockage ());
10167 /* We don't want to interpret this push insn as a register save,
10168 only as a stack adjustment. The real copy of the register as
10169 a save will be done later, if needed. */
10170 t
= plus_constant (stack_pointer_rtx
, -UNITS_PER_WORD
);
10171 t
= gen_rtx_SET (VOIDmode
, stack_pointer_rtx
, t
);
10172 add_reg_note (insn
, REG_CFA_ADJUST_CFA
, t
);
10173 RTX_FRAME_RELATED_P (insn
) = 1;
10176 /* Emit prologue code to adjust stack alignment and setup DRAP, in case
10177 of DRAP is needed and stack realignment is really needed after reload */
10178 if (stack_realign_drap
)
10180 int align_bytes
= crtl
->stack_alignment_needed
/ BITS_PER_UNIT
;
10182 /* Only need to push parameter pointer reg if it is caller saved. */
10183 if (!call_used_regs
[REGNO (crtl
->drap_reg
)])
10185 /* Push arg pointer reg */
10186 insn
= emit_insn (gen_push (crtl
->drap_reg
));
10187 RTX_FRAME_RELATED_P (insn
) = 1;
10190 /* Grab the argument pointer. */
10191 t
= plus_constant (stack_pointer_rtx
, m
->fs
.sp_offset
);
10192 insn
= emit_insn (gen_rtx_SET (VOIDmode
, crtl
->drap_reg
, t
));
10193 RTX_FRAME_RELATED_P (insn
) = 1;
10194 m
->fs
.cfa_reg
= crtl
->drap_reg
;
10195 m
->fs
.cfa_offset
= 0;
10197 /* Align the stack. */
10198 insn
= emit_insn (ix86_gen_andsp (stack_pointer_rtx
,
10200 GEN_INT (-align_bytes
)));
10201 RTX_FRAME_RELATED_P (insn
) = 1;
10203 /* Replicate the return address on the stack so that return
10204 address can be reached via (argp - 1) slot. This is needed
10205 to implement macro RETURN_ADDR_RTX and intrinsic function
10206 expand_builtin_return_addr etc. */
10207 t
= plus_constant (crtl
->drap_reg
, -UNITS_PER_WORD
);
10208 t
= gen_frame_mem (word_mode
, t
);
10209 insn
= emit_insn (gen_push (t
));
10210 RTX_FRAME_RELATED_P (insn
) = 1;
10212 /* For the purposes of frame and register save area addressing,
10213 we've started over with a new frame. */
10214 m
->fs
.sp_offset
= INCOMING_FRAME_SP_OFFSET
;
10215 m
->fs
.realigned
= true;
10218 if (frame_pointer_needed
&& !m
->fs
.fp_valid
)
10220 /* Note: AT&T enter does NOT have reversed args. Enter is probably
10221 slower on all targets. Also sdb doesn't like it. */
10222 insn
= emit_insn (gen_push (hard_frame_pointer_rtx
));
10223 RTX_FRAME_RELATED_P (insn
) = 1;
10225 if (m
->fs
.sp_offset
== frame
.hard_frame_pointer_offset
)
10227 insn
= emit_move_insn (hard_frame_pointer_rtx
, stack_pointer_rtx
);
10228 RTX_FRAME_RELATED_P (insn
) = 1;
10230 if (m
->fs
.cfa_reg
== stack_pointer_rtx
)
10231 m
->fs
.cfa_reg
= hard_frame_pointer_rtx
;
10232 m
->fs
.fp_offset
= m
->fs
.sp_offset
;
10233 m
->fs
.fp_valid
= true;
10237 int_registers_saved
= (frame
.nregs
== 0);
10239 if (!int_registers_saved
)
10241 /* If saving registers via PUSH, do so now. */
10242 if (!frame
.save_regs_using_mov
)
10244 ix86_emit_save_regs ();
10245 int_registers_saved
= true;
10246 gcc_assert (m
->fs
.sp_offset
== frame
.reg_save_offset
);
10249 /* When using red zone we may start register saving before allocating
10250 the stack frame saving one cycle of the prologue. However, avoid
10251 doing this if we have to probe the stack; at least on x86_64 the
10252 stack probe can turn into a call that clobbers a red zone location. */
10253 else if (ix86_using_red_zone ()
10254 && (! TARGET_STACK_PROBE
10255 || frame
.stack_pointer_offset
< CHECK_STACK_LIMIT
))
10257 ix86_emit_save_regs_using_mov (frame
.reg_save_offset
);
10258 int_registers_saved
= true;
10262 if (stack_realign_fp
)
10264 int align_bytes
= crtl
->stack_alignment_needed
/ BITS_PER_UNIT
;
10265 gcc_assert (align_bytes
> MIN_STACK_BOUNDARY
/ BITS_PER_UNIT
);
10267 /* The computation of the size of the re-aligned stack frame means
10268 that we must allocate the size of the register save area before
10269 performing the actual alignment. Otherwise we cannot guarantee
10270 that there's enough storage above the realignment point. */
10271 if (m
->fs
.sp_offset
!= frame
.sse_reg_save_offset
)
10272 pro_epilogue_adjust_stack (stack_pointer_rtx
, stack_pointer_rtx
,
10273 GEN_INT (m
->fs
.sp_offset
10274 - frame
.sse_reg_save_offset
),
10277 /* Align the stack. */
10278 insn
= emit_insn (ix86_gen_andsp (stack_pointer_rtx
,
10280 GEN_INT (-align_bytes
)));
10282 /* For the purposes of register save area addressing, the stack
10283 pointer is no longer valid. As for the value of sp_offset,
10284 see ix86_compute_frame_layout, which we need to match in order
10285 to pass verification of stack_pointer_offset at the end. */
10286 m
->fs
.sp_offset
= (m
->fs
.sp_offset
+ align_bytes
) & -align_bytes
;
10287 m
->fs
.sp_valid
= false;
10290 allocate
= frame
.stack_pointer_offset
- m
->fs
.sp_offset
;
10292 if (flag_stack_usage_info
)
10294 /* We start to count from ARG_POINTER. */
10295 HOST_WIDE_INT stack_size
= frame
.stack_pointer_offset
;
10297 /* If it was realigned, take into account the fake frame. */
10298 if (stack_realign_drap
)
10300 if (ix86_static_chain_on_stack
)
10301 stack_size
+= UNITS_PER_WORD
;
10303 if (!call_used_regs
[REGNO (crtl
->drap_reg
)])
10304 stack_size
+= UNITS_PER_WORD
;
10306 /* This over-estimates by 1 minimal-stack-alignment-unit but
10307 mitigates that by counting in the new return address slot. */
10308 current_function_dynamic_stack_size
10309 += crtl
->stack_alignment_needed
/ BITS_PER_UNIT
;
10312 current_function_static_stack_size
= stack_size
;
10315 /* The stack has already been decremented by the instruction calling us
10316 so probe if the size is non-negative to preserve the protection area. */
10317 if (allocate
>= 0 && flag_stack_check
== STATIC_BUILTIN_STACK_CHECK
)
10319 /* We expect the registers to be saved when probes are used. */
10320 gcc_assert (int_registers_saved
);
10322 if (STACK_CHECK_MOVING_SP
)
10324 ix86_adjust_stack_and_probe (allocate
);
10329 HOST_WIDE_INT size
= allocate
;
10331 if (TARGET_64BIT
&& size
>= (HOST_WIDE_INT
) 0x80000000)
10332 size
= 0x80000000 - STACK_CHECK_PROTECT
- 1;
10334 if (TARGET_STACK_PROBE
)
10335 ix86_emit_probe_stack_range (0, size
+ STACK_CHECK_PROTECT
);
10337 ix86_emit_probe_stack_range (STACK_CHECK_PROTECT
, size
);
10343 else if (!ix86_target_stack_probe ()
10344 || frame
.stack_pointer_offset
< CHECK_STACK_LIMIT
)
10346 pro_epilogue_adjust_stack (stack_pointer_rtx
, stack_pointer_rtx
,
10347 GEN_INT (-allocate
), -1,
10348 m
->fs
.cfa_reg
== stack_pointer_rtx
);
10352 rtx eax
= gen_rtx_REG (Pmode
, AX_REG
);
10354 rtx (*adjust_stack_insn
)(rtx
, rtx
, rtx
);
10356 bool eax_live
= false;
10357 bool r10_live
= false;
10360 r10_live
= (DECL_STATIC_CHAIN (current_function_decl
) != 0);
10361 if (!TARGET_64BIT_MS_ABI
)
10362 eax_live
= ix86_eax_live_at_start_p ();
10366 emit_insn (gen_push (eax
));
10367 allocate
-= UNITS_PER_WORD
;
10371 r10
= gen_rtx_REG (Pmode
, R10_REG
);
10372 emit_insn (gen_push (r10
));
10373 allocate
-= UNITS_PER_WORD
;
10376 emit_move_insn (eax
, GEN_INT (allocate
));
10377 emit_insn (ix86_gen_allocate_stack_worker (eax
, eax
));
10379 /* Use the fact that AX still contains ALLOCATE. */
10380 adjust_stack_insn
= (Pmode
== DImode
10381 ? gen_pro_epilogue_adjust_stack_di_sub
10382 : gen_pro_epilogue_adjust_stack_si_sub
);
10384 insn
= emit_insn (adjust_stack_insn (stack_pointer_rtx
,
10385 stack_pointer_rtx
, eax
));
10387 /* Note that SEH directives need to continue tracking the stack
10388 pointer even after the frame pointer has been set up. */
10389 if (m
->fs
.cfa_reg
== stack_pointer_rtx
|| TARGET_SEH
)
10391 if (m
->fs
.cfa_reg
== stack_pointer_rtx
)
10392 m
->fs
.cfa_offset
+= allocate
;
10394 RTX_FRAME_RELATED_P (insn
) = 1;
10395 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
,
10396 gen_rtx_SET (VOIDmode
, stack_pointer_rtx
,
10397 plus_constant (stack_pointer_rtx
,
10400 m
->fs
.sp_offset
+= allocate
;
10402 if (r10_live
&& eax_live
)
10404 t
= choose_baseaddr (m
->fs
.sp_offset
- allocate
);
10405 emit_move_insn (gen_rtx_REG (word_mode
, R10_REG
),
10406 gen_frame_mem (word_mode
, t
));
10407 t
= choose_baseaddr (m
->fs
.sp_offset
- allocate
- UNITS_PER_WORD
);
10408 emit_move_insn (gen_rtx_REG (word_mode
, AX_REG
),
10409 gen_frame_mem (word_mode
, t
));
10411 else if (eax_live
|| r10_live
)
10413 t
= choose_baseaddr (m
->fs
.sp_offset
- allocate
);
10414 emit_move_insn (gen_rtx_REG (word_mode
,
10415 (eax_live
? AX_REG
: R10_REG
)),
10416 gen_frame_mem (word_mode
, t
));
10419 gcc_assert (m
->fs
.sp_offset
== frame
.stack_pointer_offset
);
10421 /* If we havn't already set up the frame pointer, do so now. */
10422 if (frame_pointer_needed
&& !m
->fs
.fp_valid
)
10424 insn
= ix86_gen_add3 (hard_frame_pointer_rtx
, stack_pointer_rtx
,
10425 GEN_INT (frame
.stack_pointer_offset
10426 - frame
.hard_frame_pointer_offset
));
10427 insn
= emit_insn (insn
);
10428 RTX_FRAME_RELATED_P (insn
) = 1;
10429 add_reg_note (insn
, REG_CFA_ADJUST_CFA
, NULL
);
10431 if (m
->fs
.cfa_reg
== stack_pointer_rtx
)
10432 m
->fs
.cfa_reg
= hard_frame_pointer_rtx
;
10433 m
->fs
.fp_offset
= frame
.hard_frame_pointer_offset
;
10434 m
->fs
.fp_valid
= true;
10437 if (!int_registers_saved
)
10438 ix86_emit_save_regs_using_mov (frame
.reg_save_offset
);
10439 if (frame
.nsseregs
)
10440 ix86_emit_save_sse_regs_using_mov (frame
.sse_reg_save_offset
);
10442 pic_reg_used
= false;
10443 if (pic_offset_table_rtx
10444 && (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM
)
10447 unsigned int alt_pic_reg_used
= ix86_select_alt_pic_regnum ();
10449 if (alt_pic_reg_used
!= INVALID_REGNUM
)
10450 SET_REGNO (pic_offset_table_rtx
, alt_pic_reg_used
);
10452 pic_reg_used
= true;
10459 if (ix86_cmodel
== CM_LARGE_PIC
)
10461 rtx label
, tmp_reg
;
10463 gcc_assert (Pmode
== DImode
);
10464 label
= gen_label_rtx ();
10465 emit_label (label
);
10466 LABEL_PRESERVE_P (label
) = 1;
10467 tmp_reg
= gen_rtx_REG (Pmode
, R11_REG
);
10468 gcc_assert (REGNO (pic_offset_table_rtx
) != REGNO (tmp_reg
));
10469 insn
= emit_insn (gen_set_rip_rex64 (pic_offset_table_rtx
,
10471 insn
= emit_insn (gen_set_got_offset_rex64 (tmp_reg
, label
));
10472 insn
= emit_insn (ix86_gen_add3 (pic_offset_table_rtx
,
10473 pic_offset_table_rtx
, tmp_reg
));
10476 insn
= emit_insn (gen_set_got_rex64 (pic_offset_table_rtx
));
10480 insn
= emit_insn (gen_set_got (pic_offset_table_rtx
));
10481 RTX_FRAME_RELATED_P (insn
) = 1;
10482 add_reg_note (insn
, REG_CFA_FLUSH_QUEUE
, NULL_RTX
);
10486 /* In the pic_reg_used case, make sure that the got load isn't deleted
10487 when mcount needs it. Blockage to avoid call movement across mcount
10488 call is emitted in generic code after the NOTE_INSN_PROLOGUE_END
10490 if (crtl
->profile
&& !flag_fentry
&& pic_reg_used
)
10491 emit_insn (gen_prologue_use (pic_offset_table_rtx
));
10493 if (crtl
->drap_reg
&& !crtl
->stack_realign_needed
)
10495 /* vDRAP is setup but after reload it turns out stack realign
10496 isn't necessary, here we will emit prologue to setup DRAP
10497 without stack realign adjustment */
10498 t
= choose_baseaddr (0);
10499 emit_insn (gen_rtx_SET (VOIDmode
, crtl
->drap_reg
, t
));
10502 /* Prevent instructions from being scheduled into register save push
10503 sequence when access to the redzone area is done through frame pointer.
10504 The offset between the frame pointer and the stack pointer is calculated
10505 relative to the value of the stack pointer at the end of the function
10506 prologue, and moving instructions that access redzone area via frame
10507 pointer inside push sequence violates this assumption. */
10508 if (frame_pointer_needed
&& frame
.red_zone_size
)
10509 emit_insn (gen_memory_blockage ());
10511 /* Emit cld instruction if stringops are used in the function. */
10512 if (TARGET_CLD
&& ix86_current_function_needs_cld
)
10513 emit_insn (gen_cld ());
10515 /* SEH requires that the prologue end within 256 bytes of the start of
10516 the function. Prevent instruction schedules that would extend that.
10517 Further, prevent alloca modifications to the stack pointer from being
10518 combined with prologue modifications. */
10520 emit_insn (gen_prologue_use (stack_pointer_rtx
));
10523 /* Emit code to restore REG using a POP insn. */
10526 ix86_emit_restore_reg_using_pop (rtx reg
)
10528 struct machine_function
*m
= cfun
->machine
;
10529 rtx insn
= emit_insn (gen_pop (reg
));
10531 ix86_add_cfa_restore_note (insn
, reg
, m
->fs
.sp_offset
);
10532 m
->fs
.sp_offset
-= UNITS_PER_WORD
;
10534 if (m
->fs
.cfa_reg
== crtl
->drap_reg
10535 && REGNO (reg
) == REGNO (crtl
->drap_reg
))
10537 /* Previously we'd represented the CFA as an expression
10538 like *(%ebp - 8). We've just popped that value from
10539 the stack, which means we need to reset the CFA to
10540 the drap register. This will remain until we restore
10541 the stack pointer. */
10542 add_reg_note (insn
, REG_CFA_DEF_CFA
, reg
);
10543 RTX_FRAME_RELATED_P (insn
) = 1;
10545 /* This means that the DRAP register is valid for addressing too. */
10546 m
->fs
.drap_valid
= true;
10550 if (m
->fs
.cfa_reg
== stack_pointer_rtx
)
10552 rtx x
= plus_constant (stack_pointer_rtx
, UNITS_PER_WORD
);
10553 x
= gen_rtx_SET (VOIDmode
, stack_pointer_rtx
, x
);
10554 add_reg_note (insn
, REG_CFA_ADJUST_CFA
, x
);
10555 RTX_FRAME_RELATED_P (insn
) = 1;
10557 m
->fs
.cfa_offset
-= UNITS_PER_WORD
;
10560 /* When the frame pointer is the CFA, and we pop it, we are
10561 swapping back to the stack pointer as the CFA. This happens
10562 for stack frames that don't allocate other data, so we assume
10563 the stack pointer is now pointing at the return address, i.e.
10564 the function entry state, which makes the offset be 1 word. */
10565 if (reg
== hard_frame_pointer_rtx
)
10567 m
->fs
.fp_valid
= false;
10568 if (m
->fs
.cfa_reg
== hard_frame_pointer_rtx
)
10570 m
->fs
.cfa_reg
= stack_pointer_rtx
;
10571 m
->fs
.cfa_offset
-= UNITS_PER_WORD
;
10573 add_reg_note (insn
, REG_CFA_DEF_CFA
,
10574 gen_rtx_PLUS (Pmode
, stack_pointer_rtx
,
10575 GEN_INT (m
->fs
.cfa_offset
)));
10576 RTX_FRAME_RELATED_P (insn
) = 1;
10581 /* Emit code to restore saved registers using POP insns. */
10584 ix86_emit_restore_regs_using_pop (void)
10586 unsigned int regno
;
10588 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; regno
++)
10589 if (!SSE_REGNO_P (regno
) && ix86_save_reg (regno
, false))
10590 ix86_emit_restore_reg_using_pop (gen_rtx_REG (word_mode
, regno
));
10593 /* Emit code and notes for the LEAVE instruction. */
10596 ix86_emit_leave (void)
10598 struct machine_function
*m
= cfun
->machine
;
10599 rtx insn
= emit_insn (ix86_gen_leave ());
10601 ix86_add_queued_cfa_restore_notes (insn
);
10603 gcc_assert (m
->fs
.fp_valid
);
10604 m
->fs
.sp_valid
= true;
10605 m
->fs
.sp_offset
= m
->fs
.fp_offset
- UNITS_PER_WORD
;
10606 m
->fs
.fp_valid
= false;
10608 if (m
->fs
.cfa_reg
== hard_frame_pointer_rtx
)
10610 m
->fs
.cfa_reg
= stack_pointer_rtx
;
10611 m
->fs
.cfa_offset
= m
->fs
.sp_offset
;
10613 add_reg_note (insn
, REG_CFA_DEF_CFA
,
10614 plus_constant (stack_pointer_rtx
, m
->fs
.sp_offset
));
10615 RTX_FRAME_RELATED_P (insn
) = 1;
10617 ix86_add_cfa_restore_note (insn
, hard_frame_pointer_rtx
,
10621 /* Emit code to restore saved registers using MOV insns.
10622 First register is restored from CFA - CFA_OFFSET. */
10624 ix86_emit_restore_regs_using_mov (HOST_WIDE_INT cfa_offset
,
10625 bool maybe_eh_return
)
10627 struct machine_function
*m
= cfun
->machine
;
10628 unsigned int regno
;
10630 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; regno
++)
10631 if (!SSE_REGNO_P (regno
) && ix86_save_reg (regno
, maybe_eh_return
))
10633 rtx reg
= gen_rtx_REG (word_mode
, regno
);
10636 mem
= choose_baseaddr (cfa_offset
);
10637 mem
= gen_frame_mem (word_mode
, mem
);
10638 insn
= emit_move_insn (reg
, mem
);
10640 if (m
->fs
.cfa_reg
== crtl
->drap_reg
&& regno
== REGNO (crtl
->drap_reg
))
10642 /* Previously we'd represented the CFA as an expression
10643 like *(%ebp - 8). We've just popped that value from
10644 the stack, which means we need to reset the CFA to
10645 the drap register. This will remain until we restore
10646 the stack pointer. */
10647 add_reg_note (insn
, REG_CFA_DEF_CFA
, reg
);
10648 RTX_FRAME_RELATED_P (insn
) = 1;
10650 /* This means that the DRAP register is valid for addressing. */
10651 m
->fs
.drap_valid
= true;
10654 ix86_add_cfa_restore_note (NULL_RTX
, reg
, cfa_offset
);
10656 cfa_offset
-= UNITS_PER_WORD
;
10660 /* Emit code to restore saved registers using MOV insns.
10661 First register is restored from CFA - CFA_OFFSET. */
10663 ix86_emit_restore_sse_regs_using_mov (HOST_WIDE_INT cfa_offset
,
10664 bool maybe_eh_return
)
10666 unsigned int regno
;
10668 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; regno
++)
10669 if (SSE_REGNO_P (regno
) && ix86_save_reg (regno
, maybe_eh_return
))
10671 rtx reg
= gen_rtx_REG (V4SFmode
, regno
);
10674 mem
= choose_baseaddr (cfa_offset
);
10675 mem
= gen_rtx_MEM (V4SFmode
, mem
);
10676 set_mem_align (mem
, 128);
10677 emit_move_insn (reg
, mem
);
10679 ix86_add_cfa_restore_note (NULL_RTX
, reg
, cfa_offset
);
10685 /* Emit vzeroupper if needed. */
10688 ix86_maybe_emit_epilogue_vzeroupper (void)
10690 if (TARGET_VZEROUPPER
10691 && !TREE_THIS_VOLATILE (cfun
->decl
)
10692 && !cfun
->machine
->caller_return_avx256_p
)
10693 emit_insn (gen_avx_vzeroupper (GEN_INT (call_no_avx256
)));
10696 /* Restore function stack, frame, and registers. */
10699 ix86_expand_epilogue (int style
)
10701 struct machine_function
*m
= cfun
->machine
;
10702 struct machine_frame_state frame_state_save
= m
->fs
;
10703 struct ix86_frame frame
;
10704 bool restore_regs_via_mov
;
10707 ix86_finalize_stack_realign_flags ();
10708 ix86_compute_frame_layout (&frame
);
10710 m
->fs
.sp_valid
= (!frame_pointer_needed
10711 || (current_function_sp_is_unchanging
10712 && !stack_realign_fp
));
10713 gcc_assert (!m
->fs
.sp_valid
10714 || m
->fs
.sp_offset
== frame
.stack_pointer_offset
);
10716 /* The FP must be valid if the frame pointer is present. */
10717 gcc_assert (frame_pointer_needed
== m
->fs
.fp_valid
);
10718 gcc_assert (!m
->fs
.fp_valid
10719 || m
->fs
.fp_offset
== frame
.hard_frame_pointer_offset
);
10721 /* We must have *some* valid pointer to the stack frame. */
10722 gcc_assert (m
->fs
.sp_valid
|| m
->fs
.fp_valid
);
10724 /* The DRAP is never valid at this point. */
10725 gcc_assert (!m
->fs
.drap_valid
);
10727 /* See the comment about red zone and frame
10728 pointer usage in ix86_expand_prologue. */
10729 if (frame_pointer_needed
&& frame
.red_zone_size
)
10730 emit_insn (gen_memory_blockage ());
10732 using_drap
= crtl
->drap_reg
&& crtl
->stack_realign_needed
;
10733 gcc_assert (!using_drap
|| m
->fs
.cfa_reg
== crtl
->drap_reg
);
10735 /* Determine the CFA offset of the end of the red-zone. */
10736 m
->fs
.red_zone_offset
= 0;
10737 if (ix86_using_red_zone () && crtl
->args
.pops_args
< 65536)
10739 /* The red-zone begins below the return address. */
10740 m
->fs
.red_zone_offset
= RED_ZONE_SIZE
+ UNITS_PER_WORD
;
10742 /* When the register save area is in the aligned portion of
10743 the stack, determine the maximum runtime displacement that
10744 matches up with the aligned frame. */
10745 if (stack_realign_drap
)
10746 m
->fs
.red_zone_offset
-= (crtl
->stack_alignment_needed
/ BITS_PER_UNIT
10750 /* Special care must be taken for the normal return case of a function
10751 using eh_return: the eax and edx registers are marked as saved, but
10752 not restored along this path. Adjust the save location to match. */
10753 if (crtl
->calls_eh_return
&& style
!= 2)
10754 frame
.reg_save_offset
-= 2 * UNITS_PER_WORD
;
10756 /* EH_RETURN requires the use of moves to function properly. */
10757 if (crtl
->calls_eh_return
)
10758 restore_regs_via_mov
= true;
10759 /* SEH requires the use of pops to identify the epilogue. */
10760 else if (TARGET_SEH
)
10761 restore_regs_via_mov
= false;
10762 /* If we're only restoring one register and sp is not valid then
10763 using a move instruction to restore the register since it's
10764 less work than reloading sp and popping the register. */
10765 else if (!m
->fs
.sp_valid
&& frame
.nregs
<= 1)
10766 restore_regs_via_mov
= true;
10767 else if (TARGET_EPILOGUE_USING_MOVE
10768 && cfun
->machine
->use_fast_prologue_epilogue
10769 && (frame
.nregs
> 1
10770 || m
->fs
.sp_offset
!= frame
.reg_save_offset
))
10771 restore_regs_via_mov
= true;
10772 else if (frame_pointer_needed
10774 && m
->fs
.sp_offset
!= frame
.reg_save_offset
)
10775 restore_regs_via_mov
= true;
10776 else if (frame_pointer_needed
10777 && TARGET_USE_LEAVE
10778 && cfun
->machine
->use_fast_prologue_epilogue
10779 && frame
.nregs
== 1)
10780 restore_regs_via_mov
= true;
10782 restore_regs_via_mov
= false;
10784 if (restore_regs_via_mov
|| frame
.nsseregs
)
10786 /* Ensure that the entire register save area is addressable via
10787 the stack pointer, if we will restore via sp. */
10789 && m
->fs
.sp_offset
> 0x7fffffff
10790 && !(m
->fs
.fp_valid
|| m
->fs
.drap_valid
)
10791 && (frame
.nsseregs
+ frame
.nregs
) != 0)
10793 pro_epilogue_adjust_stack (stack_pointer_rtx
, stack_pointer_rtx
,
10794 GEN_INT (m
->fs
.sp_offset
10795 - frame
.sse_reg_save_offset
),
10797 m
->fs
.cfa_reg
== stack_pointer_rtx
);
10801 /* If there are any SSE registers to restore, then we have to do it
10802 via moves, since there's obviously no pop for SSE regs. */
10803 if (frame
.nsseregs
)
10804 ix86_emit_restore_sse_regs_using_mov (frame
.sse_reg_save_offset
,
10807 if (restore_regs_via_mov
)
10812 ix86_emit_restore_regs_using_mov (frame
.reg_save_offset
, style
== 2);
10814 /* eh_return epilogues need %ecx added to the stack pointer. */
10817 rtx insn
, sa
= EH_RETURN_STACKADJ_RTX
;
10819 /* Stack align doesn't work with eh_return. */
10820 gcc_assert (!stack_realign_drap
);
10821 /* Neither does regparm nested functions. */
10822 gcc_assert (!ix86_static_chain_on_stack
);
10824 if (frame_pointer_needed
)
10826 t
= gen_rtx_PLUS (Pmode
, hard_frame_pointer_rtx
, sa
);
10827 t
= plus_constant (t
, m
->fs
.fp_offset
- UNITS_PER_WORD
);
10828 emit_insn (gen_rtx_SET (VOIDmode
, sa
, t
));
10830 t
= gen_frame_mem (Pmode
, hard_frame_pointer_rtx
);
10831 insn
= emit_move_insn (hard_frame_pointer_rtx
, t
);
10833 /* Note that we use SA as a temporary CFA, as the return
10834 address is at the proper place relative to it. We
10835 pretend this happens at the FP restore insn because
10836 prior to this insn the FP would be stored at the wrong
10837 offset relative to SA, and after this insn we have no
10838 other reasonable register to use for the CFA. We don't
10839 bother resetting the CFA to the SP for the duration of
10840 the return insn. */
10841 add_reg_note (insn
, REG_CFA_DEF_CFA
,
10842 plus_constant (sa
, UNITS_PER_WORD
));
10843 ix86_add_queued_cfa_restore_notes (insn
);
10844 add_reg_note (insn
, REG_CFA_RESTORE
, hard_frame_pointer_rtx
);
10845 RTX_FRAME_RELATED_P (insn
) = 1;
10847 m
->fs
.cfa_reg
= sa
;
10848 m
->fs
.cfa_offset
= UNITS_PER_WORD
;
10849 m
->fs
.fp_valid
= false;
10851 pro_epilogue_adjust_stack (stack_pointer_rtx
, sa
,
10852 const0_rtx
, style
, false);
10856 t
= gen_rtx_PLUS (Pmode
, stack_pointer_rtx
, sa
);
10857 t
= plus_constant (t
, m
->fs
.sp_offset
- UNITS_PER_WORD
);
10858 insn
= emit_insn (gen_rtx_SET (VOIDmode
, stack_pointer_rtx
, t
));
10859 ix86_add_queued_cfa_restore_notes (insn
);
10861 gcc_assert (m
->fs
.cfa_reg
== stack_pointer_rtx
);
10862 if (m
->fs
.cfa_offset
!= UNITS_PER_WORD
)
10864 m
->fs
.cfa_offset
= UNITS_PER_WORD
;
10865 add_reg_note (insn
, REG_CFA_DEF_CFA
,
10866 plus_constant (stack_pointer_rtx
,
10868 RTX_FRAME_RELATED_P (insn
) = 1;
10871 m
->fs
.sp_offset
= UNITS_PER_WORD
;
10872 m
->fs
.sp_valid
= true;
10877 /* SEH requires that the function end with (1) a stack adjustment
10878 if necessary, (2) a sequence of pops, and (3) a return or
10879 jump instruction. Prevent insns from the function body from
10880 being scheduled into this sequence. */
10883 /* Prevent a catch region from being adjacent to the standard
10884 epilogue sequence. Unfortuantely crtl->uses_eh_lsda nor
10885 several other flags that would be interesting to test are
10887 if (flag_non_call_exceptions
)
10888 emit_insn (gen_nops (const1_rtx
));
10890 emit_insn (gen_blockage ());
10893 /* First step is to deallocate the stack frame so that we can
10894 pop the registers. */
10895 if (!m
->fs
.sp_valid
)
10897 pro_epilogue_adjust_stack (stack_pointer_rtx
, hard_frame_pointer_rtx
,
10898 GEN_INT (m
->fs
.fp_offset
10899 - frame
.reg_save_offset
),
10902 else if (m
->fs
.sp_offset
!= frame
.reg_save_offset
)
10904 pro_epilogue_adjust_stack (stack_pointer_rtx
, stack_pointer_rtx
,
10905 GEN_INT (m
->fs
.sp_offset
10906 - frame
.reg_save_offset
),
10908 m
->fs
.cfa_reg
== stack_pointer_rtx
);
10911 ix86_emit_restore_regs_using_pop ();
10914 /* If we used a stack pointer and haven't already got rid of it,
10916 if (m
->fs
.fp_valid
)
10918 /* If the stack pointer is valid and pointing at the frame
10919 pointer store address, then we only need a pop. */
10920 if (m
->fs
.sp_valid
&& m
->fs
.sp_offset
== frame
.hfp_save_offset
)
10921 ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx
);
10922 /* Leave results in shorter dependency chains on CPUs that are
10923 able to grok it fast. */
10924 else if (TARGET_USE_LEAVE
10925 || optimize_function_for_size_p (cfun
)
10926 || !cfun
->machine
->use_fast_prologue_epilogue
)
10927 ix86_emit_leave ();
10930 pro_epilogue_adjust_stack (stack_pointer_rtx
,
10931 hard_frame_pointer_rtx
,
10932 const0_rtx
, style
, !using_drap
);
10933 ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx
);
10939 int param_ptr_offset
= UNITS_PER_WORD
;
10942 gcc_assert (stack_realign_drap
);
10944 if (ix86_static_chain_on_stack
)
10945 param_ptr_offset
+= UNITS_PER_WORD
;
10946 if (!call_used_regs
[REGNO (crtl
->drap_reg
)])
10947 param_ptr_offset
+= UNITS_PER_WORD
;
10949 insn
= emit_insn (gen_rtx_SET
10950 (VOIDmode
, stack_pointer_rtx
,
10951 gen_rtx_PLUS (Pmode
,
10953 GEN_INT (-param_ptr_offset
))));
10954 m
->fs
.cfa_reg
= stack_pointer_rtx
;
10955 m
->fs
.cfa_offset
= param_ptr_offset
;
10956 m
->fs
.sp_offset
= param_ptr_offset
;
10957 m
->fs
.realigned
= false;
10959 add_reg_note (insn
, REG_CFA_DEF_CFA
,
10960 gen_rtx_PLUS (Pmode
, stack_pointer_rtx
,
10961 GEN_INT (param_ptr_offset
)));
10962 RTX_FRAME_RELATED_P (insn
) = 1;
10964 if (!call_used_regs
[REGNO (crtl
->drap_reg
)])
10965 ix86_emit_restore_reg_using_pop (crtl
->drap_reg
);
10968 /* At this point the stack pointer must be valid, and we must have
10969 restored all of the registers. We may not have deallocated the
10970 entire stack frame. We've delayed this until now because it may
10971 be possible to merge the local stack deallocation with the
10972 deallocation forced by ix86_static_chain_on_stack. */
10973 gcc_assert (m
->fs
.sp_valid
);
10974 gcc_assert (!m
->fs
.fp_valid
);
10975 gcc_assert (!m
->fs
.realigned
);
10976 if (m
->fs
.sp_offset
!= UNITS_PER_WORD
)
10978 pro_epilogue_adjust_stack (stack_pointer_rtx
, stack_pointer_rtx
,
10979 GEN_INT (m
->fs
.sp_offset
- UNITS_PER_WORD
),
10983 ix86_add_queued_cfa_restore_notes (get_last_insn ());
10985 /* Sibcall epilogues don't want a return instruction. */
10988 m
->fs
= frame_state_save
;
10992 /* Emit vzeroupper if needed. */
10993 ix86_maybe_emit_epilogue_vzeroupper ();
10995 if (crtl
->args
.pops_args
&& crtl
->args
.size
)
10997 rtx popc
= GEN_INT (crtl
->args
.pops_args
);
10999 /* i386 can only pop 64K bytes. If asked to pop more, pop return
11000 address, do explicit add, and jump indirectly to the caller. */
11002 if (crtl
->args
.pops_args
>= 65536)
11004 rtx ecx
= gen_rtx_REG (SImode
, CX_REG
);
11007 /* There is no "pascal" calling convention in any 64bit ABI. */
11008 gcc_assert (!TARGET_64BIT
);
11010 insn
= emit_insn (gen_pop (ecx
));
11011 m
->fs
.cfa_offset
-= UNITS_PER_WORD
;
11012 m
->fs
.sp_offset
-= UNITS_PER_WORD
;
11014 add_reg_note (insn
, REG_CFA_ADJUST_CFA
,
11015 copy_rtx (XVECEXP (PATTERN (insn
), 0, 1)));
11016 add_reg_note (insn
, REG_CFA_REGISTER
,
11017 gen_rtx_SET (VOIDmode
, ecx
, pc_rtx
));
11018 RTX_FRAME_RELATED_P (insn
) = 1;
11020 pro_epilogue_adjust_stack (stack_pointer_rtx
, stack_pointer_rtx
,
11022 emit_jump_insn (gen_simple_return_indirect_internal (ecx
));
11025 emit_jump_insn (gen_simple_return_pop_internal (popc
));
11028 emit_jump_insn (gen_simple_return_internal ());
11030 /* Restore the state back to the state from the prologue,
11031 so that it's correct for the next epilogue. */
11032 m
->fs
= frame_state_save
;
11035 /* Reset from the function's potential modifications. */
11038 ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED
,
11039 HOST_WIDE_INT size ATTRIBUTE_UNUSED
)
11041 if (pic_offset_table_rtx
)
11042 SET_REGNO (pic_offset_table_rtx
, REAL_PIC_OFFSET_TABLE_REGNUM
);
11044 /* Mach-O doesn't support labels at the end of objects, so if
11045 it looks like we might want one, insert a NOP. */
11047 rtx insn
= get_last_insn ();
11048 rtx deleted_debug_label
= NULL_RTX
;
11051 && NOTE_KIND (insn
) != NOTE_INSN_DELETED_LABEL
)
11053 /* Don't insert a nop for NOTE_INSN_DELETED_DEBUG_LABEL
11054 notes only, instead set their CODE_LABEL_NUMBER to -1,
11055 otherwise there would be code generation differences
11056 in between -g and -g0. */
11057 if (NOTE_P (insn
) && NOTE_KIND (insn
) == NOTE_INSN_DELETED_DEBUG_LABEL
)
11058 deleted_debug_label
= insn
;
11059 insn
= PREV_INSN (insn
);
11064 && NOTE_KIND (insn
) == NOTE_INSN_DELETED_LABEL
)))
11065 fputs ("\tnop\n", file
);
11066 else if (deleted_debug_label
)
11067 for (insn
= deleted_debug_label
; insn
; insn
= NEXT_INSN (insn
))
11068 if (NOTE_KIND (insn
) == NOTE_INSN_DELETED_DEBUG_LABEL
)
11069 CODE_LABEL_NUMBER (insn
) = -1;
11075 /* Return a scratch register to use in the split stack prologue. The
11076 split stack prologue is used for -fsplit-stack. It is the first
11077 instructions in the function, even before the regular prologue.
11078 The scratch register can be any caller-saved register which is not
11079 used for parameters or for the static chain. */
11081 static unsigned int
11082 split_stack_prologue_scratch_regno (void)
11091 is_fastcall
= (lookup_attribute ("fastcall",
11092 TYPE_ATTRIBUTES (TREE_TYPE (cfun
->decl
)))
11094 regparm
= ix86_function_regparm (TREE_TYPE (cfun
->decl
), cfun
->decl
);
11098 if (DECL_STATIC_CHAIN (cfun
->decl
))
11100 sorry ("-fsplit-stack does not support fastcall with "
11101 "nested function");
11102 return INVALID_REGNUM
;
11106 else if (regparm
< 3)
11108 if (!DECL_STATIC_CHAIN (cfun
->decl
))
11114 sorry ("-fsplit-stack does not support 2 register "
11115 " parameters for a nested function");
11116 return INVALID_REGNUM
;
11123 /* FIXME: We could make this work by pushing a register
11124 around the addition and comparison. */
11125 sorry ("-fsplit-stack does not support 3 register parameters");
11126 return INVALID_REGNUM
;
11131 /* A SYMBOL_REF for the function which allocates new stackspace for
11134 static GTY(()) rtx split_stack_fn
;
11136 /* A SYMBOL_REF for the more stack function when using the large
11139 static GTY(()) rtx split_stack_fn_large
;
11141 /* Handle -fsplit-stack. These are the first instructions in the
11142 function, even before the regular prologue. */
11145 ix86_expand_split_stack_prologue (void)
11147 struct ix86_frame frame
;
11148 HOST_WIDE_INT allocate
;
11149 unsigned HOST_WIDE_INT args_size
;
11150 rtx label
, limit
, current
, jump_insn
, allocate_rtx
, call_insn
, call_fusage
;
11151 rtx scratch_reg
= NULL_RTX
;
11152 rtx varargs_label
= NULL_RTX
;
11155 gcc_assert (flag_split_stack
&& reload_completed
);
11157 ix86_finalize_stack_realign_flags ();
11158 ix86_compute_frame_layout (&frame
);
11159 allocate
= frame
.stack_pointer_offset
- INCOMING_FRAME_SP_OFFSET
;
11161 /* This is the label we will branch to if we have enough stack
11162 space. We expect the basic block reordering pass to reverse this
11163 branch if optimizing, so that we branch in the unlikely case. */
11164 label
= gen_label_rtx ();
11166 /* We need to compare the stack pointer minus the frame size with
11167 the stack boundary in the TCB. The stack boundary always gives
11168 us SPLIT_STACK_AVAILABLE bytes, so if we need less than that we
11169 can compare directly. Otherwise we need to do an addition. */
11171 limit
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, const0_rtx
),
11172 UNSPEC_STACK_CHECK
);
11173 limit
= gen_rtx_CONST (Pmode
, limit
);
11174 limit
= gen_rtx_MEM (Pmode
, limit
);
11175 if (allocate
< SPLIT_STACK_AVAILABLE
)
11176 current
= stack_pointer_rtx
;
11179 unsigned int scratch_regno
;
11182 /* We need a scratch register to hold the stack pointer minus
11183 the required frame size. Since this is the very start of the
11184 function, the scratch register can be any caller-saved
11185 register which is not used for parameters. */
11186 offset
= GEN_INT (- allocate
);
11187 scratch_regno
= split_stack_prologue_scratch_regno ();
11188 if (scratch_regno
== INVALID_REGNUM
)
11190 scratch_reg
= gen_rtx_REG (Pmode
, scratch_regno
);
11191 if (!TARGET_64BIT
|| x86_64_immediate_operand (offset
, Pmode
))
11193 /* We don't use ix86_gen_add3 in this case because it will
11194 want to split to lea, but when not optimizing the insn
11195 will not be split after this point. */
11196 emit_insn (gen_rtx_SET (VOIDmode
, scratch_reg
,
11197 gen_rtx_PLUS (Pmode
, stack_pointer_rtx
,
11202 emit_move_insn (scratch_reg
, offset
);
11203 emit_insn (ix86_gen_add3 (scratch_reg
, scratch_reg
,
11204 stack_pointer_rtx
));
11206 current
= scratch_reg
;
11209 ix86_expand_branch (GEU
, current
, limit
, label
);
11210 jump_insn
= get_last_insn ();
11211 JUMP_LABEL (jump_insn
) = label
;
11213 /* Mark the jump as very likely to be taken. */
11214 add_reg_note (jump_insn
, REG_BR_PROB
,
11215 GEN_INT (REG_BR_PROB_BASE
- REG_BR_PROB_BASE
/ 100));
11217 if (split_stack_fn
== NULL_RTX
)
11218 split_stack_fn
= gen_rtx_SYMBOL_REF (Pmode
, "__morestack");
11219 fn
= split_stack_fn
;
11221 /* Get more stack space. We pass in the desired stack space and the
11222 size of the arguments to copy to the new stack. In 32-bit mode
11223 we push the parameters; __morestack will return on a new stack
11224 anyhow. In 64-bit mode we pass the parameters in r10 and
11226 allocate_rtx
= GEN_INT (allocate
);
11227 args_size
= crtl
->args
.size
>= 0 ? crtl
->args
.size
: 0;
11228 call_fusage
= NULL_RTX
;
11233 reg10
= gen_rtx_REG (Pmode
, R10_REG
);
11234 reg11
= gen_rtx_REG (Pmode
, R11_REG
);
11236 /* If this function uses a static chain, it will be in %r10.
11237 Preserve it across the call to __morestack. */
11238 if (DECL_STATIC_CHAIN (cfun
->decl
))
11242 rax
= gen_rtx_REG (word_mode
, AX_REG
);
11243 emit_move_insn (rax
, gen_rtx_REG (word_mode
, R10_REG
));
11244 use_reg (&call_fusage
, rax
);
11247 if (ix86_cmodel
== CM_LARGE
|| ix86_cmodel
== CM_LARGE_PIC
)
11249 HOST_WIDE_INT argval
;
11251 gcc_assert (Pmode
== DImode
);
11252 /* When using the large model we need to load the address
11253 into a register, and we've run out of registers. So we
11254 switch to a different calling convention, and we call a
11255 different function: __morestack_large. We pass the
11256 argument size in the upper 32 bits of r10 and pass the
11257 frame size in the lower 32 bits. */
11258 gcc_assert ((allocate
& (HOST_WIDE_INT
) 0xffffffff) == allocate
);
11259 gcc_assert ((args_size
& 0xffffffff) == args_size
);
11261 if (split_stack_fn_large
== NULL_RTX
)
11262 split_stack_fn_large
=
11263 gen_rtx_SYMBOL_REF (Pmode
, "__morestack_large_model");
11265 if (ix86_cmodel
== CM_LARGE_PIC
)
11269 label
= gen_label_rtx ();
11270 emit_label (label
);
11271 LABEL_PRESERVE_P (label
) = 1;
11272 emit_insn (gen_set_rip_rex64 (reg10
, label
));
11273 emit_insn (gen_set_got_offset_rex64 (reg11
, label
));
11274 emit_insn (ix86_gen_add3 (reg10
, reg10
, reg11
));
11275 x
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, split_stack_fn_large
),
11277 x
= gen_rtx_CONST (Pmode
, x
);
11278 emit_move_insn (reg11
, x
);
11279 x
= gen_rtx_PLUS (Pmode
, reg10
, reg11
);
11280 x
= gen_const_mem (Pmode
, x
);
11281 emit_move_insn (reg11
, x
);
11284 emit_move_insn (reg11
, split_stack_fn_large
);
11288 argval
= ((args_size
<< 16) << 16) + allocate
;
11289 emit_move_insn (reg10
, GEN_INT (argval
));
11293 emit_move_insn (reg10
, allocate_rtx
);
11294 emit_move_insn (reg11
, GEN_INT (args_size
));
11295 use_reg (&call_fusage
, reg11
);
11298 use_reg (&call_fusage
, reg10
);
11302 emit_insn (gen_push (GEN_INT (args_size
)));
11303 emit_insn (gen_push (allocate_rtx
));
11305 call_insn
= ix86_expand_call (NULL_RTX
, gen_rtx_MEM (QImode
, fn
),
11306 GEN_INT (UNITS_PER_WORD
), constm1_rtx
,
11308 add_function_usage_to (call_insn
, call_fusage
);
11310 /* In order to make call/return prediction work right, we now need
11311 to execute a return instruction. See
11312 libgcc/config/i386/morestack.S for the details on how this works.
11314 For flow purposes gcc must not see this as a return
11315 instruction--we need control flow to continue at the subsequent
11316 label. Therefore, we use an unspec. */
11317 gcc_assert (crtl
->args
.pops_args
< 65536);
11318 emit_insn (gen_split_stack_return (GEN_INT (crtl
->args
.pops_args
)));
11320 /* If we are in 64-bit mode and this function uses a static chain,
11321 we saved %r10 in %rax before calling _morestack. */
11322 if (TARGET_64BIT
&& DECL_STATIC_CHAIN (cfun
->decl
))
11323 emit_move_insn (gen_rtx_REG (word_mode
, R10_REG
),
11324 gen_rtx_REG (word_mode
, AX_REG
));
11326 /* If this function calls va_start, we need to store a pointer to
11327 the arguments on the old stack, because they may not have been
11328 all copied to the new stack. At this point the old stack can be
11329 found at the frame pointer value used by __morestack, because
11330 __morestack has set that up before calling back to us. Here we
11331 store that pointer in a scratch register, and in
11332 ix86_expand_prologue we store the scratch register in a stack
11334 if (cfun
->machine
->split_stack_varargs_pointer
!= NULL_RTX
)
11336 unsigned int scratch_regno
;
11340 scratch_regno
= split_stack_prologue_scratch_regno ();
11341 scratch_reg
= gen_rtx_REG (Pmode
, scratch_regno
);
11342 frame_reg
= gen_rtx_REG (Pmode
, BP_REG
);
11346 return address within this function
11347 return address of caller of this function
11349 So we add three words to get to the stack arguments.
11353 return address within this function
11354 first argument to __morestack
11355 second argument to __morestack
11356 return address of caller of this function
11358 So we add five words to get to the stack arguments.
11360 words
= TARGET_64BIT
? 3 : 5;
11361 emit_insn (gen_rtx_SET (VOIDmode
, scratch_reg
,
11362 gen_rtx_PLUS (Pmode
, frame_reg
,
11363 GEN_INT (words
* UNITS_PER_WORD
))));
11365 varargs_label
= gen_label_rtx ();
11366 emit_jump_insn (gen_jump (varargs_label
));
11367 JUMP_LABEL (get_last_insn ()) = varargs_label
;
11372 emit_label (label
);
11373 LABEL_NUSES (label
) = 1;
11375 /* If this function calls va_start, we now have to set the scratch
11376 register for the case where we do not call __morestack. In this
11377 case we need to set it based on the stack pointer. */
11378 if (cfun
->machine
->split_stack_varargs_pointer
!= NULL_RTX
)
11380 emit_insn (gen_rtx_SET (VOIDmode
, scratch_reg
,
11381 gen_rtx_PLUS (Pmode
, stack_pointer_rtx
,
11382 GEN_INT (UNITS_PER_WORD
))));
11384 emit_label (varargs_label
);
11385 LABEL_NUSES (varargs_label
) = 1;
11389 /* We may have to tell the dataflow pass that the split stack prologue
11390 is initializing a scratch register. */
11393 ix86_live_on_entry (bitmap regs
)
11395 if (cfun
->machine
->split_stack_varargs_pointer
!= NULL_RTX
)
11397 gcc_assert (flag_split_stack
);
11398 bitmap_set_bit (regs
, split_stack_prologue_scratch_regno ());
11402 /* Determine if op is suitable SUBREG RTX for address. */
11405 ix86_address_subreg_operand (rtx op
)
11407 enum machine_mode mode
;
11412 mode
= GET_MODE (op
);
11414 if (GET_MODE_CLASS (mode
) != MODE_INT
)
11417 /* Don't allow SUBREGs that span more than a word. It can lead to spill
11418 failures when the register is one word out of a two word structure. */
11419 if (GET_MODE_SIZE (mode
) > UNITS_PER_WORD
)
11422 /* Allow only SUBREGs of non-eliminable hard registers. */
11423 return register_no_elim_operand (op
, mode
);
11426 /* Extract the parts of an RTL expression that is a valid memory address
11427 for an instruction. Return 0 if the structure of the address is
11428 grossly off. Return -1 if the address contains ASHIFT, so it is not
11429 strictly valid, but still used for computing length of lea instruction. */
11432 ix86_decompose_address (rtx addr
, struct ix86_address
*out
)
11434 rtx base
= NULL_RTX
, index
= NULL_RTX
, disp
= NULL_RTX
;
11435 rtx base_reg
, index_reg
;
11436 HOST_WIDE_INT scale
= 1;
11437 rtx scale_rtx
= NULL_RTX
;
11440 enum ix86_address_seg seg
= SEG_DEFAULT
;
11442 /* Allow zero-extended SImode addresses,
11443 they will be emitted with addr32 prefix. */
11444 if (TARGET_64BIT
&& GET_MODE (addr
) == DImode
)
11446 if (GET_CODE (addr
) == ZERO_EXTEND
11447 && GET_MODE (XEXP (addr
, 0)) == SImode
)
11448 addr
= XEXP (addr
, 0);
11449 else if (GET_CODE (addr
) == AND
11450 && const_32bit_mask (XEXP (addr
, 1), DImode
))
11452 addr
= XEXP (addr
, 0);
11454 /* Adjust SUBREGs. */
11455 if (GET_CODE (addr
) == SUBREG
11456 && GET_MODE (SUBREG_REG (addr
)) == SImode
)
11457 addr
= SUBREG_REG (addr
);
11458 else if (GET_MODE (addr
) == DImode
)
11459 addr
= gen_rtx_SUBREG (SImode
, addr
, 0);
11467 else if (GET_CODE (addr
) == SUBREG
)
11469 if (ix86_address_subreg_operand (SUBREG_REG (addr
)))
11474 else if (GET_CODE (addr
) == PLUS
)
11476 rtx addends
[4], op
;
11484 addends
[n
++] = XEXP (op
, 1);
11487 while (GET_CODE (op
) == PLUS
);
11492 for (i
= n
; i
>= 0; --i
)
11495 switch (GET_CODE (op
))
11500 index
= XEXP (op
, 0);
11501 scale_rtx
= XEXP (op
, 1);
11507 index
= XEXP (op
, 0);
11508 tmp
= XEXP (op
, 1);
11509 if (!CONST_INT_P (tmp
))
11511 scale
= INTVAL (tmp
);
11512 if ((unsigned HOST_WIDE_INT
) scale
> 3)
11514 scale
= 1 << scale
;
11519 if (GET_CODE (op
) != UNSPEC
)
11524 if (XINT (op
, 1) == UNSPEC_TP
11525 && TARGET_TLS_DIRECT_SEG_REFS
11526 && seg
== SEG_DEFAULT
)
11527 seg
= TARGET_64BIT
? SEG_FS
: SEG_GS
;
11533 if (!ix86_address_subreg_operand (SUBREG_REG (op
)))
11560 else if (GET_CODE (addr
) == MULT
)
11562 index
= XEXP (addr
, 0); /* index*scale */
11563 scale_rtx
= XEXP (addr
, 1);
11565 else if (GET_CODE (addr
) == ASHIFT
)
11567 /* We're called for lea too, which implements ashift on occasion. */
11568 index
= XEXP (addr
, 0);
11569 tmp
= XEXP (addr
, 1);
11570 if (!CONST_INT_P (tmp
))
11572 scale
= INTVAL (tmp
);
11573 if ((unsigned HOST_WIDE_INT
) scale
> 3)
11575 scale
= 1 << scale
;
11579 disp
= addr
; /* displacement */
11585 else if (GET_CODE (index
) == SUBREG
11586 && ix86_address_subreg_operand (SUBREG_REG (index
)))
11592 /* Address override works only on the (%reg) part of %fs:(%reg). */
11593 if (seg
!= SEG_DEFAULT
11594 && ((base
&& GET_MODE (base
) != word_mode
)
11595 || (index
&& GET_MODE (index
) != word_mode
)))
11598 /* Extract the integral value of scale. */
11601 if (!CONST_INT_P (scale_rtx
))
11603 scale
= INTVAL (scale_rtx
);
11606 base_reg
= base
&& GET_CODE (base
) == SUBREG
? SUBREG_REG (base
) : base
;
11607 index_reg
= index
&& GET_CODE (index
) == SUBREG
? SUBREG_REG (index
) : index
;
11609 /* Avoid useless 0 displacement. */
11610 if (disp
== const0_rtx
&& (base
|| index
))
11613 /* Allow arg pointer and stack pointer as index if there is not scaling. */
11614 if (base_reg
&& index_reg
&& scale
== 1
11615 && (index_reg
== arg_pointer_rtx
11616 || index_reg
== frame_pointer_rtx
11617 || (REG_P (index_reg
) && REGNO (index_reg
) == STACK_POINTER_REGNUM
)))
11620 tmp
= base
, base
= index
, index
= tmp
;
11621 tmp
= base_reg
, base_reg
= index_reg
, index_reg
= tmp
;
11624 /* Special case: %ebp cannot be encoded as a base without a displacement.
11628 && (base_reg
== hard_frame_pointer_rtx
11629 || base_reg
== frame_pointer_rtx
11630 || base_reg
== arg_pointer_rtx
11631 || (REG_P (base_reg
)
11632 && (REGNO (base_reg
) == HARD_FRAME_POINTER_REGNUM
11633 || REGNO (base_reg
) == R13_REG
))))
11636 /* Special case: on K6, [%esi] makes the instruction vector decoded.
11637 Avoid this by transforming to [%esi+0].
11638 Reload calls address legitimization without cfun defined, so we need
11639 to test cfun for being non-NULL. */
11640 if (TARGET_K6
&& cfun
&& optimize_function_for_speed_p (cfun
)
11641 && base_reg
&& !index_reg
&& !disp
11642 && REG_P (base_reg
) && REGNO (base_reg
) == SI_REG
)
11645 /* Special case: encode reg+reg instead of reg*2. */
11646 if (!base
&& index
&& scale
== 2)
11647 base
= index
, base_reg
= index_reg
, scale
= 1;
11649 /* Special case: scaling cannot be encoded without base or displacement. */
11650 if (!base
&& !disp
&& index
&& scale
!= 1)
11654 out
->index
= index
;
11656 out
->scale
= scale
;
11662 /* Return cost of the memory address x.
11663 For i386, it is better to use a complex address than let gcc copy
11664 the address into a reg and make a new pseudo. But not if the address
11665 requires to two regs - that would mean more pseudos with longer
11668 ix86_address_cost (rtx x
, bool speed ATTRIBUTE_UNUSED
)
11670 struct ix86_address parts
;
11672 int ok
= ix86_decompose_address (x
, &parts
);
11676 if (parts
.base
&& GET_CODE (parts
.base
) == SUBREG
)
11677 parts
.base
= SUBREG_REG (parts
.base
);
11678 if (parts
.index
&& GET_CODE (parts
.index
) == SUBREG
)
11679 parts
.index
= SUBREG_REG (parts
.index
);
11681 /* Attempt to minimize number of registers in the address. */
11683 && (!REG_P (parts
.base
) || REGNO (parts
.base
) >= FIRST_PSEUDO_REGISTER
))
11685 && (!REG_P (parts
.index
)
11686 || REGNO (parts
.index
) >= FIRST_PSEUDO_REGISTER
)))
11690 && (!REG_P (parts
.base
) || REGNO (parts
.base
) >= FIRST_PSEUDO_REGISTER
)
11692 && (!REG_P (parts
.index
) || REGNO (parts
.index
) >= FIRST_PSEUDO_REGISTER
)
11693 && parts
.base
!= parts
.index
)
11696 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
11697 since it's predecode logic can't detect the length of instructions
11698 and it degenerates to vector decoded. Increase cost of such
11699 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
11700 to split such addresses or even refuse such addresses at all.
11702 Following addressing modes are affected:
11707 The first and last case may be avoidable by explicitly coding the zero in
11708 memory address, but I don't have AMD-K6 machine handy to check this
11712 && ((!parts
.disp
&& parts
.base
&& parts
.index
&& parts
.scale
!= 1)
11713 || (parts
.disp
&& !parts
.base
&& parts
.index
&& parts
.scale
!= 1)
11714 || (!parts
.disp
&& parts
.base
&& parts
.index
&& parts
.scale
== 1)))
11720 /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O as
11721 this is used for to form addresses to local data when -fPIC is in
11725 darwin_local_data_pic (rtx disp
)
11727 return (GET_CODE (disp
) == UNSPEC
11728 && XINT (disp
, 1) == UNSPEC_MACHOPIC_OFFSET
);
11731 /* Determine if a given RTX is a valid constant. We already know this
11732 satisfies CONSTANT_P. */
11735 ix86_legitimate_constant_p (enum machine_mode mode ATTRIBUTE_UNUSED
, rtx x
)
11737 switch (GET_CODE (x
))
11742 if (GET_CODE (x
) == PLUS
)
11744 if (!CONST_INT_P (XEXP (x
, 1)))
11749 if (TARGET_MACHO
&& darwin_local_data_pic (x
))
11752 /* Only some unspecs are valid as "constants". */
11753 if (GET_CODE (x
) == UNSPEC
)
11754 switch (XINT (x
, 1))
11757 case UNSPEC_GOTOFF
:
11758 case UNSPEC_PLTOFF
:
11759 return TARGET_64BIT
;
11761 case UNSPEC_NTPOFF
:
11762 x
= XVECEXP (x
, 0, 0);
11763 return (GET_CODE (x
) == SYMBOL_REF
11764 && SYMBOL_REF_TLS_MODEL (x
) == TLS_MODEL_LOCAL_EXEC
);
11765 case UNSPEC_DTPOFF
:
11766 x
= XVECEXP (x
, 0, 0);
11767 return (GET_CODE (x
) == SYMBOL_REF
11768 && SYMBOL_REF_TLS_MODEL (x
) == TLS_MODEL_LOCAL_DYNAMIC
);
11773 /* We must have drilled down to a symbol. */
11774 if (GET_CODE (x
) == LABEL_REF
)
11776 if (GET_CODE (x
) != SYMBOL_REF
)
11781 /* TLS symbols are never valid. */
11782 if (SYMBOL_REF_TLS_MODEL (x
))
11785 /* DLLIMPORT symbols are never valid. */
11786 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
11787 && SYMBOL_REF_DLLIMPORT_P (x
))
11791 /* mdynamic-no-pic */
11792 if (MACHO_DYNAMIC_NO_PIC_P
)
11793 return machopic_symbol_defined_p (x
);
11798 if (GET_MODE (x
) == TImode
11799 && x
!= CONST0_RTX (TImode
)
11805 if (!standard_sse_constant_p (x
))
11812 /* Otherwise we handle everything else in the move patterns. */
11816 /* Determine if it's legal to put X into the constant pool. This
11817 is not possible for the address of thread-local symbols, which
11818 is checked above. */
11821 ix86_cannot_force_const_mem (enum machine_mode mode
, rtx x
)
11823 /* We can always put integral constants and vectors in memory. */
11824 switch (GET_CODE (x
))
11834 return !ix86_legitimate_constant_p (mode
, x
);
11838 /* Nonzero if the constant value X is a legitimate general operand
11839 when generating PIC code. It is given that flag_pic is on and
11840 that X satisfies CONSTANT_P or is a CONST_DOUBLE. */
11843 legitimate_pic_operand_p (rtx x
)
11847 switch (GET_CODE (x
))
11850 inner
= XEXP (x
, 0);
11851 if (GET_CODE (inner
) == PLUS
11852 && CONST_INT_P (XEXP (inner
, 1)))
11853 inner
= XEXP (inner
, 0);
11855 /* Only some unspecs are valid as "constants". */
11856 if (GET_CODE (inner
) == UNSPEC
)
11857 switch (XINT (inner
, 1))
11860 case UNSPEC_GOTOFF
:
11861 case UNSPEC_PLTOFF
:
11862 return TARGET_64BIT
;
11864 x
= XVECEXP (inner
, 0, 0);
11865 return (GET_CODE (x
) == SYMBOL_REF
11866 && SYMBOL_REF_TLS_MODEL (x
) == TLS_MODEL_LOCAL_EXEC
);
11867 case UNSPEC_MACHOPIC_OFFSET
:
11868 return legitimate_pic_address_disp_p (x
);
11876 return legitimate_pic_address_disp_p (x
);
11883 /* Determine if a given CONST RTX is a valid memory displacement
11887 legitimate_pic_address_disp_p (rtx disp
)
11891 /* In 64bit mode we can allow direct addresses of symbols and labels
11892 when they are not dynamic symbols. */
11895 rtx op0
= disp
, op1
;
11897 switch (GET_CODE (disp
))
11903 if (GET_CODE (XEXP (disp
, 0)) != PLUS
)
11905 op0
= XEXP (XEXP (disp
, 0), 0);
11906 op1
= XEXP (XEXP (disp
, 0), 1);
11907 if (!CONST_INT_P (op1
)
11908 || INTVAL (op1
) >= 16*1024*1024
11909 || INTVAL (op1
) < -16*1024*1024)
11911 if (GET_CODE (op0
) == LABEL_REF
)
11913 if (GET_CODE (op0
) == CONST
11914 && GET_CODE (XEXP (op0
, 0)) == UNSPEC
11915 && XINT (XEXP (op0
, 0), 1) == UNSPEC_PCREL
)
11917 if (GET_CODE (op0
) == UNSPEC
11918 && XINT (op0
, 1) == UNSPEC_PCREL
)
11920 if (GET_CODE (op0
) != SYMBOL_REF
)
11925 /* TLS references should always be enclosed in UNSPEC. */
11926 if (SYMBOL_REF_TLS_MODEL (op0
))
11928 if (!SYMBOL_REF_FAR_ADDR_P (op0
) && SYMBOL_REF_LOCAL_P (op0
)
11929 && ix86_cmodel
!= CM_LARGE_PIC
)
11937 if (GET_CODE (disp
) != CONST
)
11939 disp
= XEXP (disp
, 0);
11943 /* We are unsafe to allow PLUS expressions. This limit allowed distance
11944 of GOT tables. We should not need these anyway. */
11945 if (GET_CODE (disp
) != UNSPEC
11946 || (XINT (disp
, 1) != UNSPEC_GOTPCREL
11947 && XINT (disp
, 1) != UNSPEC_GOTOFF
11948 && XINT (disp
, 1) != UNSPEC_PCREL
11949 && XINT (disp
, 1) != UNSPEC_PLTOFF
))
11952 if (GET_CODE (XVECEXP (disp
, 0, 0)) != SYMBOL_REF
11953 && GET_CODE (XVECEXP (disp
, 0, 0)) != LABEL_REF
)
11959 if (GET_CODE (disp
) == PLUS
)
11961 if (!CONST_INT_P (XEXP (disp
, 1)))
11963 disp
= XEXP (disp
, 0);
11967 if (TARGET_MACHO
&& darwin_local_data_pic (disp
))
11970 if (GET_CODE (disp
) != UNSPEC
)
11973 switch (XINT (disp
, 1))
11978 /* We need to check for both symbols and labels because VxWorks loads
11979 text labels with @GOT rather than @GOTOFF. See gotoff_operand for
11981 return (GET_CODE (XVECEXP (disp
, 0, 0)) == SYMBOL_REF
11982 || GET_CODE (XVECEXP (disp
, 0, 0)) == LABEL_REF
);
11983 case UNSPEC_GOTOFF
:
11984 /* Refuse GOTOFF in 64bit mode since it is always 64bit when used.
11985 While ABI specify also 32bit relocation but we don't produce it in
11986 small PIC model at all. */
11987 if ((GET_CODE (XVECEXP (disp
, 0, 0)) == SYMBOL_REF
11988 || GET_CODE (XVECEXP (disp
, 0, 0)) == LABEL_REF
)
11990 return gotoff_operand (XVECEXP (disp
, 0, 0), Pmode
);
11992 case UNSPEC_GOTTPOFF
:
11993 case UNSPEC_GOTNTPOFF
:
11994 case UNSPEC_INDNTPOFF
:
11997 disp
= XVECEXP (disp
, 0, 0);
11998 return (GET_CODE (disp
) == SYMBOL_REF
11999 && SYMBOL_REF_TLS_MODEL (disp
) == TLS_MODEL_INITIAL_EXEC
);
12000 case UNSPEC_NTPOFF
:
12001 disp
= XVECEXP (disp
, 0, 0);
12002 return (GET_CODE (disp
) == SYMBOL_REF
12003 && SYMBOL_REF_TLS_MODEL (disp
) == TLS_MODEL_LOCAL_EXEC
);
12004 case UNSPEC_DTPOFF
:
12005 disp
= XVECEXP (disp
, 0, 0);
12006 return (GET_CODE (disp
) == SYMBOL_REF
12007 && SYMBOL_REF_TLS_MODEL (disp
) == TLS_MODEL_LOCAL_DYNAMIC
);
12013 /* Recognizes RTL expressions that are valid memory addresses for an
12014 instruction. The MODE argument is the machine mode for the MEM
12015 expression that wants to use this address.
12017 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
12018 convert common non-canonical forms to canonical form so that they will
12022 ix86_legitimate_address_p (enum machine_mode mode ATTRIBUTE_UNUSED
,
12023 rtx addr
, bool strict
)
12025 struct ix86_address parts
;
12026 rtx base
, index
, disp
;
12027 HOST_WIDE_INT scale
;
12029 /* Since constant address in x32 is signed extended to 64bit,
12030 we have to prevent addresses from 0x80000000 to 0xffffffff. */
12032 && CONST_INT_P (addr
)
12033 && INTVAL (addr
) < 0)
12036 if (ix86_decompose_address (addr
, &parts
) <= 0)
12037 /* Decomposition failed. */
12041 index
= parts
.index
;
12043 scale
= parts
.scale
;
12045 /* Validate base register. */
12052 else if (GET_CODE (base
) == SUBREG
&& REG_P (SUBREG_REG (base
)))
12053 reg
= SUBREG_REG (base
);
12055 /* Base is not a register. */
12058 if (GET_MODE (base
) != SImode
&& GET_MODE (base
) != DImode
)
12061 if ((strict
&& ! REG_OK_FOR_BASE_STRICT_P (reg
))
12062 || (! strict
&& ! REG_OK_FOR_BASE_NONSTRICT_P (reg
)))
12063 /* Base is not valid. */
12067 /* Validate index register. */
12074 else if (GET_CODE (index
) == SUBREG
&& REG_P (SUBREG_REG (index
)))
12075 reg
= SUBREG_REG (index
);
12077 /* Index is not a register. */
12080 if (GET_MODE (index
) != SImode
&& GET_MODE (index
) != DImode
)
12083 if ((strict
&& ! REG_OK_FOR_INDEX_STRICT_P (reg
))
12084 || (! strict
&& ! REG_OK_FOR_INDEX_NONSTRICT_P (reg
)))
12085 /* Index is not valid. */
12089 /* Index and base should have the same mode. */
12091 && GET_MODE (base
) != GET_MODE (index
))
12094 /* Validate scale factor. */
12098 /* Scale without index. */
12101 if (scale
!= 2 && scale
!= 4 && scale
!= 8)
12102 /* Scale is not a valid multiplier. */
12106 /* Validate displacement. */
12109 if (GET_CODE (disp
) == CONST
12110 && GET_CODE (XEXP (disp
, 0)) == UNSPEC
12111 && XINT (XEXP (disp
, 0), 1) != UNSPEC_MACHOPIC_OFFSET
)
12112 switch (XINT (XEXP (disp
, 0), 1))
12114 /* Refuse GOTOFF and GOT in 64bit mode since it is always 64bit when
12115 used. While ABI specify also 32bit relocations, we don't produce
12116 them at all and use IP relative instead. */
12118 case UNSPEC_GOTOFF
:
12119 gcc_assert (flag_pic
);
12121 goto is_legitimate_pic
;
12123 /* 64bit address unspec. */
12126 case UNSPEC_GOTPCREL
:
12128 gcc_assert (flag_pic
);
12129 goto is_legitimate_pic
;
12131 case UNSPEC_GOTTPOFF
:
12132 case UNSPEC_GOTNTPOFF
:
12133 case UNSPEC_INDNTPOFF
:
12134 case UNSPEC_NTPOFF
:
12135 case UNSPEC_DTPOFF
:
12138 case UNSPEC_STACK_CHECK
:
12139 gcc_assert (flag_split_stack
);
12143 /* Invalid address unspec. */
12147 else if (SYMBOLIC_CONST (disp
)
12151 && MACHOPIC_INDIRECT
12152 && !machopic_operand_p (disp
)
12158 if (TARGET_64BIT
&& (index
|| base
))
12160 /* foo@dtpoff(%rX) is ok. */
12161 if (GET_CODE (disp
) != CONST
12162 || GET_CODE (XEXP (disp
, 0)) != PLUS
12163 || GET_CODE (XEXP (XEXP (disp
, 0), 0)) != UNSPEC
12164 || !CONST_INT_P (XEXP (XEXP (disp
, 0), 1))
12165 || (XINT (XEXP (XEXP (disp
, 0), 0), 1) != UNSPEC_DTPOFF
12166 && XINT (XEXP (XEXP (disp
, 0), 0), 1) != UNSPEC_NTPOFF
))
12167 /* Non-constant pic memory reference. */
12170 else if ((!TARGET_MACHO
|| flag_pic
)
12171 && ! legitimate_pic_address_disp_p (disp
))
12172 /* Displacement is an invalid pic construct. */
12175 else if (MACHO_DYNAMIC_NO_PIC_P
12176 && !ix86_legitimate_constant_p (Pmode
, disp
))
12177 /* displacment must be referenced via non_lazy_pointer */
12181 /* This code used to verify that a symbolic pic displacement
12182 includes the pic_offset_table_rtx register.
12184 While this is good idea, unfortunately these constructs may
12185 be created by "adds using lea" optimization for incorrect
12194 This code is nonsensical, but results in addressing
12195 GOT table with pic_offset_table_rtx base. We can't
12196 just refuse it easily, since it gets matched by
12197 "addsi3" pattern, that later gets split to lea in the
12198 case output register differs from input. While this
12199 can be handled by separate addsi pattern for this case
12200 that never results in lea, this seems to be easier and
12201 correct fix for crash to disable this test. */
12203 else if (GET_CODE (disp
) != LABEL_REF
12204 && !CONST_INT_P (disp
)
12205 && (GET_CODE (disp
) != CONST
12206 || !ix86_legitimate_constant_p (Pmode
, disp
))
12207 && (GET_CODE (disp
) != SYMBOL_REF
12208 || !ix86_legitimate_constant_p (Pmode
, disp
)))
12209 /* Displacement is not constant. */
12211 else if (TARGET_64BIT
12212 && !x86_64_immediate_operand (disp
, VOIDmode
))
12213 /* Displacement is out of range. */
12217 /* Everything looks valid. */
12221 /* Determine if a given RTX is a valid constant address. */
12224 constant_address_p (rtx x
)
12226 return CONSTANT_P (x
) && ix86_legitimate_address_p (Pmode
, x
, 1);
12229 /* Return a unique alias set for the GOT. */
12231 static alias_set_type
12232 ix86_GOT_alias_set (void)
12234 static alias_set_type set
= -1;
12236 set
= new_alias_set ();
12240 /* Return a legitimate reference for ORIG (an address) using the
12241 register REG. If REG is 0, a new pseudo is generated.
12243 There are two types of references that must be handled:
12245 1. Global data references must load the address from the GOT, via
12246 the PIC reg. An insn is emitted to do this load, and the reg is
12249 2. Static data references, constant pool addresses, and code labels
12250 compute the address as an offset from the GOT, whose base is in
12251 the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to
12252 differentiate them from global data objects. The returned
12253 address is the PIC reg + an unspec constant.
12255 TARGET_LEGITIMATE_ADDRESS_P rejects symbolic references unless the PIC
12256 reg also appears in the address. */
12259 legitimize_pic_address (rtx orig
, rtx reg
)
12262 rtx new_rtx
= orig
;
12266 if (TARGET_MACHO
&& !TARGET_64BIT
)
12269 reg
= gen_reg_rtx (Pmode
);
12270 /* Use the generic Mach-O PIC machinery. */
12271 return machopic_legitimize_pic_address (orig
, GET_MODE (orig
), reg
);
12275 if (TARGET_64BIT
&& legitimate_pic_address_disp_p (addr
))
12277 else if (TARGET_64BIT
12278 && ix86_cmodel
!= CM_SMALL_PIC
12279 && gotoff_operand (addr
, Pmode
))
12282 /* This symbol may be referenced via a displacement from the PIC
12283 base address (@GOTOFF). */
12285 if (reload_in_progress
)
12286 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM
, true);
12287 if (GET_CODE (addr
) == CONST
)
12288 addr
= XEXP (addr
, 0);
12289 if (GET_CODE (addr
) == PLUS
)
12291 new_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, XEXP (addr
, 0)),
12293 new_rtx
= gen_rtx_PLUS (Pmode
, new_rtx
, XEXP (addr
, 1));
12296 new_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, addr
), UNSPEC_GOTOFF
);
12297 new_rtx
= gen_rtx_CONST (Pmode
, new_rtx
);
12299 tmpreg
= gen_reg_rtx (Pmode
);
12302 emit_move_insn (tmpreg
, new_rtx
);
12306 new_rtx
= expand_simple_binop (Pmode
, PLUS
, reg
, pic_offset_table_rtx
,
12307 tmpreg
, 1, OPTAB_DIRECT
);
12310 else new_rtx
= gen_rtx_PLUS (Pmode
, pic_offset_table_rtx
, tmpreg
);
12312 else if (!TARGET_64BIT
&& gotoff_operand (addr
, Pmode
))
12314 /* This symbol may be referenced via a displacement from the PIC
12315 base address (@GOTOFF). */
12317 if (reload_in_progress
)
12318 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM
, true);
12319 if (GET_CODE (addr
) == CONST
)
12320 addr
= XEXP (addr
, 0);
12321 if (GET_CODE (addr
) == PLUS
)
12323 new_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, XEXP (addr
, 0)),
12325 new_rtx
= gen_rtx_PLUS (Pmode
, new_rtx
, XEXP (addr
, 1));
12328 new_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, addr
), UNSPEC_GOTOFF
);
12329 new_rtx
= gen_rtx_CONST (Pmode
, new_rtx
);
12330 new_rtx
= gen_rtx_PLUS (Pmode
, pic_offset_table_rtx
, new_rtx
);
12334 emit_move_insn (reg
, new_rtx
);
12338 else if ((GET_CODE (addr
) == SYMBOL_REF
&& SYMBOL_REF_TLS_MODEL (addr
) == 0)
12339 /* We can't use @GOTOFF for text labels on VxWorks;
12340 see gotoff_operand. */
12341 || (TARGET_VXWORKS_RTP
&& GET_CODE (addr
) == LABEL_REF
))
12343 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
)
12345 if (GET_CODE (addr
) == SYMBOL_REF
&& SYMBOL_REF_DLLIMPORT_P (addr
))
12346 return legitimize_dllimport_symbol (addr
, true);
12347 if (GET_CODE (addr
) == CONST
&& GET_CODE (XEXP (addr
, 0)) == PLUS
12348 && GET_CODE (XEXP (XEXP (addr
, 0), 0)) == SYMBOL_REF
12349 && SYMBOL_REF_DLLIMPORT_P (XEXP (XEXP (addr
, 0), 0)))
12351 rtx t
= legitimize_dllimport_symbol (XEXP (XEXP (addr
, 0), 0), true);
12352 return gen_rtx_PLUS (Pmode
, t
, XEXP (XEXP (addr
, 0), 1));
12356 /* For x64 PE-COFF there is no GOT table. So we use address
12358 if (TARGET_64BIT
&& DEFAULT_ABI
== MS_ABI
)
12360 new_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, addr
), UNSPEC_PCREL
);
12361 new_rtx
= gen_rtx_CONST (Pmode
, new_rtx
);
12364 reg
= gen_reg_rtx (Pmode
);
12365 emit_move_insn (reg
, new_rtx
);
12368 else if (TARGET_64BIT
&& ix86_cmodel
!= CM_LARGE_PIC
)
12370 new_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, addr
), UNSPEC_GOTPCREL
);
12371 new_rtx
= gen_rtx_CONST (Pmode
, new_rtx
);
12372 new_rtx
= gen_const_mem (Pmode
, new_rtx
);
12373 set_mem_alias_set (new_rtx
, ix86_GOT_alias_set ());
12376 reg
= gen_reg_rtx (Pmode
);
12377 /* Use directly gen_movsi, otherwise the address is loaded
12378 into register for CSE. We don't want to CSE this addresses,
12379 instead we CSE addresses from the GOT table, so skip this. */
12380 emit_insn (gen_movsi (reg
, new_rtx
));
12385 /* This symbol must be referenced via a load from the
12386 Global Offset Table (@GOT). */
12388 if (reload_in_progress
)
12389 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM
, true);
12390 new_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, addr
), UNSPEC_GOT
);
12391 new_rtx
= gen_rtx_CONST (Pmode
, new_rtx
);
12393 new_rtx
= force_reg (Pmode
, new_rtx
);
12394 new_rtx
= gen_rtx_PLUS (Pmode
, pic_offset_table_rtx
, new_rtx
);
12395 new_rtx
= gen_const_mem (Pmode
, new_rtx
);
12396 set_mem_alias_set (new_rtx
, ix86_GOT_alias_set ());
12399 reg
= gen_reg_rtx (Pmode
);
12400 emit_move_insn (reg
, new_rtx
);
12406 if (CONST_INT_P (addr
)
12407 && !x86_64_immediate_operand (addr
, VOIDmode
))
12411 emit_move_insn (reg
, addr
);
12415 new_rtx
= force_reg (Pmode
, addr
);
12417 else if (GET_CODE (addr
) == CONST
)
12419 addr
= XEXP (addr
, 0);
12421 /* We must match stuff we generate before. Assume the only
12422 unspecs that can get here are ours. Not that we could do
12423 anything with them anyway.... */
12424 if (GET_CODE (addr
) == UNSPEC
12425 || (GET_CODE (addr
) == PLUS
12426 && GET_CODE (XEXP (addr
, 0)) == UNSPEC
))
12428 gcc_assert (GET_CODE (addr
) == PLUS
);
12430 if (GET_CODE (addr
) == PLUS
)
12432 rtx op0
= XEXP (addr
, 0), op1
= XEXP (addr
, 1);
12434 /* Check first to see if this is a constant offset from a @GOTOFF
12435 symbol reference. */
12436 if (gotoff_operand (op0
, Pmode
)
12437 && CONST_INT_P (op1
))
12441 if (reload_in_progress
)
12442 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM
, true);
12443 new_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, op0
),
12445 new_rtx
= gen_rtx_PLUS (Pmode
, new_rtx
, op1
);
12446 new_rtx
= gen_rtx_CONST (Pmode
, new_rtx
);
12447 new_rtx
= gen_rtx_PLUS (Pmode
, pic_offset_table_rtx
, new_rtx
);
12451 emit_move_insn (reg
, new_rtx
);
12457 if (INTVAL (op1
) < -16*1024*1024
12458 || INTVAL (op1
) >= 16*1024*1024)
12460 if (!x86_64_immediate_operand (op1
, Pmode
))
12461 op1
= force_reg (Pmode
, op1
);
12462 new_rtx
= gen_rtx_PLUS (Pmode
, force_reg (Pmode
, op0
), op1
);
12468 base
= legitimize_pic_address (XEXP (addr
, 0), reg
);
12469 new_rtx
= legitimize_pic_address (XEXP (addr
, 1),
12470 base
== reg
? NULL_RTX
: reg
);
12472 if (CONST_INT_P (new_rtx
))
12473 new_rtx
= plus_constant (base
, INTVAL (new_rtx
));
12476 if (GET_CODE (new_rtx
) == PLUS
&& CONSTANT_P (XEXP (new_rtx
, 1)))
12478 base
= gen_rtx_PLUS (Pmode
, base
, XEXP (new_rtx
, 0));
12479 new_rtx
= XEXP (new_rtx
, 1);
12481 new_rtx
= gen_rtx_PLUS (Pmode
, base
, new_rtx
);
12489 /* Load the thread pointer. If TO_REG is true, force it into a register. */
12492 get_thread_pointer (enum machine_mode tp_mode
, bool to_reg
)
12494 rtx tp
= gen_rtx_UNSPEC (ptr_mode
, gen_rtvec (1, const0_rtx
), UNSPEC_TP
);
12496 if (GET_MODE (tp
) != tp_mode
)
12498 gcc_assert (GET_MODE (tp
) == SImode
);
12499 gcc_assert (tp_mode
== DImode
);
12501 tp
= gen_rtx_ZERO_EXTEND (tp_mode
, tp
);
12505 tp
= copy_to_mode_reg (tp_mode
, tp
);
12510 /* Construct the SYMBOL_REF for the tls_get_addr function. */
12512 static GTY(()) rtx ix86_tls_symbol
;
12515 ix86_tls_get_addr (void)
12517 if (!ix86_tls_symbol
)
12520 = ((TARGET_ANY_GNU_TLS
&& !TARGET_64BIT
)
12521 ? "___tls_get_addr" : "__tls_get_addr");
12523 ix86_tls_symbol
= gen_rtx_SYMBOL_REF (Pmode
, sym
);
12526 return ix86_tls_symbol
;
12529 /* Construct the SYMBOL_REF for the _TLS_MODULE_BASE_ symbol. */
12531 static GTY(()) rtx ix86_tls_module_base_symbol
;
12534 ix86_tls_module_base (void)
12536 if (!ix86_tls_module_base_symbol
)
12538 ix86_tls_module_base_symbol
12539 = gen_rtx_SYMBOL_REF (Pmode
, "_TLS_MODULE_BASE_");
12541 SYMBOL_REF_FLAGS (ix86_tls_module_base_symbol
)
12542 |= TLS_MODEL_GLOBAL_DYNAMIC
<< SYMBOL_FLAG_TLS_SHIFT
;
12545 return ix86_tls_module_base_symbol
;
12548 /* A subroutine of ix86_legitimize_address and ix86_expand_move. FOR_MOV is
12549 false if we expect this to be used for a memory address and true if
12550 we expect to load the address into a register. */
12553 legitimize_tls_address (rtx x
, enum tls_model model
, bool for_mov
)
12555 rtx dest
, base
, off
;
12556 rtx pic
= NULL_RTX
, tp
= NULL_RTX
;
12557 enum machine_mode tp_mode
= Pmode
;
12562 case TLS_MODEL_GLOBAL_DYNAMIC
:
12563 dest
= gen_reg_rtx (Pmode
);
12568 pic
= pic_offset_table_rtx
;
12571 pic
= gen_reg_rtx (Pmode
);
12572 emit_insn (gen_set_got (pic
));
12576 if (TARGET_GNU2_TLS
)
12579 emit_insn (gen_tls_dynamic_gnu2_64 (dest
, x
));
12581 emit_insn (gen_tls_dynamic_gnu2_32 (dest
, x
, pic
));
12583 tp
= get_thread_pointer (Pmode
, true);
12584 dest
= force_reg (Pmode
, gen_rtx_PLUS (Pmode
, tp
, dest
));
12586 set_unique_reg_note (get_last_insn (), REG_EQUAL
, x
);
12590 rtx caddr
= ix86_tls_get_addr ();
12594 rtx rax
= gen_rtx_REG (Pmode
, AX_REG
), insns
;
12597 emit_call_insn (ix86_gen_tls_global_dynamic_64 (rax
, x
,
12599 insns
= get_insns ();
12602 RTL_CONST_CALL_P (insns
) = 1;
12603 emit_libcall_block (insns
, dest
, rax
, x
);
12606 emit_insn (gen_tls_global_dynamic_32 (dest
, x
, pic
, caddr
));
12610 case TLS_MODEL_LOCAL_DYNAMIC
:
12611 base
= gen_reg_rtx (Pmode
);
12616 pic
= pic_offset_table_rtx
;
12619 pic
= gen_reg_rtx (Pmode
);
12620 emit_insn (gen_set_got (pic
));
12624 if (TARGET_GNU2_TLS
)
12626 rtx tmp
= ix86_tls_module_base ();
12629 emit_insn (gen_tls_dynamic_gnu2_64 (base
, tmp
));
12631 emit_insn (gen_tls_dynamic_gnu2_32 (base
, tmp
, pic
));
12633 tp
= get_thread_pointer (Pmode
, true);
12634 set_unique_reg_note (get_last_insn (), REG_EQUAL
,
12635 gen_rtx_MINUS (Pmode
, tmp
, tp
));
12639 rtx caddr
= ix86_tls_get_addr ();
12643 rtx rax
= gen_rtx_REG (Pmode
, AX_REG
), insns
, eqv
;
12646 emit_call_insn (ix86_gen_tls_local_dynamic_base_64 (rax
,
12648 insns
= get_insns ();
12651 /* Attach a unique REG_EQUAL, to allow the RTL optimizers to
12652 share the LD_BASE result with other LD model accesses. */
12653 eqv
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, const0_rtx
),
12654 UNSPEC_TLS_LD_BASE
);
12656 RTL_CONST_CALL_P (insns
) = 1;
12657 emit_libcall_block (insns
, base
, rax
, eqv
);
12660 emit_insn (gen_tls_local_dynamic_base_32 (base
, pic
, caddr
));
12663 off
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, x
), UNSPEC_DTPOFF
);
12664 off
= gen_rtx_CONST (Pmode
, off
);
12666 dest
= force_reg (Pmode
, gen_rtx_PLUS (Pmode
, base
, off
));
12668 if (TARGET_GNU2_TLS
)
12670 dest
= force_reg (Pmode
, gen_rtx_PLUS (Pmode
, dest
, tp
));
12672 set_unique_reg_note (get_last_insn (), REG_EQUAL
, x
);
12676 case TLS_MODEL_INITIAL_EXEC
:
12679 if (TARGET_SUN_TLS
)
12681 /* The Sun linker took the AMD64 TLS spec literally
12682 and can only handle %rax as destination of the
12683 initial executable code sequence. */
12685 dest
= gen_reg_rtx (Pmode
);
12686 emit_insn (gen_tls_initial_exec_64_sun (dest
, x
));
12690 /* Generate DImode references to avoid %fs:(%reg32)
12691 problems and linker IE->LE relaxation bug. */
12694 type
= UNSPEC_GOTNTPOFF
;
12698 if (reload_in_progress
)
12699 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM
, true);
12700 pic
= pic_offset_table_rtx
;
12701 type
= TARGET_ANY_GNU_TLS
? UNSPEC_GOTNTPOFF
: UNSPEC_GOTTPOFF
;
12703 else if (!TARGET_ANY_GNU_TLS
)
12705 pic
= gen_reg_rtx (Pmode
);
12706 emit_insn (gen_set_got (pic
));
12707 type
= UNSPEC_GOTTPOFF
;
12712 type
= UNSPEC_INDNTPOFF
;
12715 off
= gen_rtx_UNSPEC (tp_mode
, gen_rtvec (1, x
), type
);
12716 off
= gen_rtx_CONST (tp_mode
, off
);
12718 off
= gen_rtx_PLUS (tp_mode
, pic
, off
);
12719 off
= gen_const_mem (tp_mode
, off
);
12720 set_mem_alias_set (off
, ix86_GOT_alias_set ());
12722 if (TARGET_64BIT
|| TARGET_ANY_GNU_TLS
)
12724 base
= get_thread_pointer (tp_mode
,
12725 for_mov
|| !TARGET_TLS_DIRECT_SEG_REFS
);
12726 off
= force_reg (tp_mode
, off
);
12727 return gen_rtx_PLUS (tp_mode
, base
, off
);
12731 base
= get_thread_pointer (Pmode
, true);
12732 dest
= gen_reg_rtx (Pmode
);
12733 emit_insn (ix86_gen_sub3 (dest
, base
, off
));
12737 case TLS_MODEL_LOCAL_EXEC
:
12738 off
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, x
),
12739 (TARGET_64BIT
|| TARGET_ANY_GNU_TLS
)
12740 ? UNSPEC_NTPOFF
: UNSPEC_TPOFF
);
12741 off
= gen_rtx_CONST (Pmode
, off
);
12743 if (TARGET_64BIT
|| TARGET_ANY_GNU_TLS
)
12745 base
= get_thread_pointer (Pmode
,
12746 for_mov
|| !TARGET_TLS_DIRECT_SEG_REFS
);
12747 return gen_rtx_PLUS (Pmode
, base
, off
);
12751 base
= get_thread_pointer (Pmode
, true);
12752 dest
= gen_reg_rtx (Pmode
);
12753 emit_insn (ix86_gen_sub3 (dest
, base
, off
));
12758 gcc_unreachable ();
12764 /* Create or return the unique __imp_DECL dllimport symbol corresponding
12767 static GTY((if_marked ("tree_map_marked_p"), param_is (struct tree_map
)))
12768 htab_t dllimport_map
;
12771 get_dllimport_decl (tree decl
)
12773 struct tree_map
*h
, in
;
12776 const char *prefix
;
12777 size_t namelen
, prefixlen
;
12782 if (!dllimport_map
)
12783 dllimport_map
= htab_create_ggc (512, tree_map_hash
, tree_map_eq
, 0);
12785 in
.hash
= htab_hash_pointer (decl
);
12786 in
.base
.from
= decl
;
12787 loc
= htab_find_slot_with_hash (dllimport_map
, &in
, in
.hash
, INSERT
);
12788 h
= (struct tree_map
*) *loc
;
12792 *loc
= h
= ggc_alloc_tree_map ();
12794 h
->base
.from
= decl
;
12795 h
->to
= to
= build_decl (DECL_SOURCE_LOCATION (decl
),
12796 VAR_DECL
, NULL
, ptr_type_node
);
12797 DECL_ARTIFICIAL (to
) = 1;
12798 DECL_IGNORED_P (to
) = 1;
12799 DECL_EXTERNAL (to
) = 1;
12800 TREE_READONLY (to
) = 1;
12802 name
= IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl
));
12803 name
= targetm
.strip_name_encoding (name
);
12804 prefix
= name
[0] == FASTCALL_PREFIX
|| user_label_prefix
[0] == 0
12805 ? "*__imp_" : "*__imp__";
12806 namelen
= strlen (name
);
12807 prefixlen
= strlen (prefix
);
12808 imp_name
= (char *) alloca (namelen
+ prefixlen
+ 1);
12809 memcpy (imp_name
, prefix
, prefixlen
);
12810 memcpy (imp_name
+ prefixlen
, name
, namelen
+ 1);
12812 name
= ggc_alloc_string (imp_name
, namelen
+ prefixlen
);
12813 rtl
= gen_rtx_SYMBOL_REF (Pmode
, name
);
12814 SET_SYMBOL_REF_DECL (rtl
, to
);
12815 SYMBOL_REF_FLAGS (rtl
) = SYMBOL_FLAG_LOCAL
;
12817 rtl
= gen_const_mem (Pmode
, rtl
);
12818 set_mem_alias_set (rtl
, ix86_GOT_alias_set ());
12820 SET_DECL_RTL (to
, rtl
);
12821 SET_DECL_ASSEMBLER_NAME (to
, get_identifier (name
));
12826 /* Expand SYMBOL into its corresponding dllimport symbol. WANT_REG is
12827 true if we require the result be a register. */
12830 legitimize_dllimport_symbol (rtx symbol
, bool want_reg
)
12835 gcc_assert (SYMBOL_REF_DECL (symbol
));
12836 imp_decl
= get_dllimport_decl (SYMBOL_REF_DECL (symbol
));
12838 x
= DECL_RTL (imp_decl
);
12840 x
= force_reg (Pmode
, x
);
12844 /* Try machine-dependent ways of modifying an illegitimate address
12845 to be legitimate. If we find one, return the new, valid address.
12846 This macro is used in only one place: `memory_address' in explow.c.
12848 OLDX is the address as it was before break_out_memory_refs was called.
12849 In some cases it is useful to look at this to decide what needs to be done.
12851 It is always safe for this macro to do nothing. It exists to recognize
12852 opportunities to optimize the output.
12854 For the 80386, we handle X+REG by loading X into a register R and
12855 using R+REG. R will go in a general reg and indexing will be used.
12856 However, if REG is a broken-out memory address or multiplication,
12857 nothing needs to be done because REG can certainly go in a general reg.
12859 When -fpic is used, special handling is needed for symbolic references.
12860 See comments by legitimize_pic_address in i386.c for details. */
12863 ix86_legitimize_address (rtx x
, rtx oldx ATTRIBUTE_UNUSED
,
12864 enum machine_mode mode
)
12869 log
= GET_CODE (x
) == SYMBOL_REF
? SYMBOL_REF_TLS_MODEL (x
) : 0;
12871 return legitimize_tls_address (x
, (enum tls_model
) log
, false);
12872 if (GET_CODE (x
) == CONST
12873 && GET_CODE (XEXP (x
, 0)) == PLUS
12874 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == SYMBOL_REF
12875 && (log
= SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x
, 0), 0))))
12877 rtx t
= legitimize_tls_address (XEXP (XEXP (x
, 0), 0),
12878 (enum tls_model
) log
, false);
12879 return gen_rtx_PLUS (Pmode
, t
, XEXP (XEXP (x
, 0), 1));
12882 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
)
12884 if (GET_CODE (x
) == SYMBOL_REF
&& SYMBOL_REF_DLLIMPORT_P (x
))
12885 return legitimize_dllimport_symbol (x
, true);
12886 if (GET_CODE (x
) == CONST
12887 && GET_CODE (XEXP (x
, 0)) == PLUS
12888 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == SYMBOL_REF
12889 && SYMBOL_REF_DLLIMPORT_P (XEXP (XEXP (x
, 0), 0)))
12891 rtx t
= legitimize_dllimport_symbol (XEXP (XEXP (x
, 0), 0), true);
12892 return gen_rtx_PLUS (Pmode
, t
, XEXP (XEXP (x
, 0), 1));
12896 if (flag_pic
&& SYMBOLIC_CONST (x
))
12897 return legitimize_pic_address (x
, 0);
12900 if (MACHO_DYNAMIC_NO_PIC_P
&& SYMBOLIC_CONST (x
))
12901 return machopic_indirect_data_reference (x
, 0);
12904 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
12905 if (GET_CODE (x
) == ASHIFT
12906 && CONST_INT_P (XEXP (x
, 1))
12907 && (unsigned HOST_WIDE_INT
) INTVAL (XEXP (x
, 1)) < 4)
12910 log
= INTVAL (XEXP (x
, 1));
12911 x
= gen_rtx_MULT (Pmode
, force_reg (Pmode
, XEXP (x
, 0)),
12912 GEN_INT (1 << log
));
12915 if (GET_CODE (x
) == PLUS
)
12917 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
12919 if (GET_CODE (XEXP (x
, 0)) == ASHIFT
12920 && CONST_INT_P (XEXP (XEXP (x
, 0), 1))
12921 && (unsigned HOST_WIDE_INT
) INTVAL (XEXP (XEXP (x
, 0), 1)) < 4)
12924 log
= INTVAL (XEXP (XEXP (x
, 0), 1));
12925 XEXP (x
, 0) = gen_rtx_MULT (Pmode
,
12926 force_reg (Pmode
, XEXP (XEXP (x
, 0), 0)),
12927 GEN_INT (1 << log
));
12930 if (GET_CODE (XEXP (x
, 1)) == ASHIFT
12931 && CONST_INT_P (XEXP (XEXP (x
, 1), 1))
12932 && (unsigned HOST_WIDE_INT
) INTVAL (XEXP (XEXP (x
, 1), 1)) < 4)
12935 log
= INTVAL (XEXP (XEXP (x
, 1), 1));
12936 XEXP (x
, 1) = gen_rtx_MULT (Pmode
,
12937 force_reg (Pmode
, XEXP (XEXP (x
, 1), 0)),
12938 GEN_INT (1 << log
));
12941 /* Put multiply first if it isn't already. */
12942 if (GET_CODE (XEXP (x
, 1)) == MULT
)
12944 rtx tmp
= XEXP (x
, 0);
12945 XEXP (x
, 0) = XEXP (x
, 1);
12950 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
12951 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
12952 created by virtual register instantiation, register elimination, and
12953 similar optimizations. */
12954 if (GET_CODE (XEXP (x
, 0)) == MULT
&& GET_CODE (XEXP (x
, 1)) == PLUS
)
12957 x
= gen_rtx_PLUS (Pmode
,
12958 gen_rtx_PLUS (Pmode
, XEXP (x
, 0),
12959 XEXP (XEXP (x
, 1), 0)),
12960 XEXP (XEXP (x
, 1), 1));
12964 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
12965 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
12966 else if (GET_CODE (x
) == PLUS
&& GET_CODE (XEXP (x
, 0)) == PLUS
12967 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == MULT
12968 && GET_CODE (XEXP (XEXP (x
, 0), 1)) == PLUS
12969 && CONSTANT_P (XEXP (x
, 1)))
12972 rtx other
= NULL_RTX
;
12974 if (CONST_INT_P (XEXP (x
, 1)))
12976 constant
= XEXP (x
, 1);
12977 other
= XEXP (XEXP (XEXP (x
, 0), 1), 1);
12979 else if (CONST_INT_P (XEXP (XEXP (XEXP (x
, 0), 1), 1)))
12981 constant
= XEXP (XEXP (XEXP (x
, 0), 1), 1);
12982 other
= XEXP (x
, 1);
12990 x
= gen_rtx_PLUS (Pmode
,
12991 gen_rtx_PLUS (Pmode
, XEXP (XEXP (x
, 0), 0),
12992 XEXP (XEXP (XEXP (x
, 0), 1), 0)),
12993 plus_constant (other
, INTVAL (constant
)));
12997 if (changed
&& ix86_legitimate_address_p (mode
, x
, false))
13000 if (GET_CODE (XEXP (x
, 0)) == MULT
)
13003 XEXP (x
, 0) = force_operand (XEXP (x
, 0), 0);
13006 if (GET_CODE (XEXP (x
, 1)) == MULT
)
13009 XEXP (x
, 1) = force_operand (XEXP (x
, 1), 0);
13013 && REG_P (XEXP (x
, 1))
13014 && REG_P (XEXP (x
, 0)))
13017 if (flag_pic
&& SYMBOLIC_CONST (XEXP (x
, 1)))
13020 x
= legitimize_pic_address (x
, 0);
13023 if (changed
&& ix86_legitimate_address_p (mode
, x
, false))
13026 if (REG_P (XEXP (x
, 0)))
13028 rtx temp
= gen_reg_rtx (Pmode
);
13029 rtx val
= force_operand (XEXP (x
, 1), temp
);
13032 if (GET_MODE (val
) != Pmode
)
13033 val
= convert_to_mode (Pmode
, val
, 1);
13034 emit_move_insn (temp
, val
);
13037 XEXP (x
, 1) = temp
;
13041 else if (REG_P (XEXP (x
, 1)))
13043 rtx temp
= gen_reg_rtx (Pmode
);
13044 rtx val
= force_operand (XEXP (x
, 0), temp
);
13047 if (GET_MODE (val
) != Pmode
)
13048 val
= convert_to_mode (Pmode
, val
, 1);
13049 emit_move_insn (temp
, val
);
13052 XEXP (x
, 0) = temp
;
13060 /* Print an integer constant expression in assembler syntax. Addition
13061 and subtraction are the only arithmetic that may appear in these
13062 expressions. FILE is the stdio stream to write to, X is the rtx, and
13063 CODE is the operand print code from the output string. */
13066 output_pic_addr_const (FILE *file
, rtx x
, int code
)
13070 switch (GET_CODE (x
))
13073 gcc_assert (flag_pic
);
13078 if (TARGET_64BIT
|| ! TARGET_MACHO_BRANCH_ISLANDS
)
13079 output_addr_const (file
, x
);
13082 const char *name
= XSTR (x
, 0);
13084 /* Mark the decl as referenced so that cgraph will
13085 output the function. */
13086 if (SYMBOL_REF_DECL (x
))
13087 mark_decl_referenced (SYMBOL_REF_DECL (x
));
13090 if (MACHOPIC_INDIRECT
13091 && machopic_classify_symbol (x
) == MACHOPIC_UNDEFINED_FUNCTION
)
13092 name
= machopic_indirection_name (x
, /*stub_p=*/true);
13094 assemble_name (file
, name
);
13096 if (!TARGET_MACHO
&& !(TARGET_64BIT
&& DEFAULT_ABI
== MS_ABI
)
13097 && code
== 'P' && ! SYMBOL_REF_LOCAL_P (x
))
13098 fputs ("@PLT", file
);
13105 ASM_GENERATE_INTERNAL_LABEL (buf
, "L", CODE_LABEL_NUMBER (x
));
13106 assemble_name (asm_out_file
, buf
);
13110 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (x
));
13114 /* This used to output parentheses around the expression,
13115 but that does not work on the 386 (either ATT or BSD assembler). */
13116 output_pic_addr_const (file
, XEXP (x
, 0), code
);
13120 if (GET_MODE (x
) == VOIDmode
)
13122 /* We can use %d if the number is <32 bits and positive. */
13123 if (CONST_DOUBLE_HIGH (x
) || CONST_DOUBLE_LOW (x
) < 0)
13124 fprintf (file
, "0x%lx%08lx",
13125 (unsigned long) CONST_DOUBLE_HIGH (x
),
13126 (unsigned long) CONST_DOUBLE_LOW (x
));
13128 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, CONST_DOUBLE_LOW (x
));
13131 /* We can't handle floating point constants;
13132 TARGET_PRINT_OPERAND must handle them. */
13133 output_operand_lossage ("floating constant misused");
13137 /* Some assemblers need integer constants to appear first. */
13138 if (CONST_INT_P (XEXP (x
, 0)))
13140 output_pic_addr_const (file
, XEXP (x
, 0), code
);
13142 output_pic_addr_const (file
, XEXP (x
, 1), code
);
13146 gcc_assert (CONST_INT_P (XEXP (x
, 1)));
13147 output_pic_addr_const (file
, XEXP (x
, 1), code
);
13149 output_pic_addr_const (file
, XEXP (x
, 0), code
);
13155 putc (ASSEMBLER_DIALECT
== ASM_INTEL
? '(' : '[', file
);
13156 output_pic_addr_const (file
, XEXP (x
, 0), code
);
13158 output_pic_addr_const (file
, XEXP (x
, 1), code
);
13160 putc (ASSEMBLER_DIALECT
== ASM_INTEL
? ')' : ']', file
);
13164 if (XINT (x
, 1) == UNSPEC_STACK_CHECK
)
13166 bool f
= i386_asm_output_addr_const_extra (file
, x
);
13171 gcc_assert (XVECLEN (x
, 0) == 1);
13172 output_pic_addr_const (file
, XVECEXP (x
, 0, 0), code
);
13173 switch (XINT (x
, 1))
13176 fputs ("@GOT", file
);
13178 case UNSPEC_GOTOFF
:
13179 fputs ("@GOTOFF", file
);
13181 case UNSPEC_PLTOFF
:
13182 fputs ("@PLTOFF", file
);
13185 fputs (ASSEMBLER_DIALECT
== ASM_ATT
?
13186 "(%rip)" : "[rip]", file
);
13188 case UNSPEC_GOTPCREL
:
13189 fputs (ASSEMBLER_DIALECT
== ASM_ATT
?
13190 "@GOTPCREL(%rip)" : "@GOTPCREL[rip]", file
);
13192 case UNSPEC_GOTTPOFF
:
13193 /* FIXME: This might be @TPOFF in Sun ld too. */
13194 fputs ("@gottpoff", file
);
13197 fputs ("@tpoff", file
);
13199 case UNSPEC_NTPOFF
:
13201 fputs ("@tpoff", file
);
13203 fputs ("@ntpoff", file
);
13205 case UNSPEC_DTPOFF
:
13206 fputs ("@dtpoff", file
);
13208 case UNSPEC_GOTNTPOFF
:
13210 fputs (ASSEMBLER_DIALECT
== ASM_ATT
?
13211 "@gottpoff(%rip)": "@gottpoff[rip]", file
);
13213 fputs ("@gotntpoff", file
);
13215 case UNSPEC_INDNTPOFF
:
13216 fputs ("@indntpoff", file
);
13219 case UNSPEC_MACHOPIC_OFFSET
:
13221 machopic_output_function_base_name (file
);
13225 output_operand_lossage ("invalid UNSPEC as operand");
13231 output_operand_lossage ("invalid expression as operand");
13235 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
13236 We need to emit DTP-relative relocations. */
13238 static void ATTRIBUTE_UNUSED
13239 i386_output_dwarf_dtprel (FILE *file
, int size
, rtx x
)
13241 fputs (ASM_LONG
, file
);
13242 output_addr_const (file
, x
);
13243 fputs ("@dtpoff", file
);
13249 fputs (", 0", file
);
13252 gcc_unreachable ();
13256 /* Return true if X is a representation of the PIC register. This copes
13257 with calls from ix86_find_base_term, where the register might have
13258 been replaced by a cselib value. */
13261 ix86_pic_register_p (rtx x
)
13263 if (GET_CODE (x
) == VALUE
&& CSELIB_VAL_PTR (x
))
13264 return (pic_offset_table_rtx
13265 && rtx_equal_for_cselib_p (x
, pic_offset_table_rtx
));
13267 return REG_P (x
) && REGNO (x
) == PIC_OFFSET_TABLE_REGNUM
;
13270 /* Helper function for ix86_delegitimize_address.
13271 Attempt to delegitimize TLS local-exec accesses. */
13274 ix86_delegitimize_tls_address (rtx orig_x
)
13276 rtx x
= orig_x
, unspec
;
13277 struct ix86_address addr
;
13279 if (!TARGET_TLS_DIRECT_SEG_REFS
)
13283 if (GET_CODE (x
) != PLUS
|| GET_MODE (x
) != Pmode
)
13285 if (ix86_decompose_address (x
, &addr
) == 0
13286 || addr
.seg
!= (TARGET_64BIT
? SEG_FS
: SEG_GS
)
13287 || addr
.disp
== NULL_RTX
13288 || GET_CODE (addr
.disp
) != CONST
)
13290 unspec
= XEXP (addr
.disp
, 0);
13291 if (GET_CODE (unspec
) == PLUS
&& CONST_INT_P (XEXP (unspec
, 1)))
13292 unspec
= XEXP (unspec
, 0);
13293 if (GET_CODE (unspec
) != UNSPEC
|| XINT (unspec
, 1) != UNSPEC_NTPOFF
)
13295 x
= XVECEXP (unspec
, 0, 0);
13296 gcc_assert (GET_CODE (x
) == SYMBOL_REF
);
13297 if (unspec
!= XEXP (addr
.disp
, 0))
13298 x
= gen_rtx_PLUS (Pmode
, x
, XEXP (XEXP (addr
.disp
, 0), 1));
13301 rtx idx
= addr
.index
;
13302 if (addr
.scale
!= 1)
13303 idx
= gen_rtx_MULT (Pmode
, idx
, GEN_INT (addr
.scale
));
13304 x
= gen_rtx_PLUS (Pmode
, idx
, x
);
13307 x
= gen_rtx_PLUS (Pmode
, addr
.base
, x
);
13308 if (MEM_P (orig_x
))
13309 x
= replace_equiv_address_nv (orig_x
, x
);
13313 /* In the name of slightly smaller debug output, and to cater to
13314 general assembler lossage, recognize PIC+GOTOFF and turn it back
13315 into a direct symbol reference.
13317 On Darwin, this is necessary to avoid a crash, because Darwin
13318 has a different PIC label for each routine but the DWARF debugging
13319 information is not associated with any particular routine, so it's
13320 necessary to remove references to the PIC label from RTL stored by
13321 the DWARF output code. */
13324 ix86_delegitimize_address (rtx x
)
13326 rtx orig_x
= delegitimize_mem_from_attrs (x
);
13327 /* addend is NULL or some rtx if x is something+GOTOFF where
13328 something doesn't include the PIC register. */
13329 rtx addend
= NULL_RTX
;
13330 /* reg_addend is NULL or a multiple of some register. */
13331 rtx reg_addend
= NULL_RTX
;
13332 /* const_addend is NULL or a const_int. */
13333 rtx const_addend
= NULL_RTX
;
13334 /* This is the result, or NULL. */
13335 rtx result
= NULL_RTX
;
13344 if (GET_CODE (x
) == CONST
13345 && GET_CODE (XEXP (x
, 0)) == PLUS
13346 && GET_MODE (XEXP (x
, 0)) == Pmode
13347 && CONST_INT_P (XEXP (XEXP (x
, 0), 1))
13348 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == UNSPEC
13349 && XINT (XEXP (XEXP (x
, 0), 0), 1) == UNSPEC_PCREL
)
13351 rtx x2
= XVECEXP (XEXP (XEXP (x
, 0), 0), 0, 0);
13352 x
= gen_rtx_PLUS (Pmode
, XEXP (XEXP (x
, 0), 1), x2
);
13353 if (MEM_P (orig_x
))
13354 x
= replace_equiv_address_nv (orig_x
, x
);
13357 if (GET_CODE (x
) != CONST
13358 || GET_CODE (XEXP (x
, 0)) != UNSPEC
13359 || (XINT (XEXP (x
, 0), 1) != UNSPEC_GOTPCREL
13360 && XINT (XEXP (x
, 0), 1) != UNSPEC_PCREL
)
13361 || (!MEM_P (orig_x
) && XINT (XEXP (x
, 0), 1) != UNSPEC_PCREL
))
13362 return ix86_delegitimize_tls_address (orig_x
);
13363 x
= XVECEXP (XEXP (x
, 0), 0, 0);
13364 if (GET_MODE (orig_x
) != GET_MODE (x
) && MEM_P (orig_x
))
13366 x
= simplify_gen_subreg (GET_MODE (orig_x
), x
,
13374 if (GET_CODE (x
) != PLUS
13375 || GET_CODE (XEXP (x
, 1)) != CONST
)
13376 return ix86_delegitimize_tls_address (orig_x
);
13378 if (ix86_pic_register_p (XEXP (x
, 0)))
13379 /* %ebx + GOT/GOTOFF */
13381 else if (GET_CODE (XEXP (x
, 0)) == PLUS
)
13383 /* %ebx + %reg * scale + GOT/GOTOFF */
13384 reg_addend
= XEXP (x
, 0);
13385 if (ix86_pic_register_p (XEXP (reg_addend
, 0)))
13386 reg_addend
= XEXP (reg_addend
, 1);
13387 else if (ix86_pic_register_p (XEXP (reg_addend
, 1)))
13388 reg_addend
= XEXP (reg_addend
, 0);
13391 reg_addend
= NULL_RTX
;
13392 addend
= XEXP (x
, 0);
13396 addend
= XEXP (x
, 0);
13398 x
= XEXP (XEXP (x
, 1), 0);
13399 if (GET_CODE (x
) == PLUS
13400 && CONST_INT_P (XEXP (x
, 1)))
13402 const_addend
= XEXP (x
, 1);
13406 if (GET_CODE (x
) == UNSPEC
13407 && ((XINT (x
, 1) == UNSPEC_GOT
&& MEM_P (orig_x
) && !addend
)
13408 || (XINT (x
, 1) == UNSPEC_GOTOFF
&& !MEM_P (orig_x
))))
13409 result
= XVECEXP (x
, 0, 0);
13411 if (TARGET_MACHO
&& darwin_local_data_pic (x
)
13412 && !MEM_P (orig_x
))
13413 result
= XVECEXP (x
, 0, 0);
13416 return ix86_delegitimize_tls_address (orig_x
);
13419 result
= gen_rtx_CONST (Pmode
, gen_rtx_PLUS (Pmode
, result
, const_addend
));
13421 result
= gen_rtx_PLUS (Pmode
, reg_addend
, result
);
13424 /* If the rest of original X doesn't involve the PIC register, add
13425 addend and subtract pic_offset_table_rtx. This can happen e.g.
13427 leal (%ebx, %ecx, 4), %ecx
13429 movl foo@GOTOFF(%ecx), %edx
13430 in which case we return (%ecx - %ebx) + foo. */
13431 if (pic_offset_table_rtx
)
13432 result
= gen_rtx_PLUS (Pmode
, gen_rtx_MINUS (Pmode
, copy_rtx (addend
),
13433 pic_offset_table_rtx
),
13438 if (GET_MODE (orig_x
) != Pmode
&& MEM_P (orig_x
))
13440 result
= simplify_gen_subreg (GET_MODE (orig_x
), result
, Pmode
, 0);
13441 if (result
== NULL_RTX
)
13447 /* If X is a machine specific address (i.e. a symbol or label being
13448 referenced as a displacement from the GOT implemented using an
13449 UNSPEC), then return the base term. Otherwise return X. */
13452 ix86_find_base_term (rtx x
)
13458 if (GET_CODE (x
) != CONST
)
13460 term
= XEXP (x
, 0);
13461 if (GET_CODE (term
) == PLUS
13462 && (CONST_INT_P (XEXP (term
, 1))
13463 || GET_CODE (XEXP (term
, 1)) == CONST_DOUBLE
))
13464 term
= XEXP (term
, 0);
13465 if (GET_CODE (term
) != UNSPEC
13466 || (XINT (term
, 1) != UNSPEC_GOTPCREL
13467 && XINT (term
, 1) != UNSPEC_PCREL
))
13470 return XVECEXP (term
, 0, 0);
13473 return ix86_delegitimize_address (x
);
13477 put_condition_code (enum rtx_code code
, enum machine_mode mode
, int reverse
,
13478 int fp
, FILE *file
)
13480 const char *suffix
;
13482 if (mode
== CCFPmode
|| mode
== CCFPUmode
)
13484 code
= ix86_fp_compare_code_to_integer (code
);
13488 code
= reverse_condition (code
);
13539 gcc_assert (mode
== CCmode
|| mode
== CCNOmode
|| mode
== CCGCmode
);
13543 /* ??? Use "nbe" instead of "a" for fcmov lossage on some assemblers.
13544 Those same assemblers have the same but opposite lossage on cmov. */
13545 if (mode
== CCmode
)
13546 suffix
= fp
? "nbe" : "a";
13547 else if (mode
== CCCmode
)
13550 gcc_unreachable ();
13566 gcc_unreachable ();
13570 gcc_assert (mode
== CCmode
|| mode
== CCCmode
);
13587 gcc_unreachable ();
13591 /* ??? As above. */
13592 gcc_assert (mode
== CCmode
|| mode
== CCCmode
);
13593 suffix
= fp
? "nb" : "ae";
13596 gcc_assert (mode
== CCmode
|| mode
== CCGCmode
|| mode
== CCNOmode
);
13600 /* ??? As above. */
13601 if (mode
== CCmode
)
13603 else if (mode
== CCCmode
)
13604 suffix
= fp
? "nb" : "ae";
13606 gcc_unreachable ();
13609 suffix
= fp
? "u" : "p";
13612 suffix
= fp
? "nu" : "np";
13615 gcc_unreachable ();
13617 fputs (suffix
, file
);
13620 /* Print the name of register X to FILE based on its machine mode and number.
13621 If CODE is 'w', pretend the mode is HImode.
13622 If CODE is 'b', pretend the mode is QImode.
13623 If CODE is 'k', pretend the mode is SImode.
13624 If CODE is 'q', pretend the mode is DImode.
13625 If CODE is 'x', pretend the mode is V4SFmode.
13626 If CODE is 't', pretend the mode is V8SFmode.
13627 If CODE is 'h', pretend the reg is the 'high' byte register.
13628 If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op.
13629 If CODE is 'd', duplicate the operand for AVX instruction.
13633 print_reg (rtx x
, int code
, FILE *file
)
13636 bool duplicated
= code
== 'd' && TARGET_AVX
;
13638 gcc_assert (x
== pc_rtx
13639 || (REGNO (x
) != ARG_POINTER_REGNUM
13640 && REGNO (x
) != FRAME_POINTER_REGNUM
13641 && REGNO (x
) != FLAGS_REG
13642 && REGNO (x
) != FPSR_REG
13643 && REGNO (x
) != FPCR_REG
));
13645 if (ASSEMBLER_DIALECT
== ASM_ATT
)
13650 gcc_assert (TARGET_64BIT
);
13651 fputs ("rip", file
);
13655 if (code
== 'w' || MMX_REG_P (x
))
13657 else if (code
== 'b')
13659 else if (code
== 'k')
13661 else if (code
== 'q')
13663 else if (code
== 'y')
13665 else if (code
== 'h')
13667 else if (code
== 'x')
13669 else if (code
== 't')
13672 code
= GET_MODE_SIZE (GET_MODE (x
));
13674 /* Irritatingly, AMD extended registers use different naming convention
13675 from the normal registers: "r%d[bwd]" */
13676 if (REX_INT_REG_P (x
))
13678 gcc_assert (TARGET_64BIT
);
13680 fprint_ul (file
, REGNO (x
) - FIRST_REX_INT_REG
+ 8);
13684 error ("extended registers have no high halves");
13699 error ("unsupported operand size for extended register");
13709 if (STACK_TOP_P (x
))
13718 if (! ANY_FP_REG_P (x
))
13719 putc (code
== 8 && TARGET_64BIT
? 'r' : 'e', file
);
13724 reg
= hi_reg_name
[REGNO (x
)];
13727 if (REGNO (x
) >= ARRAY_SIZE (qi_reg_name
))
13729 reg
= qi_reg_name
[REGNO (x
)];
13732 if (REGNO (x
) >= ARRAY_SIZE (qi_high_reg_name
))
13734 reg
= qi_high_reg_name
[REGNO (x
)];
13739 gcc_assert (!duplicated
);
13741 fputs (hi_reg_name
[REGNO (x
)] + 1, file
);
13746 gcc_unreachable ();
13752 if (ASSEMBLER_DIALECT
== ASM_ATT
)
13753 fprintf (file
, ", %%%s", reg
);
13755 fprintf (file
, ", %s", reg
);
13759 /* Locate some local-dynamic symbol still in use by this function
13760 so that we can print its name in some tls_local_dynamic_base
13764 get_some_local_dynamic_name_1 (rtx
*px
, void *data ATTRIBUTE_UNUSED
)
13768 if (GET_CODE (x
) == SYMBOL_REF
13769 && SYMBOL_REF_TLS_MODEL (x
) == TLS_MODEL_LOCAL_DYNAMIC
)
13771 cfun
->machine
->some_ld_name
= XSTR (x
, 0);
13778 static const char *
13779 get_some_local_dynamic_name (void)
13783 if (cfun
->machine
->some_ld_name
)
13784 return cfun
->machine
->some_ld_name
;
13786 for (insn
= get_insns (); insn
; insn
= NEXT_INSN (insn
))
13787 if (NONDEBUG_INSN_P (insn
)
13788 && for_each_rtx (&PATTERN (insn
), get_some_local_dynamic_name_1
, 0))
13789 return cfun
->machine
->some_ld_name
;
13794 /* Meaning of CODE:
13795 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
13796 C -- print opcode suffix for set/cmov insn.
13797 c -- like C, but print reversed condition
13798 F,f -- likewise, but for floating-point.
13799 O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.",
13801 R -- print the prefix for register names.
13802 z -- print the opcode suffix for the size of the current operand.
13803 Z -- likewise, with special suffixes for x87 instructions.
13804 * -- print a star (in certain assembler syntax)
13805 A -- print an absolute memory reference.
13806 E -- print address with DImode register names if TARGET_64BIT.
13807 w -- print the operand as if it's a "word" (HImode) even if it isn't.
13808 s -- print a shift double count, followed by the assemblers argument
13810 b -- print the QImode name of the register for the indicated operand.
13811 %b0 would print %al if operands[0] is reg 0.
13812 w -- likewise, print the HImode name of the register.
13813 k -- likewise, print the SImode name of the register.
13814 q -- likewise, print the DImode name of the register.
13815 x -- likewise, print the V4SFmode name of the register.
13816 t -- likewise, print the V8SFmode name of the register.
13817 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
13818 y -- print "st(0)" instead of "st" as a register.
13819 d -- print duplicated register operand for AVX instruction.
13820 D -- print condition for SSE cmp instruction.
13821 P -- if PIC, print an @PLT suffix.
13822 p -- print raw symbol name.
13823 X -- don't print any sort of PIC '@' suffix for a symbol.
13824 & -- print some in-use local-dynamic symbol name.
13825 H -- print a memory address offset by 8; used for sse high-parts
13826 Y -- print condition for XOP pcom* instruction.
13827 + -- print a branch hint as 'cs' or 'ds' prefix
13828 ; -- print a semicolon (after prefixes due to bug in older gas).
13829 ~ -- print "i" if TARGET_AVX2, "f" otherwise.
13830 @ -- print a segment register of thread base pointer load
13831 ^ -- print addr32 prefix if TARGET_64BIT and Pmode != word_mode
13835 ix86_print_operand (FILE *file
, rtx x
, int code
)
13842 if (ASSEMBLER_DIALECT
== ASM_ATT
)
13848 const char *name
= get_some_local_dynamic_name ();
13850 output_operand_lossage ("'%%&' used without any "
13851 "local dynamic TLS references");
13853 assemble_name (file
, name
);
13858 switch (ASSEMBLER_DIALECT
)
13865 /* Intel syntax. For absolute addresses, registers should not
13866 be surrounded by braces. */
13870 ix86_print_operand (file
, x
, 0);
13877 gcc_unreachable ();
13880 ix86_print_operand (file
, x
, 0);
13884 /* Wrap address in an UNSPEC to declare special handling. */
13886 x
= gen_rtx_UNSPEC (DImode
, gen_rtvec (1, x
), UNSPEC_LEA_ADDR
);
13888 output_address (x
);
13892 if (ASSEMBLER_DIALECT
== ASM_ATT
)
13897 if (ASSEMBLER_DIALECT
== ASM_ATT
)
13902 if (ASSEMBLER_DIALECT
== ASM_ATT
)
13907 if (ASSEMBLER_DIALECT
== ASM_ATT
)
13912 if (ASSEMBLER_DIALECT
== ASM_ATT
)
13917 if (ASSEMBLER_DIALECT
== ASM_ATT
)
13922 if (GET_MODE_CLASS (GET_MODE (x
)) == MODE_INT
)
13924 /* Opcodes don't get size suffixes if using Intel opcodes. */
13925 if (ASSEMBLER_DIALECT
== ASM_INTEL
)
13928 switch (GET_MODE_SIZE (GET_MODE (x
)))
13947 output_operand_lossage
13948 ("invalid operand size for operand code '%c'", code
);
13953 if (GET_MODE_CLASS (GET_MODE (x
)) == MODE_FLOAT
)
13955 (0, "non-integer operand used with operand code '%c'", code
);
13959 /* 387 opcodes don't get size suffixes if using Intel opcodes. */
13960 if (ASSEMBLER_DIALECT
== ASM_INTEL
)
13963 if (GET_MODE_CLASS (GET_MODE (x
)) == MODE_INT
)
13965 switch (GET_MODE_SIZE (GET_MODE (x
)))
13968 #ifdef HAVE_AS_IX86_FILDS
13978 #ifdef HAVE_AS_IX86_FILDQ
13981 fputs ("ll", file
);
13989 else if (GET_MODE_CLASS (GET_MODE (x
)) == MODE_FLOAT
)
13991 /* 387 opcodes don't get size suffixes
13992 if the operands are registers. */
13993 if (STACK_REG_P (x
))
13996 switch (GET_MODE_SIZE (GET_MODE (x
)))
14017 output_operand_lossage
14018 ("invalid operand type used with operand code '%c'", code
);
14022 output_operand_lossage
14023 ("invalid operand size for operand code '%c'", code
);
14041 if (CONST_INT_P (x
) || ! SHIFT_DOUBLE_OMITS_COUNT
)
14043 ix86_print_operand (file
, x
, 0);
14044 fputs (", ", file
);
14049 /* Little bit of braindamage here. The SSE compare instructions
14050 does use completely different names for the comparisons that the
14051 fp conditional moves. */
14054 switch (GET_CODE (x
))
14057 fputs ("eq", file
);
14060 fputs ("eq_us", file
);
14063 fputs ("lt", file
);
14066 fputs ("nge", file
);
14069 fputs ("le", file
);
14072 fputs ("ngt", file
);
14075 fputs ("unord", file
);
14078 fputs ("neq", file
);
14081 fputs ("neq_oq", file
);
14084 fputs ("ge", file
);
14087 fputs ("nlt", file
);
14090 fputs ("gt", file
);
14093 fputs ("nle", file
);
14096 fputs ("ord", file
);
14099 output_operand_lossage ("operand is not a condition code, "
14100 "invalid operand code 'D'");
14106 switch (GET_CODE (x
))
14110 fputs ("eq", file
);
14114 fputs ("lt", file
);
14118 fputs ("le", file
);
14121 fputs ("unord", file
);
14125 fputs ("neq", file
);
14129 fputs ("nlt", file
);
14133 fputs ("nle", file
);
14136 fputs ("ord", file
);
14139 output_operand_lossage ("operand is not a condition code, "
14140 "invalid operand code 'D'");
14146 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
14147 if (ASSEMBLER_DIALECT
== ASM_ATT
)
14149 switch (GET_MODE (x
))
14151 case HImode
: putc ('w', file
); break;
14153 case SFmode
: putc ('l', file
); break;
14155 case DFmode
: putc ('q', file
); break;
14156 default: gcc_unreachable ();
14163 if (!COMPARISON_P (x
))
14165 output_operand_lossage ("operand is neither a constant nor a "
14166 "condition code, invalid operand code "
14170 put_condition_code (GET_CODE (x
), GET_MODE (XEXP (x
, 0)), 0, 0, file
);
14173 if (!COMPARISON_P (x
))
14175 output_operand_lossage ("operand is neither a constant nor a "
14176 "condition code, invalid operand code "
14180 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
14181 if (ASSEMBLER_DIALECT
== ASM_ATT
)
14184 put_condition_code (GET_CODE (x
), GET_MODE (XEXP (x
, 0)), 0, 1, file
);
14187 /* Like above, but reverse condition */
14189 /* Check to see if argument to %c is really a constant
14190 and not a condition code which needs to be reversed. */
14191 if (!COMPARISON_P (x
))
14193 output_operand_lossage ("operand is neither a constant nor a "
14194 "condition code, invalid operand "
14198 put_condition_code (GET_CODE (x
), GET_MODE (XEXP (x
, 0)), 1, 0, file
);
14201 if (!COMPARISON_P (x
))
14203 output_operand_lossage ("operand is neither a constant nor a "
14204 "condition code, invalid operand "
14208 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
14209 if (ASSEMBLER_DIALECT
== ASM_ATT
)
14212 put_condition_code (GET_CODE (x
), GET_MODE (XEXP (x
, 0)), 1, 1, file
);
14216 if (!offsettable_memref_p (x
))
14218 output_operand_lossage ("operand is not an offsettable memory "
14219 "reference, invalid operand "
14223 /* It doesn't actually matter what mode we use here, as we're
14224 only going to use this for printing. */
14225 x
= adjust_address_nv (x
, DImode
, 8);
14233 || optimize_function_for_size_p (cfun
)
14234 || !TARGET_BRANCH_PREDICTION_HINTS
)
14237 x
= find_reg_note (current_output_insn
, REG_BR_PROB
, 0);
14240 int pred_val
= INTVAL (XEXP (x
, 0));
14242 if (pred_val
< REG_BR_PROB_BASE
* 45 / 100
14243 || pred_val
> REG_BR_PROB_BASE
* 55 / 100)
14245 bool taken
= pred_val
> REG_BR_PROB_BASE
/ 2;
14247 = final_forward_branch_p (current_output_insn
) == 0;
14249 /* Emit hints only in the case default branch prediction
14250 heuristics would fail. */
14251 if (taken
!= cputaken
)
14253 /* We use 3e (DS) prefix for taken branches and
14254 2e (CS) prefix for not taken branches. */
14256 fputs ("ds ; ", file
);
14258 fputs ("cs ; ", file
);
14266 switch (GET_CODE (x
))
14269 fputs ("neq", file
);
14272 fputs ("eq", file
);
14276 fputs (INTEGRAL_MODE_P (GET_MODE (x
)) ? "ge" : "unlt", file
);
14280 fputs (INTEGRAL_MODE_P (GET_MODE (x
)) ? "gt" : "unle", file
);
14284 fputs ("le", file
);
14288 fputs ("lt", file
);
14291 fputs ("unord", file
);
14294 fputs ("ord", file
);
14297 fputs ("ueq", file
);
14300 fputs ("nlt", file
);
14303 fputs ("nle", file
);
14306 fputs ("ule", file
);
14309 fputs ("ult", file
);
14312 fputs ("une", file
);
14315 output_operand_lossage ("operand is not a condition code, "
14316 "invalid operand code 'Y'");
14322 #ifndef HAVE_AS_IX86_REP_LOCK_PREFIX
14328 if (ASSEMBLER_DIALECT
== ASM_ATT
)
14331 /* The kernel uses a different segment register for performance
14332 reasons; a system call would not have to trash the userspace
14333 segment register, which would be expensive. */
14334 if (TARGET_64BIT
&& ix86_cmodel
!= CM_KERNEL
)
14335 fputs ("fs", file
);
14337 fputs ("gs", file
);
14341 putc (TARGET_AVX2
? 'i' : 'f', file
);
14345 if (TARGET_64BIT
&& Pmode
!= word_mode
)
14346 fputs ("addr32 ", file
);
14350 output_operand_lossage ("invalid operand code '%c'", code
);
14355 print_reg (x
, code
, file
);
14357 else if (MEM_P (x
))
14359 /* No `byte ptr' prefix for call instructions or BLKmode operands. */
14360 if (ASSEMBLER_DIALECT
== ASM_INTEL
&& code
!= 'X' && code
!= 'P'
14361 && GET_MODE (x
) != BLKmode
)
14364 switch (GET_MODE_SIZE (GET_MODE (x
)))
14366 case 1: size
= "BYTE"; break;
14367 case 2: size
= "WORD"; break;
14368 case 4: size
= "DWORD"; break;
14369 case 8: size
= "QWORD"; break;
14370 case 12: size
= "TBYTE"; break;
14372 if (GET_MODE (x
) == XFmode
)
14377 case 32: size
= "YMMWORD"; break;
14379 gcc_unreachable ();
14382 /* Check for explicit size override (codes 'b', 'w', 'k',
14386 else if (code
== 'w')
14388 else if (code
== 'k')
14390 else if (code
== 'q')
14392 else if (code
== 'x')
14395 fputs (size
, file
);
14396 fputs (" PTR ", file
);
14400 /* Avoid (%rip) for call operands. */
14401 if (CONSTANT_ADDRESS_P (x
) && code
== 'P'
14402 && !CONST_INT_P (x
))
14403 output_addr_const (file
, x
);
14404 else if (this_is_asm_operands
&& ! address_operand (x
, VOIDmode
))
14405 output_operand_lossage ("invalid constraints for operand");
14407 output_address (x
);
14410 else if (GET_CODE (x
) == CONST_DOUBLE
&& GET_MODE (x
) == SFmode
)
14415 REAL_VALUE_FROM_CONST_DOUBLE (r
, x
);
14416 REAL_VALUE_TO_TARGET_SINGLE (r
, l
);
14418 if (ASSEMBLER_DIALECT
== ASM_ATT
)
14420 /* Sign extend 32bit SFmode immediate to 8 bytes. */
14422 fprintf (file
, "0x%08llx", (unsigned long long) (int) l
);
14424 fprintf (file
, "0x%08x", (unsigned int) l
);
14427 else if (GET_CODE (x
) == CONST_DOUBLE
&& GET_MODE (x
) == DFmode
)
14432 REAL_VALUE_FROM_CONST_DOUBLE (r
, x
);
14433 REAL_VALUE_TO_TARGET_DOUBLE (r
, l
);
14435 if (ASSEMBLER_DIALECT
== ASM_ATT
)
14437 fprintf (file
, "0x%lx%08lx", l
[1] & 0xffffffff, l
[0] & 0xffffffff);
14440 /* These float cases don't actually occur as immediate operands. */
14441 else if (GET_CODE (x
) == CONST_DOUBLE
&& GET_MODE (x
) == XFmode
)
14445 real_to_decimal (dstr
, CONST_DOUBLE_REAL_VALUE (x
), sizeof (dstr
), 0, 1);
14446 fputs (dstr
, file
);
14451 /* We have patterns that allow zero sets of memory, for instance.
14452 In 64-bit mode, we should probably support all 8-byte vectors,
14453 since we can in fact encode that into an immediate. */
14454 if (GET_CODE (x
) == CONST_VECTOR
)
14456 gcc_assert (x
== CONST0_RTX (GET_MODE (x
)));
14460 if (code
!= 'P' && code
!= 'p')
14462 if (CONST_INT_P (x
) || GET_CODE (x
) == CONST_DOUBLE
)
14464 if (ASSEMBLER_DIALECT
== ASM_ATT
)
14467 else if (GET_CODE (x
) == CONST
|| GET_CODE (x
) == SYMBOL_REF
14468 || GET_CODE (x
) == LABEL_REF
)
14470 if (ASSEMBLER_DIALECT
== ASM_ATT
)
14473 fputs ("OFFSET FLAT:", file
);
14476 if (CONST_INT_P (x
))
14477 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (x
));
14478 else if (flag_pic
|| MACHOPIC_INDIRECT
)
14479 output_pic_addr_const (file
, x
, code
);
14481 output_addr_const (file
, x
);
14486 ix86_print_operand_punct_valid_p (unsigned char code
)
14488 return (code
== '@' || code
== '*' || code
== '+' || code
== '&'
14489 || code
== ';' || code
== '~' || code
== '^');
14492 /* Print a memory operand whose address is ADDR. */
14495 ix86_print_operand_address (FILE *file
, rtx addr
)
14497 struct ix86_address parts
;
14498 rtx base
, index
, disp
;
14504 if (GET_CODE (addr
) == UNSPEC
&& XINT (addr
, 1) == UNSPEC_VSIBADDR
)
14506 ok
= ix86_decompose_address (XVECEXP (addr
, 0, 0), &parts
);
14507 gcc_assert (parts
.index
== NULL_RTX
);
14508 parts
.index
= XVECEXP (addr
, 0, 1);
14509 parts
.scale
= INTVAL (XVECEXP (addr
, 0, 2));
14510 addr
= XVECEXP (addr
, 0, 0);
14513 else if (GET_CODE (addr
) == UNSPEC
&& XINT (addr
, 1) == UNSPEC_LEA_ADDR
)
14515 gcc_assert (TARGET_64BIT
);
14516 ok
= ix86_decompose_address (XVECEXP (addr
, 0, 0), &parts
);
14520 ok
= ix86_decompose_address (addr
, &parts
);
14524 if (parts
.base
&& GET_CODE (parts
.base
) == SUBREG
)
14526 rtx tmp
= SUBREG_REG (parts
.base
);
14527 parts
.base
= simplify_subreg (GET_MODE (parts
.base
),
14528 tmp
, GET_MODE (tmp
), 0);
14531 if (parts
.index
&& GET_CODE (parts
.index
) == SUBREG
)
14533 rtx tmp
= SUBREG_REG (parts
.index
);
14534 parts
.index
= simplify_subreg (GET_MODE (parts
.index
),
14535 tmp
, GET_MODE (tmp
), 0);
14539 index
= parts
.index
;
14541 scale
= parts
.scale
;
14549 if (ASSEMBLER_DIALECT
== ASM_ATT
)
14551 fputs ((parts
.seg
== SEG_FS
? "fs:" : "gs:"), file
);
14554 gcc_unreachable ();
14557 /* Use one byte shorter RIP relative addressing for 64bit mode. */
14558 if (TARGET_64BIT
&& !base
&& !index
)
14562 if (GET_CODE (disp
) == CONST
14563 && GET_CODE (XEXP (disp
, 0)) == PLUS
14564 && CONST_INT_P (XEXP (XEXP (disp
, 0), 1)))
14565 symbol
= XEXP (XEXP (disp
, 0), 0);
14567 if (GET_CODE (symbol
) == LABEL_REF
14568 || (GET_CODE (symbol
) == SYMBOL_REF
14569 && SYMBOL_REF_TLS_MODEL (symbol
) == 0))
14572 if (!base
&& !index
)
14574 /* Displacement only requires special attention. */
14576 if (CONST_INT_P (disp
))
14578 if (ASSEMBLER_DIALECT
== ASM_INTEL
&& parts
.seg
== SEG_DEFAULT
)
14579 fputs ("ds:", file
);
14580 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (disp
));
14583 output_pic_addr_const (file
, disp
, 0);
14585 output_addr_const (file
, disp
);
14589 /* Print SImode register names for zero-extended
14590 addresses to force addr32 prefix. */
14592 && (GET_CODE (addr
) == ZERO_EXTEND
14593 || GET_CODE (addr
) == AND
))
14595 gcc_assert (!code
);
14599 if (ASSEMBLER_DIALECT
== ASM_ATT
)
14604 output_pic_addr_const (file
, disp
, 0);
14605 else if (GET_CODE (disp
) == LABEL_REF
)
14606 output_asm_label (disp
);
14608 output_addr_const (file
, disp
);
14613 print_reg (base
, code
, file
);
14617 print_reg (index
, vsib
? 0 : code
, file
);
14618 if (scale
!= 1 || vsib
)
14619 fprintf (file
, ",%d", scale
);
14625 rtx offset
= NULL_RTX
;
14629 /* Pull out the offset of a symbol; print any symbol itself. */
14630 if (GET_CODE (disp
) == CONST
14631 && GET_CODE (XEXP (disp
, 0)) == PLUS
14632 && CONST_INT_P (XEXP (XEXP (disp
, 0), 1)))
14634 offset
= XEXP (XEXP (disp
, 0), 1);
14635 disp
= gen_rtx_CONST (VOIDmode
,
14636 XEXP (XEXP (disp
, 0), 0));
14640 output_pic_addr_const (file
, disp
, 0);
14641 else if (GET_CODE (disp
) == LABEL_REF
)
14642 output_asm_label (disp
);
14643 else if (CONST_INT_P (disp
))
14646 output_addr_const (file
, disp
);
14652 print_reg (base
, code
, file
);
14655 if (INTVAL (offset
) >= 0)
14657 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (offset
));
14661 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (offset
));
14668 print_reg (index
, vsib
? 0 : code
, file
);
14669 if (scale
!= 1 || vsib
)
14670 fprintf (file
, "*%d", scale
);
14677 /* Implementation of TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */
14680 i386_asm_output_addr_const_extra (FILE *file
, rtx x
)
14684 if (GET_CODE (x
) != UNSPEC
)
14687 op
= XVECEXP (x
, 0, 0);
14688 switch (XINT (x
, 1))
14690 case UNSPEC_GOTTPOFF
:
14691 output_addr_const (file
, op
);
14692 /* FIXME: This might be @TPOFF in Sun ld. */
14693 fputs ("@gottpoff", file
);
14696 output_addr_const (file
, op
);
14697 fputs ("@tpoff", file
);
14699 case UNSPEC_NTPOFF
:
14700 output_addr_const (file
, op
);
14702 fputs ("@tpoff", file
);
14704 fputs ("@ntpoff", file
);
14706 case UNSPEC_DTPOFF
:
14707 output_addr_const (file
, op
);
14708 fputs ("@dtpoff", file
);
14710 case UNSPEC_GOTNTPOFF
:
14711 output_addr_const (file
, op
);
14713 fputs (ASSEMBLER_DIALECT
== ASM_ATT
?
14714 "@gottpoff(%rip)" : "@gottpoff[rip]", file
);
14716 fputs ("@gotntpoff", file
);
14718 case UNSPEC_INDNTPOFF
:
14719 output_addr_const (file
, op
);
14720 fputs ("@indntpoff", file
);
14723 case UNSPEC_MACHOPIC_OFFSET
:
14724 output_addr_const (file
, op
);
14726 machopic_output_function_base_name (file
);
14730 case UNSPEC_STACK_CHECK
:
14734 gcc_assert (flag_split_stack
);
14736 #ifdef TARGET_THREAD_SPLIT_STACK_OFFSET
14737 offset
= TARGET_THREAD_SPLIT_STACK_OFFSET
;
14739 gcc_unreachable ();
14742 fprintf (file
, "%s:%d", TARGET_64BIT
? "%fs" : "%gs", offset
);
14753 /* Split one or more double-mode RTL references into pairs of half-mode
14754 references. The RTL can be REG, offsettable MEM, integer constant, or
14755 CONST_DOUBLE. "operands" is a pointer to an array of double-mode RTLs to
14756 split and "num" is its length. lo_half and hi_half are output arrays
14757 that parallel "operands". */
14760 split_double_mode (enum machine_mode mode
, rtx operands
[],
14761 int num
, rtx lo_half
[], rtx hi_half
[])
14763 enum machine_mode half_mode
;
14769 half_mode
= DImode
;
14772 half_mode
= SImode
;
14775 gcc_unreachable ();
14778 byte
= GET_MODE_SIZE (half_mode
);
14782 rtx op
= operands
[num
];
14784 /* simplify_subreg refuse to split volatile memory addresses,
14785 but we still have to handle it. */
14788 lo_half
[num
] = adjust_address (op
, half_mode
, 0);
14789 hi_half
[num
] = adjust_address (op
, half_mode
, byte
);
14793 lo_half
[num
] = simplify_gen_subreg (half_mode
, op
,
14794 GET_MODE (op
) == VOIDmode
14795 ? mode
: GET_MODE (op
), 0);
14796 hi_half
[num
] = simplify_gen_subreg (half_mode
, op
,
14797 GET_MODE (op
) == VOIDmode
14798 ? mode
: GET_MODE (op
), byte
);
14803 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
14804 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
14805 is the expression of the binary operation. The output may either be
14806 emitted here, or returned to the caller, like all output_* functions.
14808 There is no guarantee that the operands are the same mode, as they
14809 might be within FLOAT or FLOAT_EXTEND expressions. */
14811 #ifndef SYSV386_COMPAT
14812 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
14813 wants to fix the assemblers because that causes incompatibility
14814 with gcc. No-one wants to fix gcc because that causes
14815 incompatibility with assemblers... You can use the option of
14816 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
14817 #define SYSV386_COMPAT 1
14821 output_387_binary_op (rtx insn
, rtx
*operands
)
14823 static char buf
[40];
14826 int is_sse
= SSE_REG_P (operands
[0]) || SSE_REG_P (operands
[1]) || SSE_REG_P (operands
[2]);
14828 #ifdef ENABLE_CHECKING
14829 /* Even if we do not want to check the inputs, this documents input
14830 constraints. Which helps in understanding the following code. */
14831 if (STACK_REG_P (operands
[0])
14832 && ((REG_P (operands
[1])
14833 && REGNO (operands
[0]) == REGNO (operands
[1])
14834 && (STACK_REG_P (operands
[2]) || MEM_P (operands
[2])))
14835 || (REG_P (operands
[2])
14836 && REGNO (operands
[0]) == REGNO (operands
[2])
14837 && (STACK_REG_P (operands
[1]) || MEM_P (operands
[1]))))
14838 && (STACK_TOP_P (operands
[1]) || STACK_TOP_P (operands
[2])))
14841 gcc_assert (is_sse
);
14844 switch (GET_CODE (operands
[3]))
14847 if (GET_MODE_CLASS (GET_MODE (operands
[1])) == MODE_INT
14848 || GET_MODE_CLASS (GET_MODE (operands
[2])) == MODE_INT
)
14856 if (GET_MODE_CLASS (GET_MODE (operands
[1])) == MODE_INT
14857 || GET_MODE_CLASS (GET_MODE (operands
[2])) == MODE_INT
)
14865 if (GET_MODE_CLASS (GET_MODE (operands
[1])) == MODE_INT
14866 || GET_MODE_CLASS (GET_MODE (operands
[2])) == MODE_INT
)
14874 if (GET_MODE_CLASS (GET_MODE (operands
[1])) == MODE_INT
14875 || GET_MODE_CLASS (GET_MODE (operands
[2])) == MODE_INT
)
14883 gcc_unreachable ();
14890 strcpy (buf
, ssep
);
14891 if (GET_MODE (operands
[0]) == SFmode
)
14892 strcat (buf
, "ss\t{%2, %1, %0|%0, %1, %2}");
14894 strcat (buf
, "sd\t{%2, %1, %0|%0, %1, %2}");
14898 strcpy (buf
, ssep
+ 1);
14899 if (GET_MODE (operands
[0]) == SFmode
)
14900 strcat (buf
, "ss\t{%2, %0|%0, %2}");
14902 strcat (buf
, "sd\t{%2, %0|%0, %2}");
14908 switch (GET_CODE (operands
[3]))
14912 if (REG_P (operands
[2]) && REGNO (operands
[0]) == REGNO (operands
[2]))
14914 rtx temp
= operands
[2];
14915 operands
[2] = operands
[1];
14916 operands
[1] = temp
;
14919 /* know operands[0] == operands[1]. */
14921 if (MEM_P (operands
[2]))
14927 if (find_regno_note (insn
, REG_DEAD
, REGNO (operands
[2])))
14929 if (STACK_TOP_P (operands
[0]))
14930 /* How is it that we are storing to a dead operand[2]?
14931 Well, presumably operands[1] is dead too. We can't
14932 store the result to st(0) as st(0) gets popped on this
14933 instruction. Instead store to operands[2] (which I
14934 think has to be st(1)). st(1) will be popped later.
14935 gcc <= 2.8.1 didn't have this check and generated
14936 assembly code that the Unixware assembler rejected. */
14937 p
= "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
14939 p
= "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
14943 if (STACK_TOP_P (operands
[0]))
14944 p
= "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
14946 p
= "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
14951 if (MEM_P (operands
[1]))
14957 if (MEM_P (operands
[2]))
14963 if (find_regno_note (insn
, REG_DEAD
, REGNO (operands
[2])))
14966 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
14967 derived assemblers, confusingly reverse the direction of
14968 the operation for fsub{r} and fdiv{r} when the
14969 destination register is not st(0). The Intel assembler
14970 doesn't have this brain damage. Read !SYSV386_COMPAT to
14971 figure out what the hardware really does. */
14972 if (STACK_TOP_P (operands
[0]))
14973 p
= "{p\t%0, %2|rp\t%2, %0}";
14975 p
= "{rp\t%2, %0|p\t%0, %2}";
14977 if (STACK_TOP_P (operands
[0]))
14978 /* As above for fmul/fadd, we can't store to st(0). */
14979 p
= "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
14981 p
= "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
14986 if (find_regno_note (insn
, REG_DEAD
, REGNO (operands
[1])))
14989 if (STACK_TOP_P (operands
[0]))
14990 p
= "{rp\t%0, %1|p\t%1, %0}";
14992 p
= "{p\t%1, %0|rp\t%0, %1}";
14994 if (STACK_TOP_P (operands
[0]))
14995 p
= "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
14997 p
= "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
15002 if (STACK_TOP_P (operands
[0]))
15004 if (STACK_TOP_P (operands
[1]))
15005 p
= "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
15007 p
= "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
15010 else if (STACK_TOP_P (operands
[1]))
15013 p
= "{\t%1, %0|r\t%0, %1}";
15015 p
= "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
15021 p
= "{r\t%2, %0|\t%0, %2}";
15023 p
= "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
15029 gcc_unreachable ();
15036 /* Return needed mode for entity in optimize_mode_switching pass. */
15039 ix86_mode_needed (int entity
, rtx insn
)
15041 enum attr_i387_cw mode
;
15043 /* The mode UNINITIALIZED is used to store control word after a
15044 function call or ASM pattern. The mode ANY specify that function
15045 has no requirements on the control word and make no changes in the
15046 bits we are interested in. */
15049 || (NONJUMP_INSN_P (insn
)
15050 && (asm_noperands (PATTERN (insn
)) >= 0
15051 || GET_CODE (PATTERN (insn
)) == ASM_INPUT
)))
15052 return I387_CW_UNINITIALIZED
;
15054 if (recog_memoized (insn
) < 0)
15055 return I387_CW_ANY
;
15057 mode
= get_attr_i387_cw (insn
);
15062 if (mode
== I387_CW_TRUNC
)
15067 if (mode
== I387_CW_FLOOR
)
15072 if (mode
== I387_CW_CEIL
)
15077 if (mode
== I387_CW_MASK_PM
)
15082 gcc_unreachable ();
15085 return I387_CW_ANY
;
15088 /* Output code to initialize control word copies used by trunc?f?i and
15089 rounding patterns. CURRENT_MODE is set to current control word,
15090 while NEW_MODE is set to new control word. */
15093 emit_i387_cw_initialization (int mode
)
15095 rtx stored_mode
= assign_386_stack_local (HImode
, SLOT_CW_STORED
);
15098 enum ix86_stack_slot slot
;
15100 rtx reg
= gen_reg_rtx (HImode
);
15102 emit_insn (gen_x86_fnstcw_1 (stored_mode
));
15103 emit_move_insn (reg
, copy_rtx (stored_mode
));
15105 if (TARGET_64BIT
|| TARGET_PARTIAL_REG_STALL
15106 || optimize_function_for_size_p (cfun
))
15110 case I387_CW_TRUNC
:
15111 /* round toward zero (truncate) */
15112 emit_insn (gen_iorhi3 (reg
, reg
, GEN_INT (0x0c00)));
15113 slot
= SLOT_CW_TRUNC
;
15116 case I387_CW_FLOOR
:
15117 /* round down toward -oo */
15118 emit_insn (gen_andhi3 (reg
, reg
, GEN_INT (~0x0c00)));
15119 emit_insn (gen_iorhi3 (reg
, reg
, GEN_INT (0x0400)));
15120 slot
= SLOT_CW_FLOOR
;
15124 /* round up toward +oo */
15125 emit_insn (gen_andhi3 (reg
, reg
, GEN_INT (~0x0c00)));
15126 emit_insn (gen_iorhi3 (reg
, reg
, GEN_INT (0x0800)));
15127 slot
= SLOT_CW_CEIL
;
15130 case I387_CW_MASK_PM
:
15131 /* mask precision exception for nearbyint() */
15132 emit_insn (gen_iorhi3 (reg
, reg
, GEN_INT (0x0020)));
15133 slot
= SLOT_CW_MASK_PM
;
15137 gcc_unreachable ();
15144 case I387_CW_TRUNC
:
15145 /* round toward zero (truncate) */
15146 emit_insn (gen_movsi_insv_1 (reg
, GEN_INT (0xc)));
15147 slot
= SLOT_CW_TRUNC
;
15150 case I387_CW_FLOOR
:
15151 /* round down toward -oo */
15152 emit_insn (gen_movsi_insv_1 (reg
, GEN_INT (0x4)));
15153 slot
= SLOT_CW_FLOOR
;
15157 /* round up toward +oo */
15158 emit_insn (gen_movsi_insv_1 (reg
, GEN_INT (0x8)));
15159 slot
= SLOT_CW_CEIL
;
15162 case I387_CW_MASK_PM
:
15163 /* mask precision exception for nearbyint() */
15164 emit_insn (gen_iorhi3 (reg
, reg
, GEN_INT (0x0020)));
15165 slot
= SLOT_CW_MASK_PM
;
15169 gcc_unreachable ();
15173 gcc_assert (slot
< MAX_386_STACK_LOCALS
);
15175 new_mode
= assign_386_stack_local (HImode
, slot
);
15176 emit_move_insn (new_mode
, reg
);
15179 /* Output code for INSN to convert a float to a signed int. OPERANDS
15180 are the insn operands. The output may be [HSD]Imode and the input
15181 operand may be [SDX]Fmode. */
15184 output_fix_trunc (rtx insn
, rtx
*operands
, bool fisttp
)
15186 int stack_top_dies
= find_regno_note (insn
, REG_DEAD
, FIRST_STACK_REG
) != 0;
15187 int dimode_p
= GET_MODE (operands
[0]) == DImode
;
15188 int round_mode
= get_attr_i387_cw (insn
);
15190 /* Jump through a hoop or two for DImode, since the hardware has no
15191 non-popping instruction. We used to do this a different way, but
15192 that was somewhat fragile and broke with post-reload splitters. */
15193 if ((dimode_p
|| fisttp
) && !stack_top_dies
)
15194 output_asm_insn ("fld\t%y1", operands
);
15196 gcc_assert (STACK_TOP_P (operands
[1]));
15197 gcc_assert (MEM_P (operands
[0]));
15198 gcc_assert (GET_MODE (operands
[1]) != TFmode
);
15201 output_asm_insn ("fisttp%Z0\t%0", operands
);
15204 if (round_mode
!= I387_CW_ANY
)
15205 output_asm_insn ("fldcw\t%3", operands
);
15206 if (stack_top_dies
|| dimode_p
)
15207 output_asm_insn ("fistp%Z0\t%0", operands
);
15209 output_asm_insn ("fist%Z0\t%0", operands
);
15210 if (round_mode
!= I387_CW_ANY
)
15211 output_asm_insn ("fldcw\t%2", operands
);
15217 /* Output code for x87 ffreep insn. The OPNO argument, which may only
15218 have the values zero or one, indicates the ffreep insn's operand
15219 from the OPERANDS array. */
15221 static const char *
15222 output_387_ffreep (rtx
*operands ATTRIBUTE_UNUSED
, int opno
)
15224 if (TARGET_USE_FFREEP
)
15225 #ifdef HAVE_AS_IX86_FFREEP
15226 return opno
? "ffreep\t%y1" : "ffreep\t%y0";
15229 static char retval
[32];
15230 int regno
= REGNO (operands
[opno
]);
15232 gcc_assert (FP_REGNO_P (regno
));
15234 regno
-= FIRST_STACK_REG
;
15236 snprintf (retval
, sizeof (retval
), ASM_SHORT
"0xc%ddf", regno
);
15241 return opno
? "fstp\t%y1" : "fstp\t%y0";
15245 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
15246 should be used. UNORDERED_P is true when fucom should be used. */
15249 output_fp_compare (rtx insn
, rtx
*operands
, bool eflags_p
, bool unordered_p
)
15251 int stack_top_dies
;
15252 rtx cmp_op0
, cmp_op1
;
15253 int is_sse
= SSE_REG_P (operands
[0]) || SSE_REG_P (operands
[1]);
15257 cmp_op0
= operands
[0];
15258 cmp_op1
= operands
[1];
15262 cmp_op0
= operands
[1];
15263 cmp_op1
= operands
[2];
15268 if (GET_MODE (operands
[0]) == SFmode
)
15270 return "%vucomiss\t{%1, %0|%0, %1}";
15272 return "%vcomiss\t{%1, %0|%0, %1}";
15275 return "%vucomisd\t{%1, %0|%0, %1}";
15277 return "%vcomisd\t{%1, %0|%0, %1}";
15280 gcc_assert (STACK_TOP_P (cmp_op0
));
15282 stack_top_dies
= find_regno_note (insn
, REG_DEAD
, FIRST_STACK_REG
) != 0;
15284 if (cmp_op1
== CONST0_RTX (GET_MODE (cmp_op1
)))
15286 if (stack_top_dies
)
15288 output_asm_insn ("ftst\n\tfnstsw\t%0", operands
);
15289 return output_387_ffreep (operands
, 1);
15292 return "ftst\n\tfnstsw\t%0";
15295 if (STACK_REG_P (cmp_op1
)
15297 && find_regno_note (insn
, REG_DEAD
, REGNO (cmp_op1
))
15298 && REGNO (cmp_op1
) != FIRST_STACK_REG
)
15300 /* If both the top of the 387 stack dies, and the other operand
15301 is also a stack register that dies, then this must be a
15302 `fcompp' float compare */
15306 /* There is no double popping fcomi variant. Fortunately,
15307 eflags is immune from the fstp's cc clobbering. */
15309 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands
);
15311 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands
);
15312 return output_387_ffreep (operands
, 0);
15317 return "fucompp\n\tfnstsw\t%0";
15319 return "fcompp\n\tfnstsw\t%0";
15324 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
15326 static const char * const alt
[16] =
15328 "fcom%Z2\t%y2\n\tfnstsw\t%0",
15329 "fcomp%Z2\t%y2\n\tfnstsw\t%0",
15330 "fucom%Z2\t%y2\n\tfnstsw\t%0",
15331 "fucomp%Z2\t%y2\n\tfnstsw\t%0",
15333 "ficom%Z2\t%y2\n\tfnstsw\t%0",
15334 "ficomp%Z2\t%y2\n\tfnstsw\t%0",
15338 "fcomi\t{%y1, %0|%0, %y1}",
15339 "fcomip\t{%y1, %0|%0, %y1}",
15340 "fucomi\t{%y1, %0|%0, %y1}",
15341 "fucomip\t{%y1, %0|%0, %y1}",
15352 mask
= eflags_p
<< 3;
15353 mask
|= (GET_MODE_CLASS (GET_MODE (cmp_op1
)) == MODE_INT
) << 2;
15354 mask
|= unordered_p
<< 1;
15355 mask
|= stack_top_dies
;
15357 gcc_assert (mask
< 16);
15366 ix86_output_addr_vec_elt (FILE *file
, int value
)
15368 const char *directive
= ASM_LONG
;
15372 directive
= ASM_QUAD
;
15374 gcc_assert (!TARGET_64BIT
);
15377 fprintf (file
, "%s%s%d\n", directive
, LPREFIX
, value
);
15381 ix86_output_addr_diff_elt (FILE *file
, int value
, int rel
)
15383 const char *directive
= ASM_LONG
;
15386 if (TARGET_64BIT
&& CASE_VECTOR_MODE
== DImode
)
15387 directive
= ASM_QUAD
;
15389 gcc_assert (!TARGET_64BIT
);
15391 /* We can't use @GOTOFF for text labels on VxWorks; see gotoff_operand. */
15392 if (TARGET_64BIT
|| TARGET_VXWORKS_RTP
)
15393 fprintf (file
, "%s%s%d-%s%d\n",
15394 directive
, LPREFIX
, value
, LPREFIX
, rel
);
15395 else if (HAVE_AS_GOTOFF_IN_DATA
)
15396 fprintf (file
, ASM_LONG
"%s%d@GOTOFF\n", LPREFIX
, value
);
15398 else if (TARGET_MACHO
)
15400 fprintf (file
, ASM_LONG
"%s%d-", LPREFIX
, value
);
15401 machopic_output_function_base_name (file
);
15406 asm_fprintf (file
, ASM_LONG
"%U%s+[.-%s%d]\n",
15407 GOT_SYMBOL_NAME
, LPREFIX
, value
);
15410 /* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
15414 ix86_expand_clear (rtx dest
)
15418 /* We play register width games, which are only valid after reload. */
15419 gcc_assert (reload_completed
);
15421 /* Avoid HImode and its attendant prefix byte. */
15422 if (GET_MODE_SIZE (GET_MODE (dest
)) < 4)
15423 dest
= gen_rtx_REG (SImode
, REGNO (dest
));
15424 tmp
= gen_rtx_SET (VOIDmode
, dest
, const0_rtx
);
15426 /* This predicate should match that for movsi_xor and movdi_xor_rex64. */
15427 if (!TARGET_USE_MOV0
|| optimize_insn_for_speed_p ())
15429 rtx clob
= gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCmode
, FLAGS_REG
));
15430 tmp
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, tmp
, clob
));
15436 /* X is an unchanging MEM. If it is a constant pool reference, return
15437 the constant pool rtx, else NULL. */
15440 maybe_get_pool_constant (rtx x
)
15442 x
= ix86_delegitimize_address (XEXP (x
, 0));
15444 if (GET_CODE (x
) == SYMBOL_REF
&& CONSTANT_POOL_ADDRESS_P (x
))
15445 return get_pool_constant (x
);
15451 ix86_expand_move (enum machine_mode mode
, rtx operands
[])
15454 enum tls_model model
;
15459 if (GET_CODE (op1
) == SYMBOL_REF
)
15461 model
= SYMBOL_REF_TLS_MODEL (op1
);
15464 op1
= legitimize_tls_address (op1
, model
, true);
15465 op1
= force_operand (op1
, op0
);
15468 if (GET_MODE (op1
) != mode
)
15469 op1
= convert_to_mode (mode
, op1
, 1);
15471 else if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
15472 && SYMBOL_REF_DLLIMPORT_P (op1
))
15473 op1
= legitimize_dllimport_symbol (op1
, false);
15475 else if (GET_CODE (op1
) == CONST
15476 && GET_CODE (XEXP (op1
, 0)) == PLUS
15477 && GET_CODE (XEXP (XEXP (op1
, 0), 0)) == SYMBOL_REF
)
15479 rtx addend
= XEXP (XEXP (op1
, 0), 1);
15480 rtx symbol
= XEXP (XEXP (op1
, 0), 0);
15483 model
= SYMBOL_REF_TLS_MODEL (symbol
);
15485 tmp
= legitimize_tls_address (symbol
, model
, true);
15486 else if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
15487 && SYMBOL_REF_DLLIMPORT_P (symbol
))
15488 tmp
= legitimize_dllimport_symbol (symbol
, true);
15492 tmp
= force_operand (tmp
, NULL
);
15493 tmp
= expand_simple_binop (Pmode
, PLUS
, tmp
, addend
,
15494 op0
, 1, OPTAB_DIRECT
);
15497 if (GET_MODE (tmp
) != mode
)
15498 op1
= convert_to_mode (mode
, tmp
, 1);
15502 if ((flag_pic
|| MACHOPIC_INDIRECT
)
15503 && symbolic_operand (op1
, mode
))
15505 if (TARGET_MACHO
&& !TARGET_64BIT
)
15508 /* dynamic-no-pic */
15509 if (MACHOPIC_INDIRECT
)
15511 rtx temp
= ((reload_in_progress
15512 || ((op0
&& REG_P (op0
))
15514 ? op0
: gen_reg_rtx (Pmode
));
15515 op1
= machopic_indirect_data_reference (op1
, temp
);
15517 op1
= machopic_legitimize_pic_address (op1
, mode
,
15518 temp
== op1
? 0 : temp
);
15520 if (op0
!= op1
&& GET_CODE (op0
) != MEM
)
15522 rtx insn
= gen_rtx_SET (VOIDmode
, op0
, op1
);
15526 if (GET_CODE (op0
) == MEM
)
15527 op1
= force_reg (Pmode
, op1
);
15531 if (GET_CODE (temp
) != REG
)
15532 temp
= gen_reg_rtx (Pmode
);
15533 temp
= legitimize_pic_address (op1
, temp
);
15538 /* dynamic-no-pic */
15544 op1
= force_reg (mode
, op1
);
15545 else if (!(TARGET_64BIT
&& x86_64_movabs_operand (op1
, DImode
)))
15547 rtx reg
= can_create_pseudo_p () ? NULL_RTX
: op0
;
15548 op1
= legitimize_pic_address (op1
, reg
);
15551 if (GET_MODE (op1
) != mode
)
15552 op1
= convert_to_mode (mode
, op1
, 1);
15559 && (PUSH_ROUNDING (GET_MODE_SIZE (mode
)) != GET_MODE_SIZE (mode
)
15560 || !push_operand (op0
, mode
))
15562 op1
= force_reg (mode
, op1
);
15564 if (push_operand (op0
, mode
)
15565 && ! general_no_elim_operand (op1
, mode
))
15566 op1
= copy_to_mode_reg (mode
, op1
);
15568 /* Force large constants in 64bit compilation into register
15569 to get them CSEed. */
15570 if (can_create_pseudo_p ()
15571 && (mode
== DImode
) && TARGET_64BIT
15572 && immediate_operand (op1
, mode
)
15573 && !x86_64_zext_immediate_operand (op1
, VOIDmode
)
15574 && !register_operand (op0
, mode
)
15576 op1
= copy_to_mode_reg (mode
, op1
);
15578 if (can_create_pseudo_p ()
15579 && FLOAT_MODE_P (mode
)
15580 && GET_CODE (op1
) == CONST_DOUBLE
)
15582 /* If we are loading a floating point constant to a register,
15583 force the value to memory now, since we'll get better code
15584 out the back end. */
15586 op1
= validize_mem (force_const_mem (mode
, op1
));
15587 if (!register_operand (op0
, mode
))
15589 rtx temp
= gen_reg_rtx (mode
);
15590 emit_insn (gen_rtx_SET (VOIDmode
, temp
, op1
));
15591 emit_move_insn (op0
, temp
);
15597 emit_insn (gen_rtx_SET (VOIDmode
, op0
, op1
));
15601 ix86_expand_vector_move (enum machine_mode mode
, rtx operands
[])
15603 rtx op0
= operands
[0], op1
= operands
[1];
15604 unsigned int align
= GET_MODE_ALIGNMENT (mode
);
15606 /* Force constants other than zero into memory. We do not know how
15607 the instructions used to build constants modify the upper 64 bits
15608 of the register, once we have that information we may be able
15609 to handle some of them more efficiently. */
15610 if (can_create_pseudo_p ()
15611 && register_operand (op0
, mode
)
15612 && (CONSTANT_P (op1
)
15613 || (GET_CODE (op1
) == SUBREG
15614 && CONSTANT_P (SUBREG_REG (op1
))))
15615 && !standard_sse_constant_p (op1
))
15616 op1
= validize_mem (force_const_mem (mode
, op1
));
15618 /* We need to check memory alignment for SSE mode since attribute
15619 can make operands unaligned. */
15620 if (can_create_pseudo_p ()
15621 && SSE_REG_MODE_P (mode
)
15622 && ((MEM_P (op0
) && (MEM_ALIGN (op0
) < align
))
15623 || (MEM_P (op1
) && (MEM_ALIGN (op1
) < align
))))
15627 /* ix86_expand_vector_move_misalign() does not like constants ... */
15628 if (CONSTANT_P (op1
)
15629 || (GET_CODE (op1
) == SUBREG
15630 && CONSTANT_P (SUBREG_REG (op1
))))
15631 op1
= validize_mem (force_const_mem (mode
, op1
));
15633 /* ... nor both arguments in memory. */
15634 if (!register_operand (op0
, mode
)
15635 && !register_operand (op1
, mode
))
15636 op1
= force_reg (mode
, op1
);
15638 tmp
[0] = op0
; tmp
[1] = op1
;
15639 ix86_expand_vector_move_misalign (mode
, tmp
);
15643 /* Make operand1 a register if it isn't already. */
15644 if (can_create_pseudo_p ()
15645 && !register_operand (op0
, mode
)
15646 && !register_operand (op1
, mode
))
15648 emit_move_insn (op0
, force_reg (GET_MODE (op0
), op1
));
15652 emit_insn (gen_rtx_SET (VOIDmode
, op0
, op1
));
15655 /* Split 32-byte AVX unaligned load and store if needed. */
15658 ix86_avx256_split_vector_move_misalign (rtx op0
, rtx op1
)
15661 rtx (*extract
) (rtx
, rtx
, rtx
);
15662 rtx (*move_unaligned
) (rtx
, rtx
);
15663 enum machine_mode mode
;
15665 switch (GET_MODE (op0
))
15668 gcc_unreachable ();
15670 extract
= gen_avx_vextractf128v32qi
;
15671 move_unaligned
= gen_avx_movdqu256
;
15675 extract
= gen_avx_vextractf128v8sf
;
15676 move_unaligned
= gen_avx_movups256
;
15680 extract
= gen_avx_vextractf128v4df
;
15681 move_unaligned
= gen_avx_movupd256
;
15686 if (MEM_P (op1
) && TARGET_AVX256_SPLIT_UNALIGNED_LOAD
)
15688 rtx r
= gen_reg_rtx (mode
);
15689 m
= adjust_address (op1
, mode
, 0);
15690 emit_move_insn (r
, m
);
15691 m
= adjust_address (op1
, mode
, 16);
15692 r
= gen_rtx_VEC_CONCAT (GET_MODE (op0
), r
, m
);
15693 emit_move_insn (op0
, r
);
15695 else if (MEM_P (op0
) && TARGET_AVX256_SPLIT_UNALIGNED_STORE
)
15697 m
= adjust_address (op0
, mode
, 0);
15698 emit_insn (extract (m
, op1
, const0_rtx
));
15699 m
= adjust_address (op0
, mode
, 16);
15700 emit_insn (extract (m
, op1
, const1_rtx
));
15703 emit_insn (move_unaligned (op0
, op1
));
15706 /* Implement the movmisalign patterns for SSE. Non-SSE modes go
15707 straight to ix86_expand_vector_move. */
15708 /* Code generation for scalar reg-reg moves of single and double precision data:
15709 if (x86_sse_partial_reg_dependency == true | x86_sse_split_regs == true)
15713 if (x86_sse_partial_reg_dependency == true)
15718 Code generation for scalar loads of double precision data:
15719 if (x86_sse_split_regs == true)
15720 movlpd mem, reg (gas syntax)
15724 Code generation for unaligned packed loads of single precision data
15725 (x86_sse_unaligned_move_optimal overrides x86_sse_partial_reg_dependency):
15726 if (x86_sse_unaligned_move_optimal)
15729 if (x86_sse_partial_reg_dependency == true)
15741 Code generation for unaligned packed loads of double precision data
15742 (x86_sse_unaligned_move_optimal overrides x86_sse_split_regs):
15743 if (x86_sse_unaligned_move_optimal)
15746 if (x86_sse_split_regs == true)
15759 ix86_expand_vector_move_misalign (enum machine_mode mode
, rtx operands
[])
15768 switch (GET_MODE_CLASS (mode
))
15770 case MODE_VECTOR_INT
:
15772 switch (GET_MODE_SIZE (mode
))
15775 /* If we're optimizing for size, movups is the smallest. */
15776 if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
)
15778 op0
= gen_lowpart (V4SFmode
, op0
);
15779 op1
= gen_lowpart (V4SFmode
, op1
);
15780 emit_insn (gen_sse_movups (op0
, op1
));
15783 op0
= gen_lowpart (V16QImode
, op0
);
15784 op1
= gen_lowpart (V16QImode
, op1
);
15785 emit_insn (gen_sse2_movdqu (op0
, op1
));
15788 op0
= gen_lowpart (V32QImode
, op0
);
15789 op1
= gen_lowpart (V32QImode
, op1
);
15790 ix86_avx256_split_vector_move_misalign (op0
, op1
);
15793 gcc_unreachable ();
15796 case MODE_VECTOR_FLOAT
:
15797 op0
= gen_lowpart (mode
, op0
);
15798 op1
= gen_lowpart (mode
, op1
);
15803 emit_insn (gen_sse_movups (op0
, op1
));
15806 ix86_avx256_split_vector_move_misalign (op0
, op1
);
15809 if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
)
15811 op0
= gen_lowpart (V4SFmode
, op0
);
15812 op1
= gen_lowpart (V4SFmode
, op1
);
15813 emit_insn (gen_sse_movups (op0
, op1
));
15816 emit_insn (gen_sse2_movupd (op0
, op1
));
15819 ix86_avx256_split_vector_move_misalign (op0
, op1
);
15822 gcc_unreachable ();
15827 gcc_unreachable ();
15835 /* If we're optimizing for size, movups is the smallest. */
15836 if (optimize_insn_for_size_p ()
15837 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
)
15839 op0
= gen_lowpart (V4SFmode
, op0
);
15840 op1
= gen_lowpart (V4SFmode
, op1
);
15841 emit_insn (gen_sse_movups (op0
, op1
));
15845 /* ??? If we have typed data, then it would appear that using
15846 movdqu is the only way to get unaligned data loaded with
15848 if (TARGET_SSE2
&& GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
)
15850 op0
= gen_lowpart (V16QImode
, op0
);
15851 op1
= gen_lowpart (V16QImode
, op1
);
15852 emit_insn (gen_sse2_movdqu (op0
, op1
));
15856 if (TARGET_SSE2
&& mode
== V2DFmode
)
15860 if (TARGET_SSE_UNALIGNED_LOAD_OPTIMAL
)
15862 op0
= gen_lowpart (V2DFmode
, op0
);
15863 op1
= gen_lowpart (V2DFmode
, op1
);
15864 emit_insn (gen_sse2_movupd (op0
, op1
));
15868 /* When SSE registers are split into halves, we can avoid
15869 writing to the top half twice. */
15870 if (TARGET_SSE_SPLIT_REGS
)
15872 emit_clobber (op0
);
15877 /* ??? Not sure about the best option for the Intel chips.
15878 The following would seem to satisfy; the register is
15879 entirely cleared, breaking the dependency chain. We
15880 then store to the upper half, with a dependency depth
15881 of one. A rumor has it that Intel recommends two movsd
15882 followed by an unpacklpd, but this is unconfirmed. And
15883 given that the dependency depth of the unpacklpd would
15884 still be one, I'm not sure why this would be better. */
15885 zero
= CONST0_RTX (V2DFmode
);
15888 m
= adjust_address (op1
, DFmode
, 0);
15889 emit_insn (gen_sse2_loadlpd (op0
, zero
, m
));
15890 m
= adjust_address (op1
, DFmode
, 8);
15891 emit_insn (gen_sse2_loadhpd (op0
, op0
, m
));
15895 if (TARGET_SSE_UNALIGNED_LOAD_OPTIMAL
)
15897 op0
= gen_lowpart (V4SFmode
, op0
);
15898 op1
= gen_lowpart (V4SFmode
, op1
);
15899 emit_insn (gen_sse_movups (op0
, op1
));
15903 if (TARGET_SSE_PARTIAL_REG_DEPENDENCY
)
15904 emit_move_insn (op0
, CONST0_RTX (mode
));
15906 emit_clobber (op0
);
15908 if (mode
!= V4SFmode
)
15909 op0
= gen_lowpart (V4SFmode
, op0
);
15910 m
= adjust_address (op1
, V2SFmode
, 0);
15911 emit_insn (gen_sse_loadlps (op0
, op0
, m
));
15912 m
= adjust_address (op1
, V2SFmode
, 8);
15913 emit_insn (gen_sse_loadhps (op0
, op0
, m
));
15916 else if (MEM_P (op0
))
15918 /* If we're optimizing for size, movups is the smallest. */
15919 if (optimize_insn_for_size_p ()
15920 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
)
15922 op0
= gen_lowpart (V4SFmode
, op0
);
15923 op1
= gen_lowpart (V4SFmode
, op1
);
15924 emit_insn (gen_sse_movups (op0
, op1
));
15928 /* ??? Similar to above, only less clear because of quote
15929 typeless stores unquote. */
15930 if (TARGET_SSE2
&& !TARGET_SSE_TYPELESS_STORES
15931 && GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
)
15933 op0
= gen_lowpart (V16QImode
, op0
);
15934 op1
= gen_lowpart (V16QImode
, op1
);
15935 emit_insn (gen_sse2_movdqu (op0
, op1
));
15939 if (TARGET_SSE2
&& mode
== V2DFmode
)
15941 if (TARGET_SSE_UNALIGNED_STORE_OPTIMAL
)
15943 op0
= gen_lowpart (V2DFmode
, op0
);
15944 op1
= gen_lowpart (V2DFmode
, op1
);
15945 emit_insn (gen_sse2_movupd (op0
, op1
));
15949 m
= adjust_address (op0
, DFmode
, 0);
15950 emit_insn (gen_sse2_storelpd (m
, op1
));
15951 m
= adjust_address (op0
, DFmode
, 8);
15952 emit_insn (gen_sse2_storehpd (m
, op1
));
15957 if (mode
!= V4SFmode
)
15958 op1
= gen_lowpart (V4SFmode
, op1
);
15960 if (TARGET_SSE_UNALIGNED_STORE_OPTIMAL
)
15962 op0
= gen_lowpart (V4SFmode
, op0
);
15963 emit_insn (gen_sse_movups (op0
, op1
));
15967 m
= adjust_address (op0
, V2SFmode
, 0);
15968 emit_insn (gen_sse_storelps (m
, op1
));
15969 m
= adjust_address (op0
, V2SFmode
, 8);
15970 emit_insn (gen_sse_storehps (m
, op1
));
15975 gcc_unreachable ();
15978 /* Expand a push in MODE. This is some mode for which we do not support
15979 proper push instructions, at least from the registers that we expect
15980 the value to live in. */
15983 ix86_expand_push (enum machine_mode mode
, rtx x
)
15987 tmp
= expand_simple_binop (Pmode
, PLUS
, stack_pointer_rtx
,
15988 GEN_INT (-GET_MODE_SIZE (mode
)),
15989 stack_pointer_rtx
, 1, OPTAB_DIRECT
);
15990 if (tmp
!= stack_pointer_rtx
)
15991 emit_move_insn (stack_pointer_rtx
, tmp
);
15993 tmp
= gen_rtx_MEM (mode
, stack_pointer_rtx
);
15995 /* When we push an operand onto stack, it has to be aligned at least
15996 at the function argument boundary. However since we don't have
15997 the argument type, we can't determine the actual argument
15999 emit_move_insn (tmp
, x
);
16002 /* Helper function of ix86_fixup_binary_operands to canonicalize
16003 operand order. Returns true if the operands should be swapped. */
16006 ix86_swap_binary_operands_p (enum rtx_code code
, enum machine_mode mode
,
16009 rtx dst
= operands
[0];
16010 rtx src1
= operands
[1];
16011 rtx src2
= operands
[2];
16013 /* If the operation is not commutative, we can't do anything. */
16014 if (GET_RTX_CLASS (code
) != RTX_COMM_ARITH
)
16017 /* Highest priority is that src1 should match dst. */
16018 if (rtx_equal_p (dst
, src1
))
16020 if (rtx_equal_p (dst
, src2
))
16023 /* Next highest priority is that immediate constants come second. */
16024 if (immediate_operand (src2
, mode
))
16026 if (immediate_operand (src1
, mode
))
16029 /* Lowest priority is that memory references should come second. */
16039 /* Fix up OPERANDS to satisfy ix86_binary_operator_ok. Return the
16040 destination to use for the operation. If different from the true
16041 destination in operands[0], a copy operation will be required. */
16044 ix86_fixup_binary_operands (enum rtx_code code
, enum machine_mode mode
,
16047 rtx dst
= operands
[0];
16048 rtx src1
= operands
[1];
16049 rtx src2
= operands
[2];
16051 /* Canonicalize operand order. */
16052 if (ix86_swap_binary_operands_p (code
, mode
, operands
))
16056 /* It is invalid to swap operands of different modes. */
16057 gcc_assert (GET_MODE (src1
) == GET_MODE (src2
));
16064 /* Both source operands cannot be in memory. */
16065 if (MEM_P (src1
) && MEM_P (src2
))
16067 /* Optimization: Only read from memory once. */
16068 if (rtx_equal_p (src1
, src2
))
16070 src2
= force_reg (mode
, src2
);
16074 src2
= force_reg (mode
, src2
);
16077 /* If the destination is memory, and we do not have matching source
16078 operands, do things in registers. */
16079 if (MEM_P (dst
) && !rtx_equal_p (dst
, src1
))
16080 dst
= gen_reg_rtx (mode
);
16082 /* Source 1 cannot be a constant. */
16083 if (CONSTANT_P (src1
))
16084 src1
= force_reg (mode
, src1
);
16086 /* Source 1 cannot be a non-matching memory. */
16087 if (MEM_P (src1
) && !rtx_equal_p (dst
, src1
))
16088 src1
= force_reg (mode
, src1
);
16090 /* Improve address combine. */
16092 && GET_MODE_CLASS (mode
) == MODE_INT
16094 src2
= force_reg (mode
, src2
);
16096 operands
[1] = src1
;
16097 operands
[2] = src2
;
16101 /* Similarly, but assume that the destination has already been
16102 set up properly. */
16105 ix86_fixup_binary_operands_no_copy (enum rtx_code code
,
16106 enum machine_mode mode
, rtx operands
[])
16108 rtx dst
= ix86_fixup_binary_operands (code
, mode
, operands
);
16109 gcc_assert (dst
== operands
[0]);
16112 /* Attempt to expand a binary operator. Make the expansion closer to the
16113 actual machine, then just general_operand, which will allow 3 separate
16114 memory references (one output, two input) in a single insn. */
16117 ix86_expand_binary_operator (enum rtx_code code
, enum machine_mode mode
,
16120 rtx src1
, src2
, dst
, op
, clob
;
16122 dst
= ix86_fixup_binary_operands (code
, mode
, operands
);
16123 src1
= operands
[1];
16124 src2
= operands
[2];
16126 /* Emit the instruction. */
16128 op
= gen_rtx_SET (VOIDmode
, dst
, gen_rtx_fmt_ee (code
, mode
, src1
, src2
));
16129 if (reload_in_progress
)
16131 /* Reload doesn't know about the flags register, and doesn't know that
16132 it doesn't want to clobber it. We can only do this with PLUS. */
16133 gcc_assert (code
== PLUS
);
16136 else if (reload_completed
16138 && !rtx_equal_p (dst
, src1
))
16140 /* This is going to be an LEA; avoid splitting it later. */
16145 clob
= gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCmode
, FLAGS_REG
));
16146 emit_insn (gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, op
, clob
)));
16149 /* Fix up the destination if needed. */
16150 if (dst
!= operands
[0])
16151 emit_move_insn (operands
[0], dst
);
16154 /* Return TRUE or FALSE depending on whether the binary operator meets the
16155 appropriate constraints. */
16158 ix86_binary_operator_ok (enum rtx_code code
, enum machine_mode mode
,
16161 rtx dst
= operands
[0];
16162 rtx src1
= operands
[1];
16163 rtx src2
= operands
[2];
16165 /* Both source operands cannot be in memory. */
16166 if (MEM_P (src1
) && MEM_P (src2
))
16169 /* Canonicalize operand order for commutative operators. */
16170 if (ix86_swap_binary_operands_p (code
, mode
, operands
))
16177 /* If the destination is memory, we must have a matching source operand. */
16178 if (MEM_P (dst
) && !rtx_equal_p (dst
, src1
))
16181 /* Source 1 cannot be a constant. */
16182 if (CONSTANT_P (src1
))
16185 /* Source 1 cannot be a non-matching memory. */
16186 if (MEM_P (src1
) && !rtx_equal_p (dst
, src1
))
16187 /* Support "andhi/andsi/anddi" as a zero-extending move. */
16188 return (code
== AND
16191 || (TARGET_64BIT
&& mode
== DImode
))
16192 && satisfies_constraint_L (src2
));
16197 /* Attempt to expand a unary operator. Make the expansion closer to the
16198 actual machine, then just general_operand, which will allow 2 separate
16199 memory references (one output, one input) in a single insn. */
16202 ix86_expand_unary_operator (enum rtx_code code
, enum machine_mode mode
,
16205 int matching_memory
;
16206 rtx src
, dst
, op
, clob
;
16211 /* If the destination is memory, and we do not have matching source
16212 operands, do things in registers. */
16213 matching_memory
= 0;
16216 if (rtx_equal_p (dst
, src
))
16217 matching_memory
= 1;
16219 dst
= gen_reg_rtx (mode
);
16222 /* When source operand is memory, destination must match. */
16223 if (MEM_P (src
) && !matching_memory
)
16224 src
= force_reg (mode
, src
);
16226 /* Emit the instruction. */
16228 op
= gen_rtx_SET (VOIDmode
, dst
, gen_rtx_fmt_e (code
, mode
, src
));
16229 if (reload_in_progress
|| code
== NOT
)
16231 /* Reload doesn't know about the flags register, and doesn't know that
16232 it doesn't want to clobber it. */
16233 gcc_assert (code
== NOT
);
16238 clob
= gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCmode
, FLAGS_REG
));
16239 emit_insn (gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, op
, clob
)));
16242 /* Fix up the destination if needed. */
16243 if (dst
!= operands
[0])
16244 emit_move_insn (operands
[0], dst
);
16247 /* Split 32bit/64bit divmod with 8bit unsigned divmod if dividend and
16248 divisor are within the range [0-255]. */
16251 ix86_split_idivmod (enum machine_mode mode
, rtx operands
[],
16254 rtx end_label
, qimode_label
;
16255 rtx insn
, div
, mod
;
16256 rtx scratch
, tmp0
, tmp1
, tmp2
;
16257 rtx (*gen_divmod4_1
) (rtx
, rtx
, rtx
, rtx
);
16258 rtx (*gen_zero_extend
) (rtx
, rtx
);
16259 rtx (*gen_test_ccno_1
) (rtx
, rtx
);
16264 gen_divmod4_1
= signed_p
? gen_divmodsi4_1
: gen_udivmodsi4_1
;
16265 gen_test_ccno_1
= gen_testsi_ccno_1
;
16266 gen_zero_extend
= gen_zero_extendqisi2
;
16269 gen_divmod4_1
= signed_p
? gen_divmoddi4_1
: gen_udivmoddi4_1
;
16270 gen_test_ccno_1
= gen_testdi_ccno_1
;
16271 gen_zero_extend
= gen_zero_extendqidi2
;
16274 gcc_unreachable ();
16277 end_label
= gen_label_rtx ();
16278 qimode_label
= gen_label_rtx ();
16280 scratch
= gen_reg_rtx (mode
);
16282 /* Use 8bit unsigned divimod if dividend and divisor are within
16283 the range [0-255]. */
16284 emit_move_insn (scratch
, operands
[2]);
16285 scratch
= expand_simple_binop (mode
, IOR
, scratch
, operands
[3],
16286 scratch
, 1, OPTAB_DIRECT
);
16287 emit_insn (gen_test_ccno_1 (scratch
, GEN_INT (-0x100)));
16288 tmp0
= gen_rtx_REG (CCNOmode
, FLAGS_REG
);
16289 tmp0
= gen_rtx_EQ (VOIDmode
, tmp0
, const0_rtx
);
16290 tmp0
= gen_rtx_IF_THEN_ELSE (VOIDmode
, tmp0
,
16291 gen_rtx_LABEL_REF (VOIDmode
, qimode_label
),
16293 insn
= emit_jump_insn (gen_rtx_SET (VOIDmode
, pc_rtx
, tmp0
));
16294 predict_jump (REG_BR_PROB_BASE
* 50 / 100);
16295 JUMP_LABEL (insn
) = qimode_label
;
16297 /* Generate original signed/unsigned divimod. */
16298 div
= gen_divmod4_1 (operands
[0], operands
[1],
16299 operands
[2], operands
[3]);
16302 /* Branch to the end. */
16303 emit_jump_insn (gen_jump (end_label
));
16306 /* Generate 8bit unsigned divide. */
16307 emit_label (qimode_label
);
16308 /* Don't use operands[0] for result of 8bit divide since not all
16309 registers support QImode ZERO_EXTRACT. */
16310 tmp0
= simplify_gen_subreg (HImode
, scratch
, mode
, 0);
16311 tmp1
= simplify_gen_subreg (HImode
, operands
[2], mode
, 0);
16312 tmp2
= simplify_gen_subreg (QImode
, operands
[3], mode
, 0);
16313 emit_insn (gen_udivmodhiqi3 (tmp0
, tmp1
, tmp2
));
16317 div
= gen_rtx_DIV (SImode
, operands
[2], operands
[3]);
16318 mod
= gen_rtx_MOD (SImode
, operands
[2], operands
[3]);
16322 div
= gen_rtx_UDIV (SImode
, operands
[2], operands
[3]);
16323 mod
= gen_rtx_UMOD (SImode
, operands
[2], operands
[3]);
16326 /* Extract remainder from AH. */
16327 tmp1
= gen_rtx_ZERO_EXTRACT (mode
, tmp0
, GEN_INT (8), GEN_INT (8));
16328 if (REG_P (operands
[1]))
16329 insn
= emit_move_insn (operands
[1], tmp1
);
16332 /* Need a new scratch register since the old one has result
16334 scratch
= gen_reg_rtx (mode
);
16335 emit_move_insn (scratch
, tmp1
);
16336 insn
= emit_move_insn (operands
[1], scratch
);
16338 set_unique_reg_note (insn
, REG_EQUAL
, mod
);
16340 /* Zero extend quotient from AL. */
16341 tmp1
= gen_lowpart (QImode
, tmp0
);
16342 insn
= emit_insn (gen_zero_extend (operands
[0], tmp1
));
16343 set_unique_reg_note (insn
, REG_EQUAL
, div
);
16345 emit_label (end_label
);
16348 #define LEA_MAX_STALL (3)
16349 #define LEA_SEARCH_THRESHOLD (LEA_MAX_STALL << 1)
16351 /* Increase given DISTANCE in half-cycles according to
16352 dependencies between PREV and NEXT instructions.
16353 Add 1 half-cycle if there is no dependency and
16354 go to next cycle if there is some dependecy. */
16356 static unsigned int
16357 increase_distance (rtx prev
, rtx next
, unsigned int distance
)
16362 if (!prev
|| !next
)
16363 return distance
+ (distance
& 1) + 2;
16365 if (!DF_INSN_USES (next
) || !DF_INSN_DEFS (prev
))
16366 return distance
+ 1;
16368 for (use_rec
= DF_INSN_USES (next
); *use_rec
; use_rec
++)
16369 for (def_rec
= DF_INSN_DEFS (prev
); *def_rec
; def_rec
++)
16370 if (!DF_REF_IS_ARTIFICIAL (*def_rec
)
16371 && DF_REF_REGNO (*use_rec
) == DF_REF_REGNO (*def_rec
))
16372 return distance
+ (distance
& 1) + 2;
16374 return distance
+ 1;
16377 /* Function checks if instruction INSN defines register number
16378 REGNO1 or REGNO2. */
16381 insn_defines_reg (unsigned int regno1
, unsigned int regno2
,
16386 for (def_rec
= DF_INSN_DEFS (insn
); *def_rec
; def_rec
++)
16387 if (DF_REF_REG_DEF_P (*def_rec
)
16388 && !DF_REF_IS_ARTIFICIAL (*def_rec
)
16389 && (regno1
== DF_REF_REGNO (*def_rec
)
16390 || regno2
== DF_REF_REGNO (*def_rec
)))
16398 /* Function checks if instruction INSN uses register number
16399 REGNO as a part of address expression. */
16402 insn_uses_reg_mem (unsigned int regno
, rtx insn
)
16406 for (use_rec
= DF_INSN_USES (insn
); *use_rec
; use_rec
++)
16407 if (DF_REF_REG_MEM_P (*use_rec
) && regno
== DF_REF_REGNO (*use_rec
))
16413 /* Search backward for non-agu definition of register number REGNO1
16414 or register number REGNO2 in basic block starting from instruction
16415 START up to head of basic block or instruction INSN.
16417 Function puts true value into *FOUND var if definition was found
16418 and false otherwise.
16420 Distance in half-cycles between START and found instruction or head
16421 of BB is added to DISTANCE and returned. */
16424 distance_non_agu_define_in_bb (unsigned int regno1
, unsigned int regno2
,
16425 rtx insn
, int distance
,
16426 rtx start
, bool *found
)
16428 basic_block bb
= start
? BLOCK_FOR_INSN (start
) : NULL
;
16436 && distance
< LEA_SEARCH_THRESHOLD
)
16438 if (NONDEBUG_INSN_P (prev
) && NONJUMP_INSN_P (prev
))
16440 distance
= increase_distance (prev
, next
, distance
);
16441 if (insn_defines_reg (regno1
, regno2
, prev
))
16443 if (recog_memoized (prev
) < 0
16444 || get_attr_type (prev
) != TYPE_LEA
)
16453 if (prev
== BB_HEAD (bb
))
16456 prev
= PREV_INSN (prev
);
16462 /* Search backward for non-agu definition of register number REGNO1
16463 or register number REGNO2 in INSN's basic block until
16464 1. Pass LEA_SEARCH_THRESHOLD instructions, or
16465 2. Reach neighbour BBs boundary, or
16466 3. Reach agu definition.
16467 Returns the distance between the non-agu definition point and INSN.
16468 If no definition point, returns -1. */
16471 distance_non_agu_define (unsigned int regno1
, unsigned int regno2
,
16474 basic_block bb
= BLOCK_FOR_INSN (insn
);
16476 bool found
= false;
16478 if (insn
!= BB_HEAD (bb
))
16479 distance
= distance_non_agu_define_in_bb (regno1
, regno2
, insn
,
16480 distance
, PREV_INSN (insn
),
16483 if (!found
&& distance
< LEA_SEARCH_THRESHOLD
)
16487 bool simple_loop
= false;
16489 FOR_EACH_EDGE (e
, ei
, bb
->preds
)
16492 simple_loop
= true;
16497 distance
= distance_non_agu_define_in_bb (regno1
, regno2
,
16499 BB_END (bb
), &found
);
16502 int shortest_dist
= -1;
16503 bool found_in_bb
= false;
16505 FOR_EACH_EDGE (e
, ei
, bb
->preds
)
16508 = distance_non_agu_define_in_bb (regno1
, regno2
,
16514 if (shortest_dist
< 0)
16515 shortest_dist
= bb_dist
;
16516 else if (bb_dist
> 0)
16517 shortest_dist
= MIN (bb_dist
, shortest_dist
);
16523 distance
= shortest_dist
;
16527 /* get_attr_type may modify recog data. We want to make sure
16528 that recog data is valid for instruction INSN, on which
16529 distance_non_agu_define is called. INSN is unchanged here. */
16530 extract_insn_cached (insn
);
16535 return distance
>> 1;
16538 /* Return the distance in half-cycles between INSN and the next
16539 insn that uses register number REGNO in memory address added
16540 to DISTANCE. Return -1 if REGNO0 is set.
16542 Put true value into *FOUND if register usage was found and
16544 Put true value into *REDEFINED if register redefinition was
16545 found and false otherwise. */
16548 distance_agu_use_in_bb (unsigned int regno
,
16549 rtx insn
, int distance
, rtx start
,
16550 bool *found
, bool *redefined
)
16552 basic_block bb
= start
? BLOCK_FOR_INSN (start
) : NULL
;
16557 *redefined
= false;
16561 && distance
< LEA_SEARCH_THRESHOLD
)
16563 if (NONDEBUG_INSN_P (next
) && NONJUMP_INSN_P (next
))
16565 distance
= increase_distance(prev
, next
, distance
);
16566 if (insn_uses_reg_mem (regno
, next
))
16568 /* Return DISTANCE if OP0 is used in memory
16569 address in NEXT. */
16574 if (insn_defines_reg (regno
, INVALID_REGNUM
, next
))
16576 /* Return -1 if OP0 is set in NEXT. */
16584 if (next
== BB_END (bb
))
16587 next
= NEXT_INSN (next
);
16593 /* Return the distance between INSN and the next insn that uses
16594 register number REGNO0 in memory address. Return -1 if no such
16595 a use is found within LEA_SEARCH_THRESHOLD or REGNO0 is set. */
16598 distance_agu_use (unsigned int regno0
, rtx insn
)
16600 basic_block bb
= BLOCK_FOR_INSN (insn
);
16602 bool found
= false;
16603 bool redefined
= false;
16605 if (insn
!= BB_END (bb
))
16606 distance
= distance_agu_use_in_bb (regno0
, insn
, distance
,
16608 &found
, &redefined
);
16610 if (!found
&& !redefined
&& distance
< LEA_SEARCH_THRESHOLD
)
16614 bool simple_loop
= false;
16616 FOR_EACH_EDGE (e
, ei
, bb
->succs
)
16619 simple_loop
= true;
16624 distance
= distance_agu_use_in_bb (regno0
, insn
,
16625 distance
, BB_HEAD (bb
),
16626 &found
, &redefined
);
16629 int shortest_dist
= -1;
16630 bool found_in_bb
= false;
16631 bool redefined_in_bb
= false;
16633 FOR_EACH_EDGE (e
, ei
, bb
->succs
)
16636 = distance_agu_use_in_bb (regno0
, insn
,
16637 distance
, BB_HEAD (e
->dest
),
16638 &found_in_bb
, &redefined_in_bb
);
16641 if (shortest_dist
< 0)
16642 shortest_dist
= bb_dist
;
16643 else if (bb_dist
> 0)
16644 shortest_dist
= MIN (bb_dist
, shortest_dist
);
16650 distance
= shortest_dist
;
16654 if (!found
|| redefined
)
16657 return distance
>> 1;
16660 /* Define this macro to tune LEA priority vs ADD, it take effect when
16661 there is a dilemma of choicing LEA or ADD
16662 Negative value: ADD is more preferred than LEA
16664 Positive value: LEA is more preferred than ADD*/
16665 #define IX86_LEA_PRIORITY 0
16667 /* Return true if usage of lea INSN has performance advantage
16668 over a sequence of instructions. Instructions sequence has
16669 SPLIT_COST cycles higher latency than lea latency. */
16672 ix86_lea_outperforms (rtx insn
, unsigned int regno0
, unsigned int regno1
,
16673 unsigned int regno2
, unsigned int split_cost
)
16675 int dist_define
, dist_use
;
16677 dist_define
= distance_non_agu_define (regno1
, regno2
, insn
);
16678 dist_use
= distance_agu_use (regno0
, insn
);
16680 if (dist_define
< 0 || dist_define
>= LEA_MAX_STALL
)
16682 /* If there is no non AGU operand definition, no AGU
16683 operand usage and split cost is 0 then both lea
16684 and non lea variants have same priority. Currently
16685 we prefer lea for 64 bit code and non lea on 32 bit
16687 if (dist_use
< 0 && split_cost
== 0)
16688 return TARGET_64BIT
|| IX86_LEA_PRIORITY
;
16693 /* With longer definitions distance lea is more preferable.
16694 Here we change it to take into account splitting cost and
16696 dist_define
+= split_cost
+ IX86_LEA_PRIORITY
;
16698 /* If there is no use in memory addess then we just check
16699 that split cost does not exceed AGU stall. */
16701 return dist_define
>= LEA_MAX_STALL
;
16703 /* If this insn has both backward non-agu dependence and forward
16704 agu dependence, the one with short distance takes effect. */
16705 return dist_define
>= dist_use
;
16708 /* Return true if it is legal to clobber flags by INSN and
16709 false otherwise. */
16712 ix86_ok_to_clobber_flags (rtx insn
)
16714 basic_block bb
= BLOCK_FOR_INSN (insn
);
16720 if (NONDEBUG_INSN_P (insn
))
16722 for (use
= DF_INSN_USES (insn
); *use
; use
++)
16723 if (DF_REF_REG_USE_P (*use
) && DF_REF_REGNO (*use
) == FLAGS_REG
)
16726 if (insn_defines_reg (FLAGS_REG
, INVALID_REGNUM
, insn
))
16730 if (insn
== BB_END (bb
))
16733 insn
= NEXT_INSN (insn
);
16736 live
= df_get_live_out(bb
);
16737 return !REGNO_REG_SET_P (live
, FLAGS_REG
);
16740 /* Return true if we need to split op0 = op1 + op2 into a sequence of
16741 move and add to avoid AGU stalls. */
16744 ix86_avoid_lea_for_add (rtx insn
, rtx operands
[])
16746 unsigned int regno0
= true_regnum (operands
[0]);
16747 unsigned int regno1
= true_regnum (operands
[1]);
16748 unsigned int regno2
= true_regnum (operands
[2]);
16750 /* Check if we need to optimize. */
16751 if (!TARGET_OPT_AGU
|| optimize_function_for_size_p (cfun
))
16754 /* Check it is correct to split here. */
16755 if (!ix86_ok_to_clobber_flags(insn
))
16758 /* We need to split only adds with non destructive
16759 destination operand. */
16760 if (regno0
== regno1
|| regno0
== regno2
)
16763 return !ix86_lea_outperforms (insn
, regno0
, regno1
, regno2
, 1);
16766 /* Return true if we should emit lea instruction instead of mov
16770 ix86_use_lea_for_mov (rtx insn
, rtx operands
[])
16772 unsigned int regno0
;
16773 unsigned int regno1
;
16775 /* Check if we need to optimize. */
16776 if (!TARGET_OPT_AGU
|| optimize_function_for_size_p (cfun
))
16779 /* Use lea for reg to reg moves only. */
16780 if (!REG_P (operands
[0]) || !REG_P (operands
[1]))
16783 regno0
= true_regnum (operands
[0]);
16784 regno1
= true_regnum (operands
[1]);
16786 return ix86_lea_outperforms (insn
, regno0
, regno1
, -1, 0);
16789 /* Return true if we need to split lea into a sequence of
16790 instructions to avoid AGU stalls. */
16793 ix86_avoid_lea_for_addr (rtx insn
, rtx operands
[])
16795 unsigned int regno0
= true_regnum (operands
[0]) ;
16796 unsigned int regno1
= -1;
16797 unsigned int regno2
= -1;
16798 unsigned int split_cost
= 0;
16799 struct ix86_address parts
;
16802 /* Check we need to optimize. */
16803 if (!TARGET_OPT_AGU
|| optimize_function_for_size_p (cfun
))
16806 /* Check it is correct to split here. */
16807 if (!ix86_ok_to_clobber_flags(insn
))
16810 ok
= ix86_decompose_address (operands
[1], &parts
);
16813 /* We should not split into add if non legitimate pic
16814 operand is used as displacement. */
16815 if (parts
.disp
&& flag_pic
&& !LEGITIMATE_PIC_OPERAND_P (parts
.disp
))
16819 regno1
= true_regnum (parts
.base
);
16821 regno2
= true_regnum (parts
.index
);
16823 /* Compute how many cycles we will add to execution time
16824 if split lea into a sequence of instructions. */
16825 if (parts
.base
|| parts
.index
)
16827 /* Have to use mov instruction if non desctructive
16828 destination form is used. */
16829 if (regno1
!= regno0
&& regno2
!= regno0
)
16832 /* Have to add index to base if both exist. */
16833 if (parts
.base
&& parts
.index
)
16836 /* Have to use shift and adds if scale is 2 or greater. */
16837 if (parts
.scale
> 1)
16839 if (regno0
!= regno1
)
16841 else if (regno2
== regno0
)
16844 split_cost
+= parts
.scale
;
16847 /* Have to use add instruction with immediate if
16848 disp is non zero. */
16849 if (parts
.disp
&& parts
.disp
!= const0_rtx
)
16852 /* Subtract the price of lea. */
16856 return !ix86_lea_outperforms (insn
, regno0
, regno1
, regno2
, split_cost
);
16859 /* Emit x86 binary operand CODE in mode MODE, where the first operand
16860 matches destination. RTX includes clobber of FLAGS_REG. */
16863 ix86_emit_binop (enum rtx_code code
, enum machine_mode mode
,
16868 op
= gen_rtx_SET (VOIDmode
, dst
, gen_rtx_fmt_ee (code
, mode
, dst
, src
));
16869 clob
= gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCmode
, FLAGS_REG
));
16871 emit_insn (gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, op
, clob
)));
16874 /* Split lea instructions into a sequence of instructions
16875 which are executed on ALU to avoid AGU stalls.
16876 It is assumed that it is allowed to clobber flags register
16877 at lea position. */
16880 ix86_split_lea_for_addr (rtx operands
[], enum machine_mode mode
)
16882 unsigned int regno0
= true_regnum (operands
[0]) ;
16883 unsigned int regno1
= INVALID_REGNUM
;
16884 unsigned int regno2
= INVALID_REGNUM
;
16885 struct ix86_address parts
;
16889 ok
= ix86_decompose_address (operands
[1], &parts
);
16894 if (GET_MODE (parts
.base
) != mode
)
16895 parts
.base
= gen_rtx_SUBREG (mode
, parts
.base
, 0);
16896 regno1
= true_regnum (parts
.base
);
16901 if (GET_MODE (parts
.index
) != mode
)
16902 parts
.index
= gen_rtx_SUBREG (mode
, parts
.index
, 0);
16903 regno2
= true_regnum (parts
.index
);
16906 if (parts
.scale
> 1)
16908 /* Case r1 = r1 + ... */
16909 if (regno1
== regno0
)
16911 /* If we have a case r1 = r1 + C * r1 then we
16912 should use multiplication which is very
16913 expensive. Assume cost model is wrong if we
16914 have such case here. */
16915 gcc_assert (regno2
!= regno0
);
16917 for (adds
= parts
.scale
; adds
> 0; adds
--)
16918 ix86_emit_binop (PLUS
, mode
, operands
[0], parts
.index
);
16922 /* r1 = r2 + r3 * C case. Need to move r3 into r1. */
16923 if (regno0
!= regno2
)
16924 emit_insn (gen_rtx_SET (VOIDmode
, operands
[0], parts
.index
));
16926 /* Use shift for scaling. */
16927 ix86_emit_binop (ASHIFT
, mode
, operands
[0],
16928 GEN_INT (exact_log2 (parts
.scale
)));
16931 ix86_emit_binop (PLUS
, mode
, operands
[0], parts
.base
);
16933 if (parts
.disp
&& parts
.disp
!= const0_rtx
)
16934 ix86_emit_binop (PLUS
, mode
, operands
[0], parts
.disp
);
16937 else if (!parts
.base
&& !parts
.index
)
16939 gcc_assert(parts
.disp
);
16940 emit_insn (gen_rtx_SET (VOIDmode
, operands
[0], parts
.disp
));
16946 if (regno0
!= regno2
)
16947 emit_insn (gen_rtx_SET (VOIDmode
, operands
[0], parts
.index
));
16949 else if (!parts
.index
)
16951 if (regno0
!= regno1
)
16952 emit_insn (gen_rtx_SET (VOIDmode
, operands
[0], parts
.base
));
16956 if (regno0
== regno1
)
16958 else if (regno0
== regno2
)
16962 emit_insn (gen_rtx_SET (VOIDmode
, operands
[0], parts
.base
));
16966 ix86_emit_binop (PLUS
, mode
, operands
[0], tmp
);
16969 if (parts
.disp
&& parts
.disp
!= const0_rtx
)
16970 ix86_emit_binop (PLUS
, mode
, operands
[0], parts
.disp
);
16974 /* Return true if it is ok to optimize an ADD operation to LEA
16975 operation to avoid flag register consumation. For most processors,
16976 ADD is faster than LEA. For the processors like ATOM, if the
16977 destination register of LEA holds an actual address which will be
16978 used soon, LEA is better and otherwise ADD is better. */
16981 ix86_lea_for_add_ok (rtx insn
, rtx operands
[])
16983 unsigned int regno0
= true_regnum (operands
[0]);
16984 unsigned int regno1
= true_regnum (operands
[1]);
16985 unsigned int regno2
= true_regnum (operands
[2]);
16987 /* If a = b + c, (a!=b && a!=c), must use lea form. */
16988 if (regno0
!= regno1
&& regno0
!= regno2
)
16991 if (!TARGET_OPT_AGU
|| optimize_function_for_size_p (cfun
))
16994 return ix86_lea_outperforms (insn
, regno0
, regno1
, regno2
, 0);
16997 /* Return true if destination reg of SET_BODY is shift count of
17001 ix86_dep_by_shift_count_body (const_rtx set_body
, const_rtx use_body
)
17007 /* Retrieve destination of SET_BODY. */
17008 switch (GET_CODE (set_body
))
17011 set_dest
= SET_DEST (set_body
);
17012 if (!set_dest
|| !REG_P (set_dest
))
17016 for (i
= XVECLEN (set_body
, 0) - 1; i
>= 0; i
--)
17017 if (ix86_dep_by_shift_count_body (XVECEXP (set_body
, 0, i
),
17025 /* Retrieve shift count of USE_BODY. */
17026 switch (GET_CODE (use_body
))
17029 shift_rtx
= XEXP (use_body
, 1);
17032 for (i
= XVECLEN (use_body
, 0) - 1; i
>= 0; i
--)
17033 if (ix86_dep_by_shift_count_body (set_body
,
17034 XVECEXP (use_body
, 0, i
)))
17042 && (GET_CODE (shift_rtx
) == ASHIFT
17043 || GET_CODE (shift_rtx
) == LSHIFTRT
17044 || GET_CODE (shift_rtx
) == ASHIFTRT
17045 || GET_CODE (shift_rtx
) == ROTATE
17046 || GET_CODE (shift_rtx
) == ROTATERT
))
17048 rtx shift_count
= XEXP (shift_rtx
, 1);
17050 /* Return true if shift count is dest of SET_BODY. */
17051 if (REG_P (shift_count
)
17052 && true_regnum (set_dest
) == true_regnum (shift_count
))
17059 /* Return true if destination reg of SET_INSN is shift count of
17063 ix86_dep_by_shift_count (const_rtx set_insn
, const_rtx use_insn
)
17065 return ix86_dep_by_shift_count_body (PATTERN (set_insn
),
17066 PATTERN (use_insn
));
17069 /* Return TRUE or FALSE depending on whether the unary operator meets the
17070 appropriate constraints. */
17073 ix86_unary_operator_ok (enum rtx_code code ATTRIBUTE_UNUSED
,
17074 enum machine_mode mode ATTRIBUTE_UNUSED
,
17075 rtx operands
[2] ATTRIBUTE_UNUSED
)
17077 /* If one of operands is memory, source and destination must match. */
17078 if ((MEM_P (operands
[0])
17079 || MEM_P (operands
[1]))
17080 && ! rtx_equal_p (operands
[0], operands
[1]))
17085 /* Return TRUE if the operands to a vec_interleave_{high,low}v2df
17086 are ok, keeping in mind the possible movddup alternative. */
17089 ix86_vec_interleave_v2df_operator_ok (rtx operands
[3], bool high
)
17091 if (MEM_P (operands
[0]))
17092 return rtx_equal_p (operands
[0], operands
[1 + high
]);
17093 if (MEM_P (operands
[1]) && MEM_P (operands
[2]))
17094 return TARGET_SSE3
&& rtx_equal_p (operands
[1], operands
[2]);
17098 /* Post-reload splitter for converting an SF or DFmode value in an
17099 SSE register into an unsigned SImode. */
17102 ix86_split_convert_uns_si_sse (rtx operands
[])
17104 enum machine_mode vecmode
;
17105 rtx value
, large
, zero_or_two31
, input
, two31
, x
;
17107 large
= operands
[1];
17108 zero_or_two31
= operands
[2];
17109 input
= operands
[3];
17110 two31
= operands
[4];
17111 vecmode
= GET_MODE (large
);
17112 value
= gen_rtx_REG (vecmode
, REGNO (operands
[0]));
17114 /* Load up the value into the low element. We must ensure that the other
17115 elements are valid floats -- zero is the easiest such value. */
17118 if (vecmode
== V4SFmode
)
17119 emit_insn (gen_vec_setv4sf_0 (value
, CONST0_RTX (V4SFmode
), input
));
17121 emit_insn (gen_sse2_loadlpd (value
, CONST0_RTX (V2DFmode
), input
));
17125 input
= gen_rtx_REG (vecmode
, REGNO (input
));
17126 emit_move_insn (value
, CONST0_RTX (vecmode
));
17127 if (vecmode
== V4SFmode
)
17128 emit_insn (gen_sse_movss (value
, value
, input
));
17130 emit_insn (gen_sse2_movsd (value
, value
, input
));
17133 emit_move_insn (large
, two31
);
17134 emit_move_insn (zero_or_two31
, MEM_P (two31
) ? large
: two31
);
17136 x
= gen_rtx_fmt_ee (LE
, vecmode
, large
, value
);
17137 emit_insn (gen_rtx_SET (VOIDmode
, large
, x
));
17139 x
= gen_rtx_AND (vecmode
, zero_or_two31
, large
);
17140 emit_insn (gen_rtx_SET (VOIDmode
, zero_or_two31
, x
));
17142 x
= gen_rtx_MINUS (vecmode
, value
, zero_or_two31
);
17143 emit_insn (gen_rtx_SET (VOIDmode
, value
, x
));
17145 large
= gen_rtx_REG (V4SImode
, REGNO (large
));
17146 emit_insn (gen_ashlv4si3 (large
, large
, GEN_INT (31)));
17148 x
= gen_rtx_REG (V4SImode
, REGNO (value
));
17149 if (vecmode
== V4SFmode
)
17150 emit_insn (gen_fix_truncv4sfv4si2 (x
, value
));
17152 emit_insn (gen_sse2_cvttpd2dq (x
, value
));
17155 emit_insn (gen_xorv4si3 (value
, value
, large
));
17158 /* Convert an unsigned DImode value into a DFmode, using only SSE.
17159 Expects the 64-bit DImode to be supplied in a pair of integral
17160 registers. Requires SSE2; will use SSE3 if available. For x86_32,
17161 -mfpmath=sse, !optimize_size only. */
17164 ix86_expand_convert_uns_didf_sse (rtx target
, rtx input
)
17166 REAL_VALUE_TYPE bias_lo_rvt
, bias_hi_rvt
;
17167 rtx int_xmm
, fp_xmm
;
17168 rtx biases
, exponents
;
17171 int_xmm
= gen_reg_rtx (V4SImode
);
17172 if (TARGET_INTER_UNIT_MOVES
)
17173 emit_insn (gen_movdi_to_sse (int_xmm
, input
));
17174 else if (TARGET_SSE_SPLIT_REGS
)
17176 emit_clobber (int_xmm
);
17177 emit_move_insn (gen_lowpart (DImode
, int_xmm
), input
);
17181 x
= gen_reg_rtx (V2DImode
);
17182 ix86_expand_vector_init_one_nonzero (false, V2DImode
, x
, input
, 0);
17183 emit_move_insn (int_xmm
, gen_lowpart (V4SImode
, x
));
17186 x
= gen_rtx_CONST_VECTOR (V4SImode
,
17187 gen_rtvec (4, GEN_INT (0x43300000UL
),
17188 GEN_INT (0x45300000UL
),
17189 const0_rtx
, const0_rtx
));
17190 exponents
= validize_mem (force_const_mem (V4SImode
, x
));
17192 /* int_xmm = {0x45300000UL, fp_xmm/hi, 0x43300000, fp_xmm/lo } */
17193 emit_insn (gen_vec_interleave_lowv4si (int_xmm
, int_xmm
, exponents
));
17195 /* Concatenating (juxtaposing) (0x43300000UL ## fp_value_low_xmm)
17196 yields a valid DF value equal to (0x1.0p52 + double(fp_value_lo_xmm)).
17197 Similarly (0x45300000UL ## fp_value_hi_xmm) yields
17198 (0x1.0p84 + double(fp_value_hi_xmm)).
17199 Note these exponents differ by 32. */
17201 fp_xmm
= copy_to_mode_reg (V2DFmode
, gen_lowpart (V2DFmode
, int_xmm
));
17203 /* Subtract off those 0x1.0p52 and 0x1.0p84 biases, to produce values
17204 in [0,2**32-1] and [0]+[2**32,2**64-1] respectively. */
17205 real_ldexp (&bias_lo_rvt
, &dconst1
, 52);
17206 real_ldexp (&bias_hi_rvt
, &dconst1
, 84);
17207 biases
= const_double_from_real_value (bias_lo_rvt
, DFmode
);
17208 x
= const_double_from_real_value (bias_hi_rvt
, DFmode
);
17209 biases
= gen_rtx_CONST_VECTOR (V2DFmode
, gen_rtvec (2, biases
, x
));
17210 biases
= validize_mem (force_const_mem (V2DFmode
, biases
));
17211 emit_insn (gen_subv2df3 (fp_xmm
, fp_xmm
, biases
));
17213 /* Add the upper and lower DFmode values together. */
17215 emit_insn (gen_sse3_haddv2df3 (fp_xmm
, fp_xmm
, fp_xmm
));
17218 x
= copy_to_mode_reg (V2DFmode
, fp_xmm
);
17219 emit_insn (gen_vec_interleave_highv2df (fp_xmm
, fp_xmm
, fp_xmm
));
17220 emit_insn (gen_addv2df3 (fp_xmm
, fp_xmm
, x
));
17223 ix86_expand_vector_extract (false, target
, fp_xmm
, 0);
17226 /* Not used, but eases macroization of patterns. */
17228 ix86_expand_convert_uns_sixf_sse (rtx target ATTRIBUTE_UNUSED
,
17229 rtx input ATTRIBUTE_UNUSED
)
17231 gcc_unreachable ();
17234 /* Convert an unsigned SImode value into a DFmode. Only currently used
17235 for SSE, but applicable anywhere. */
17238 ix86_expand_convert_uns_sidf_sse (rtx target
, rtx input
)
17240 REAL_VALUE_TYPE TWO31r
;
17243 x
= expand_simple_binop (SImode
, PLUS
, input
, GEN_INT (-2147483647 - 1),
17244 NULL
, 1, OPTAB_DIRECT
);
17246 fp
= gen_reg_rtx (DFmode
);
17247 emit_insn (gen_floatsidf2 (fp
, x
));
17249 real_ldexp (&TWO31r
, &dconst1
, 31);
17250 x
= const_double_from_real_value (TWO31r
, DFmode
);
17252 x
= expand_simple_binop (DFmode
, PLUS
, fp
, x
, target
, 0, OPTAB_DIRECT
);
17254 emit_move_insn (target
, x
);
17257 /* Convert a signed DImode value into a DFmode. Only used for SSE in
17258 32-bit mode; otherwise we have a direct convert instruction. */
17261 ix86_expand_convert_sign_didf_sse (rtx target
, rtx input
)
17263 REAL_VALUE_TYPE TWO32r
;
17264 rtx fp_lo
, fp_hi
, x
;
17266 fp_lo
= gen_reg_rtx (DFmode
);
17267 fp_hi
= gen_reg_rtx (DFmode
);
17269 emit_insn (gen_floatsidf2 (fp_hi
, gen_highpart (SImode
, input
)));
17271 real_ldexp (&TWO32r
, &dconst1
, 32);
17272 x
= const_double_from_real_value (TWO32r
, DFmode
);
17273 fp_hi
= expand_simple_binop (DFmode
, MULT
, fp_hi
, x
, fp_hi
, 0, OPTAB_DIRECT
);
17275 ix86_expand_convert_uns_sidf_sse (fp_lo
, gen_lowpart (SImode
, input
));
17277 x
= expand_simple_binop (DFmode
, PLUS
, fp_hi
, fp_lo
, target
,
17280 emit_move_insn (target
, x
);
17283 /* Convert an unsigned SImode value into a SFmode, using only SSE.
17284 For x86_32, -mfpmath=sse, !optimize_size only. */
17286 ix86_expand_convert_uns_sisf_sse (rtx target
, rtx input
)
17288 REAL_VALUE_TYPE ONE16r
;
17289 rtx fp_hi
, fp_lo
, int_hi
, int_lo
, x
;
17291 real_ldexp (&ONE16r
, &dconst1
, 16);
17292 x
= const_double_from_real_value (ONE16r
, SFmode
);
17293 int_lo
= expand_simple_binop (SImode
, AND
, input
, GEN_INT(0xffff),
17294 NULL
, 0, OPTAB_DIRECT
);
17295 int_hi
= expand_simple_binop (SImode
, LSHIFTRT
, input
, GEN_INT(16),
17296 NULL
, 0, OPTAB_DIRECT
);
17297 fp_hi
= gen_reg_rtx (SFmode
);
17298 fp_lo
= gen_reg_rtx (SFmode
);
17299 emit_insn (gen_floatsisf2 (fp_hi
, int_hi
));
17300 emit_insn (gen_floatsisf2 (fp_lo
, int_lo
));
17301 fp_hi
= expand_simple_binop (SFmode
, MULT
, fp_hi
, x
, fp_hi
,
17303 fp_hi
= expand_simple_binop (SFmode
, PLUS
, fp_hi
, fp_lo
, target
,
17305 if (!rtx_equal_p (target
, fp_hi
))
17306 emit_move_insn (target
, fp_hi
);
17309 /* floatunsv{4,8}siv{4,8}sf2 expander. Expand code to convert
17310 a vector of unsigned ints VAL to vector of floats TARGET. */
17313 ix86_expand_vector_convert_uns_vsivsf (rtx target
, rtx val
)
17316 REAL_VALUE_TYPE TWO16r
;
17317 enum machine_mode intmode
= GET_MODE (val
);
17318 enum machine_mode fltmode
= GET_MODE (target
);
17319 rtx (*cvt
) (rtx
, rtx
);
17321 if (intmode
== V4SImode
)
17322 cvt
= gen_floatv4siv4sf2
;
17324 cvt
= gen_floatv8siv8sf2
;
17325 tmp
[0] = ix86_build_const_vector (intmode
, 1, GEN_INT (0xffff));
17326 tmp
[0] = force_reg (intmode
, tmp
[0]);
17327 tmp
[1] = expand_simple_binop (intmode
, AND
, val
, tmp
[0], NULL_RTX
, 1,
17329 tmp
[2] = expand_simple_binop (intmode
, LSHIFTRT
, val
, GEN_INT (16),
17330 NULL_RTX
, 1, OPTAB_DIRECT
);
17331 tmp
[3] = gen_reg_rtx (fltmode
);
17332 emit_insn (cvt (tmp
[3], tmp
[1]));
17333 tmp
[4] = gen_reg_rtx (fltmode
);
17334 emit_insn (cvt (tmp
[4], tmp
[2]));
17335 real_ldexp (&TWO16r
, &dconst1
, 16);
17336 tmp
[5] = const_double_from_real_value (TWO16r
, SFmode
);
17337 tmp
[5] = force_reg (fltmode
, ix86_build_const_vector (fltmode
, 1, tmp
[5]));
17338 tmp
[6] = expand_simple_binop (fltmode
, MULT
, tmp
[4], tmp
[5], NULL_RTX
, 1,
17340 tmp
[7] = expand_simple_binop (fltmode
, PLUS
, tmp
[3], tmp
[6], target
, 1,
17342 if (tmp
[7] != target
)
17343 emit_move_insn (target
, tmp
[7]);
17346 /* Adjust a V*SFmode/V*DFmode value VAL so that *sfix_trunc* resp. fix_trunc*
17347 pattern can be used on it instead of *ufix_trunc* resp. fixuns_trunc*.
17348 This is done by doing just signed conversion if < 0x1p31, and otherwise by
17349 subtracting 0x1p31 first and xoring in 0x80000000 from *XORP afterwards. */
17352 ix86_expand_adjust_ufix_to_sfix_si (rtx val
, rtx
*xorp
)
17354 REAL_VALUE_TYPE TWO31r
;
17355 rtx two31r
, tmp
[4];
17356 enum machine_mode mode
= GET_MODE (val
);
17357 enum machine_mode scalarmode
= GET_MODE_INNER (mode
);
17358 enum machine_mode intmode
= GET_MODE_SIZE (mode
) == 32 ? V8SImode
: V4SImode
;
17359 rtx (*cmp
) (rtx
, rtx
, rtx
, rtx
);
17362 for (i
= 0; i
< 3; i
++)
17363 tmp
[i
] = gen_reg_rtx (mode
);
17364 real_ldexp (&TWO31r
, &dconst1
, 31);
17365 two31r
= const_double_from_real_value (TWO31r
, scalarmode
);
17366 two31r
= ix86_build_const_vector (mode
, 1, two31r
);
17367 two31r
= force_reg (mode
, two31r
);
17370 case V8SFmode
: cmp
= gen_avx_maskcmpv8sf3
; break;
17371 case V4SFmode
: cmp
= gen_sse_maskcmpv4sf3
; break;
17372 case V4DFmode
: cmp
= gen_avx_maskcmpv4df3
; break;
17373 case V2DFmode
: cmp
= gen_sse2_maskcmpv2df3
; break;
17374 default: gcc_unreachable ();
17376 tmp
[3] = gen_rtx_LE (mode
, two31r
, val
);
17377 emit_insn (cmp (tmp
[0], two31r
, val
, tmp
[3]));
17378 tmp
[1] = expand_simple_binop (mode
, AND
, tmp
[0], two31r
, tmp
[1],
17380 if (intmode
== V4SImode
|| TARGET_AVX2
)
17381 *xorp
= expand_simple_binop (intmode
, ASHIFT
,
17382 gen_lowpart (intmode
, tmp
[0]),
17383 GEN_INT (31), NULL_RTX
, 0,
17387 rtx two31
= GEN_INT ((unsigned HOST_WIDE_INT
) 1 << 31);
17388 two31
= ix86_build_const_vector (intmode
, 1, two31
);
17389 *xorp
= expand_simple_binop (intmode
, AND
,
17390 gen_lowpart (intmode
, tmp
[0]),
17391 two31
, NULL_RTX
, 0,
17394 return expand_simple_binop (mode
, MINUS
, val
, tmp
[1], tmp
[2],
17398 /* A subroutine of ix86_build_signbit_mask. If VECT is true,
17399 then replicate the value for all elements of the vector
17403 ix86_build_const_vector (enum machine_mode mode
, bool vect
, rtx value
)
17407 enum machine_mode scalar_mode
;
17424 n_elt
= GET_MODE_NUNITS (mode
);
17425 v
= rtvec_alloc (n_elt
);
17426 scalar_mode
= GET_MODE_INNER (mode
);
17428 RTVEC_ELT (v
, 0) = value
;
17430 for (i
= 1; i
< n_elt
; ++i
)
17431 RTVEC_ELT (v
, i
) = vect
? value
: CONST0_RTX (scalar_mode
);
17433 return gen_rtx_CONST_VECTOR (mode
, v
);
17436 gcc_unreachable ();
17440 /* A subroutine of ix86_expand_fp_absneg_operator, copysign expanders
17441 and ix86_expand_int_vcond. Create a mask for the sign bit in MODE
17442 for an SSE register. If VECT is true, then replicate the mask for
17443 all elements of the vector register. If INVERT is true, then create
17444 a mask excluding the sign bit. */
17447 ix86_build_signbit_mask (enum machine_mode mode
, bool vect
, bool invert
)
17449 enum machine_mode vec_mode
, imode
;
17450 HOST_WIDE_INT hi
, lo
;
17455 /* Find the sign bit, sign extended to 2*HWI. */
17463 mode
= GET_MODE_INNER (mode
);
17465 lo
= 0x80000000, hi
= lo
< 0;
17473 mode
= GET_MODE_INNER (mode
);
17475 if (HOST_BITS_PER_WIDE_INT
>= 64)
17476 lo
= (HOST_WIDE_INT
)1 << shift
, hi
= -1;
17478 lo
= 0, hi
= (HOST_WIDE_INT
)1 << (shift
- HOST_BITS_PER_WIDE_INT
);
17483 vec_mode
= VOIDmode
;
17484 if (HOST_BITS_PER_WIDE_INT
>= 64)
17487 lo
= 0, hi
= (HOST_WIDE_INT
)1 << shift
;
17494 lo
= 0, hi
= (HOST_WIDE_INT
)1 << (shift
- HOST_BITS_PER_WIDE_INT
);
17498 lo
= ~lo
, hi
= ~hi
;
17504 mask
= immed_double_const (lo
, hi
, imode
);
17506 vec
= gen_rtvec (2, v
, mask
);
17507 v
= gen_rtx_CONST_VECTOR (V2DImode
, vec
);
17508 v
= copy_to_mode_reg (mode
, gen_lowpart (mode
, v
));
17515 gcc_unreachable ();
17519 lo
= ~lo
, hi
= ~hi
;
17521 /* Force this value into the low part of a fp vector constant. */
17522 mask
= immed_double_const (lo
, hi
, imode
);
17523 mask
= gen_lowpart (mode
, mask
);
17525 if (vec_mode
== VOIDmode
)
17526 return force_reg (mode
, mask
);
17528 v
= ix86_build_const_vector (vec_mode
, vect
, mask
);
17529 return force_reg (vec_mode
, v
);
17532 /* Generate code for floating point ABS or NEG. */
17535 ix86_expand_fp_absneg_operator (enum rtx_code code
, enum machine_mode mode
,
17538 rtx mask
, set
, dst
, src
;
17539 bool use_sse
= false;
17540 bool vector_mode
= VECTOR_MODE_P (mode
);
17541 enum machine_mode vmode
= mode
;
17545 else if (mode
== TFmode
)
17547 else if (TARGET_SSE_MATH
)
17549 use_sse
= SSE_FLOAT_MODE_P (mode
);
17550 if (mode
== SFmode
)
17552 else if (mode
== DFmode
)
17556 /* NEG and ABS performed with SSE use bitwise mask operations.
17557 Create the appropriate mask now. */
17559 mask
= ix86_build_signbit_mask (vmode
, vector_mode
, code
== ABS
);
17566 set
= gen_rtx_fmt_e (code
, mode
, src
);
17567 set
= gen_rtx_SET (VOIDmode
, dst
, set
);
17574 use
= gen_rtx_USE (VOIDmode
, mask
);
17576 par
= gen_rtvec (2, set
, use
);
17579 clob
= gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCmode
, FLAGS_REG
));
17580 par
= gen_rtvec (3, set
, use
, clob
);
17582 emit_insn (gen_rtx_PARALLEL (VOIDmode
, par
));
17588 /* Expand a copysign operation. Special case operand 0 being a constant. */
17591 ix86_expand_copysign (rtx operands
[])
17593 enum machine_mode mode
, vmode
;
17594 rtx dest
, op0
, op1
, mask
, nmask
;
17596 dest
= operands
[0];
17600 mode
= GET_MODE (dest
);
17602 if (mode
== SFmode
)
17604 else if (mode
== DFmode
)
17609 if (GET_CODE (op0
) == CONST_DOUBLE
)
17611 rtx (*copysign_insn
)(rtx
, rtx
, rtx
, rtx
);
17613 if (real_isneg (CONST_DOUBLE_REAL_VALUE (op0
)))
17614 op0
= simplify_unary_operation (ABS
, mode
, op0
, mode
);
17616 if (mode
== SFmode
|| mode
== DFmode
)
17618 if (op0
== CONST0_RTX (mode
))
17619 op0
= CONST0_RTX (vmode
);
17622 rtx v
= ix86_build_const_vector (vmode
, false, op0
);
17624 op0
= force_reg (vmode
, v
);
17627 else if (op0
!= CONST0_RTX (mode
))
17628 op0
= force_reg (mode
, op0
);
17630 mask
= ix86_build_signbit_mask (vmode
, 0, 0);
17632 if (mode
== SFmode
)
17633 copysign_insn
= gen_copysignsf3_const
;
17634 else if (mode
== DFmode
)
17635 copysign_insn
= gen_copysigndf3_const
;
17637 copysign_insn
= gen_copysigntf3_const
;
17639 emit_insn (copysign_insn (dest
, op0
, op1
, mask
));
17643 rtx (*copysign_insn
)(rtx
, rtx
, rtx
, rtx
, rtx
, rtx
);
17645 nmask
= ix86_build_signbit_mask (vmode
, 0, 1);
17646 mask
= ix86_build_signbit_mask (vmode
, 0, 0);
17648 if (mode
== SFmode
)
17649 copysign_insn
= gen_copysignsf3_var
;
17650 else if (mode
== DFmode
)
17651 copysign_insn
= gen_copysigndf3_var
;
17653 copysign_insn
= gen_copysigntf3_var
;
17655 emit_insn (copysign_insn (dest
, NULL_RTX
, op0
, op1
, nmask
, mask
));
17659 /* Deconstruct a copysign operation into bit masks. Operand 0 is known to
17660 be a constant, and so has already been expanded into a vector constant. */
17663 ix86_split_copysign_const (rtx operands
[])
17665 enum machine_mode mode
, vmode
;
17666 rtx dest
, op0
, mask
, x
;
17668 dest
= operands
[0];
17670 mask
= operands
[3];
17672 mode
= GET_MODE (dest
);
17673 vmode
= GET_MODE (mask
);
17675 dest
= simplify_gen_subreg (vmode
, dest
, mode
, 0);
17676 x
= gen_rtx_AND (vmode
, dest
, mask
);
17677 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
17679 if (op0
!= CONST0_RTX (vmode
))
17681 x
= gen_rtx_IOR (vmode
, dest
, op0
);
17682 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
17686 /* Deconstruct a copysign operation into bit masks. Operand 0 is variable,
17687 so we have to do two masks. */
17690 ix86_split_copysign_var (rtx operands
[])
17692 enum machine_mode mode
, vmode
;
17693 rtx dest
, scratch
, op0
, op1
, mask
, nmask
, x
;
17695 dest
= operands
[0];
17696 scratch
= operands
[1];
17699 nmask
= operands
[4];
17700 mask
= operands
[5];
17702 mode
= GET_MODE (dest
);
17703 vmode
= GET_MODE (mask
);
17705 if (rtx_equal_p (op0
, op1
))
17707 /* Shouldn't happen often (it's useless, obviously), but when it does
17708 we'd generate incorrect code if we continue below. */
17709 emit_move_insn (dest
, op0
);
17713 if (REG_P (mask
) && REGNO (dest
) == REGNO (mask
)) /* alternative 0 */
17715 gcc_assert (REGNO (op1
) == REGNO (scratch
));
17717 x
= gen_rtx_AND (vmode
, scratch
, mask
);
17718 emit_insn (gen_rtx_SET (VOIDmode
, scratch
, x
));
17721 op0
= simplify_gen_subreg (vmode
, op0
, mode
, 0);
17722 x
= gen_rtx_NOT (vmode
, dest
);
17723 x
= gen_rtx_AND (vmode
, x
, op0
);
17724 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
17728 if (REGNO (op1
) == REGNO (scratch
)) /* alternative 1,3 */
17730 x
= gen_rtx_AND (vmode
, scratch
, mask
);
17732 else /* alternative 2,4 */
17734 gcc_assert (REGNO (mask
) == REGNO (scratch
));
17735 op1
= simplify_gen_subreg (vmode
, op1
, mode
, 0);
17736 x
= gen_rtx_AND (vmode
, scratch
, op1
);
17738 emit_insn (gen_rtx_SET (VOIDmode
, scratch
, x
));
17740 if (REGNO (op0
) == REGNO (dest
)) /* alternative 1,2 */
17742 dest
= simplify_gen_subreg (vmode
, op0
, mode
, 0);
17743 x
= gen_rtx_AND (vmode
, dest
, nmask
);
17745 else /* alternative 3,4 */
17747 gcc_assert (REGNO (nmask
) == REGNO (dest
));
17749 op0
= simplify_gen_subreg (vmode
, op0
, mode
, 0);
17750 x
= gen_rtx_AND (vmode
, dest
, op0
);
17752 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
17755 x
= gen_rtx_IOR (vmode
, dest
, scratch
);
17756 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
17759 /* Return TRUE or FALSE depending on whether the first SET in INSN
17760 has source and destination with matching CC modes, and that the
17761 CC mode is at least as constrained as REQ_MODE. */
17764 ix86_match_ccmode (rtx insn
, enum machine_mode req_mode
)
17767 enum machine_mode set_mode
;
17769 set
= PATTERN (insn
);
17770 if (GET_CODE (set
) == PARALLEL
)
17771 set
= XVECEXP (set
, 0, 0);
17772 gcc_assert (GET_CODE (set
) == SET
);
17773 gcc_assert (GET_CODE (SET_SRC (set
)) == COMPARE
);
17775 set_mode
= GET_MODE (SET_DEST (set
));
17779 if (req_mode
!= CCNOmode
17780 && (req_mode
!= CCmode
17781 || XEXP (SET_SRC (set
), 1) != const0_rtx
))
17785 if (req_mode
== CCGCmode
)
17789 if (req_mode
== CCGOCmode
|| req_mode
== CCNOmode
)
17793 if (req_mode
== CCZmode
)
17803 if (set_mode
!= req_mode
)
17808 gcc_unreachable ();
17811 return GET_MODE (SET_SRC (set
)) == set_mode
;
17814 /* Generate insn patterns to do an integer compare of OPERANDS. */
17817 ix86_expand_int_compare (enum rtx_code code
, rtx op0
, rtx op1
)
17819 enum machine_mode cmpmode
;
17822 cmpmode
= SELECT_CC_MODE (code
, op0
, op1
);
17823 flags
= gen_rtx_REG (cmpmode
, FLAGS_REG
);
17825 /* This is very simple, but making the interface the same as in the
17826 FP case makes the rest of the code easier. */
17827 tmp
= gen_rtx_COMPARE (cmpmode
, op0
, op1
);
17828 emit_insn (gen_rtx_SET (VOIDmode
, flags
, tmp
));
17830 /* Return the test that should be put into the flags user, i.e.
17831 the bcc, scc, or cmov instruction. */
17832 return gen_rtx_fmt_ee (code
, VOIDmode
, flags
, const0_rtx
);
17835 /* Figure out whether to use ordered or unordered fp comparisons.
17836 Return the appropriate mode to use. */
17839 ix86_fp_compare_mode (enum rtx_code code ATTRIBUTE_UNUSED
)
17841 /* ??? In order to make all comparisons reversible, we do all comparisons
17842 non-trapping when compiling for IEEE. Once gcc is able to distinguish
17843 all forms trapping and nontrapping comparisons, we can make inequality
17844 comparisons trapping again, since it results in better code when using
17845 FCOM based compares. */
17846 return TARGET_IEEE_FP
? CCFPUmode
: CCFPmode
;
17850 ix86_cc_mode (enum rtx_code code
, rtx op0
, rtx op1
)
17852 enum machine_mode mode
= GET_MODE (op0
);
17854 if (SCALAR_FLOAT_MODE_P (mode
))
17856 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode
));
17857 return ix86_fp_compare_mode (code
);
17862 /* Only zero flag is needed. */
17863 case EQ
: /* ZF=0 */
17864 case NE
: /* ZF!=0 */
17866 /* Codes needing carry flag. */
17867 case GEU
: /* CF=0 */
17868 case LTU
: /* CF=1 */
17869 /* Detect overflow checks. They need just the carry flag. */
17870 if (GET_CODE (op0
) == PLUS
17871 && rtx_equal_p (op1
, XEXP (op0
, 0)))
17875 case GTU
: /* CF=0 & ZF=0 */
17876 case LEU
: /* CF=1 | ZF=1 */
17877 /* Detect overflow checks. They need just the carry flag. */
17878 if (GET_CODE (op0
) == MINUS
17879 && rtx_equal_p (op1
, XEXP (op0
, 0)))
17883 /* Codes possibly doable only with sign flag when
17884 comparing against zero. */
17885 case GE
: /* SF=OF or SF=0 */
17886 case LT
: /* SF<>OF or SF=1 */
17887 if (op1
== const0_rtx
)
17890 /* For other cases Carry flag is not required. */
17892 /* Codes doable only with sign flag when comparing
17893 against zero, but we miss jump instruction for it
17894 so we need to use relational tests against overflow
17895 that thus needs to be zero. */
17896 case GT
: /* ZF=0 & SF=OF */
17897 case LE
: /* ZF=1 | SF<>OF */
17898 if (op1
== const0_rtx
)
17902 /* strcmp pattern do (use flags) and combine may ask us for proper
17907 gcc_unreachable ();
17911 /* Return the fixed registers used for condition codes. */
17914 ix86_fixed_condition_code_regs (unsigned int *p1
, unsigned int *p2
)
17921 /* If two condition code modes are compatible, return a condition code
17922 mode which is compatible with both. Otherwise, return
17925 static enum machine_mode
17926 ix86_cc_modes_compatible (enum machine_mode m1
, enum machine_mode m2
)
17931 if (GET_MODE_CLASS (m1
) != MODE_CC
|| GET_MODE_CLASS (m2
) != MODE_CC
)
17934 if ((m1
== CCGCmode
&& m2
== CCGOCmode
)
17935 || (m1
== CCGOCmode
&& m2
== CCGCmode
))
17938 if (m1
== CCZmode
&& (m2
== CCGCmode
|| m2
== CCGOCmode
))
17940 else if (m2
== CCZmode
&& (m1
== CCGCmode
|| m1
== CCGOCmode
))
17946 gcc_unreachable ();
17976 /* These are only compatible with themselves, which we already
17983 /* Return a comparison we can do and that it is equivalent to
17984 swap_condition (code) apart possibly from orderedness.
17985 But, never change orderedness if TARGET_IEEE_FP, returning
17986 UNKNOWN in that case if necessary. */
17988 static enum rtx_code
17989 ix86_fp_swap_condition (enum rtx_code code
)
17993 case GT
: /* GTU - CF=0 & ZF=0 */
17994 return TARGET_IEEE_FP
? UNKNOWN
: UNLT
;
17995 case GE
: /* GEU - CF=0 */
17996 return TARGET_IEEE_FP
? UNKNOWN
: UNLE
;
17997 case UNLT
: /* LTU - CF=1 */
17998 return TARGET_IEEE_FP
? UNKNOWN
: GT
;
17999 case UNLE
: /* LEU - CF=1 | ZF=1 */
18000 return TARGET_IEEE_FP
? UNKNOWN
: GE
;
18002 return swap_condition (code
);
18006 /* Return cost of comparison CODE using the best strategy for performance.
18007 All following functions do use number of instructions as a cost metrics.
18008 In future this should be tweaked to compute bytes for optimize_size and
18009 take into account performance of various instructions on various CPUs. */
18012 ix86_fp_comparison_cost (enum rtx_code code
)
18016 /* The cost of code using bit-twiddling on %ah. */
18033 arith_cost
= TARGET_IEEE_FP
? 5 : 4;
18037 arith_cost
= TARGET_IEEE_FP
? 6 : 4;
18040 gcc_unreachable ();
18043 switch (ix86_fp_comparison_strategy (code
))
18045 case IX86_FPCMP_COMI
:
18046 return arith_cost
> 4 ? 3 : 2;
18047 case IX86_FPCMP_SAHF
:
18048 return arith_cost
> 4 ? 4 : 3;
18054 /* Return strategy to use for floating-point. We assume that fcomi is always
18055 preferrable where available, since that is also true when looking at size
18056 (2 bytes, vs. 3 for fnstsw+sahf and at least 5 for fnstsw+test). */
18058 enum ix86_fpcmp_strategy
18059 ix86_fp_comparison_strategy (enum rtx_code code ATTRIBUTE_UNUSED
)
18061 /* Do fcomi/sahf based test when profitable. */
18064 return IX86_FPCMP_COMI
;
18066 if (TARGET_SAHF
&& (TARGET_USE_SAHF
|| optimize_function_for_size_p (cfun
)))
18067 return IX86_FPCMP_SAHF
;
18069 return IX86_FPCMP_ARITH
;
18072 /* Swap, force into registers, or otherwise massage the two operands
18073 to a fp comparison. The operands are updated in place; the new
18074 comparison code is returned. */
18076 static enum rtx_code
18077 ix86_prepare_fp_compare_args (enum rtx_code code
, rtx
*pop0
, rtx
*pop1
)
18079 enum machine_mode fpcmp_mode
= ix86_fp_compare_mode (code
);
18080 rtx op0
= *pop0
, op1
= *pop1
;
18081 enum machine_mode op_mode
= GET_MODE (op0
);
18082 int is_sse
= TARGET_SSE_MATH
&& SSE_FLOAT_MODE_P (op_mode
);
18084 /* All of the unordered compare instructions only work on registers.
18085 The same is true of the fcomi compare instructions. The XFmode
18086 compare instructions require registers except when comparing
18087 against zero or when converting operand 1 from fixed point to
18091 && (fpcmp_mode
== CCFPUmode
18092 || (op_mode
== XFmode
18093 && ! (standard_80387_constant_p (op0
) == 1
18094 || standard_80387_constant_p (op1
) == 1)
18095 && GET_CODE (op1
) != FLOAT
)
18096 || ix86_fp_comparison_strategy (code
) == IX86_FPCMP_COMI
))
18098 op0
= force_reg (op_mode
, op0
);
18099 op1
= force_reg (op_mode
, op1
);
18103 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
18104 things around if they appear profitable, otherwise force op0
18105 into a register. */
18107 if (standard_80387_constant_p (op0
) == 0
18109 && ! (standard_80387_constant_p (op1
) == 0
18112 enum rtx_code new_code
= ix86_fp_swap_condition (code
);
18113 if (new_code
!= UNKNOWN
)
18116 tmp
= op0
, op0
= op1
, op1
= tmp
;
18122 op0
= force_reg (op_mode
, op0
);
18124 if (CONSTANT_P (op1
))
18126 int tmp
= standard_80387_constant_p (op1
);
18128 op1
= validize_mem (force_const_mem (op_mode
, op1
));
18132 op1
= force_reg (op_mode
, op1
);
18135 op1
= force_reg (op_mode
, op1
);
18139 /* Try to rearrange the comparison to make it cheaper. */
18140 if (ix86_fp_comparison_cost (code
)
18141 > ix86_fp_comparison_cost (swap_condition (code
))
18142 && (REG_P (op1
) || can_create_pseudo_p ()))
18145 tmp
= op0
, op0
= op1
, op1
= tmp
;
18146 code
= swap_condition (code
);
18148 op0
= force_reg (op_mode
, op0
);
18156 /* Convert comparison codes we use to represent FP comparison to integer
18157 code that will result in proper branch. Return UNKNOWN if no such code
18161 ix86_fp_compare_code_to_integer (enum rtx_code code
)
18190 /* Generate insn patterns to do a floating point compare of OPERANDS. */
18193 ix86_expand_fp_compare (enum rtx_code code
, rtx op0
, rtx op1
, rtx scratch
)
18195 enum machine_mode fpcmp_mode
, intcmp_mode
;
18198 fpcmp_mode
= ix86_fp_compare_mode (code
);
18199 code
= ix86_prepare_fp_compare_args (code
, &op0
, &op1
);
18201 /* Do fcomi/sahf based test when profitable. */
18202 switch (ix86_fp_comparison_strategy (code
))
18204 case IX86_FPCMP_COMI
:
18205 intcmp_mode
= fpcmp_mode
;
18206 tmp
= gen_rtx_COMPARE (fpcmp_mode
, op0
, op1
);
18207 tmp
= gen_rtx_SET (VOIDmode
, gen_rtx_REG (fpcmp_mode
, FLAGS_REG
),
18212 case IX86_FPCMP_SAHF
:
18213 intcmp_mode
= fpcmp_mode
;
18214 tmp
= gen_rtx_COMPARE (fpcmp_mode
, op0
, op1
);
18215 tmp
= gen_rtx_SET (VOIDmode
, gen_rtx_REG (fpcmp_mode
, FLAGS_REG
),
18219 scratch
= gen_reg_rtx (HImode
);
18220 tmp2
= gen_rtx_CLOBBER (VOIDmode
, scratch
);
18221 emit_insn (gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, tmp
, tmp2
)));
18224 case IX86_FPCMP_ARITH
:
18225 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
18226 tmp
= gen_rtx_COMPARE (fpcmp_mode
, op0
, op1
);
18227 tmp2
= gen_rtx_UNSPEC (HImode
, gen_rtvec (1, tmp
), UNSPEC_FNSTSW
);
18229 scratch
= gen_reg_rtx (HImode
);
18230 emit_insn (gen_rtx_SET (VOIDmode
, scratch
, tmp2
));
18232 /* In the unordered case, we have to check C2 for NaN's, which
18233 doesn't happen to work out to anything nice combination-wise.
18234 So do some bit twiddling on the value we've got in AH to come
18235 up with an appropriate set of condition codes. */
18237 intcmp_mode
= CCNOmode
;
18242 if (code
== GT
|| !TARGET_IEEE_FP
)
18244 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x45)));
18249 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
18250 emit_insn (gen_addqi_ext_1 (scratch
, scratch
, constm1_rtx
));
18251 emit_insn (gen_cmpqi_ext_3 (scratch
, GEN_INT (0x44)));
18252 intcmp_mode
= CCmode
;
18258 if (code
== LT
&& TARGET_IEEE_FP
)
18260 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
18261 emit_insn (gen_cmpqi_ext_3 (scratch
, const1_rtx
));
18262 intcmp_mode
= CCmode
;
18267 emit_insn (gen_testqi_ext_ccno_0 (scratch
, const1_rtx
));
18273 if (code
== GE
|| !TARGET_IEEE_FP
)
18275 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x05)));
18280 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
18281 emit_insn (gen_xorqi_cc_ext_1 (scratch
, scratch
, const1_rtx
));
18287 if (code
== LE
&& TARGET_IEEE_FP
)
18289 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
18290 emit_insn (gen_addqi_ext_1 (scratch
, scratch
, constm1_rtx
));
18291 emit_insn (gen_cmpqi_ext_3 (scratch
, GEN_INT (0x40)));
18292 intcmp_mode
= CCmode
;
18297 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x45)));
18303 if (code
== EQ
&& TARGET_IEEE_FP
)
18305 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
18306 emit_insn (gen_cmpqi_ext_3 (scratch
, GEN_INT (0x40)));
18307 intcmp_mode
= CCmode
;
18312 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x40)));
18318 if (code
== NE
&& TARGET_IEEE_FP
)
18320 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
18321 emit_insn (gen_xorqi_cc_ext_1 (scratch
, scratch
,
18327 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x40)));
18333 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x04)));
18337 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x04)));
18342 gcc_unreachable ();
18350 /* Return the test that should be put into the flags user, i.e.
18351 the bcc, scc, or cmov instruction. */
18352 return gen_rtx_fmt_ee (code
, VOIDmode
,
18353 gen_rtx_REG (intcmp_mode
, FLAGS_REG
),
18358 ix86_expand_compare (enum rtx_code code
, rtx op0
, rtx op1
)
18362 if (GET_MODE_CLASS (GET_MODE (op0
)) == MODE_CC
)
18363 ret
= gen_rtx_fmt_ee (code
, VOIDmode
, op0
, op1
);
18365 else if (SCALAR_FLOAT_MODE_P (GET_MODE (op0
)))
18367 gcc_assert (!DECIMAL_FLOAT_MODE_P (GET_MODE (op0
)));
18368 ret
= ix86_expand_fp_compare (code
, op0
, op1
, NULL_RTX
);
18371 ret
= ix86_expand_int_compare (code
, op0
, op1
);
18377 ix86_expand_branch (enum rtx_code code
, rtx op0
, rtx op1
, rtx label
)
18379 enum machine_mode mode
= GET_MODE (op0
);
18391 tmp
= ix86_expand_compare (code
, op0
, op1
);
18392 tmp
= gen_rtx_IF_THEN_ELSE (VOIDmode
, tmp
,
18393 gen_rtx_LABEL_REF (VOIDmode
, label
),
18395 emit_jump_insn (gen_rtx_SET (VOIDmode
, pc_rtx
, tmp
));
18402 /* Expand DImode branch into multiple compare+branch. */
18404 rtx lo
[2], hi
[2], label2
;
18405 enum rtx_code code1
, code2
, code3
;
18406 enum machine_mode submode
;
18408 if (CONSTANT_P (op0
) && !CONSTANT_P (op1
))
18410 tmp
= op0
, op0
= op1
, op1
= tmp
;
18411 code
= swap_condition (code
);
18414 split_double_mode (mode
, &op0
, 1, lo
+0, hi
+0);
18415 split_double_mode (mode
, &op1
, 1, lo
+1, hi
+1);
18417 submode
= mode
== DImode
? SImode
: DImode
;
18419 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
18420 avoid two branches. This costs one extra insn, so disable when
18421 optimizing for size. */
18423 if ((code
== EQ
|| code
== NE
)
18424 && (!optimize_insn_for_size_p ()
18425 || hi
[1] == const0_rtx
|| lo
[1] == const0_rtx
))
18430 if (hi
[1] != const0_rtx
)
18431 xor1
= expand_binop (submode
, xor_optab
, xor1
, hi
[1],
18432 NULL_RTX
, 0, OPTAB_WIDEN
);
18435 if (lo
[1] != const0_rtx
)
18436 xor0
= expand_binop (submode
, xor_optab
, xor0
, lo
[1],
18437 NULL_RTX
, 0, OPTAB_WIDEN
);
18439 tmp
= expand_binop (submode
, ior_optab
, xor1
, xor0
,
18440 NULL_RTX
, 0, OPTAB_WIDEN
);
18442 ix86_expand_branch (code
, tmp
, const0_rtx
, label
);
18446 /* Otherwise, if we are doing less-than or greater-or-equal-than,
18447 op1 is a constant and the low word is zero, then we can just
18448 examine the high word. Similarly for low word -1 and
18449 less-or-equal-than or greater-than. */
18451 if (CONST_INT_P (hi
[1]))
18454 case LT
: case LTU
: case GE
: case GEU
:
18455 if (lo
[1] == const0_rtx
)
18457 ix86_expand_branch (code
, hi
[0], hi
[1], label
);
18461 case LE
: case LEU
: case GT
: case GTU
:
18462 if (lo
[1] == constm1_rtx
)
18464 ix86_expand_branch (code
, hi
[0], hi
[1], label
);
18472 /* Otherwise, we need two or three jumps. */
18474 label2
= gen_label_rtx ();
18477 code2
= swap_condition (code
);
18478 code3
= unsigned_condition (code
);
18482 case LT
: case GT
: case LTU
: case GTU
:
18485 case LE
: code1
= LT
; code2
= GT
; break;
18486 case GE
: code1
= GT
; code2
= LT
; break;
18487 case LEU
: code1
= LTU
; code2
= GTU
; break;
18488 case GEU
: code1
= GTU
; code2
= LTU
; break;
18490 case EQ
: code1
= UNKNOWN
; code2
= NE
; break;
18491 case NE
: code2
= UNKNOWN
; break;
18494 gcc_unreachable ();
18499 * if (hi(a) < hi(b)) goto true;
18500 * if (hi(a) > hi(b)) goto false;
18501 * if (lo(a) < lo(b)) goto true;
18505 if (code1
!= UNKNOWN
)
18506 ix86_expand_branch (code1
, hi
[0], hi
[1], label
);
18507 if (code2
!= UNKNOWN
)
18508 ix86_expand_branch (code2
, hi
[0], hi
[1], label2
);
18510 ix86_expand_branch (code3
, lo
[0], lo
[1], label
);
18512 if (code2
!= UNKNOWN
)
18513 emit_label (label2
);
18518 gcc_assert (GET_MODE_CLASS (GET_MODE (op0
)) == MODE_CC
);
18523 /* Split branch based on floating point condition. */
18525 ix86_split_fp_branch (enum rtx_code code
, rtx op1
, rtx op2
,
18526 rtx target1
, rtx target2
, rtx tmp
, rtx pushed
)
18531 if (target2
!= pc_rtx
)
18534 code
= reverse_condition_maybe_unordered (code
);
18539 condition
= ix86_expand_fp_compare (code
, op1
, op2
,
18542 /* Remove pushed operand from stack. */
18544 ix86_free_from_memory (GET_MODE (pushed
));
18546 i
= emit_jump_insn (gen_rtx_SET
18548 gen_rtx_IF_THEN_ELSE (VOIDmode
,
18549 condition
, target1
, target2
)));
18550 if (split_branch_probability
>= 0)
18551 add_reg_note (i
, REG_BR_PROB
, GEN_INT (split_branch_probability
));
18555 ix86_expand_setcc (rtx dest
, enum rtx_code code
, rtx op0
, rtx op1
)
18559 gcc_assert (GET_MODE (dest
) == QImode
);
18561 ret
= ix86_expand_compare (code
, op0
, op1
);
18562 PUT_MODE (ret
, QImode
);
18563 emit_insn (gen_rtx_SET (VOIDmode
, dest
, ret
));
18566 /* Expand comparison setting or clearing carry flag. Return true when
18567 successful and set pop for the operation. */
18569 ix86_expand_carry_flag_compare (enum rtx_code code
, rtx op0
, rtx op1
, rtx
*pop
)
18571 enum machine_mode mode
=
18572 GET_MODE (op0
) != VOIDmode
? GET_MODE (op0
) : GET_MODE (op1
);
18574 /* Do not handle double-mode compares that go through special path. */
18575 if (mode
== (TARGET_64BIT
? TImode
: DImode
))
18578 if (SCALAR_FLOAT_MODE_P (mode
))
18580 rtx compare_op
, compare_seq
;
18582 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode
));
18584 /* Shortcut: following common codes never translate
18585 into carry flag compares. */
18586 if (code
== EQ
|| code
== NE
|| code
== UNEQ
|| code
== LTGT
18587 || code
== ORDERED
|| code
== UNORDERED
)
18590 /* These comparisons require zero flag; swap operands so they won't. */
18591 if ((code
== GT
|| code
== UNLE
|| code
== LE
|| code
== UNGT
)
18592 && !TARGET_IEEE_FP
)
18597 code
= swap_condition (code
);
18600 /* Try to expand the comparison and verify that we end up with
18601 carry flag based comparison. This fails to be true only when
18602 we decide to expand comparison using arithmetic that is not
18603 too common scenario. */
18605 compare_op
= ix86_expand_fp_compare (code
, op0
, op1
, NULL_RTX
);
18606 compare_seq
= get_insns ();
18609 if (GET_MODE (XEXP (compare_op
, 0)) == CCFPmode
18610 || GET_MODE (XEXP (compare_op
, 0)) == CCFPUmode
)
18611 code
= ix86_fp_compare_code_to_integer (GET_CODE (compare_op
));
18613 code
= GET_CODE (compare_op
);
18615 if (code
!= LTU
&& code
!= GEU
)
18618 emit_insn (compare_seq
);
18623 if (!INTEGRAL_MODE_P (mode
))
18632 /* Convert a==0 into (unsigned)a<1. */
18635 if (op1
!= const0_rtx
)
18638 code
= (code
== EQ
? LTU
: GEU
);
18641 /* Convert a>b into b<a or a>=b-1. */
18644 if (CONST_INT_P (op1
))
18646 op1
= gen_int_mode (INTVAL (op1
) + 1, GET_MODE (op0
));
18647 /* Bail out on overflow. We still can swap operands but that
18648 would force loading of the constant into register. */
18649 if (op1
== const0_rtx
18650 || !x86_64_immediate_operand (op1
, GET_MODE (op1
)))
18652 code
= (code
== GTU
? GEU
: LTU
);
18659 code
= (code
== GTU
? LTU
: GEU
);
18663 /* Convert a>=0 into (unsigned)a<0x80000000. */
18666 if (mode
== DImode
|| op1
!= const0_rtx
)
18668 op1
= gen_int_mode (1 << (GET_MODE_BITSIZE (mode
) - 1), mode
);
18669 code
= (code
== LT
? GEU
: LTU
);
18673 if (mode
== DImode
|| op1
!= constm1_rtx
)
18675 op1
= gen_int_mode (1 << (GET_MODE_BITSIZE (mode
) - 1), mode
);
18676 code
= (code
== LE
? GEU
: LTU
);
18682 /* Swapping operands may cause constant to appear as first operand. */
18683 if (!nonimmediate_operand (op0
, VOIDmode
))
18685 if (!can_create_pseudo_p ())
18687 op0
= force_reg (mode
, op0
);
18689 *pop
= ix86_expand_compare (code
, op0
, op1
);
18690 gcc_assert (GET_CODE (*pop
) == LTU
|| GET_CODE (*pop
) == GEU
);
18695 ix86_expand_int_movcc (rtx operands
[])
18697 enum rtx_code code
= GET_CODE (operands
[1]), compare_code
;
18698 rtx compare_seq
, compare_op
;
18699 enum machine_mode mode
= GET_MODE (operands
[0]);
18700 bool sign_bit_compare_p
= false;
18701 rtx op0
= XEXP (operands
[1], 0);
18702 rtx op1
= XEXP (operands
[1], 1);
18705 compare_op
= ix86_expand_compare (code
, op0
, op1
);
18706 compare_seq
= get_insns ();
18709 compare_code
= GET_CODE (compare_op
);
18711 if ((op1
== const0_rtx
&& (code
== GE
|| code
== LT
))
18712 || (op1
== constm1_rtx
&& (code
== GT
|| code
== LE
)))
18713 sign_bit_compare_p
= true;
18715 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
18716 HImode insns, we'd be swallowed in word prefix ops. */
18718 if ((mode
!= HImode
|| TARGET_FAST_PREFIX
)
18719 && (mode
!= (TARGET_64BIT
? TImode
: DImode
))
18720 && CONST_INT_P (operands
[2])
18721 && CONST_INT_P (operands
[3]))
18723 rtx out
= operands
[0];
18724 HOST_WIDE_INT ct
= INTVAL (operands
[2]);
18725 HOST_WIDE_INT cf
= INTVAL (operands
[3]);
18726 HOST_WIDE_INT diff
;
18729 /* Sign bit compares are better done using shifts than we do by using
18731 if (sign_bit_compare_p
18732 || ix86_expand_carry_flag_compare (code
, op0
, op1
, &compare_op
))
18734 /* Detect overlap between destination and compare sources. */
18737 if (!sign_bit_compare_p
)
18740 bool fpcmp
= false;
18742 compare_code
= GET_CODE (compare_op
);
18744 flags
= XEXP (compare_op
, 0);
18746 if (GET_MODE (flags
) == CCFPmode
18747 || GET_MODE (flags
) == CCFPUmode
)
18751 = ix86_fp_compare_code_to_integer (compare_code
);
18754 /* To simplify rest of code, restrict to the GEU case. */
18755 if (compare_code
== LTU
)
18757 HOST_WIDE_INT tmp
= ct
;
18760 compare_code
= reverse_condition (compare_code
);
18761 code
= reverse_condition (code
);
18766 PUT_CODE (compare_op
,
18767 reverse_condition_maybe_unordered
18768 (GET_CODE (compare_op
)));
18770 PUT_CODE (compare_op
,
18771 reverse_condition (GET_CODE (compare_op
)));
18775 if (reg_overlap_mentioned_p (out
, op0
)
18776 || reg_overlap_mentioned_p (out
, op1
))
18777 tmp
= gen_reg_rtx (mode
);
18779 if (mode
== DImode
)
18780 emit_insn (gen_x86_movdicc_0_m1 (tmp
, flags
, compare_op
));
18782 emit_insn (gen_x86_movsicc_0_m1 (gen_lowpart (SImode
, tmp
),
18783 flags
, compare_op
));
18787 if (code
== GT
|| code
== GE
)
18788 code
= reverse_condition (code
);
18791 HOST_WIDE_INT tmp
= ct
;
18796 tmp
= emit_store_flag (tmp
, code
, op0
, op1
, VOIDmode
, 0, -1);
18809 tmp
= expand_simple_binop (mode
, PLUS
,
18811 copy_rtx (tmp
), 1, OPTAB_DIRECT
);
18822 tmp
= expand_simple_binop (mode
, IOR
,
18824 copy_rtx (tmp
), 1, OPTAB_DIRECT
);
18826 else if (diff
== -1 && ct
)
18836 tmp
= expand_simple_unop (mode
, NOT
, tmp
, copy_rtx (tmp
), 1);
18838 tmp
= expand_simple_binop (mode
, PLUS
,
18839 copy_rtx (tmp
), GEN_INT (cf
),
18840 copy_rtx (tmp
), 1, OPTAB_DIRECT
);
18848 * andl cf - ct, dest
18858 tmp
= expand_simple_unop (mode
, NOT
, tmp
, copy_rtx (tmp
), 1);
18861 tmp
= expand_simple_binop (mode
, AND
,
18863 gen_int_mode (cf
- ct
, mode
),
18864 copy_rtx (tmp
), 1, OPTAB_DIRECT
);
18866 tmp
= expand_simple_binop (mode
, PLUS
,
18867 copy_rtx (tmp
), GEN_INT (ct
),
18868 copy_rtx (tmp
), 1, OPTAB_DIRECT
);
18871 if (!rtx_equal_p (tmp
, out
))
18872 emit_move_insn (copy_rtx (out
), copy_rtx (tmp
));
18879 enum machine_mode cmp_mode
= GET_MODE (op0
);
18882 tmp
= ct
, ct
= cf
, cf
= tmp
;
18885 if (SCALAR_FLOAT_MODE_P (cmp_mode
))
18887 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode
));
18889 /* We may be reversing unordered compare to normal compare, that
18890 is not valid in general (we may convert non-trapping condition
18891 to trapping one), however on i386 we currently emit all
18892 comparisons unordered. */
18893 compare_code
= reverse_condition_maybe_unordered (compare_code
);
18894 code
= reverse_condition_maybe_unordered (code
);
18898 compare_code
= reverse_condition (compare_code
);
18899 code
= reverse_condition (code
);
18903 compare_code
= UNKNOWN
;
18904 if (GET_MODE_CLASS (GET_MODE (op0
)) == MODE_INT
18905 && CONST_INT_P (op1
))
18907 if (op1
== const0_rtx
18908 && (code
== LT
|| code
== GE
))
18909 compare_code
= code
;
18910 else if (op1
== constm1_rtx
)
18914 else if (code
== GT
)
18919 /* Optimize dest = (op0 < 0) ? -1 : cf. */
18920 if (compare_code
!= UNKNOWN
18921 && GET_MODE (op0
) == GET_MODE (out
)
18922 && (cf
== -1 || ct
== -1))
18924 /* If lea code below could be used, only optimize
18925 if it results in a 2 insn sequence. */
18927 if (! (diff
== 1 || diff
== 2 || diff
== 4 || diff
== 8
18928 || diff
== 3 || diff
== 5 || diff
== 9)
18929 || (compare_code
== LT
&& ct
== -1)
18930 || (compare_code
== GE
&& cf
== -1))
18933 * notl op1 (if necessary)
18941 code
= reverse_condition (code
);
18944 out
= emit_store_flag (out
, code
, op0
, op1
, VOIDmode
, 0, -1);
18946 out
= expand_simple_binop (mode
, IOR
,
18948 out
, 1, OPTAB_DIRECT
);
18949 if (out
!= operands
[0])
18950 emit_move_insn (operands
[0], out
);
18957 if ((diff
== 1 || diff
== 2 || diff
== 4 || diff
== 8
18958 || diff
== 3 || diff
== 5 || diff
== 9)
18959 && ((mode
!= QImode
&& mode
!= HImode
) || !TARGET_PARTIAL_REG_STALL
)
18961 || x86_64_immediate_operand (GEN_INT (cf
), VOIDmode
)))
18967 * lea cf(dest*(ct-cf)),dest
18971 * This also catches the degenerate setcc-only case.
18977 out
= emit_store_flag (out
, code
, op0
, op1
, VOIDmode
, 0, 1);
18980 /* On x86_64 the lea instruction operates on Pmode, so we need
18981 to get arithmetics done in proper mode to match. */
18983 tmp
= copy_rtx (out
);
18987 out1
= copy_rtx (out
);
18988 tmp
= gen_rtx_MULT (mode
, out1
, GEN_INT (diff
& ~1));
18992 tmp
= gen_rtx_PLUS (mode
, tmp
, out1
);
18998 tmp
= gen_rtx_PLUS (mode
, tmp
, GEN_INT (cf
));
19001 if (!rtx_equal_p (tmp
, out
))
19004 out
= force_operand (tmp
, copy_rtx (out
));
19006 emit_insn (gen_rtx_SET (VOIDmode
, copy_rtx (out
), copy_rtx (tmp
)));
19008 if (!rtx_equal_p (out
, operands
[0]))
19009 emit_move_insn (operands
[0], copy_rtx (out
));
19015 * General case: Jumpful:
19016 * xorl dest,dest cmpl op1, op2
19017 * cmpl op1, op2 movl ct, dest
19018 * setcc dest jcc 1f
19019 * decl dest movl cf, dest
19020 * andl (cf-ct),dest 1:
19023 * Size 20. Size 14.
19025 * This is reasonably steep, but branch mispredict costs are
19026 * high on modern cpus, so consider failing only if optimizing
19030 if ((!TARGET_CMOVE
|| (mode
== QImode
&& TARGET_PARTIAL_REG_STALL
))
19031 && BRANCH_COST (optimize_insn_for_speed_p (),
19036 enum machine_mode cmp_mode
= GET_MODE (op0
);
19041 if (SCALAR_FLOAT_MODE_P (cmp_mode
))
19043 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode
));
19045 /* We may be reversing unordered compare to normal compare,
19046 that is not valid in general (we may convert non-trapping
19047 condition to trapping one), however on i386 we currently
19048 emit all comparisons unordered. */
19049 code
= reverse_condition_maybe_unordered (code
);
19053 code
= reverse_condition (code
);
19054 if (compare_code
!= UNKNOWN
)
19055 compare_code
= reverse_condition (compare_code
);
19059 if (compare_code
!= UNKNOWN
)
19061 /* notl op1 (if needed)
19066 For x < 0 (resp. x <= -1) there will be no notl,
19067 so if possible swap the constants to get rid of the
19069 True/false will be -1/0 while code below (store flag
19070 followed by decrement) is 0/-1, so the constants need
19071 to be exchanged once more. */
19073 if (compare_code
== GE
|| !cf
)
19075 code
= reverse_condition (code
);
19080 HOST_WIDE_INT tmp
= cf
;
19085 out
= emit_store_flag (out
, code
, op0
, op1
, VOIDmode
, 0, -1);
19089 out
= emit_store_flag (out
, code
, op0
, op1
, VOIDmode
, 0, 1);
19091 out
= expand_simple_binop (mode
, PLUS
, copy_rtx (out
),
19093 copy_rtx (out
), 1, OPTAB_DIRECT
);
19096 out
= expand_simple_binop (mode
, AND
, copy_rtx (out
),
19097 gen_int_mode (cf
- ct
, mode
),
19098 copy_rtx (out
), 1, OPTAB_DIRECT
);
19100 out
= expand_simple_binop (mode
, PLUS
, copy_rtx (out
), GEN_INT (ct
),
19101 copy_rtx (out
), 1, OPTAB_DIRECT
);
19102 if (!rtx_equal_p (out
, operands
[0]))
19103 emit_move_insn (operands
[0], copy_rtx (out
));
19109 if (!TARGET_CMOVE
|| (mode
== QImode
&& TARGET_PARTIAL_REG_STALL
))
19111 /* Try a few things more with specific constants and a variable. */
19114 rtx var
, orig_out
, out
, tmp
;
19116 if (BRANCH_COST (optimize_insn_for_speed_p (), false) <= 2)
19119 /* If one of the two operands is an interesting constant, load a
19120 constant with the above and mask it in with a logical operation. */
19122 if (CONST_INT_P (operands
[2]))
19125 if (INTVAL (operands
[2]) == 0 && operands
[3] != constm1_rtx
)
19126 operands
[3] = constm1_rtx
, op
= and_optab
;
19127 else if (INTVAL (operands
[2]) == -1 && operands
[3] != const0_rtx
)
19128 operands
[3] = const0_rtx
, op
= ior_optab
;
19132 else if (CONST_INT_P (operands
[3]))
19135 if (INTVAL (operands
[3]) == 0 && operands
[2] != constm1_rtx
)
19136 operands
[2] = constm1_rtx
, op
= and_optab
;
19137 else if (INTVAL (operands
[3]) == -1 && operands
[3] != const0_rtx
)
19138 operands
[2] = const0_rtx
, op
= ior_optab
;
19145 orig_out
= operands
[0];
19146 tmp
= gen_reg_rtx (mode
);
19149 /* Recurse to get the constant loaded. */
19150 if (ix86_expand_int_movcc (operands
) == 0)
19153 /* Mask in the interesting variable. */
19154 out
= expand_binop (mode
, op
, var
, tmp
, orig_out
, 0,
19156 if (!rtx_equal_p (out
, orig_out
))
19157 emit_move_insn (copy_rtx (orig_out
), copy_rtx (out
));
19163 * For comparison with above,
19173 if (! nonimmediate_operand (operands
[2], mode
))
19174 operands
[2] = force_reg (mode
, operands
[2]);
19175 if (! nonimmediate_operand (operands
[3], mode
))
19176 operands
[3] = force_reg (mode
, operands
[3]);
19178 if (! register_operand (operands
[2], VOIDmode
)
19180 || ! register_operand (operands
[3], VOIDmode
)))
19181 operands
[2] = force_reg (mode
, operands
[2]);
19184 && ! register_operand (operands
[3], VOIDmode
))
19185 operands
[3] = force_reg (mode
, operands
[3]);
19187 emit_insn (compare_seq
);
19188 emit_insn (gen_rtx_SET (VOIDmode
, operands
[0],
19189 gen_rtx_IF_THEN_ELSE (mode
,
19190 compare_op
, operands
[2],
19195 /* Swap, force into registers, or otherwise massage the two operands
19196 to an sse comparison with a mask result. Thus we differ a bit from
19197 ix86_prepare_fp_compare_args which expects to produce a flags result.
19199 The DEST operand exists to help determine whether to commute commutative
19200 operators. The POP0/POP1 operands are updated in place. The new
19201 comparison code is returned, or UNKNOWN if not implementable. */
19203 static enum rtx_code
19204 ix86_prepare_sse_fp_compare_args (rtx dest
, enum rtx_code code
,
19205 rtx
*pop0
, rtx
*pop1
)
19213 /* AVX supports all the needed comparisons. */
19216 /* We have no LTGT as an operator. We could implement it with
19217 NE & ORDERED, but this requires an extra temporary. It's
19218 not clear that it's worth it. */
19225 /* These are supported directly. */
19232 /* AVX has 3 operand comparisons, no need to swap anything. */
19235 /* For commutative operators, try to canonicalize the destination
19236 operand to be first in the comparison - this helps reload to
19237 avoid extra moves. */
19238 if (!dest
|| !rtx_equal_p (dest
, *pop1
))
19246 /* These are not supported directly before AVX, and furthermore
19247 ix86_expand_sse_fp_minmax only optimizes LT/UNGE. Swap the
19248 comparison operands to transform into something that is
19253 code
= swap_condition (code
);
19257 gcc_unreachable ();
19263 /* Detect conditional moves that exactly match min/max operational
19264 semantics. Note that this is IEEE safe, as long as we don't
19265 interchange the operands.
19267 Returns FALSE if this conditional move doesn't match a MIN/MAX,
19268 and TRUE if the operation is successful and instructions are emitted. */
19271 ix86_expand_sse_fp_minmax (rtx dest
, enum rtx_code code
, rtx cmp_op0
,
19272 rtx cmp_op1
, rtx if_true
, rtx if_false
)
19274 enum machine_mode mode
;
19280 else if (code
== UNGE
)
19283 if_true
= if_false
;
19289 if (rtx_equal_p (cmp_op0
, if_true
) && rtx_equal_p (cmp_op1
, if_false
))
19291 else if (rtx_equal_p (cmp_op1
, if_true
) && rtx_equal_p (cmp_op0
, if_false
))
19296 mode
= GET_MODE (dest
);
19298 /* We want to check HONOR_NANS and HONOR_SIGNED_ZEROS here,
19299 but MODE may be a vector mode and thus not appropriate. */
19300 if (!flag_finite_math_only
|| !flag_unsafe_math_optimizations
)
19302 int u
= is_min
? UNSPEC_IEEE_MIN
: UNSPEC_IEEE_MAX
;
19305 if_true
= force_reg (mode
, if_true
);
19306 v
= gen_rtvec (2, if_true
, if_false
);
19307 tmp
= gen_rtx_UNSPEC (mode
, v
, u
);
19311 code
= is_min
? SMIN
: SMAX
;
19312 tmp
= gen_rtx_fmt_ee (code
, mode
, if_true
, if_false
);
19315 emit_insn (gen_rtx_SET (VOIDmode
, dest
, tmp
));
19319 /* Expand an sse vector comparison. Return the register with the result. */
19322 ix86_expand_sse_cmp (rtx dest
, enum rtx_code code
, rtx cmp_op0
, rtx cmp_op1
,
19323 rtx op_true
, rtx op_false
)
19325 enum machine_mode mode
= GET_MODE (dest
);
19326 enum machine_mode cmp_mode
= GET_MODE (cmp_op0
);
19329 cmp_op0
= force_reg (cmp_mode
, cmp_op0
);
19330 if (!nonimmediate_operand (cmp_op1
, cmp_mode
))
19331 cmp_op1
= force_reg (cmp_mode
, cmp_op1
);
19334 || reg_overlap_mentioned_p (dest
, op_true
)
19335 || reg_overlap_mentioned_p (dest
, op_false
))
19336 dest
= gen_reg_rtx (mode
);
19338 x
= gen_rtx_fmt_ee (code
, cmp_mode
, cmp_op0
, cmp_op1
);
19339 if (cmp_mode
!= mode
)
19341 x
= force_reg (cmp_mode
, x
);
19342 convert_move (dest
, x
, false);
19345 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
19350 /* Expand DEST = CMP ? OP_TRUE : OP_FALSE into a sequence of logical
19351 operations. This is used for both scalar and vector conditional moves. */
19354 ix86_expand_sse_movcc (rtx dest
, rtx cmp
, rtx op_true
, rtx op_false
)
19356 enum machine_mode mode
= GET_MODE (dest
);
19359 if (vector_all_ones_operand (op_true
, mode
)
19360 && rtx_equal_p (op_false
, CONST0_RTX (mode
)))
19362 emit_insn (gen_rtx_SET (VOIDmode
, dest
, cmp
));
19364 else if (op_false
== CONST0_RTX (mode
))
19366 op_true
= force_reg (mode
, op_true
);
19367 x
= gen_rtx_AND (mode
, cmp
, op_true
);
19368 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
19370 else if (op_true
== CONST0_RTX (mode
))
19372 op_false
= force_reg (mode
, op_false
);
19373 x
= gen_rtx_NOT (mode
, cmp
);
19374 x
= gen_rtx_AND (mode
, x
, op_false
);
19375 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
19377 else if (INTEGRAL_MODE_P (mode
) && op_true
== CONSTM1_RTX (mode
))
19379 op_false
= force_reg (mode
, op_false
);
19380 x
= gen_rtx_IOR (mode
, cmp
, op_false
);
19381 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
19383 else if (TARGET_XOP
)
19385 op_true
= force_reg (mode
, op_true
);
19387 if (!nonimmediate_operand (op_false
, mode
))
19388 op_false
= force_reg (mode
, op_false
);
19390 emit_insn (gen_rtx_SET (mode
, dest
,
19391 gen_rtx_IF_THEN_ELSE (mode
, cmp
,
19397 rtx (*gen
) (rtx
, rtx
, rtx
, rtx
) = NULL
;
19399 if (!nonimmediate_operand (op_true
, mode
))
19400 op_true
= force_reg (mode
, op_true
);
19402 op_false
= force_reg (mode
, op_false
);
19408 gen
= gen_sse4_1_blendvps
;
19412 gen
= gen_sse4_1_blendvpd
;
19420 gen
= gen_sse4_1_pblendvb
;
19421 dest
= gen_lowpart (V16QImode
, dest
);
19422 op_false
= gen_lowpart (V16QImode
, op_false
);
19423 op_true
= gen_lowpart (V16QImode
, op_true
);
19424 cmp
= gen_lowpart (V16QImode
, cmp
);
19429 gen
= gen_avx_blendvps256
;
19433 gen
= gen_avx_blendvpd256
;
19441 gen
= gen_avx2_pblendvb
;
19442 dest
= gen_lowpart (V32QImode
, dest
);
19443 op_false
= gen_lowpart (V32QImode
, op_false
);
19444 op_true
= gen_lowpart (V32QImode
, op_true
);
19445 cmp
= gen_lowpart (V32QImode
, cmp
);
19453 emit_insn (gen (dest
, op_false
, op_true
, cmp
));
19456 op_true
= force_reg (mode
, op_true
);
19458 t2
= gen_reg_rtx (mode
);
19460 t3
= gen_reg_rtx (mode
);
19464 x
= gen_rtx_AND (mode
, op_true
, cmp
);
19465 emit_insn (gen_rtx_SET (VOIDmode
, t2
, x
));
19467 x
= gen_rtx_NOT (mode
, cmp
);
19468 x
= gen_rtx_AND (mode
, x
, op_false
);
19469 emit_insn (gen_rtx_SET (VOIDmode
, t3
, x
));
19471 x
= gen_rtx_IOR (mode
, t3
, t2
);
19472 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
19477 /* Expand a floating-point conditional move. Return true if successful. */
19480 ix86_expand_fp_movcc (rtx operands
[])
19482 enum machine_mode mode
= GET_MODE (operands
[0]);
19483 enum rtx_code code
= GET_CODE (operands
[1]);
19484 rtx tmp
, compare_op
;
19485 rtx op0
= XEXP (operands
[1], 0);
19486 rtx op1
= XEXP (operands
[1], 1);
19488 if (TARGET_SSE_MATH
&& SSE_FLOAT_MODE_P (mode
))
19490 enum machine_mode cmode
;
19492 /* Since we've no cmove for sse registers, don't force bad register
19493 allocation just to gain access to it. Deny movcc when the
19494 comparison mode doesn't match the move mode. */
19495 cmode
= GET_MODE (op0
);
19496 if (cmode
== VOIDmode
)
19497 cmode
= GET_MODE (op1
);
19501 code
= ix86_prepare_sse_fp_compare_args (operands
[0], code
, &op0
, &op1
);
19502 if (code
== UNKNOWN
)
19505 if (ix86_expand_sse_fp_minmax (operands
[0], code
, op0
, op1
,
19506 operands
[2], operands
[3]))
19509 tmp
= ix86_expand_sse_cmp (operands
[0], code
, op0
, op1
,
19510 operands
[2], operands
[3]);
19511 ix86_expand_sse_movcc (operands
[0], tmp
, operands
[2], operands
[3]);
19515 /* The floating point conditional move instructions don't directly
19516 support conditions resulting from a signed integer comparison. */
19518 compare_op
= ix86_expand_compare (code
, op0
, op1
);
19519 if (!fcmov_comparison_operator (compare_op
, VOIDmode
))
19521 tmp
= gen_reg_rtx (QImode
);
19522 ix86_expand_setcc (tmp
, code
, op0
, op1
);
19524 compare_op
= ix86_expand_compare (NE
, tmp
, const0_rtx
);
19527 emit_insn (gen_rtx_SET (VOIDmode
, operands
[0],
19528 gen_rtx_IF_THEN_ELSE (mode
, compare_op
,
19529 operands
[2], operands
[3])));
19534 /* Expand a floating-point vector conditional move; a vcond operation
19535 rather than a movcc operation. */
19538 ix86_expand_fp_vcond (rtx operands
[])
19540 enum rtx_code code
= GET_CODE (operands
[3]);
19543 code
= ix86_prepare_sse_fp_compare_args (operands
[0], code
,
19544 &operands
[4], &operands
[5]);
19545 if (code
== UNKNOWN
)
19548 switch (GET_CODE (operands
[3]))
19551 temp
= ix86_expand_sse_cmp (operands
[0], ORDERED
, operands
[4],
19552 operands
[5], operands
[0], operands
[0]);
19553 cmp
= ix86_expand_sse_cmp (operands
[0], NE
, operands
[4],
19554 operands
[5], operands
[1], operands
[2]);
19558 temp
= ix86_expand_sse_cmp (operands
[0], UNORDERED
, operands
[4],
19559 operands
[5], operands
[0], operands
[0]);
19560 cmp
= ix86_expand_sse_cmp (operands
[0], EQ
, operands
[4],
19561 operands
[5], operands
[1], operands
[2]);
19565 gcc_unreachable ();
19567 cmp
= expand_simple_binop (GET_MODE (cmp
), code
, temp
, cmp
, cmp
, 1,
19569 ix86_expand_sse_movcc (operands
[0], cmp
, operands
[1], operands
[2]);
19573 if (ix86_expand_sse_fp_minmax (operands
[0], code
, operands
[4],
19574 operands
[5], operands
[1], operands
[2]))
19577 cmp
= ix86_expand_sse_cmp (operands
[0], code
, operands
[4], operands
[5],
19578 operands
[1], operands
[2]);
19579 ix86_expand_sse_movcc (operands
[0], cmp
, operands
[1], operands
[2]);
19583 /* Expand a signed/unsigned integral vector conditional move. */
19586 ix86_expand_int_vcond (rtx operands
[])
19588 enum machine_mode data_mode
= GET_MODE (operands
[0]);
19589 enum machine_mode mode
= GET_MODE (operands
[4]);
19590 enum rtx_code code
= GET_CODE (operands
[3]);
19591 bool negate
= false;
19594 cop0
= operands
[4];
19595 cop1
= operands
[5];
19597 /* Try to optimize x < 0 ? -1 : 0 into (signed) x >> 31
19598 and x < 0 ? 1 : 0 into (unsigned) x >> 31. */
19599 if ((code
== LT
|| code
== GE
)
19600 && data_mode
== mode
19601 && cop1
== CONST0_RTX (mode
)
19602 && operands
[1 + (code
== LT
)] == CONST0_RTX (data_mode
)
19603 && GET_MODE_SIZE (GET_MODE_INNER (data_mode
)) > 1
19604 && GET_MODE_SIZE (GET_MODE_INNER (data_mode
)) <= 8
19605 && (GET_MODE_SIZE (data_mode
) == 16
19606 || (TARGET_AVX2
&& GET_MODE_SIZE (data_mode
) == 32)))
19608 rtx negop
= operands
[2 - (code
== LT
)];
19609 int shift
= GET_MODE_BITSIZE (GET_MODE_INNER (data_mode
)) - 1;
19610 if (negop
== CONST1_RTX (data_mode
))
19612 rtx res
= expand_simple_binop (mode
, LSHIFTRT
, cop0
, GEN_INT (shift
),
19613 operands
[0], 1, OPTAB_DIRECT
);
19614 if (res
!= operands
[0])
19615 emit_move_insn (operands
[0], res
);
19618 else if (GET_MODE_INNER (data_mode
) != DImode
19619 && vector_all_ones_operand (negop
, data_mode
))
19621 rtx res
= expand_simple_binop (mode
, ASHIFTRT
, cop0
, GEN_INT (shift
),
19622 operands
[0], 0, OPTAB_DIRECT
);
19623 if (res
!= operands
[0])
19624 emit_move_insn (operands
[0], res
);
19629 if (!nonimmediate_operand (cop1
, mode
))
19630 cop1
= force_reg (mode
, cop1
);
19631 if (!general_operand (operands
[1], data_mode
))
19632 operands
[1] = force_reg (data_mode
, operands
[1]);
19633 if (!general_operand (operands
[2], data_mode
))
19634 operands
[2] = force_reg (data_mode
, operands
[2]);
19636 /* XOP supports all of the comparisons on all 128-bit vector int types. */
19638 && (mode
== V16QImode
|| mode
== V8HImode
19639 || mode
== V4SImode
|| mode
== V2DImode
))
19643 /* Canonicalize the comparison to EQ, GT, GTU. */
19654 code
= reverse_condition (code
);
19660 code
= reverse_condition (code
);
19666 code
= swap_condition (code
);
19667 x
= cop0
, cop0
= cop1
, cop1
= x
;
19671 gcc_unreachable ();
19674 /* Only SSE4.1/SSE4.2 supports V2DImode. */
19675 if (mode
== V2DImode
)
19680 /* SSE4.1 supports EQ. */
19681 if (!TARGET_SSE4_1
)
19687 /* SSE4.2 supports GT/GTU. */
19688 if (!TARGET_SSE4_2
)
19693 gcc_unreachable ();
19697 /* Unsigned parallel compare is not supported by the hardware.
19698 Play some tricks to turn this into a signed comparison
19702 cop0
= force_reg (mode
, cop0
);
19712 rtx (*gen_sub3
) (rtx
, rtx
, rtx
);
19716 case V8SImode
: gen_sub3
= gen_subv8si3
; break;
19717 case V4DImode
: gen_sub3
= gen_subv4di3
; break;
19718 case V4SImode
: gen_sub3
= gen_subv4si3
; break;
19719 case V2DImode
: gen_sub3
= gen_subv2di3
; break;
19721 gcc_unreachable ();
19723 /* Subtract (-(INT MAX) - 1) from both operands to make
19725 mask
= ix86_build_signbit_mask (mode
, true, false);
19726 t1
= gen_reg_rtx (mode
);
19727 emit_insn (gen_sub3 (t1
, cop0
, mask
));
19729 t2
= gen_reg_rtx (mode
);
19730 emit_insn (gen_sub3 (t2
, cop1
, mask
));
19742 /* Perform a parallel unsigned saturating subtraction. */
19743 x
= gen_reg_rtx (mode
);
19744 emit_insn (gen_rtx_SET (VOIDmode
, x
,
19745 gen_rtx_US_MINUS (mode
, cop0
, cop1
)));
19748 cop1
= CONST0_RTX (mode
);
19754 gcc_unreachable ();
19759 /* Allow the comparison to be done in one mode, but the movcc to
19760 happen in another mode. */
19761 if (data_mode
== mode
)
19763 x
= ix86_expand_sse_cmp (operands
[0], code
, cop0
, cop1
,
19764 operands
[1+negate
], operands
[2-negate
]);
19768 gcc_assert (GET_MODE_SIZE (data_mode
) == GET_MODE_SIZE (mode
));
19769 x
= ix86_expand_sse_cmp (gen_lowpart (mode
, operands
[0]),
19771 operands
[1+negate
], operands
[2-negate
]);
19772 x
= gen_lowpart (data_mode
, x
);
19775 ix86_expand_sse_movcc (operands
[0], x
, operands
[1+negate
],
19776 operands
[2-negate
]);
19780 /* Expand a variable vector permutation. */
19783 ix86_expand_vec_perm (rtx operands
[])
19785 rtx target
= operands
[0];
19786 rtx op0
= operands
[1];
19787 rtx op1
= operands
[2];
19788 rtx mask
= operands
[3];
19789 rtx t1
, t2
, t3
, t4
, vt
, vt2
, vec
[32];
19790 enum machine_mode mode
= GET_MODE (op0
);
19791 enum machine_mode maskmode
= GET_MODE (mask
);
19793 bool one_operand_shuffle
= rtx_equal_p (op0
, op1
);
19795 /* Number of elements in the vector. */
19796 w
= GET_MODE_NUNITS (mode
);
19797 e
= GET_MODE_UNIT_SIZE (mode
);
19798 gcc_assert (w
<= 32);
19802 if (mode
== V4DImode
|| mode
== V4DFmode
|| mode
== V16HImode
)
19804 /* Unfortunately, the VPERMQ and VPERMPD instructions only support
19805 an constant shuffle operand. With a tiny bit of effort we can
19806 use VPERMD instead. A re-interpretation stall for V4DFmode is
19807 unfortunate but there's no avoiding it.
19808 Similarly for V16HImode we don't have instructions for variable
19809 shuffling, while for V32QImode we can use after preparing suitable
19810 masks vpshufb; vpshufb; vpermq; vpor. */
19812 if (mode
== V16HImode
)
19814 maskmode
= mode
= V32QImode
;
19820 maskmode
= mode
= V8SImode
;
19824 t1
= gen_reg_rtx (maskmode
);
19826 /* Replicate the low bits of the V4DImode mask into V8SImode:
19828 t1 = { A A B B C C D D }. */
19829 for (i
= 0; i
< w
/ 2; ++i
)
19830 vec
[i
*2 + 1] = vec
[i
*2] = GEN_INT (i
* 2);
19831 vt
= gen_rtx_CONST_VECTOR (maskmode
, gen_rtvec_v (w
, vec
));
19832 vt
= force_reg (maskmode
, vt
);
19833 mask
= gen_lowpart (maskmode
, mask
);
19834 if (maskmode
== V8SImode
)
19835 emit_insn (gen_avx2_permvarv8si (t1
, vt
, mask
));
19837 emit_insn (gen_avx2_pshufbv32qi3 (t1
, mask
, vt
));
19839 /* Multiply the shuffle indicies by two. */
19840 t1
= expand_simple_binop (maskmode
, PLUS
, t1
, t1
, t1
, 1,
19843 /* Add one to the odd shuffle indicies:
19844 t1 = { A*2, A*2+1, B*2, B*2+1, ... }. */
19845 for (i
= 0; i
< w
/ 2; ++i
)
19847 vec
[i
* 2] = const0_rtx
;
19848 vec
[i
* 2 + 1] = const1_rtx
;
19850 vt
= gen_rtx_CONST_VECTOR (maskmode
, gen_rtvec_v (w
, vec
));
19851 vt
= force_const_mem (maskmode
, vt
);
19852 t1
= expand_simple_binop (maskmode
, PLUS
, t1
, vt
, t1
, 1,
19855 /* Continue as if V8SImode (resp. V32QImode) was used initially. */
19856 operands
[3] = mask
= t1
;
19857 target
= gen_lowpart (mode
, target
);
19858 op0
= gen_lowpart (mode
, op0
);
19859 op1
= gen_lowpart (mode
, op1
);
19865 /* The VPERMD and VPERMPS instructions already properly ignore
19866 the high bits of the shuffle elements. No need for us to
19867 perform an AND ourselves. */
19868 if (one_operand_shuffle
)
19869 emit_insn (gen_avx2_permvarv8si (target
, mask
, op0
));
19872 t1
= gen_reg_rtx (V8SImode
);
19873 t2
= gen_reg_rtx (V8SImode
);
19874 emit_insn (gen_avx2_permvarv8si (t1
, mask
, op0
));
19875 emit_insn (gen_avx2_permvarv8si (t2
, mask
, op1
));
19881 mask
= gen_lowpart (V8SFmode
, mask
);
19882 if (one_operand_shuffle
)
19883 emit_insn (gen_avx2_permvarv8sf (target
, mask
, op0
));
19886 t1
= gen_reg_rtx (V8SFmode
);
19887 t2
= gen_reg_rtx (V8SFmode
);
19888 emit_insn (gen_avx2_permvarv8sf (t1
, mask
, op0
));
19889 emit_insn (gen_avx2_permvarv8sf (t2
, mask
, op1
));
19895 /* By combining the two 128-bit input vectors into one 256-bit
19896 input vector, we can use VPERMD and VPERMPS for the full
19897 two-operand shuffle. */
19898 t1
= gen_reg_rtx (V8SImode
);
19899 t2
= gen_reg_rtx (V8SImode
);
19900 emit_insn (gen_avx_vec_concatv8si (t1
, op0
, op1
));
19901 emit_insn (gen_avx_vec_concatv8si (t2
, mask
, mask
));
19902 emit_insn (gen_avx2_permvarv8si (t1
, t2
, t1
));
19903 emit_insn (gen_avx_vextractf128v8si (target
, t1
, const0_rtx
));
19907 t1
= gen_reg_rtx (V8SFmode
);
19908 t2
= gen_reg_rtx (V8SFmode
);
19909 mask
= gen_lowpart (V4SFmode
, mask
);
19910 emit_insn (gen_avx_vec_concatv8sf (t1
, op0
, op1
));
19911 emit_insn (gen_avx_vec_concatv8sf (t2
, mask
, mask
));
19912 emit_insn (gen_avx2_permvarv8sf (t1
, t2
, t1
));
19913 emit_insn (gen_avx_vextractf128v8sf (target
, t1
, const0_rtx
));
19917 t1
= gen_reg_rtx (V32QImode
);
19918 t2
= gen_reg_rtx (V32QImode
);
19919 t3
= gen_reg_rtx (V32QImode
);
19920 vt2
= GEN_INT (128);
19921 for (i
= 0; i
< 32; i
++)
19923 vt
= gen_rtx_CONST_VECTOR (V32QImode
, gen_rtvec_v (32, vec
));
19924 vt
= force_reg (V32QImode
, vt
);
19925 for (i
= 0; i
< 32; i
++)
19926 vec
[i
] = i
< 16 ? vt2
: const0_rtx
;
19927 vt2
= gen_rtx_CONST_VECTOR (V32QImode
, gen_rtvec_v (32, vec
));
19928 vt2
= force_reg (V32QImode
, vt2
);
19929 /* From mask create two adjusted masks, which contain the same
19930 bits as mask in the low 7 bits of each vector element.
19931 The first mask will have the most significant bit clear
19932 if it requests element from the same 128-bit lane
19933 and MSB set if it requests element from the other 128-bit lane.
19934 The second mask will have the opposite values of the MSB,
19935 and additionally will have its 128-bit lanes swapped.
19936 E.g. { 07 12 1e 09 ... | 17 19 05 1f ... } mask vector will have
19937 t1 { 07 92 9e 09 ... | 17 19 85 1f ... } and
19938 t3 { 97 99 05 9f ... | 87 12 1e 89 ... } where each ...
19939 stands for other 12 bytes. */
19940 /* The bit whether element is from the same lane or the other
19941 lane is bit 4, so shift it up by 3 to the MSB position. */
19942 emit_insn (gen_ashlv4di3 (gen_lowpart (V4DImode
, t1
),
19943 gen_lowpart (V4DImode
, mask
),
19945 /* Clear MSB bits from the mask just in case it had them set. */
19946 emit_insn (gen_avx2_andnotv32qi3 (t2
, vt
, mask
));
19947 /* After this t1 will have MSB set for elements from other lane. */
19948 emit_insn (gen_xorv32qi3 (t1
, t1
, vt2
));
19949 /* Clear bits other than MSB. */
19950 emit_insn (gen_andv32qi3 (t1
, t1
, vt
));
19951 /* Or in the lower bits from mask into t3. */
19952 emit_insn (gen_iorv32qi3 (t3
, t1
, t2
));
19953 /* And invert MSB bits in t1, so MSB is set for elements from the same
19955 emit_insn (gen_xorv32qi3 (t1
, t1
, vt
));
19956 /* Swap 128-bit lanes in t3. */
19957 emit_insn (gen_avx2_permv4di_1 (gen_lowpart (V4DImode
, t3
),
19958 gen_lowpart (V4DImode
, t3
),
19959 const2_rtx
, GEN_INT (3),
19960 const0_rtx
, const1_rtx
));
19961 /* And or in the lower bits from mask into t1. */
19962 emit_insn (gen_iorv32qi3 (t1
, t1
, t2
));
19963 if (one_operand_shuffle
)
19965 /* Each of these shuffles will put 0s in places where
19966 element from the other 128-bit lane is needed, otherwise
19967 will shuffle in the requested value. */
19968 emit_insn (gen_avx2_pshufbv32qi3 (t3
, op0
, t3
));
19969 emit_insn (gen_avx2_pshufbv32qi3 (t1
, op0
, t1
));
19970 /* For t3 the 128-bit lanes are swapped again. */
19971 emit_insn (gen_avx2_permv4di_1 (gen_lowpart (V4DImode
, t3
),
19972 gen_lowpart (V4DImode
, t3
),
19973 const2_rtx
, GEN_INT (3),
19974 const0_rtx
, const1_rtx
));
19975 /* And oring both together leads to the result. */
19976 emit_insn (gen_iorv32qi3 (target
, t1
, t3
));
19980 t4
= gen_reg_rtx (V32QImode
);
19981 /* Similarly to the above one_operand_shuffle code,
19982 just for repeated twice for each operand. merge_two:
19983 code will merge the two results together. */
19984 emit_insn (gen_avx2_pshufbv32qi3 (t4
, op0
, t3
));
19985 emit_insn (gen_avx2_pshufbv32qi3 (t3
, op1
, t3
));
19986 emit_insn (gen_avx2_pshufbv32qi3 (t2
, op0
, t1
));
19987 emit_insn (gen_avx2_pshufbv32qi3 (t1
, op1
, t1
));
19988 emit_insn (gen_avx2_permv4di_1 (gen_lowpart (V4DImode
, t4
),
19989 gen_lowpart (V4DImode
, t4
),
19990 const2_rtx
, GEN_INT (3),
19991 const0_rtx
, const1_rtx
));
19992 emit_insn (gen_avx2_permv4di_1 (gen_lowpart (V4DImode
, t3
),
19993 gen_lowpart (V4DImode
, t3
),
19994 const2_rtx
, GEN_INT (3),
19995 const0_rtx
, const1_rtx
));
19996 emit_insn (gen_iorv32qi3 (t4
, t2
, t4
));
19997 emit_insn (gen_iorv32qi3 (t3
, t1
, t3
));
20003 gcc_assert (GET_MODE_SIZE (mode
) <= 16);
20010 /* The XOP VPPERM insn supports three inputs. By ignoring the
20011 one_operand_shuffle special case, we avoid creating another
20012 set of constant vectors in memory. */
20013 one_operand_shuffle
= false;
20015 /* mask = mask & {2*w-1, ...} */
20016 vt
= GEN_INT (2*w
- 1);
20020 /* mask = mask & {w-1, ...} */
20021 vt
= GEN_INT (w
- 1);
20024 for (i
= 0; i
< w
; i
++)
20026 vt
= gen_rtx_CONST_VECTOR (maskmode
, gen_rtvec_v (w
, vec
));
20027 mask
= expand_simple_binop (maskmode
, AND
, mask
, vt
,
20028 NULL_RTX
, 0, OPTAB_DIRECT
);
20030 /* For non-QImode operations, convert the word permutation control
20031 into a byte permutation control. */
20032 if (mode
!= V16QImode
)
20034 mask
= expand_simple_binop (maskmode
, ASHIFT
, mask
,
20035 GEN_INT (exact_log2 (e
)),
20036 NULL_RTX
, 0, OPTAB_DIRECT
);
20038 /* Convert mask to vector of chars. */
20039 mask
= force_reg (V16QImode
, gen_lowpart (V16QImode
, mask
));
20041 /* Replicate each of the input bytes into byte positions:
20042 (v2di) --> {0,0,0,0,0,0,0,0, 8,8,8,8,8,8,8,8}
20043 (v4si) --> {0,0,0,0, 4,4,4,4, 8,8,8,8, 12,12,12,12}
20044 (v8hi) --> {0,0, 2,2, 4,4, 6,6, ...}. */
20045 for (i
= 0; i
< 16; ++i
)
20046 vec
[i
] = GEN_INT (i
/e
* e
);
20047 vt
= gen_rtx_CONST_VECTOR (V16QImode
, gen_rtvec_v (16, vec
));
20048 vt
= force_const_mem (V16QImode
, vt
);
20050 emit_insn (gen_xop_pperm (mask
, mask
, mask
, vt
));
20052 emit_insn (gen_ssse3_pshufbv16qi3 (mask
, mask
, vt
));
20054 /* Convert it into the byte positions by doing
20055 mask = mask + {0,1,..,16/w, 0,1,..,16/w, ...} */
20056 for (i
= 0; i
< 16; ++i
)
20057 vec
[i
] = GEN_INT (i
% e
);
20058 vt
= gen_rtx_CONST_VECTOR (V16QImode
, gen_rtvec_v (16, vec
));
20059 vt
= force_const_mem (V16QImode
, vt
);
20060 emit_insn (gen_addv16qi3 (mask
, mask
, vt
));
20063 /* The actual shuffle operations all operate on V16QImode. */
20064 op0
= gen_lowpart (V16QImode
, op0
);
20065 op1
= gen_lowpart (V16QImode
, op1
);
20066 target
= gen_lowpart (V16QImode
, target
);
20070 emit_insn (gen_xop_pperm (target
, op0
, op1
, mask
));
20072 else if (one_operand_shuffle
)
20074 emit_insn (gen_ssse3_pshufbv16qi3 (target
, op0
, mask
));
20081 /* Shuffle the two input vectors independently. */
20082 t1
= gen_reg_rtx (V16QImode
);
20083 t2
= gen_reg_rtx (V16QImode
);
20084 emit_insn (gen_ssse3_pshufbv16qi3 (t1
, op0
, mask
));
20085 emit_insn (gen_ssse3_pshufbv16qi3 (t2
, op1
, mask
));
20088 /* Then merge them together. The key is whether any given control
20089 element contained a bit set that indicates the second word. */
20090 mask
= operands
[3];
20092 if (maskmode
== V2DImode
&& !TARGET_SSE4_1
)
20094 /* Without SSE4.1, we don't have V2DImode EQ. Perform one
20095 more shuffle to convert the V2DI input mask into a V4SI
20096 input mask. At which point the masking that expand_int_vcond
20097 will work as desired. */
20098 rtx t3
= gen_reg_rtx (V4SImode
);
20099 emit_insn (gen_sse2_pshufd_1 (t3
, gen_lowpart (V4SImode
, mask
),
20100 const0_rtx
, const0_rtx
,
20101 const2_rtx
, const2_rtx
));
20103 maskmode
= V4SImode
;
20107 for (i
= 0; i
< w
; i
++)
20109 vt
= gen_rtx_CONST_VECTOR (maskmode
, gen_rtvec_v (w
, vec
));
20110 vt
= force_reg (maskmode
, vt
);
20111 mask
= expand_simple_binop (maskmode
, AND
, mask
, vt
,
20112 NULL_RTX
, 0, OPTAB_DIRECT
);
20114 xops
[0] = gen_lowpart (mode
, operands
[0]);
20115 xops
[1] = gen_lowpart (mode
, t2
);
20116 xops
[2] = gen_lowpart (mode
, t1
);
20117 xops
[3] = gen_rtx_EQ (maskmode
, mask
, vt
);
20120 ok
= ix86_expand_int_vcond (xops
);
20125 /* Unpack OP[1] into the next wider integer vector type. UNSIGNED_P is
20126 true if we should do zero extension, else sign extension. HIGH_P is
20127 true if we want the N/2 high elements, else the low elements. */
20130 ix86_expand_sse_unpack (rtx operands
[2], bool unsigned_p
, bool high_p
)
20132 enum machine_mode imode
= GET_MODE (operands
[1]);
20137 rtx (*unpack
)(rtx
, rtx
);
20138 rtx (*extract
)(rtx
, rtx
) = NULL
;
20139 enum machine_mode halfmode
= BLKmode
;
20145 unpack
= gen_avx2_zero_extendv16qiv16hi2
;
20147 unpack
= gen_avx2_sign_extendv16qiv16hi2
;
20148 halfmode
= V16QImode
;
20150 = high_p
? gen_vec_extract_hi_v32qi
: gen_vec_extract_lo_v32qi
;
20154 unpack
= gen_avx2_zero_extendv8hiv8si2
;
20156 unpack
= gen_avx2_sign_extendv8hiv8si2
;
20157 halfmode
= V8HImode
;
20159 = high_p
? gen_vec_extract_hi_v16hi
: gen_vec_extract_lo_v16hi
;
20163 unpack
= gen_avx2_zero_extendv4siv4di2
;
20165 unpack
= gen_avx2_sign_extendv4siv4di2
;
20166 halfmode
= V4SImode
;
20168 = high_p
? gen_vec_extract_hi_v8si
: gen_vec_extract_lo_v8si
;
20172 unpack
= gen_sse4_1_zero_extendv8qiv8hi2
;
20174 unpack
= gen_sse4_1_sign_extendv8qiv8hi2
;
20178 unpack
= gen_sse4_1_zero_extendv4hiv4si2
;
20180 unpack
= gen_sse4_1_sign_extendv4hiv4si2
;
20184 unpack
= gen_sse4_1_zero_extendv2siv2di2
;
20186 unpack
= gen_sse4_1_sign_extendv2siv2di2
;
20189 gcc_unreachable ();
20192 if (GET_MODE_SIZE (imode
) == 32)
20194 tmp
= gen_reg_rtx (halfmode
);
20195 emit_insn (extract (tmp
, operands
[1]));
20199 /* Shift higher 8 bytes to lower 8 bytes. */
20200 tmp
= gen_reg_rtx (imode
);
20201 emit_insn (gen_sse2_lshrv1ti3 (gen_lowpart (V1TImode
, tmp
),
20202 gen_lowpart (V1TImode
, operands
[1]),
20208 emit_insn (unpack (operands
[0], tmp
));
20212 rtx (*unpack
)(rtx
, rtx
, rtx
);
20218 unpack
= gen_vec_interleave_highv16qi
;
20220 unpack
= gen_vec_interleave_lowv16qi
;
20224 unpack
= gen_vec_interleave_highv8hi
;
20226 unpack
= gen_vec_interleave_lowv8hi
;
20230 unpack
= gen_vec_interleave_highv4si
;
20232 unpack
= gen_vec_interleave_lowv4si
;
20235 gcc_unreachable ();
20238 dest
= gen_lowpart (imode
, operands
[0]);
20241 tmp
= force_reg (imode
, CONST0_RTX (imode
));
20243 tmp
= ix86_expand_sse_cmp (gen_reg_rtx (imode
), GT
, CONST0_RTX (imode
),
20244 operands
[1], pc_rtx
, pc_rtx
);
20246 emit_insn (unpack (dest
, operands
[1], tmp
));
20250 /* Expand conditional increment or decrement using adb/sbb instructions.
20251 The default case using setcc followed by the conditional move can be
20252 done by generic code. */
20254 ix86_expand_int_addcc (rtx operands
[])
20256 enum rtx_code code
= GET_CODE (operands
[1]);
20258 rtx (*insn
)(rtx
, rtx
, rtx
, rtx
, rtx
);
20260 rtx val
= const0_rtx
;
20261 bool fpcmp
= false;
20262 enum machine_mode mode
;
20263 rtx op0
= XEXP (operands
[1], 0);
20264 rtx op1
= XEXP (operands
[1], 1);
20266 if (operands
[3] != const1_rtx
20267 && operands
[3] != constm1_rtx
)
20269 if (!ix86_expand_carry_flag_compare (code
, op0
, op1
, &compare_op
))
20271 code
= GET_CODE (compare_op
);
20273 flags
= XEXP (compare_op
, 0);
20275 if (GET_MODE (flags
) == CCFPmode
20276 || GET_MODE (flags
) == CCFPUmode
)
20279 code
= ix86_fp_compare_code_to_integer (code
);
20286 PUT_CODE (compare_op
,
20287 reverse_condition_maybe_unordered
20288 (GET_CODE (compare_op
)));
20290 PUT_CODE (compare_op
, reverse_condition (GET_CODE (compare_op
)));
20293 mode
= GET_MODE (operands
[0]);
20295 /* Construct either adc or sbb insn. */
20296 if ((code
== LTU
) == (operands
[3] == constm1_rtx
))
20301 insn
= gen_subqi3_carry
;
20304 insn
= gen_subhi3_carry
;
20307 insn
= gen_subsi3_carry
;
20310 insn
= gen_subdi3_carry
;
20313 gcc_unreachable ();
20321 insn
= gen_addqi3_carry
;
20324 insn
= gen_addhi3_carry
;
20327 insn
= gen_addsi3_carry
;
20330 insn
= gen_adddi3_carry
;
20333 gcc_unreachable ();
20336 emit_insn (insn (operands
[0], operands
[2], val
, flags
, compare_op
));
20342 /* Split operands 0 and 1 into half-mode parts. Similar to split_double_mode,
20343 but works for floating pointer parameters and nonoffsetable memories.
20344 For pushes, it returns just stack offsets; the values will be saved
20345 in the right order. Maximally three parts are generated. */
20348 ix86_split_to_parts (rtx operand
, rtx
*parts
, enum machine_mode mode
)
20353 size
= mode
==XFmode
? 3 : GET_MODE_SIZE (mode
) / 4;
20355 size
= (GET_MODE_SIZE (mode
) + 4) / 8;
20357 gcc_assert (!REG_P (operand
) || !MMX_REGNO_P (REGNO (operand
)));
20358 gcc_assert (size
>= 2 && size
<= 4);
20360 /* Optimize constant pool reference to immediates. This is used by fp
20361 moves, that force all constants to memory to allow combining. */
20362 if (MEM_P (operand
) && MEM_READONLY_P (operand
))
20364 rtx tmp
= maybe_get_pool_constant (operand
);
20369 if (MEM_P (operand
) && !offsettable_memref_p (operand
))
20371 /* The only non-offsetable memories we handle are pushes. */
20372 int ok
= push_operand (operand
, VOIDmode
);
20376 operand
= copy_rtx (operand
);
20377 PUT_MODE (operand
, word_mode
);
20378 parts
[0] = parts
[1] = parts
[2] = parts
[3] = operand
;
20382 if (GET_CODE (operand
) == CONST_VECTOR
)
20384 enum machine_mode imode
= int_mode_for_mode (mode
);
20385 /* Caution: if we looked through a constant pool memory above,
20386 the operand may actually have a different mode now. That's
20387 ok, since we want to pun this all the way back to an integer. */
20388 operand
= simplify_subreg (imode
, operand
, GET_MODE (operand
), 0);
20389 gcc_assert (operand
!= NULL
);
20395 if (mode
== DImode
)
20396 split_double_mode (mode
, &operand
, 1, &parts
[0], &parts
[1]);
20401 if (REG_P (operand
))
20403 gcc_assert (reload_completed
);
20404 for (i
= 0; i
< size
; i
++)
20405 parts
[i
] = gen_rtx_REG (SImode
, REGNO (operand
) + i
);
20407 else if (offsettable_memref_p (operand
))
20409 operand
= adjust_address (operand
, SImode
, 0);
20410 parts
[0] = operand
;
20411 for (i
= 1; i
< size
; i
++)
20412 parts
[i
] = adjust_address (operand
, SImode
, 4 * i
);
20414 else if (GET_CODE (operand
) == CONST_DOUBLE
)
20419 REAL_VALUE_FROM_CONST_DOUBLE (r
, operand
);
20423 real_to_target (l
, &r
, mode
);
20424 parts
[3] = gen_int_mode (l
[3], SImode
);
20425 parts
[2] = gen_int_mode (l
[2], SImode
);
20428 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r
, l
);
20429 parts
[2] = gen_int_mode (l
[2], SImode
);
20432 REAL_VALUE_TO_TARGET_DOUBLE (r
, l
);
20435 gcc_unreachable ();
20437 parts
[1] = gen_int_mode (l
[1], SImode
);
20438 parts
[0] = gen_int_mode (l
[0], SImode
);
20441 gcc_unreachable ();
20446 if (mode
== TImode
)
20447 split_double_mode (mode
, &operand
, 1, &parts
[0], &parts
[1]);
20448 if (mode
== XFmode
|| mode
== TFmode
)
20450 enum machine_mode upper_mode
= mode
==XFmode
? SImode
: DImode
;
20451 if (REG_P (operand
))
20453 gcc_assert (reload_completed
);
20454 parts
[0] = gen_rtx_REG (DImode
, REGNO (operand
) + 0);
20455 parts
[1] = gen_rtx_REG (upper_mode
, REGNO (operand
) + 1);
20457 else if (offsettable_memref_p (operand
))
20459 operand
= adjust_address (operand
, DImode
, 0);
20460 parts
[0] = operand
;
20461 parts
[1] = adjust_address (operand
, upper_mode
, 8);
20463 else if (GET_CODE (operand
) == CONST_DOUBLE
)
20468 REAL_VALUE_FROM_CONST_DOUBLE (r
, operand
);
20469 real_to_target (l
, &r
, mode
);
20471 /* Do not use shift by 32 to avoid warning on 32bit systems. */
20472 if (HOST_BITS_PER_WIDE_INT
>= 64)
20475 ((l
[0] & (((HOST_WIDE_INT
) 2 << 31) - 1))
20476 + ((((HOST_WIDE_INT
) l
[1]) << 31) << 1),
20479 parts
[0] = immed_double_const (l
[0], l
[1], DImode
);
20481 if (upper_mode
== SImode
)
20482 parts
[1] = gen_int_mode (l
[2], SImode
);
20483 else if (HOST_BITS_PER_WIDE_INT
>= 64)
20486 ((l
[2] & (((HOST_WIDE_INT
) 2 << 31) - 1))
20487 + ((((HOST_WIDE_INT
) l
[3]) << 31) << 1),
20490 parts
[1] = immed_double_const (l
[2], l
[3], DImode
);
20493 gcc_unreachable ();
20500 /* Emit insns to perform a move or push of DI, DF, XF, and TF values.
20501 Return false when normal moves are needed; true when all required
20502 insns have been emitted. Operands 2-4 contain the input values
20503 int the correct order; operands 5-7 contain the output values. */
20506 ix86_split_long_move (rtx operands
[])
20511 int collisions
= 0;
20512 enum machine_mode mode
= GET_MODE (operands
[0]);
20513 bool collisionparts
[4];
20515 /* The DFmode expanders may ask us to move double.
20516 For 64bit target this is single move. By hiding the fact
20517 here we simplify i386.md splitters. */
20518 if (TARGET_64BIT
&& GET_MODE_SIZE (GET_MODE (operands
[0])) == 8)
20520 /* Optimize constant pool reference to immediates. This is used by
20521 fp moves, that force all constants to memory to allow combining. */
20523 if (MEM_P (operands
[1])
20524 && GET_CODE (XEXP (operands
[1], 0)) == SYMBOL_REF
20525 && CONSTANT_POOL_ADDRESS_P (XEXP (operands
[1], 0)))
20526 operands
[1] = get_pool_constant (XEXP (operands
[1], 0));
20527 if (push_operand (operands
[0], VOIDmode
))
20529 operands
[0] = copy_rtx (operands
[0]);
20530 PUT_MODE (operands
[0], word_mode
);
20533 operands
[0] = gen_lowpart (DImode
, operands
[0]);
20534 operands
[1] = gen_lowpart (DImode
, operands
[1]);
20535 emit_move_insn (operands
[0], operands
[1]);
20539 /* The only non-offsettable memory we handle is push. */
20540 if (push_operand (operands
[0], VOIDmode
))
20543 gcc_assert (!MEM_P (operands
[0])
20544 || offsettable_memref_p (operands
[0]));
20546 nparts
= ix86_split_to_parts (operands
[1], part
[1], GET_MODE (operands
[0]));
20547 ix86_split_to_parts (operands
[0], part
[0], GET_MODE (operands
[0]));
20549 /* When emitting push, take care for source operands on the stack. */
20550 if (push
&& MEM_P (operands
[1])
20551 && reg_overlap_mentioned_p (stack_pointer_rtx
, operands
[1]))
20553 rtx src_base
= XEXP (part
[1][nparts
- 1], 0);
20555 /* Compensate for the stack decrement by 4. */
20556 if (!TARGET_64BIT
&& nparts
== 3
20557 && mode
== XFmode
&& TARGET_128BIT_LONG_DOUBLE
)
20558 src_base
= plus_constant (src_base
, 4);
20560 /* src_base refers to the stack pointer and is
20561 automatically decreased by emitted push. */
20562 for (i
= 0; i
< nparts
; i
++)
20563 part
[1][i
] = change_address (part
[1][i
],
20564 GET_MODE (part
[1][i
]), src_base
);
20567 /* We need to do copy in the right order in case an address register
20568 of the source overlaps the destination. */
20569 if (REG_P (part
[0][0]) && MEM_P (part
[1][0]))
20573 for (i
= 0; i
< nparts
; i
++)
20576 = reg_overlap_mentioned_p (part
[0][i
], XEXP (part
[1][0], 0));
20577 if (collisionparts
[i
])
20581 /* Collision in the middle part can be handled by reordering. */
20582 if (collisions
== 1 && nparts
== 3 && collisionparts
[1])
20584 tmp
= part
[0][1]; part
[0][1] = part
[0][2]; part
[0][2] = tmp
;
20585 tmp
= part
[1][1]; part
[1][1] = part
[1][2]; part
[1][2] = tmp
;
20587 else if (collisions
== 1
20589 && (collisionparts
[1] || collisionparts
[2]))
20591 if (collisionparts
[1])
20593 tmp
= part
[0][1]; part
[0][1] = part
[0][2]; part
[0][2] = tmp
;
20594 tmp
= part
[1][1]; part
[1][1] = part
[1][2]; part
[1][2] = tmp
;
20598 tmp
= part
[0][2]; part
[0][2] = part
[0][3]; part
[0][3] = tmp
;
20599 tmp
= part
[1][2]; part
[1][2] = part
[1][3]; part
[1][3] = tmp
;
20603 /* If there are more collisions, we can't handle it by reordering.
20604 Do an lea to the last part and use only one colliding move. */
20605 else if (collisions
> 1)
20611 base
= part
[0][nparts
- 1];
20613 /* Handle the case when the last part isn't valid for lea.
20614 Happens in 64-bit mode storing the 12-byte XFmode. */
20615 if (GET_MODE (base
) != Pmode
)
20616 base
= gen_rtx_REG (Pmode
, REGNO (base
));
20618 emit_insn (gen_rtx_SET (VOIDmode
, base
, XEXP (part
[1][0], 0)));
20619 part
[1][0] = replace_equiv_address (part
[1][0], base
);
20620 for (i
= 1; i
< nparts
; i
++)
20622 tmp
= plus_constant (base
, UNITS_PER_WORD
* i
);
20623 part
[1][i
] = replace_equiv_address (part
[1][i
], tmp
);
20634 if (TARGET_128BIT_LONG_DOUBLE
&& mode
== XFmode
)
20635 emit_insn (ix86_gen_add3 (stack_pointer_rtx
,
20636 stack_pointer_rtx
, GEN_INT (-4)));
20637 emit_move_insn (part
[0][2], part
[1][2]);
20639 else if (nparts
== 4)
20641 emit_move_insn (part
[0][3], part
[1][3]);
20642 emit_move_insn (part
[0][2], part
[1][2]);
20647 /* In 64bit mode we don't have 32bit push available. In case this is
20648 register, it is OK - we will just use larger counterpart. We also
20649 retype memory - these comes from attempt to avoid REX prefix on
20650 moving of second half of TFmode value. */
20651 if (GET_MODE (part
[1][1]) == SImode
)
20653 switch (GET_CODE (part
[1][1]))
20656 part
[1][1] = adjust_address (part
[1][1], DImode
, 0);
20660 part
[1][1] = gen_rtx_REG (DImode
, REGNO (part
[1][1]));
20664 gcc_unreachable ();
20667 if (GET_MODE (part
[1][0]) == SImode
)
20668 part
[1][0] = part
[1][1];
20671 emit_move_insn (part
[0][1], part
[1][1]);
20672 emit_move_insn (part
[0][0], part
[1][0]);
20676 /* Choose correct order to not overwrite the source before it is copied. */
20677 if ((REG_P (part
[0][0])
20678 && REG_P (part
[1][1])
20679 && (REGNO (part
[0][0]) == REGNO (part
[1][1])
20681 && REGNO (part
[0][0]) == REGNO (part
[1][2]))
20683 && REGNO (part
[0][0]) == REGNO (part
[1][3]))))
20685 && reg_overlap_mentioned_p (part
[0][0], XEXP (part
[1][0], 0))))
20687 for (i
= 0, j
= nparts
- 1; i
< nparts
; i
++, j
--)
20689 operands
[2 + i
] = part
[0][j
];
20690 operands
[6 + i
] = part
[1][j
];
20695 for (i
= 0; i
< nparts
; i
++)
20697 operands
[2 + i
] = part
[0][i
];
20698 operands
[6 + i
] = part
[1][i
];
20702 /* If optimizing for size, attempt to locally unCSE nonzero constants. */
20703 if (optimize_insn_for_size_p ())
20705 for (j
= 0; j
< nparts
- 1; j
++)
20706 if (CONST_INT_P (operands
[6 + j
])
20707 && operands
[6 + j
] != const0_rtx
20708 && REG_P (operands
[2 + j
]))
20709 for (i
= j
; i
< nparts
- 1; i
++)
20710 if (CONST_INT_P (operands
[7 + i
])
20711 && INTVAL (operands
[7 + i
]) == INTVAL (operands
[6 + j
]))
20712 operands
[7 + i
] = operands
[2 + j
];
20715 for (i
= 0; i
< nparts
; i
++)
20716 emit_move_insn (operands
[2 + i
], operands
[6 + i
]);
20721 /* Helper function of ix86_split_ashl used to generate an SImode/DImode
20722 left shift by a constant, either using a single shift or
20723 a sequence of add instructions. */
20726 ix86_expand_ashl_const (rtx operand
, int count
, enum machine_mode mode
)
20728 rtx (*insn
)(rtx
, rtx
, rtx
);
20731 || (count
* ix86_cost
->add
<= ix86_cost
->shift_const
20732 && !optimize_insn_for_size_p ()))
20734 insn
= mode
== DImode
? gen_addsi3
: gen_adddi3
;
20735 while (count
-- > 0)
20736 emit_insn (insn (operand
, operand
, operand
));
20740 insn
= mode
== DImode
? gen_ashlsi3
: gen_ashldi3
;
20741 emit_insn (insn (operand
, operand
, GEN_INT (count
)));
20746 ix86_split_ashl (rtx
*operands
, rtx scratch
, enum machine_mode mode
)
20748 rtx (*gen_ashl3
)(rtx
, rtx
, rtx
);
20749 rtx (*gen_shld
)(rtx
, rtx
, rtx
);
20750 int half_width
= GET_MODE_BITSIZE (mode
) >> 1;
20752 rtx low
[2], high
[2];
20755 if (CONST_INT_P (operands
[2]))
20757 split_double_mode (mode
, operands
, 2, low
, high
);
20758 count
= INTVAL (operands
[2]) & (GET_MODE_BITSIZE (mode
) - 1);
20760 if (count
>= half_width
)
20762 emit_move_insn (high
[0], low
[1]);
20763 emit_move_insn (low
[0], const0_rtx
);
20765 if (count
> half_width
)
20766 ix86_expand_ashl_const (high
[0], count
- half_width
, mode
);
20770 gen_shld
= mode
== DImode
? gen_x86_shld
: gen_x86_64_shld
;
20772 if (!rtx_equal_p (operands
[0], operands
[1]))
20773 emit_move_insn (operands
[0], operands
[1]);
20775 emit_insn (gen_shld (high
[0], low
[0], GEN_INT (count
)));
20776 ix86_expand_ashl_const (low
[0], count
, mode
);
20781 split_double_mode (mode
, operands
, 1, low
, high
);
20783 gen_ashl3
= mode
== DImode
? gen_ashlsi3
: gen_ashldi3
;
20785 if (operands
[1] == const1_rtx
)
20787 /* Assuming we've chosen a QImode capable registers, then 1 << N
20788 can be done with two 32/64-bit shifts, no branches, no cmoves. */
20789 if (ANY_QI_REG_P (low
[0]) && ANY_QI_REG_P (high
[0]))
20791 rtx s
, d
, flags
= gen_rtx_REG (CCZmode
, FLAGS_REG
);
20793 ix86_expand_clear (low
[0]);
20794 ix86_expand_clear (high
[0]);
20795 emit_insn (gen_testqi_ccz_1 (operands
[2], GEN_INT (half_width
)));
20797 d
= gen_lowpart (QImode
, low
[0]);
20798 d
= gen_rtx_STRICT_LOW_PART (VOIDmode
, d
);
20799 s
= gen_rtx_EQ (QImode
, flags
, const0_rtx
);
20800 emit_insn (gen_rtx_SET (VOIDmode
, d
, s
));
20802 d
= gen_lowpart (QImode
, high
[0]);
20803 d
= gen_rtx_STRICT_LOW_PART (VOIDmode
, d
);
20804 s
= gen_rtx_NE (QImode
, flags
, const0_rtx
);
20805 emit_insn (gen_rtx_SET (VOIDmode
, d
, s
));
20808 /* Otherwise, we can get the same results by manually performing
20809 a bit extract operation on bit 5/6, and then performing the two
20810 shifts. The two methods of getting 0/1 into low/high are exactly
20811 the same size. Avoiding the shift in the bit extract case helps
20812 pentium4 a bit; no one else seems to care much either way. */
20815 enum machine_mode half_mode
;
20816 rtx (*gen_lshr3
)(rtx
, rtx
, rtx
);
20817 rtx (*gen_and3
)(rtx
, rtx
, rtx
);
20818 rtx (*gen_xor3
)(rtx
, rtx
, rtx
);
20819 HOST_WIDE_INT bits
;
20822 if (mode
== DImode
)
20824 half_mode
= SImode
;
20825 gen_lshr3
= gen_lshrsi3
;
20826 gen_and3
= gen_andsi3
;
20827 gen_xor3
= gen_xorsi3
;
20832 half_mode
= DImode
;
20833 gen_lshr3
= gen_lshrdi3
;
20834 gen_and3
= gen_anddi3
;
20835 gen_xor3
= gen_xordi3
;
20839 if (TARGET_PARTIAL_REG_STALL
&& !optimize_insn_for_size_p ())
20840 x
= gen_rtx_ZERO_EXTEND (half_mode
, operands
[2]);
20842 x
= gen_lowpart (half_mode
, operands
[2]);
20843 emit_insn (gen_rtx_SET (VOIDmode
, high
[0], x
));
20845 emit_insn (gen_lshr3 (high
[0], high
[0], GEN_INT (bits
)));
20846 emit_insn (gen_and3 (high
[0], high
[0], const1_rtx
));
20847 emit_move_insn (low
[0], high
[0]);
20848 emit_insn (gen_xor3 (low
[0], low
[0], const1_rtx
));
20851 emit_insn (gen_ashl3 (low
[0], low
[0], operands
[2]));
20852 emit_insn (gen_ashl3 (high
[0], high
[0], operands
[2]));
20856 if (operands
[1] == constm1_rtx
)
20858 /* For -1 << N, we can avoid the shld instruction, because we
20859 know that we're shifting 0...31/63 ones into a -1. */
20860 emit_move_insn (low
[0], constm1_rtx
);
20861 if (optimize_insn_for_size_p ())
20862 emit_move_insn (high
[0], low
[0]);
20864 emit_move_insn (high
[0], constm1_rtx
);
20868 gen_shld
= mode
== DImode
? gen_x86_shld
: gen_x86_64_shld
;
20870 if (!rtx_equal_p (operands
[0], operands
[1]))
20871 emit_move_insn (operands
[0], operands
[1]);
20873 split_double_mode (mode
, operands
, 1, low
, high
);
20874 emit_insn (gen_shld (high
[0], low
[0], operands
[2]));
20877 emit_insn (gen_ashl3 (low
[0], low
[0], operands
[2]));
20879 if (TARGET_CMOVE
&& scratch
)
20881 rtx (*gen_x86_shift_adj_1
)(rtx
, rtx
, rtx
, rtx
)
20882 = mode
== DImode
? gen_x86_shiftsi_adj_1
: gen_x86_shiftdi_adj_1
;
20884 ix86_expand_clear (scratch
);
20885 emit_insn (gen_x86_shift_adj_1 (high
[0], low
[0], operands
[2], scratch
));
20889 rtx (*gen_x86_shift_adj_2
)(rtx
, rtx
, rtx
)
20890 = mode
== DImode
? gen_x86_shiftsi_adj_2
: gen_x86_shiftdi_adj_2
;
20892 emit_insn (gen_x86_shift_adj_2 (high
[0], low
[0], operands
[2]));
20897 ix86_split_ashr (rtx
*operands
, rtx scratch
, enum machine_mode mode
)
20899 rtx (*gen_ashr3
)(rtx
, rtx
, rtx
)
20900 = mode
== DImode
? gen_ashrsi3
: gen_ashrdi3
;
20901 rtx (*gen_shrd
)(rtx
, rtx
, rtx
);
20902 int half_width
= GET_MODE_BITSIZE (mode
) >> 1;
20904 rtx low
[2], high
[2];
20907 if (CONST_INT_P (operands
[2]))
20909 split_double_mode (mode
, operands
, 2, low
, high
);
20910 count
= INTVAL (operands
[2]) & (GET_MODE_BITSIZE (mode
) - 1);
20912 if (count
== GET_MODE_BITSIZE (mode
) - 1)
20914 emit_move_insn (high
[0], high
[1]);
20915 emit_insn (gen_ashr3 (high
[0], high
[0],
20916 GEN_INT (half_width
- 1)));
20917 emit_move_insn (low
[0], high
[0]);
20920 else if (count
>= half_width
)
20922 emit_move_insn (low
[0], high
[1]);
20923 emit_move_insn (high
[0], low
[0]);
20924 emit_insn (gen_ashr3 (high
[0], high
[0],
20925 GEN_INT (half_width
- 1)));
20927 if (count
> half_width
)
20928 emit_insn (gen_ashr3 (low
[0], low
[0],
20929 GEN_INT (count
- half_width
)));
20933 gen_shrd
= mode
== DImode
? gen_x86_shrd
: gen_x86_64_shrd
;
20935 if (!rtx_equal_p (operands
[0], operands
[1]))
20936 emit_move_insn (operands
[0], operands
[1]);
20938 emit_insn (gen_shrd (low
[0], high
[0], GEN_INT (count
)));
20939 emit_insn (gen_ashr3 (high
[0], high
[0], GEN_INT (count
)));
20944 gen_shrd
= mode
== DImode
? gen_x86_shrd
: gen_x86_64_shrd
;
20946 if (!rtx_equal_p (operands
[0], operands
[1]))
20947 emit_move_insn (operands
[0], operands
[1]);
20949 split_double_mode (mode
, operands
, 1, low
, high
);
20951 emit_insn (gen_shrd (low
[0], high
[0], operands
[2]));
20952 emit_insn (gen_ashr3 (high
[0], high
[0], operands
[2]));
20954 if (TARGET_CMOVE
&& scratch
)
20956 rtx (*gen_x86_shift_adj_1
)(rtx
, rtx
, rtx
, rtx
)
20957 = mode
== DImode
? gen_x86_shiftsi_adj_1
: gen_x86_shiftdi_adj_1
;
20959 emit_move_insn (scratch
, high
[0]);
20960 emit_insn (gen_ashr3 (scratch
, scratch
,
20961 GEN_INT (half_width
- 1)));
20962 emit_insn (gen_x86_shift_adj_1 (low
[0], high
[0], operands
[2],
20967 rtx (*gen_x86_shift_adj_3
)(rtx
, rtx
, rtx
)
20968 = mode
== DImode
? gen_x86_shiftsi_adj_3
: gen_x86_shiftdi_adj_3
;
20970 emit_insn (gen_x86_shift_adj_3 (low
[0], high
[0], operands
[2]));
20976 ix86_split_lshr (rtx
*operands
, rtx scratch
, enum machine_mode mode
)
20978 rtx (*gen_lshr3
)(rtx
, rtx
, rtx
)
20979 = mode
== DImode
? gen_lshrsi3
: gen_lshrdi3
;
20980 rtx (*gen_shrd
)(rtx
, rtx
, rtx
);
20981 int half_width
= GET_MODE_BITSIZE (mode
) >> 1;
20983 rtx low
[2], high
[2];
20986 if (CONST_INT_P (operands
[2]))
20988 split_double_mode (mode
, operands
, 2, low
, high
);
20989 count
= INTVAL (operands
[2]) & (GET_MODE_BITSIZE (mode
) - 1);
20991 if (count
>= half_width
)
20993 emit_move_insn (low
[0], high
[1]);
20994 ix86_expand_clear (high
[0]);
20996 if (count
> half_width
)
20997 emit_insn (gen_lshr3 (low
[0], low
[0],
20998 GEN_INT (count
- half_width
)));
21002 gen_shrd
= mode
== DImode
? gen_x86_shrd
: gen_x86_64_shrd
;
21004 if (!rtx_equal_p (operands
[0], operands
[1]))
21005 emit_move_insn (operands
[0], operands
[1]);
21007 emit_insn (gen_shrd (low
[0], high
[0], GEN_INT (count
)));
21008 emit_insn (gen_lshr3 (high
[0], high
[0], GEN_INT (count
)));
21013 gen_shrd
= mode
== DImode
? gen_x86_shrd
: gen_x86_64_shrd
;
21015 if (!rtx_equal_p (operands
[0], operands
[1]))
21016 emit_move_insn (operands
[0], operands
[1]);
21018 split_double_mode (mode
, operands
, 1, low
, high
);
21020 emit_insn (gen_shrd (low
[0], high
[0], operands
[2]));
21021 emit_insn (gen_lshr3 (high
[0], high
[0], operands
[2]));
21023 if (TARGET_CMOVE
&& scratch
)
21025 rtx (*gen_x86_shift_adj_1
)(rtx
, rtx
, rtx
, rtx
)
21026 = mode
== DImode
? gen_x86_shiftsi_adj_1
: gen_x86_shiftdi_adj_1
;
21028 ix86_expand_clear (scratch
);
21029 emit_insn (gen_x86_shift_adj_1 (low
[0], high
[0], operands
[2],
21034 rtx (*gen_x86_shift_adj_2
)(rtx
, rtx
, rtx
)
21035 = mode
== DImode
? gen_x86_shiftsi_adj_2
: gen_x86_shiftdi_adj_2
;
21037 emit_insn (gen_x86_shift_adj_2 (low
[0], high
[0], operands
[2]));
21042 /* Predict just emitted jump instruction to be taken with probability PROB. */
21044 predict_jump (int prob
)
21046 rtx insn
= get_last_insn ();
21047 gcc_assert (JUMP_P (insn
));
21048 add_reg_note (insn
, REG_BR_PROB
, GEN_INT (prob
));
21051 /* Helper function for the string operations below. Dest VARIABLE whether
21052 it is aligned to VALUE bytes. If true, jump to the label. */
21054 ix86_expand_aligntest (rtx variable
, int value
, bool epilogue
)
21056 rtx label
= gen_label_rtx ();
21057 rtx tmpcount
= gen_reg_rtx (GET_MODE (variable
));
21058 if (GET_MODE (variable
) == DImode
)
21059 emit_insn (gen_anddi3 (tmpcount
, variable
, GEN_INT (value
)));
21061 emit_insn (gen_andsi3 (tmpcount
, variable
, GEN_INT (value
)));
21062 emit_cmp_and_jump_insns (tmpcount
, const0_rtx
, EQ
, 0, GET_MODE (variable
),
21065 predict_jump (REG_BR_PROB_BASE
* 50 / 100);
21067 predict_jump (REG_BR_PROB_BASE
* 90 / 100);
21071 /* Adjust COUNTER by the VALUE. */
21073 ix86_adjust_counter (rtx countreg
, HOST_WIDE_INT value
)
21075 rtx (*gen_add
)(rtx
, rtx
, rtx
)
21076 = GET_MODE (countreg
) == DImode
? gen_adddi3
: gen_addsi3
;
21078 emit_insn (gen_add (countreg
, countreg
, GEN_INT (-value
)));
21081 /* Zero extend possibly SImode EXP to Pmode register. */
21083 ix86_zero_extend_to_Pmode (rtx exp
)
21085 if (GET_MODE (exp
) != Pmode
)
21086 exp
= convert_to_mode (Pmode
, exp
, 1);
21087 return force_reg (Pmode
, exp
);
21090 /* Divide COUNTREG by SCALE. */
21092 scale_counter (rtx countreg
, int scale
)
21098 if (CONST_INT_P (countreg
))
21099 return GEN_INT (INTVAL (countreg
) / scale
);
21100 gcc_assert (REG_P (countreg
));
21102 sc
= expand_simple_binop (GET_MODE (countreg
), LSHIFTRT
, countreg
,
21103 GEN_INT (exact_log2 (scale
)),
21104 NULL
, 1, OPTAB_DIRECT
);
21108 /* Return mode for the memcpy/memset loop counter. Prefer SImode over
21109 DImode for constant loop counts. */
21111 static enum machine_mode
21112 counter_mode (rtx count_exp
)
21114 if (GET_MODE (count_exp
) != VOIDmode
)
21115 return GET_MODE (count_exp
);
21116 if (!CONST_INT_P (count_exp
))
21118 if (TARGET_64BIT
&& (INTVAL (count_exp
) & ~0xffffffff))
21123 /* When SRCPTR is non-NULL, output simple loop to move memory
21124 pointer to SRCPTR to DESTPTR via chunks of MODE unrolled UNROLL times,
21125 overall size is COUNT specified in bytes. When SRCPTR is NULL, output the
21126 equivalent loop to set memory by VALUE (supposed to be in MODE).
21128 The size is rounded down to whole number of chunk size moved at once.
21129 SRCMEM and DESTMEM provide MEMrtx to feed proper aliasing info. */
21133 expand_set_or_movmem_via_loop (rtx destmem
, rtx srcmem
,
21134 rtx destptr
, rtx srcptr
, rtx value
,
21135 rtx count
, enum machine_mode mode
, int unroll
,
21138 rtx out_label
, top_label
, iter
, tmp
;
21139 enum machine_mode iter_mode
= counter_mode (count
);
21140 rtx piece_size
= GEN_INT (GET_MODE_SIZE (mode
) * unroll
);
21141 rtx piece_size_mask
= GEN_INT (~((GET_MODE_SIZE (mode
) * unroll
) - 1));
21147 top_label
= gen_label_rtx ();
21148 out_label
= gen_label_rtx ();
21149 iter
= gen_reg_rtx (iter_mode
);
21151 size
= expand_simple_binop (iter_mode
, AND
, count
, piece_size_mask
,
21152 NULL
, 1, OPTAB_DIRECT
);
21153 /* Those two should combine. */
21154 if (piece_size
== const1_rtx
)
21156 emit_cmp_and_jump_insns (size
, const0_rtx
, EQ
, NULL_RTX
, iter_mode
,
21158 predict_jump (REG_BR_PROB_BASE
* 10 / 100);
21160 emit_move_insn (iter
, const0_rtx
);
21162 emit_label (top_label
);
21164 tmp
= convert_modes (Pmode
, iter_mode
, iter
, true);
21165 x_addr
= gen_rtx_PLUS (Pmode
, destptr
, tmp
);
21166 destmem
= change_address (destmem
, mode
, x_addr
);
21170 y_addr
= gen_rtx_PLUS (Pmode
, srcptr
, copy_rtx (tmp
));
21171 srcmem
= change_address (srcmem
, mode
, y_addr
);
21173 /* When unrolling for chips that reorder memory reads and writes,
21174 we can save registers by using single temporary.
21175 Also using 4 temporaries is overkill in 32bit mode. */
21176 if (!TARGET_64BIT
&& 0)
21178 for (i
= 0; i
< unroll
; i
++)
21183 adjust_address (copy_rtx (destmem
), mode
, GET_MODE_SIZE (mode
));
21185 adjust_address (copy_rtx (srcmem
), mode
, GET_MODE_SIZE (mode
));
21187 emit_move_insn (destmem
, srcmem
);
21193 gcc_assert (unroll
<= 4);
21194 for (i
= 0; i
< unroll
; i
++)
21196 tmpreg
[i
] = gen_reg_rtx (mode
);
21200 adjust_address (copy_rtx (srcmem
), mode
, GET_MODE_SIZE (mode
));
21202 emit_move_insn (tmpreg
[i
], srcmem
);
21204 for (i
= 0; i
< unroll
; i
++)
21209 adjust_address (copy_rtx (destmem
), mode
, GET_MODE_SIZE (mode
));
21211 emit_move_insn (destmem
, tmpreg
[i
]);
21216 for (i
= 0; i
< unroll
; i
++)
21220 adjust_address (copy_rtx (destmem
), mode
, GET_MODE_SIZE (mode
));
21221 emit_move_insn (destmem
, value
);
21224 tmp
= expand_simple_binop (iter_mode
, PLUS
, iter
, piece_size
, iter
,
21225 true, OPTAB_LIB_WIDEN
);
21227 emit_move_insn (iter
, tmp
);
21229 emit_cmp_and_jump_insns (iter
, size
, LT
, NULL_RTX
, iter_mode
,
21231 if (expected_size
!= -1)
21233 expected_size
/= GET_MODE_SIZE (mode
) * unroll
;
21234 if (expected_size
== 0)
21236 else if (expected_size
> REG_BR_PROB_BASE
)
21237 predict_jump (REG_BR_PROB_BASE
- 1);
21239 predict_jump (REG_BR_PROB_BASE
- (REG_BR_PROB_BASE
+ expected_size
/ 2) / expected_size
);
21242 predict_jump (REG_BR_PROB_BASE
* 80 / 100);
21243 iter
= ix86_zero_extend_to_Pmode (iter
);
21244 tmp
= expand_simple_binop (Pmode
, PLUS
, destptr
, iter
, destptr
,
21245 true, OPTAB_LIB_WIDEN
);
21246 if (tmp
!= destptr
)
21247 emit_move_insn (destptr
, tmp
);
21250 tmp
= expand_simple_binop (Pmode
, PLUS
, srcptr
, iter
, srcptr
,
21251 true, OPTAB_LIB_WIDEN
);
21253 emit_move_insn (srcptr
, tmp
);
21255 emit_label (out_label
);
21258 /* Output "rep; mov" instruction.
21259 Arguments have same meaning as for previous function */
21261 expand_movmem_via_rep_mov (rtx destmem
, rtx srcmem
,
21262 rtx destptr
, rtx srcptr
,
21264 enum machine_mode mode
)
21269 HOST_WIDE_INT rounded_count
;
21271 /* If the size is known, it is shorter to use rep movs. */
21272 if (mode
== QImode
&& CONST_INT_P (count
)
21273 && !(INTVAL (count
) & 3))
21276 if (destptr
!= XEXP (destmem
, 0) || GET_MODE (destmem
) != BLKmode
)
21277 destmem
= adjust_automodify_address_nv (destmem
, BLKmode
, destptr
, 0);
21278 if (srcptr
!= XEXP (srcmem
, 0) || GET_MODE (srcmem
) != BLKmode
)
21279 srcmem
= adjust_automodify_address_nv (srcmem
, BLKmode
, srcptr
, 0);
21280 countreg
= ix86_zero_extend_to_Pmode (scale_counter (count
, GET_MODE_SIZE (mode
)));
21281 if (mode
!= QImode
)
21283 destexp
= gen_rtx_ASHIFT (Pmode
, countreg
,
21284 GEN_INT (exact_log2 (GET_MODE_SIZE (mode
))));
21285 destexp
= gen_rtx_PLUS (Pmode
, destexp
, destptr
);
21286 srcexp
= gen_rtx_ASHIFT (Pmode
, countreg
,
21287 GEN_INT (exact_log2 (GET_MODE_SIZE (mode
))));
21288 srcexp
= gen_rtx_PLUS (Pmode
, srcexp
, srcptr
);
21292 destexp
= gen_rtx_PLUS (Pmode
, destptr
, countreg
);
21293 srcexp
= gen_rtx_PLUS (Pmode
, srcptr
, countreg
);
21295 if (CONST_INT_P (count
))
21297 rounded_count
= (INTVAL (count
)
21298 & ~((HOST_WIDE_INT
) GET_MODE_SIZE (mode
) - 1));
21299 destmem
= shallow_copy_rtx (destmem
);
21300 srcmem
= shallow_copy_rtx (srcmem
);
21301 set_mem_size (destmem
, rounded_count
);
21302 set_mem_size (srcmem
, rounded_count
);
21306 if (MEM_SIZE_KNOWN_P (destmem
))
21307 clear_mem_size (destmem
);
21308 if (MEM_SIZE_KNOWN_P (srcmem
))
21309 clear_mem_size (srcmem
);
21311 emit_insn (gen_rep_mov (destptr
, destmem
, srcptr
, srcmem
, countreg
,
21315 /* Output "rep; stos" instruction.
21316 Arguments have same meaning as for previous function */
21318 expand_setmem_via_rep_stos (rtx destmem
, rtx destptr
, rtx value
,
21319 rtx count
, enum machine_mode mode
,
21324 HOST_WIDE_INT rounded_count
;
21326 if (destptr
!= XEXP (destmem
, 0) || GET_MODE (destmem
) != BLKmode
)
21327 destmem
= adjust_automodify_address_nv (destmem
, BLKmode
, destptr
, 0);
21328 value
= force_reg (mode
, gen_lowpart (mode
, value
));
21329 countreg
= ix86_zero_extend_to_Pmode (scale_counter (count
, GET_MODE_SIZE (mode
)));
21330 if (mode
!= QImode
)
21332 destexp
= gen_rtx_ASHIFT (Pmode
, countreg
,
21333 GEN_INT (exact_log2 (GET_MODE_SIZE (mode
))));
21334 destexp
= gen_rtx_PLUS (Pmode
, destexp
, destptr
);
21337 destexp
= gen_rtx_PLUS (Pmode
, destptr
, countreg
);
21338 if (orig_value
== const0_rtx
&& CONST_INT_P (count
))
21340 rounded_count
= (INTVAL (count
)
21341 & ~((HOST_WIDE_INT
) GET_MODE_SIZE (mode
) - 1));
21342 destmem
= shallow_copy_rtx (destmem
);
21343 set_mem_size (destmem
, rounded_count
);
21345 else if (MEM_SIZE_KNOWN_P (destmem
))
21346 clear_mem_size (destmem
);
21347 emit_insn (gen_rep_stos (destptr
, countreg
, destmem
, value
, destexp
));
21351 emit_strmov (rtx destmem
, rtx srcmem
,
21352 rtx destptr
, rtx srcptr
, enum machine_mode mode
, int offset
)
21354 rtx src
= adjust_automodify_address_nv (srcmem
, mode
, srcptr
, offset
);
21355 rtx dest
= adjust_automodify_address_nv (destmem
, mode
, destptr
, offset
);
21356 emit_insn (gen_strmov (destptr
, dest
, srcptr
, src
));
21359 /* Output code to copy at most count & (max_size - 1) bytes from SRC to DEST. */
21361 expand_movmem_epilogue (rtx destmem
, rtx srcmem
,
21362 rtx destptr
, rtx srcptr
, rtx count
, int max_size
)
21365 if (CONST_INT_P (count
))
21367 HOST_WIDE_INT countval
= INTVAL (count
);
21370 if ((countval
& 0x10) && max_size
> 16)
21374 emit_strmov (destmem
, srcmem
, destptr
, srcptr
, DImode
, offset
);
21375 emit_strmov (destmem
, srcmem
, destptr
, srcptr
, DImode
, offset
+ 8);
21378 gcc_unreachable ();
21381 if ((countval
& 0x08) && max_size
> 8)
21384 emit_strmov (destmem
, srcmem
, destptr
, srcptr
, DImode
, offset
);
21387 emit_strmov (destmem
, srcmem
, destptr
, srcptr
, SImode
, offset
);
21388 emit_strmov (destmem
, srcmem
, destptr
, srcptr
, SImode
, offset
+ 4);
21392 if ((countval
& 0x04) && max_size
> 4)
21394 emit_strmov (destmem
, srcmem
, destptr
, srcptr
, SImode
, offset
);
21397 if ((countval
& 0x02) && max_size
> 2)
21399 emit_strmov (destmem
, srcmem
, destptr
, srcptr
, HImode
, offset
);
21402 if ((countval
& 0x01) && max_size
> 1)
21404 emit_strmov (destmem
, srcmem
, destptr
, srcptr
, QImode
, offset
);
21411 count
= expand_simple_binop (GET_MODE (count
), AND
, count
, GEN_INT (max_size
- 1),
21412 count
, 1, OPTAB_DIRECT
);
21413 expand_set_or_movmem_via_loop (destmem
, srcmem
, destptr
, srcptr
, NULL
,
21414 count
, QImode
, 1, 4);
21418 /* When there are stringops, we can cheaply increase dest and src pointers.
21419 Otherwise we save code size by maintaining offset (zero is readily
21420 available from preceding rep operation) and using x86 addressing modes.
21422 if (TARGET_SINGLE_STRINGOP
)
21426 rtx label
= ix86_expand_aligntest (count
, 4, true);
21427 src
= change_address (srcmem
, SImode
, srcptr
);
21428 dest
= change_address (destmem
, SImode
, destptr
);
21429 emit_insn (gen_strmov (destptr
, dest
, srcptr
, src
));
21430 emit_label (label
);
21431 LABEL_NUSES (label
) = 1;
21435 rtx label
= ix86_expand_aligntest (count
, 2, true);
21436 src
= change_address (srcmem
, HImode
, srcptr
);
21437 dest
= change_address (destmem
, HImode
, destptr
);
21438 emit_insn (gen_strmov (destptr
, dest
, srcptr
, src
));
21439 emit_label (label
);
21440 LABEL_NUSES (label
) = 1;
21444 rtx label
= ix86_expand_aligntest (count
, 1, true);
21445 src
= change_address (srcmem
, QImode
, srcptr
);
21446 dest
= change_address (destmem
, QImode
, destptr
);
21447 emit_insn (gen_strmov (destptr
, dest
, srcptr
, src
));
21448 emit_label (label
);
21449 LABEL_NUSES (label
) = 1;
21454 rtx offset
= force_reg (Pmode
, const0_rtx
);
21459 rtx label
= ix86_expand_aligntest (count
, 4, true);
21460 src
= change_address (srcmem
, SImode
, srcptr
);
21461 dest
= change_address (destmem
, SImode
, destptr
);
21462 emit_move_insn (dest
, src
);
21463 tmp
= expand_simple_binop (Pmode
, PLUS
, offset
, GEN_INT (4), NULL
,
21464 true, OPTAB_LIB_WIDEN
);
21466 emit_move_insn (offset
, tmp
);
21467 emit_label (label
);
21468 LABEL_NUSES (label
) = 1;
21472 rtx label
= ix86_expand_aligntest (count
, 2, true);
21473 tmp
= gen_rtx_PLUS (Pmode
, srcptr
, offset
);
21474 src
= change_address (srcmem
, HImode
, tmp
);
21475 tmp
= gen_rtx_PLUS (Pmode
, destptr
, offset
);
21476 dest
= change_address (destmem
, HImode
, tmp
);
21477 emit_move_insn (dest
, src
);
21478 tmp
= expand_simple_binop (Pmode
, PLUS
, offset
, GEN_INT (2), tmp
,
21479 true, OPTAB_LIB_WIDEN
);
21481 emit_move_insn (offset
, tmp
);
21482 emit_label (label
);
21483 LABEL_NUSES (label
) = 1;
21487 rtx label
= ix86_expand_aligntest (count
, 1, true);
21488 tmp
= gen_rtx_PLUS (Pmode
, srcptr
, offset
);
21489 src
= change_address (srcmem
, QImode
, tmp
);
21490 tmp
= gen_rtx_PLUS (Pmode
, destptr
, offset
);
21491 dest
= change_address (destmem
, QImode
, tmp
);
21492 emit_move_insn (dest
, src
);
21493 emit_label (label
);
21494 LABEL_NUSES (label
) = 1;
21499 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
21501 expand_setmem_epilogue_via_loop (rtx destmem
, rtx destptr
, rtx value
,
21502 rtx count
, int max_size
)
21505 expand_simple_binop (counter_mode (count
), AND
, count
,
21506 GEN_INT (max_size
- 1), count
, 1, OPTAB_DIRECT
);
21507 expand_set_or_movmem_via_loop (destmem
, NULL
, destptr
, NULL
,
21508 gen_lowpart (QImode
, value
), count
, QImode
,
21512 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
21514 expand_setmem_epilogue (rtx destmem
, rtx destptr
, rtx value
, rtx count
, int max_size
)
21518 if (CONST_INT_P (count
))
21520 HOST_WIDE_INT countval
= INTVAL (count
);
21523 if ((countval
& 0x10) && max_size
> 16)
21527 dest
= adjust_automodify_address_nv (destmem
, DImode
, destptr
, offset
);
21528 emit_insn (gen_strset (destptr
, dest
, value
));
21529 dest
= adjust_automodify_address_nv (destmem
, DImode
, destptr
, offset
+ 8);
21530 emit_insn (gen_strset (destptr
, dest
, value
));
21533 gcc_unreachable ();
21536 if ((countval
& 0x08) && max_size
> 8)
21540 dest
= adjust_automodify_address_nv (destmem
, DImode
, destptr
, offset
);
21541 emit_insn (gen_strset (destptr
, dest
, value
));
21545 dest
= adjust_automodify_address_nv (destmem
, SImode
, destptr
, offset
);
21546 emit_insn (gen_strset (destptr
, dest
, value
));
21547 dest
= adjust_automodify_address_nv (destmem
, SImode
, destptr
, offset
+ 4);
21548 emit_insn (gen_strset (destptr
, dest
, value
));
21552 if ((countval
& 0x04) && max_size
> 4)
21554 dest
= adjust_automodify_address_nv (destmem
, SImode
, destptr
, offset
);
21555 emit_insn (gen_strset (destptr
, dest
, gen_lowpart (SImode
, value
)));
21558 if ((countval
& 0x02) && max_size
> 2)
21560 dest
= adjust_automodify_address_nv (destmem
, HImode
, destptr
, offset
);
21561 emit_insn (gen_strset (destptr
, dest
, gen_lowpart (HImode
, value
)));
21564 if ((countval
& 0x01) && max_size
> 1)
21566 dest
= adjust_automodify_address_nv (destmem
, QImode
, destptr
, offset
);
21567 emit_insn (gen_strset (destptr
, dest
, gen_lowpart (QImode
, value
)));
21574 expand_setmem_epilogue_via_loop (destmem
, destptr
, value
, count
, max_size
);
21579 rtx label
= ix86_expand_aligntest (count
, 16, true);
21582 dest
= change_address (destmem
, DImode
, destptr
);
21583 emit_insn (gen_strset (destptr
, dest
, value
));
21584 emit_insn (gen_strset (destptr
, dest
, value
));
21588 dest
= change_address (destmem
, SImode
, destptr
);
21589 emit_insn (gen_strset (destptr
, dest
, value
));
21590 emit_insn (gen_strset (destptr
, dest
, value
));
21591 emit_insn (gen_strset (destptr
, dest
, value
));
21592 emit_insn (gen_strset (destptr
, dest
, value
));
21594 emit_label (label
);
21595 LABEL_NUSES (label
) = 1;
21599 rtx label
= ix86_expand_aligntest (count
, 8, true);
21602 dest
= change_address (destmem
, DImode
, destptr
);
21603 emit_insn (gen_strset (destptr
, dest
, value
));
21607 dest
= change_address (destmem
, SImode
, destptr
);
21608 emit_insn (gen_strset (destptr
, dest
, value
));
21609 emit_insn (gen_strset (destptr
, dest
, value
));
21611 emit_label (label
);
21612 LABEL_NUSES (label
) = 1;
21616 rtx label
= ix86_expand_aligntest (count
, 4, true);
21617 dest
= change_address (destmem
, SImode
, destptr
);
21618 emit_insn (gen_strset (destptr
, dest
, gen_lowpart (SImode
, value
)));
21619 emit_label (label
);
21620 LABEL_NUSES (label
) = 1;
21624 rtx label
= ix86_expand_aligntest (count
, 2, true);
21625 dest
= change_address (destmem
, HImode
, destptr
);
21626 emit_insn (gen_strset (destptr
, dest
, gen_lowpart (HImode
, value
)));
21627 emit_label (label
);
21628 LABEL_NUSES (label
) = 1;
21632 rtx label
= ix86_expand_aligntest (count
, 1, true);
21633 dest
= change_address (destmem
, QImode
, destptr
);
21634 emit_insn (gen_strset (destptr
, dest
, gen_lowpart (QImode
, value
)));
21635 emit_label (label
);
21636 LABEL_NUSES (label
) = 1;
21640 /* Copy enough from DEST to SRC to align DEST known to by aligned by ALIGN to
21641 DESIRED_ALIGNMENT. */
21643 expand_movmem_prologue (rtx destmem
, rtx srcmem
,
21644 rtx destptr
, rtx srcptr
, rtx count
,
21645 int align
, int desired_alignment
)
21647 if (align
<= 1 && desired_alignment
> 1)
21649 rtx label
= ix86_expand_aligntest (destptr
, 1, false);
21650 srcmem
= change_address (srcmem
, QImode
, srcptr
);
21651 destmem
= change_address (destmem
, QImode
, destptr
);
21652 emit_insn (gen_strmov (destptr
, destmem
, srcptr
, srcmem
));
21653 ix86_adjust_counter (count
, 1);
21654 emit_label (label
);
21655 LABEL_NUSES (label
) = 1;
21657 if (align
<= 2 && desired_alignment
> 2)
21659 rtx label
= ix86_expand_aligntest (destptr
, 2, false);
21660 srcmem
= change_address (srcmem
, HImode
, srcptr
);
21661 destmem
= change_address (destmem
, HImode
, destptr
);
21662 emit_insn (gen_strmov (destptr
, destmem
, srcptr
, srcmem
));
21663 ix86_adjust_counter (count
, 2);
21664 emit_label (label
);
21665 LABEL_NUSES (label
) = 1;
21667 if (align
<= 4 && desired_alignment
> 4)
21669 rtx label
= ix86_expand_aligntest (destptr
, 4, false);
21670 srcmem
= change_address (srcmem
, SImode
, srcptr
);
21671 destmem
= change_address (destmem
, SImode
, destptr
);
21672 emit_insn (gen_strmov (destptr
, destmem
, srcptr
, srcmem
));
21673 ix86_adjust_counter (count
, 4);
21674 emit_label (label
);
21675 LABEL_NUSES (label
) = 1;
21677 gcc_assert (desired_alignment
<= 8);
21680 /* Copy enough from DST to SRC to align DST known to DESIRED_ALIGN.
21681 ALIGN_BYTES is how many bytes need to be copied. */
21683 expand_constant_movmem_prologue (rtx dst
, rtx
*srcp
, rtx destreg
, rtx srcreg
,
21684 int desired_align
, int align_bytes
)
21687 rtx orig_dst
= dst
;
21688 rtx orig_src
= src
;
21690 int src_align_bytes
= get_mem_align_offset (src
, desired_align
* BITS_PER_UNIT
);
21691 if (src_align_bytes
>= 0)
21692 src_align_bytes
= desired_align
- src_align_bytes
;
21693 if (align_bytes
& 1)
21695 dst
= adjust_automodify_address_nv (dst
, QImode
, destreg
, 0);
21696 src
= adjust_automodify_address_nv (src
, QImode
, srcreg
, 0);
21698 emit_insn (gen_strmov (destreg
, dst
, srcreg
, src
));
21700 if (align_bytes
& 2)
21702 dst
= adjust_automodify_address_nv (dst
, HImode
, destreg
, off
);
21703 src
= adjust_automodify_address_nv (src
, HImode
, srcreg
, off
);
21704 if (MEM_ALIGN (dst
) < 2 * BITS_PER_UNIT
)
21705 set_mem_align (dst
, 2 * BITS_PER_UNIT
);
21706 if (src_align_bytes
>= 0
21707 && (src_align_bytes
& 1) == (align_bytes
& 1)
21708 && MEM_ALIGN (src
) < 2 * BITS_PER_UNIT
)
21709 set_mem_align (src
, 2 * BITS_PER_UNIT
);
21711 emit_insn (gen_strmov (destreg
, dst
, srcreg
, src
));
21713 if (align_bytes
& 4)
21715 dst
= adjust_automodify_address_nv (dst
, SImode
, destreg
, off
);
21716 src
= adjust_automodify_address_nv (src
, SImode
, srcreg
, off
);
21717 if (MEM_ALIGN (dst
) < 4 * BITS_PER_UNIT
)
21718 set_mem_align (dst
, 4 * BITS_PER_UNIT
);
21719 if (src_align_bytes
>= 0)
21721 unsigned int src_align
= 0;
21722 if ((src_align_bytes
& 3) == (align_bytes
& 3))
21724 else if ((src_align_bytes
& 1) == (align_bytes
& 1))
21726 if (MEM_ALIGN (src
) < src_align
* BITS_PER_UNIT
)
21727 set_mem_align (src
, src_align
* BITS_PER_UNIT
);
21730 emit_insn (gen_strmov (destreg
, dst
, srcreg
, src
));
21732 dst
= adjust_automodify_address_nv (dst
, BLKmode
, destreg
, off
);
21733 src
= adjust_automodify_address_nv (src
, BLKmode
, srcreg
, off
);
21734 if (MEM_ALIGN (dst
) < (unsigned int) desired_align
* BITS_PER_UNIT
)
21735 set_mem_align (dst
, desired_align
* BITS_PER_UNIT
);
21736 if (src_align_bytes
>= 0)
21738 unsigned int src_align
= 0;
21739 if ((src_align_bytes
& 7) == (align_bytes
& 7))
21741 else if ((src_align_bytes
& 3) == (align_bytes
& 3))
21743 else if ((src_align_bytes
& 1) == (align_bytes
& 1))
21745 if (src_align
> (unsigned int) desired_align
)
21746 src_align
= desired_align
;
21747 if (MEM_ALIGN (src
) < src_align
* BITS_PER_UNIT
)
21748 set_mem_align (src
, src_align
* BITS_PER_UNIT
);
21750 if (MEM_SIZE_KNOWN_P (orig_dst
))
21751 set_mem_size (dst
, MEM_SIZE (orig_dst
) - align_bytes
);
21752 if (MEM_SIZE_KNOWN_P (orig_src
))
21753 set_mem_size (src
, MEM_SIZE (orig_src
) - align_bytes
);
21758 /* Set enough from DEST to align DEST known to by aligned by ALIGN to
21759 DESIRED_ALIGNMENT. */
21761 expand_setmem_prologue (rtx destmem
, rtx destptr
, rtx value
, rtx count
,
21762 int align
, int desired_alignment
)
21764 if (align
<= 1 && desired_alignment
> 1)
21766 rtx label
= ix86_expand_aligntest (destptr
, 1, false);
21767 destmem
= change_address (destmem
, QImode
, destptr
);
21768 emit_insn (gen_strset (destptr
, destmem
, gen_lowpart (QImode
, value
)));
21769 ix86_adjust_counter (count
, 1);
21770 emit_label (label
);
21771 LABEL_NUSES (label
) = 1;
21773 if (align
<= 2 && desired_alignment
> 2)
21775 rtx label
= ix86_expand_aligntest (destptr
, 2, false);
21776 destmem
= change_address (destmem
, HImode
, destptr
);
21777 emit_insn (gen_strset (destptr
, destmem
, gen_lowpart (HImode
, value
)));
21778 ix86_adjust_counter (count
, 2);
21779 emit_label (label
);
21780 LABEL_NUSES (label
) = 1;
21782 if (align
<= 4 && desired_alignment
> 4)
21784 rtx label
= ix86_expand_aligntest (destptr
, 4, false);
21785 destmem
= change_address (destmem
, SImode
, destptr
);
21786 emit_insn (gen_strset (destptr
, destmem
, gen_lowpart (SImode
, value
)));
21787 ix86_adjust_counter (count
, 4);
21788 emit_label (label
);
21789 LABEL_NUSES (label
) = 1;
21791 gcc_assert (desired_alignment
<= 8);
21794 /* Set enough from DST to align DST known to by aligned by ALIGN to
21795 DESIRED_ALIGN. ALIGN_BYTES is how many bytes need to be stored. */
21797 expand_constant_setmem_prologue (rtx dst
, rtx destreg
, rtx value
,
21798 int desired_align
, int align_bytes
)
21801 rtx orig_dst
= dst
;
21802 if (align_bytes
& 1)
21804 dst
= adjust_automodify_address_nv (dst
, QImode
, destreg
, 0);
21806 emit_insn (gen_strset (destreg
, dst
,
21807 gen_lowpart (QImode
, value
)));
21809 if (align_bytes
& 2)
21811 dst
= adjust_automodify_address_nv (dst
, HImode
, destreg
, off
);
21812 if (MEM_ALIGN (dst
) < 2 * BITS_PER_UNIT
)
21813 set_mem_align (dst
, 2 * BITS_PER_UNIT
);
21815 emit_insn (gen_strset (destreg
, dst
,
21816 gen_lowpart (HImode
, value
)));
21818 if (align_bytes
& 4)
21820 dst
= adjust_automodify_address_nv (dst
, SImode
, destreg
, off
);
21821 if (MEM_ALIGN (dst
) < 4 * BITS_PER_UNIT
)
21822 set_mem_align (dst
, 4 * BITS_PER_UNIT
);
21824 emit_insn (gen_strset (destreg
, dst
,
21825 gen_lowpart (SImode
, value
)));
21827 dst
= adjust_automodify_address_nv (dst
, BLKmode
, destreg
, off
);
21828 if (MEM_ALIGN (dst
) < (unsigned int) desired_align
* BITS_PER_UNIT
)
21829 set_mem_align (dst
, desired_align
* BITS_PER_UNIT
);
21830 if (MEM_SIZE_KNOWN_P (orig_dst
))
21831 set_mem_size (dst
, MEM_SIZE (orig_dst
) - align_bytes
);
21835 /* Given COUNT and EXPECTED_SIZE, decide on codegen of string operation. */
21836 static enum stringop_alg
21837 decide_alg (HOST_WIDE_INT count
, HOST_WIDE_INT expected_size
, bool memset
,
21838 int *dynamic_check
)
21840 const struct stringop_algs
* algs
;
21841 bool optimize_for_speed
;
21842 /* Algorithms using the rep prefix want at least edi and ecx;
21843 additionally, memset wants eax and memcpy wants esi. Don't
21844 consider such algorithms if the user has appropriated those
21845 registers for their own purposes. */
21846 bool rep_prefix_usable
= !(fixed_regs
[CX_REG
] || fixed_regs
[DI_REG
]
21848 ? fixed_regs
[AX_REG
] : fixed_regs
[SI_REG
]));
21850 #define ALG_USABLE_P(alg) (rep_prefix_usable \
21851 || (alg != rep_prefix_1_byte \
21852 && alg != rep_prefix_4_byte \
21853 && alg != rep_prefix_8_byte))
21854 const struct processor_costs
*cost
;
21856 /* Even if the string operation call is cold, we still might spend a lot
21857 of time processing large blocks. */
21858 if (optimize_function_for_size_p (cfun
)
21859 || (optimize_insn_for_size_p ()
21860 && expected_size
!= -1 && expected_size
< 256))
21861 optimize_for_speed
= false;
21863 optimize_for_speed
= true;
21865 cost
= optimize_for_speed
? ix86_cost
: &ix86_size_cost
;
21867 *dynamic_check
= -1;
21869 algs
= &cost
->memset
[TARGET_64BIT
!= 0];
21871 algs
= &cost
->memcpy
[TARGET_64BIT
!= 0];
21872 if (ix86_stringop_alg
!= no_stringop
&& ALG_USABLE_P (ix86_stringop_alg
))
21873 return ix86_stringop_alg
;
21874 /* rep; movq or rep; movl is the smallest variant. */
21875 else if (!optimize_for_speed
)
21877 if (!count
|| (count
& 3))
21878 return rep_prefix_usable
? rep_prefix_1_byte
: loop_1_byte
;
21880 return rep_prefix_usable
? rep_prefix_4_byte
: loop
;
21882 /* Very tiny blocks are best handled via the loop, REP is expensive to setup.
21884 else if (expected_size
!= -1 && expected_size
< 4)
21885 return loop_1_byte
;
21886 else if (expected_size
!= -1)
21889 enum stringop_alg alg
= libcall
;
21890 for (i
= 0; i
< MAX_STRINGOP_ALGS
; i
++)
21892 /* We get here if the algorithms that were not libcall-based
21893 were rep-prefix based and we are unable to use rep prefixes
21894 based on global register usage. Break out of the loop and
21895 use the heuristic below. */
21896 if (algs
->size
[i
].max
== 0)
21898 if (algs
->size
[i
].max
>= expected_size
|| algs
->size
[i
].max
== -1)
21900 enum stringop_alg candidate
= algs
->size
[i
].alg
;
21902 if (candidate
!= libcall
&& ALG_USABLE_P (candidate
))
21904 /* Honor TARGET_INLINE_ALL_STRINGOPS by picking
21905 last non-libcall inline algorithm. */
21906 if (TARGET_INLINE_ALL_STRINGOPS
)
21908 /* When the current size is best to be copied by a libcall,
21909 but we are still forced to inline, run the heuristic below
21910 that will pick code for medium sized blocks. */
21911 if (alg
!= libcall
)
21915 else if (ALG_USABLE_P (candidate
))
21919 gcc_assert (TARGET_INLINE_ALL_STRINGOPS
|| !rep_prefix_usable
);
21921 /* When asked to inline the call anyway, try to pick meaningful choice.
21922 We look for maximal size of block that is faster to copy by hand and
21923 take blocks of at most of that size guessing that average size will
21924 be roughly half of the block.
21926 If this turns out to be bad, we might simply specify the preferred
21927 choice in ix86_costs. */
21928 if ((TARGET_INLINE_ALL_STRINGOPS
|| TARGET_INLINE_STRINGOPS_DYNAMICALLY
)
21929 && (algs
->unknown_size
== libcall
|| !ALG_USABLE_P (algs
->unknown_size
)))
21932 enum stringop_alg alg
;
21934 bool any_alg_usable_p
= true;
21936 for (i
= 0; i
< MAX_STRINGOP_ALGS
; i
++)
21938 enum stringop_alg candidate
= algs
->size
[i
].alg
;
21939 any_alg_usable_p
= any_alg_usable_p
&& ALG_USABLE_P (candidate
);
21941 if (candidate
!= libcall
&& candidate
21942 && ALG_USABLE_P (candidate
))
21943 max
= algs
->size
[i
].max
;
21945 /* If there aren't any usable algorithms, then recursing on
21946 smaller sizes isn't going to find anything. Just return the
21947 simple byte-at-a-time copy loop. */
21948 if (!any_alg_usable_p
)
21950 /* Pick something reasonable. */
21951 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY
)
21952 *dynamic_check
= 128;
21953 return loop_1_byte
;
21957 alg
= decide_alg (count
, max
/ 2, memset
, dynamic_check
);
21958 gcc_assert (*dynamic_check
== -1);
21959 gcc_assert (alg
!= libcall
);
21960 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY
)
21961 *dynamic_check
= max
;
21964 return ALG_USABLE_P (algs
->unknown_size
) ? algs
->unknown_size
: libcall
;
21965 #undef ALG_USABLE_P
21968 /* Decide on alignment. We know that the operand is already aligned to ALIGN
21969 (ALIGN can be based on profile feedback and thus it is not 100% guaranteed). */
21971 decide_alignment (int align
,
21972 enum stringop_alg alg
,
21975 int desired_align
= 0;
21979 gcc_unreachable ();
21981 case unrolled_loop
:
21982 desired_align
= GET_MODE_SIZE (Pmode
);
21984 case rep_prefix_8_byte
:
21987 case rep_prefix_4_byte
:
21988 /* PentiumPro has special logic triggering for 8 byte aligned blocks.
21989 copying whole cacheline at once. */
21990 if (TARGET_PENTIUMPRO
)
21995 case rep_prefix_1_byte
:
21996 /* PentiumPro has special logic triggering for 8 byte aligned blocks.
21997 copying whole cacheline at once. */
21998 if (TARGET_PENTIUMPRO
)
22012 if (desired_align
< align
)
22013 desired_align
= align
;
22014 if (expected_size
!= -1 && expected_size
< 4)
22015 desired_align
= align
;
22016 return desired_align
;
22019 /* Return the smallest power of 2 greater than VAL. */
22021 smallest_pow2_greater_than (int val
)
22029 /* Expand string move (memcpy) operation. Use i386 string operations
22030 when profitable. expand_setmem contains similar code. The code
22031 depends upon architecture, block size and alignment, but always has
22032 the same overall structure:
22034 1) Prologue guard: Conditional that jumps up to epilogues for small
22035 blocks that can be handled by epilogue alone. This is faster
22036 but also needed for correctness, since prologue assume the block
22037 is larger than the desired alignment.
22039 Optional dynamic check for size and libcall for large
22040 blocks is emitted here too, with -minline-stringops-dynamically.
22042 2) Prologue: copy first few bytes in order to get destination
22043 aligned to DESIRED_ALIGN. It is emitted only when ALIGN is less
22044 than DESIRED_ALIGN and up to DESIRED_ALIGN - ALIGN bytes can be
22045 copied. We emit either a jump tree on power of two sized
22046 blocks, or a byte loop.
22048 3) Main body: the copying loop itself, copying in SIZE_NEEDED chunks
22049 with specified algorithm.
22051 4) Epilogue: code copying tail of the block that is too small to be
22052 handled by main body (or up to size guarded by prologue guard). */
22055 ix86_expand_movmem (rtx dst
, rtx src
, rtx count_exp
, rtx align_exp
,
22056 rtx expected_align_exp
, rtx expected_size_exp
)
22062 rtx jump_around_label
= NULL
;
22063 HOST_WIDE_INT align
= 1;
22064 unsigned HOST_WIDE_INT count
= 0;
22065 HOST_WIDE_INT expected_size
= -1;
22066 int size_needed
= 0, epilogue_size_needed
;
22067 int desired_align
= 0, align_bytes
= 0;
22068 enum stringop_alg alg
;
22070 bool need_zero_guard
= false;
22072 if (CONST_INT_P (align_exp
))
22073 align
= INTVAL (align_exp
);
22074 /* i386 can do misaligned access on reasonably increased cost. */
22075 if (CONST_INT_P (expected_align_exp
)
22076 && INTVAL (expected_align_exp
) > align
)
22077 align
= INTVAL (expected_align_exp
);
22078 /* ALIGN is the minimum of destination and source alignment, but we care here
22079 just about destination alignment. */
22080 else if (MEM_ALIGN (dst
) > (unsigned HOST_WIDE_INT
) align
* BITS_PER_UNIT
)
22081 align
= MEM_ALIGN (dst
) / BITS_PER_UNIT
;
22083 if (CONST_INT_P (count_exp
))
22084 count
= expected_size
= INTVAL (count_exp
);
22085 if (CONST_INT_P (expected_size_exp
) && count
== 0)
22086 expected_size
= INTVAL (expected_size_exp
);
22088 /* Make sure we don't need to care about overflow later on. */
22089 if (count
> ((unsigned HOST_WIDE_INT
) 1 << 30))
22092 /* Step 0: Decide on preferred algorithm, desired alignment and
22093 size of chunks to be copied by main loop. */
22095 alg
= decide_alg (count
, expected_size
, false, &dynamic_check
);
22096 desired_align
= decide_alignment (align
, alg
, expected_size
);
22098 if (!TARGET_ALIGN_STRINGOPS
)
22099 align
= desired_align
;
22101 if (alg
== libcall
)
22103 gcc_assert (alg
!= no_stringop
);
22105 count_exp
= copy_to_mode_reg (GET_MODE (count_exp
), count_exp
);
22106 destreg
= copy_addr_to_reg (XEXP (dst
, 0));
22107 srcreg
= copy_addr_to_reg (XEXP (src
, 0));
22112 gcc_unreachable ();
22114 need_zero_guard
= true;
22115 size_needed
= GET_MODE_SIZE (word_mode
);
22117 case unrolled_loop
:
22118 need_zero_guard
= true;
22119 size_needed
= GET_MODE_SIZE (word_mode
) * (TARGET_64BIT
? 4 : 2);
22121 case rep_prefix_8_byte
:
22124 case rep_prefix_4_byte
:
22127 case rep_prefix_1_byte
:
22131 need_zero_guard
= true;
22136 epilogue_size_needed
= size_needed
;
22138 /* Step 1: Prologue guard. */
22140 /* Alignment code needs count to be in register. */
22141 if (CONST_INT_P (count_exp
) && desired_align
> align
)
22143 if (INTVAL (count_exp
) > desired_align
22144 && INTVAL (count_exp
) > size_needed
)
22147 = get_mem_align_offset (dst
, desired_align
* BITS_PER_UNIT
);
22148 if (align_bytes
<= 0)
22151 align_bytes
= desired_align
- align_bytes
;
22153 if (align_bytes
== 0)
22154 count_exp
= force_reg (counter_mode (count_exp
), count_exp
);
22156 gcc_assert (desired_align
>= 1 && align
>= 1);
22158 /* Ensure that alignment prologue won't copy past end of block. */
22159 if (size_needed
> 1 || (desired_align
> 1 && desired_align
> align
))
22161 epilogue_size_needed
= MAX (size_needed
- 1, desired_align
- align
);
22162 /* Epilogue always copies COUNT_EXP & EPILOGUE_SIZE_NEEDED bytes.
22163 Make sure it is power of 2. */
22164 epilogue_size_needed
= smallest_pow2_greater_than (epilogue_size_needed
);
22168 if (count
< (unsigned HOST_WIDE_INT
)epilogue_size_needed
)
22170 /* If main algorithm works on QImode, no epilogue is needed.
22171 For small sizes just don't align anything. */
22172 if (size_needed
== 1)
22173 desired_align
= align
;
22180 label
= gen_label_rtx ();
22181 emit_cmp_and_jump_insns (count_exp
,
22182 GEN_INT (epilogue_size_needed
),
22183 LTU
, 0, counter_mode (count_exp
), 1, label
);
22184 if (expected_size
== -1 || expected_size
< epilogue_size_needed
)
22185 predict_jump (REG_BR_PROB_BASE
* 60 / 100);
22187 predict_jump (REG_BR_PROB_BASE
* 20 / 100);
22191 /* Emit code to decide on runtime whether library call or inline should be
22193 if (dynamic_check
!= -1)
22195 if (CONST_INT_P (count_exp
))
22197 if (UINTVAL (count_exp
) >= (unsigned HOST_WIDE_INT
)dynamic_check
)
22199 emit_block_move_via_libcall (dst
, src
, count_exp
, false);
22200 count_exp
= const0_rtx
;
22206 rtx hot_label
= gen_label_rtx ();
22207 jump_around_label
= gen_label_rtx ();
22208 emit_cmp_and_jump_insns (count_exp
, GEN_INT (dynamic_check
- 1),
22209 LEU
, 0, GET_MODE (count_exp
), 1, hot_label
);
22210 predict_jump (REG_BR_PROB_BASE
* 90 / 100);
22211 emit_block_move_via_libcall (dst
, src
, count_exp
, false);
22212 emit_jump (jump_around_label
);
22213 emit_label (hot_label
);
22217 /* Step 2: Alignment prologue. */
22219 if (desired_align
> align
)
22221 if (align_bytes
== 0)
22223 /* Except for the first move in epilogue, we no longer know
22224 constant offset in aliasing info. It don't seems to worth
22225 the pain to maintain it for the first move, so throw away
22227 src
= change_address (src
, BLKmode
, srcreg
);
22228 dst
= change_address (dst
, BLKmode
, destreg
);
22229 expand_movmem_prologue (dst
, src
, destreg
, srcreg
, count_exp
, align
,
22234 /* If we know how many bytes need to be stored before dst is
22235 sufficiently aligned, maintain aliasing info accurately. */
22236 dst
= expand_constant_movmem_prologue (dst
, &src
, destreg
, srcreg
,
22237 desired_align
, align_bytes
);
22238 count_exp
= plus_constant (count_exp
, -align_bytes
);
22239 count
-= align_bytes
;
22241 if (need_zero_guard
22242 && (count
< (unsigned HOST_WIDE_INT
) size_needed
22243 || (align_bytes
== 0
22244 && count
< ((unsigned HOST_WIDE_INT
) size_needed
22245 + desired_align
- align
))))
22247 /* It is possible that we copied enough so the main loop will not
22249 gcc_assert (size_needed
> 1);
22250 if (label
== NULL_RTX
)
22251 label
= gen_label_rtx ();
22252 emit_cmp_and_jump_insns (count_exp
,
22253 GEN_INT (size_needed
),
22254 LTU
, 0, counter_mode (count_exp
), 1, label
);
22255 if (expected_size
== -1
22256 || expected_size
< (desired_align
- align
) / 2 + size_needed
)
22257 predict_jump (REG_BR_PROB_BASE
* 20 / 100);
22259 predict_jump (REG_BR_PROB_BASE
* 60 / 100);
22262 if (label
&& size_needed
== 1)
22264 emit_label (label
);
22265 LABEL_NUSES (label
) = 1;
22267 epilogue_size_needed
= 1;
22269 else if (label
== NULL_RTX
)
22270 epilogue_size_needed
= size_needed
;
22272 /* Step 3: Main loop. */
22278 gcc_unreachable ();
22280 expand_set_or_movmem_via_loop (dst
, src
, destreg
, srcreg
, NULL
,
22281 count_exp
, QImode
, 1, expected_size
);
22284 expand_set_or_movmem_via_loop (dst
, src
, destreg
, srcreg
, NULL
,
22285 count_exp
, word_mode
, 1, expected_size
);
22287 case unrolled_loop
:
22288 /* Unroll only by factor of 2 in 32bit mode, since we don't have enough
22289 registers for 4 temporaries anyway. */
22290 expand_set_or_movmem_via_loop (dst
, src
, destreg
, srcreg
, NULL
,
22291 count_exp
, word_mode
, TARGET_64BIT
? 4 : 2,
22294 case rep_prefix_8_byte
:
22295 expand_movmem_via_rep_mov (dst
, src
, destreg
, srcreg
, count_exp
,
22298 case rep_prefix_4_byte
:
22299 expand_movmem_via_rep_mov (dst
, src
, destreg
, srcreg
, count_exp
,
22302 case rep_prefix_1_byte
:
22303 expand_movmem_via_rep_mov (dst
, src
, destreg
, srcreg
, count_exp
,
22307 /* Adjust properly the offset of src and dest memory for aliasing. */
22308 if (CONST_INT_P (count_exp
))
22310 src
= adjust_automodify_address_nv (src
, BLKmode
, srcreg
,
22311 (count
/ size_needed
) * size_needed
);
22312 dst
= adjust_automodify_address_nv (dst
, BLKmode
, destreg
,
22313 (count
/ size_needed
) * size_needed
);
22317 src
= change_address (src
, BLKmode
, srcreg
);
22318 dst
= change_address (dst
, BLKmode
, destreg
);
22321 /* Step 4: Epilogue to copy the remaining bytes. */
22325 /* When the main loop is done, COUNT_EXP might hold original count,
22326 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
22327 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
22328 bytes. Compensate if needed. */
22330 if (size_needed
< epilogue_size_needed
)
22333 expand_simple_binop (counter_mode (count_exp
), AND
, count_exp
,
22334 GEN_INT (size_needed
- 1), count_exp
, 1,
22336 if (tmp
!= count_exp
)
22337 emit_move_insn (count_exp
, tmp
);
22339 emit_label (label
);
22340 LABEL_NUSES (label
) = 1;
22343 if (count_exp
!= const0_rtx
&& epilogue_size_needed
> 1)
22344 expand_movmem_epilogue (dst
, src
, destreg
, srcreg
, count_exp
,
22345 epilogue_size_needed
);
22346 if (jump_around_label
)
22347 emit_label (jump_around_label
);
22351 /* Helper function for memcpy. For QImode value 0xXY produce
22352 0xXYXYXYXY of wide specified by MODE. This is essentially
22353 a * 0x10101010, but we can do slightly better than
22354 synth_mult by unwinding the sequence by hand on CPUs with
22357 promote_duplicated_reg (enum machine_mode mode
, rtx val
)
22359 enum machine_mode valmode
= GET_MODE (val
);
22361 int nops
= mode
== DImode
? 3 : 2;
22363 gcc_assert (mode
== SImode
|| mode
== DImode
);
22364 if (val
== const0_rtx
)
22365 return copy_to_mode_reg (mode
, const0_rtx
);
22366 if (CONST_INT_P (val
))
22368 HOST_WIDE_INT v
= INTVAL (val
) & 255;
22372 if (mode
== DImode
)
22373 v
|= (v
<< 16) << 16;
22374 return copy_to_mode_reg (mode
, gen_int_mode (v
, mode
));
22377 if (valmode
== VOIDmode
)
22379 if (valmode
!= QImode
)
22380 val
= gen_lowpart (QImode
, val
);
22381 if (mode
== QImode
)
22383 if (!TARGET_PARTIAL_REG_STALL
)
22385 if (ix86_cost
->mult_init
[mode
== DImode
? 3 : 2]
22386 + ix86_cost
->mult_bit
* (mode
== DImode
? 8 : 4)
22387 <= (ix86_cost
->shift_const
+ ix86_cost
->add
) * nops
22388 + (COSTS_N_INSNS (TARGET_PARTIAL_REG_STALL
== 0)))
22390 rtx reg
= convert_modes (mode
, QImode
, val
, true);
22391 tmp
= promote_duplicated_reg (mode
, const1_rtx
);
22392 return expand_simple_binop (mode
, MULT
, reg
, tmp
, NULL
, 1,
22397 rtx reg
= convert_modes (mode
, QImode
, val
, true);
22399 if (!TARGET_PARTIAL_REG_STALL
)
22400 if (mode
== SImode
)
22401 emit_insn (gen_movsi_insv_1 (reg
, reg
));
22403 emit_insn (gen_movdi_insv_1 (reg
, reg
));
22406 tmp
= expand_simple_binop (mode
, ASHIFT
, reg
, GEN_INT (8),
22407 NULL
, 1, OPTAB_DIRECT
);
22409 expand_simple_binop (mode
, IOR
, reg
, tmp
, reg
, 1, OPTAB_DIRECT
);
22411 tmp
= expand_simple_binop (mode
, ASHIFT
, reg
, GEN_INT (16),
22412 NULL
, 1, OPTAB_DIRECT
);
22413 reg
= expand_simple_binop (mode
, IOR
, reg
, tmp
, reg
, 1, OPTAB_DIRECT
);
22414 if (mode
== SImode
)
22416 tmp
= expand_simple_binop (mode
, ASHIFT
, reg
, GEN_INT (32),
22417 NULL
, 1, OPTAB_DIRECT
);
22418 reg
= expand_simple_binop (mode
, IOR
, reg
, tmp
, reg
, 1, OPTAB_DIRECT
);
22423 /* Duplicate value VAL using promote_duplicated_reg into maximal size that will
22424 be needed by main loop copying SIZE_NEEDED chunks and prologue getting
22425 alignment from ALIGN to DESIRED_ALIGN. */
22427 promote_duplicated_reg_to_size (rtx val
, int size_needed
, int desired_align
, int align
)
22432 && (size_needed
> 4 || (desired_align
> align
&& desired_align
> 4)))
22433 promoted_val
= promote_duplicated_reg (DImode
, val
);
22434 else if (size_needed
> 2 || (desired_align
> align
&& desired_align
> 2))
22435 promoted_val
= promote_duplicated_reg (SImode
, val
);
22436 else if (size_needed
> 1 || (desired_align
> align
&& desired_align
> 1))
22437 promoted_val
= promote_duplicated_reg (HImode
, val
);
22439 promoted_val
= val
;
22441 return promoted_val
;
22444 /* Expand string clear operation (bzero). Use i386 string operations when
22445 profitable. See expand_movmem comment for explanation of individual
22446 steps performed. */
22448 ix86_expand_setmem (rtx dst
, rtx count_exp
, rtx val_exp
, rtx align_exp
,
22449 rtx expected_align_exp
, rtx expected_size_exp
)
22454 rtx jump_around_label
= NULL
;
22455 HOST_WIDE_INT align
= 1;
22456 unsigned HOST_WIDE_INT count
= 0;
22457 HOST_WIDE_INT expected_size
= -1;
22458 int size_needed
= 0, epilogue_size_needed
;
22459 int desired_align
= 0, align_bytes
= 0;
22460 enum stringop_alg alg
;
22461 rtx promoted_val
= NULL
;
22462 bool force_loopy_epilogue
= false;
22464 bool need_zero_guard
= false;
22466 if (CONST_INT_P (align_exp
))
22467 align
= INTVAL (align_exp
);
22468 /* i386 can do misaligned access on reasonably increased cost. */
22469 if (CONST_INT_P (expected_align_exp
)
22470 && INTVAL (expected_align_exp
) > align
)
22471 align
= INTVAL (expected_align_exp
);
22472 if (CONST_INT_P (count_exp
))
22473 count
= expected_size
= INTVAL (count_exp
);
22474 if (CONST_INT_P (expected_size_exp
) && count
== 0)
22475 expected_size
= INTVAL (expected_size_exp
);
22477 /* Make sure we don't need to care about overflow later on. */
22478 if (count
> ((unsigned HOST_WIDE_INT
) 1 << 30))
22481 /* Step 0: Decide on preferred algorithm, desired alignment and
22482 size of chunks to be copied by main loop. */
22484 alg
= decide_alg (count
, expected_size
, true, &dynamic_check
);
22485 desired_align
= decide_alignment (align
, alg
, expected_size
);
22487 if (!TARGET_ALIGN_STRINGOPS
)
22488 align
= desired_align
;
22490 if (alg
== libcall
)
22492 gcc_assert (alg
!= no_stringop
);
22494 count_exp
= copy_to_mode_reg (counter_mode (count_exp
), count_exp
);
22495 destreg
= copy_addr_to_reg (XEXP (dst
, 0));
22500 gcc_unreachable ();
22502 need_zero_guard
= true;
22503 size_needed
= GET_MODE_SIZE (word_mode
);
22505 case unrolled_loop
:
22506 need_zero_guard
= true;
22507 size_needed
= GET_MODE_SIZE (word_mode
) * 4;
22509 case rep_prefix_8_byte
:
22512 case rep_prefix_4_byte
:
22515 case rep_prefix_1_byte
:
22519 need_zero_guard
= true;
22523 epilogue_size_needed
= size_needed
;
22525 /* Step 1: Prologue guard. */
22527 /* Alignment code needs count to be in register. */
22528 if (CONST_INT_P (count_exp
) && desired_align
> align
)
22530 if (INTVAL (count_exp
) > desired_align
22531 && INTVAL (count_exp
) > size_needed
)
22534 = get_mem_align_offset (dst
, desired_align
* BITS_PER_UNIT
);
22535 if (align_bytes
<= 0)
22538 align_bytes
= desired_align
- align_bytes
;
22540 if (align_bytes
== 0)
22542 enum machine_mode mode
= SImode
;
22543 if (TARGET_64BIT
&& (count
& ~0xffffffff))
22545 count_exp
= force_reg (mode
, count_exp
);
22548 /* Do the cheap promotion to allow better CSE across the
22549 main loop and epilogue (ie one load of the big constant in the
22550 front of all code. */
22551 if (CONST_INT_P (val_exp
))
22552 promoted_val
= promote_duplicated_reg_to_size (val_exp
, size_needed
,
22553 desired_align
, align
);
22554 /* Ensure that alignment prologue won't copy past end of block. */
22555 if (size_needed
> 1 || (desired_align
> 1 && desired_align
> align
))
22557 epilogue_size_needed
= MAX (size_needed
- 1, desired_align
- align
);
22558 /* Epilogue always copies COUNT_EXP & (EPILOGUE_SIZE_NEEDED - 1) bytes.
22559 Make sure it is power of 2. */
22560 epilogue_size_needed
= smallest_pow2_greater_than (epilogue_size_needed
);
22562 /* To improve performance of small blocks, we jump around the VAL
22563 promoting mode. This mean that if the promoted VAL is not constant,
22564 we might not use it in the epilogue and have to use byte
22566 if (epilogue_size_needed
> 2 && !promoted_val
)
22567 force_loopy_epilogue
= true;
22570 if (count
< (unsigned HOST_WIDE_INT
)epilogue_size_needed
)
22572 /* If main algorithm works on QImode, no epilogue is needed.
22573 For small sizes just don't align anything. */
22574 if (size_needed
== 1)
22575 desired_align
= align
;
22582 label
= gen_label_rtx ();
22583 emit_cmp_and_jump_insns (count_exp
,
22584 GEN_INT (epilogue_size_needed
),
22585 LTU
, 0, counter_mode (count_exp
), 1, label
);
22586 if (expected_size
== -1 || expected_size
<= epilogue_size_needed
)
22587 predict_jump (REG_BR_PROB_BASE
* 60 / 100);
22589 predict_jump (REG_BR_PROB_BASE
* 20 / 100);
22592 if (dynamic_check
!= -1)
22594 rtx hot_label
= gen_label_rtx ();
22595 jump_around_label
= gen_label_rtx ();
22596 emit_cmp_and_jump_insns (count_exp
, GEN_INT (dynamic_check
- 1),
22597 LEU
, 0, counter_mode (count_exp
), 1, hot_label
);
22598 predict_jump (REG_BR_PROB_BASE
* 90 / 100);
22599 set_storage_via_libcall (dst
, count_exp
, val_exp
, false);
22600 emit_jump (jump_around_label
);
22601 emit_label (hot_label
);
22604 /* Step 2: Alignment prologue. */
22606 /* Do the expensive promotion once we branched off the small blocks. */
22608 promoted_val
= promote_duplicated_reg_to_size (val_exp
, size_needed
,
22609 desired_align
, align
);
22610 gcc_assert (desired_align
>= 1 && align
>= 1);
22612 if (desired_align
> align
)
22614 if (align_bytes
== 0)
22616 /* Except for the first move in epilogue, we no longer know
22617 constant offset in aliasing info. It don't seems to worth
22618 the pain to maintain it for the first move, so throw away
22620 dst
= change_address (dst
, BLKmode
, destreg
);
22621 expand_setmem_prologue (dst
, destreg
, promoted_val
, count_exp
, align
,
22626 /* If we know how many bytes need to be stored before dst is
22627 sufficiently aligned, maintain aliasing info accurately. */
22628 dst
= expand_constant_setmem_prologue (dst
, destreg
, promoted_val
,
22629 desired_align
, align_bytes
);
22630 count_exp
= plus_constant (count_exp
, -align_bytes
);
22631 count
-= align_bytes
;
22633 if (need_zero_guard
22634 && (count
< (unsigned HOST_WIDE_INT
) size_needed
22635 || (align_bytes
== 0
22636 && count
< ((unsigned HOST_WIDE_INT
) size_needed
22637 + desired_align
- align
))))
22639 /* It is possible that we copied enough so the main loop will not
22641 gcc_assert (size_needed
> 1);
22642 if (label
== NULL_RTX
)
22643 label
= gen_label_rtx ();
22644 emit_cmp_and_jump_insns (count_exp
,
22645 GEN_INT (size_needed
),
22646 LTU
, 0, counter_mode (count_exp
), 1, label
);
22647 if (expected_size
== -1
22648 || expected_size
< (desired_align
- align
) / 2 + size_needed
)
22649 predict_jump (REG_BR_PROB_BASE
* 20 / 100);
22651 predict_jump (REG_BR_PROB_BASE
* 60 / 100);
22654 if (label
&& size_needed
== 1)
22656 emit_label (label
);
22657 LABEL_NUSES (label
) = 1;
22659 promoted_val
= val_exp
;
22660 epilogue_size_needed
= 1;
22662 else if (label
== NULL_RTX
)
22663 epilogue_size_needed
= size_needed
;
22665 /* Step 3: Main loop. */
22671 gcc_unreachable ();
22673 expand_set_or_movmem_via_loop (dst
, NULL
, destreg
, NULL
, promoted_val
,
22674 count_exp
, QImode
, 1, expected_size
);
22677 expand_set_or_movmem_via_loop (dst
, NULL
, destreg
, NULL
, promoted_val
,
22678 count_exp
, word_mode
, 1, expected_size
);
22680 case unrolled_loop
:
22681 expand_set_or_movmem_via_loop (dst
, NULL
, destreg
, NULL
, promoted_val
,
22682 count_exp
, word_mode
, 4, expected_size
);
22684 case rep_prefix_8_byte
:
22685 expand_setmem_via_rep_stos (dst
, destreg
, promoted_val
, count_exp
,
22688 case rep_prefix_4_byte
:
22689 expand_setmem_via_rep_stos (dst
, destreg
, promoted_val
, count_exp
,
22692 case rep_prefix_1_byte
:
22693 expand_setmem_via_rep_stos (dst
, destreg
, promoted_val
, count_exp
,
22697 /* Adjust properly the offset of src and dest memory for aliasing. */
22698 if (CONST_INT_P (count_exp
))
22699 dst
= adjust_automodify_address_nv (dst
, BLKmode
, destreg
,
22700 (count
/ size_needed
) * size_needed
);
22702 dst
= change_address (dst
, BLKmode
, destreg
);
22704 /* Step 4: Epilogue to copy the remaining bytes. */
22708 /* When the main loop is done, COUNT_EXP might hold original count,
22709 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
22710 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
22711 bytes. Compensate if needed. */
22713 if (size_needed
< epilogue_size_needed
)
22716 expand_simple_binop (counter_mode (count_exp
), AND
, count_exp
,
22717 GEN_INT (size_needed
- 1), count_exp
, 1,
22719 if (tmp
!= count_exp
)
22720 emit_move_insn (count_exp
, tmp
);
22722 emit_label (label
);
22723 LABEL_NUSES (label
) = 1;
22726 if (count_exp
!= const0_rtx
&& epilogue_size_needed
> 1)
22728 if (force_loopy_epilogue
)
22729 expand_setmem_epilogue_via_loop (dst
, destreg
, val_exp
, count_exp
,
22730 epilogue_size_needed
);
22732 expand_setmem_epilogue (dst
, destreg
, promoted_val
, count_exp
,
22733 epilogue_size_needed
);
22735 if (jump_around_label
)
22736 emit_label (jump_around_label
);
22740 /* Expand the appropriate insns for doing strlen if not just doing
22743 out = result, initialized with the start address
22744 align_rtx = alignment of the address.
22745 scratch = scratch register, initialized with the startaddress when
22746 not aligned, otherwise undefined
22748 This is just the body. It needs the initializations mentioned above and
22749 some address computing at the end. These things are done in i386.md. */
22752 ix86_expand_strlensi_unroll_1 (rtx out
, rtx src
, rtx align_rtx
)
22756 rtx align_2_label
= NULL_RTX
;
22757 rtx align_3_label
= NULL_RTX
;
22758 rtx align_4_label
= gen_label_rtx ();
22759 rtx end_0_label
= gen_label_rtx ();
22761 rtx tmpreg
= gen_reg_rtx (SImode
);
22762 rtx scratch
= gen_reg_rtx (SImode
);
22766 if (CONST_INT_P (align_rtx
))
22767 align
= INTVAL (align_rtx
);
22769 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
22771 /* Is there a known alignment and is it less than 4? */
22774 rtx scratch1
= gen_reg_rtx (Pmode
);
22775 emit_move_insn (scratch1
, out
);
22776 /* Is there a known alignment and is it not 2? */
22779 align_3_label
= gen_label_rtx (); /* Label when aligned to 3-byte */
22780 align_2_label
= gen_label_rtx (); /* Label when aligned to 2-byte */
22782 /* Leave just the 3 lower bits. */
22783 align_rtx
= expand_binop (Pmode
, and_optab
, scratch1
, GEN_INT (3),
22784 NULL_RTX
, 0, OPTAB_WIDEN
);
22786 emit_cmp_and_jump_insns (align_rtx
, const0_rtx
, EQ
, NULL
,
22787 Pmode
, 1, align_4_label
);
22788 emit_cmp_and_jump_insns (align_rtx
, const2_rtx
, EQ
, NULL
,
22789 Pmode
, 1, align_2_label
);
22790 emit_cmp_and_jump_insns (align_rtx
, const2_rtx
, GTU
, NULL
,
22791 Pmode
, 1, align_3_label
);
22795 /* Since the alignment is 2, we have to check 2 or 0 bytes;
22796 check if is aligned to 4 - byte. */
22798 align_rtx
= expand_binop (Pmode
, and_optab
, scratch1
, const2_rtx
,
22799 NULL_RTX
, 0, OPTAB_WIDEN
);
22801 emit_cmp_and_jump_insns (align_rtx
, const0_rtx
, EQ
, NULL
,
22802 Pmode
, 1, align_4_label
);
22805 mem
= change_address (src
, QImode
, out
);
22807 /* Now compare the bytes. */
22809 /* Compare the first n unaligned byte on a byte per byte basis. */
22810 emit_cmp_and_jump_insns (mem
, const0_rtx
, EQ
, NULL
,
22811 QImode
, 1, end_0_label
);
22813 /* Increment the address. */
22814 emit_insn (ix86_gen_add3 (out
, out
, const1_rtx
));
22816 /* Not needed with an alignment of 2 */
22819 emit_label (align_2_label
);
22821 emit_cmp_and_jump_insns (mem
, const0_rtx
, EQ
, NULL
, QImode
, 1,
22824 emit_insn (ix86_gen_add3 (out
, out
, const1_rtx
));
22826 emit_label (align_3_label
);
22829 emit_cmp_and_jump_insns (mem
, const0_rtx
, EQ
, NULL
, QImode
, 1,
22832 emit_insn (ix86_gen_add3 (out
, out
, const1_rtx
));
22835 /* Generate loop to check 4 bytes at a time. It is not a good idea to
22836 align this loop. It gives only huge programs, but does not help to
22838 emit_label (align_4_label
);
22840 mem
= change_address (src
, SImode
, out
);
22841 emit_move_insn (scratch
, mem
);
22842 emit_insn (ix86_gen_add3 (out
, out
, GEN_INT (4)));
22844 /* This formula yields a nonzero result iff one of the bytes is zero.
22845 This saves three branches inside loop and many cycles. */
22847 emit_insn (gen_addsi3 (tmpreg
, scratch
, GEN_INT (-0x01010101)));
22848 emit_insn (gen_one_cmplsi2 (scratch
, scratch
));
22849 emit_insn (gen_andsi3 (tmpreg
, tmpreg
, scratch
));
22850 emit_insn (gen_andsi3 (tmpreg
, tmpreg
,
22851 gen_int_mode (0x80808080, SImode
)));
22852 emit_cmp_and_jump_insns (tmpreg
, const0_rtx
, EQ
, 0, SImode
, 1,
22857 rtx reg
= gen_reg_rtx (SImode
);
22858 rtx reg2
= gen_reg_rtx (Pmode
);
22859 emit_move_insn (reg
, tmpreg
);
22860 emit_insn (gen_lshrsi3 (reg
, reg
, GEN_INT (16)));
22862 /* If zero is not in the first two bytes, move two bytes forward. */
22863 emit_insn (gen_testsi_ccno_1 (tmpreg
, GEN_INT (0x8080)));
22864 tmp
= gen_rtx_REG (CCNOmode
, FLAGS_REG
);
22865 tmp
= gen_rtx_EQ (VOIDmode
, tmp
, const0_rtx
);
22866 emit_insn (gen_rtx_SET (VOIDmode
, tmpreg
,
22867 gen_rtx_IF_THEN_ELSE (SImode
, tmp
,
22870 /* Emit lea manually to avoid clobbering of flags. */
22871 emit_insn (gen_rtx_SET (SImode
, reg2
,
22872 gen_rtx_PLUS (Pmode
, out
, const2_rtx
)));
22874 tmp
= gen_rtx_REG (CCNOmode
, FLAGS_REG
);
22875 tmp
= gen_rtx_EQ (VOIDmode
, tmp
, const0_rtx
);
22876 emit_insn (gen_rtx_SET (VOIDmode
, out
,
22877 gen_rtx_IF_THEN_ELSE (Pmode
, tmp
,
22883 rtx end_2_label
= gen_label_rtx ();
22884 /* Is zero in the first two bytes? */
22886 emit_insn (gen_testsi_ccno_1 (tmpreg
, GEN_INT (0x8080)));
22887 tmp
= gen_rtx_REG (CCNOmode
, FLAGS_REG
);
22888 tmp
= gen_rtx_NE (VOIDmode
, tmp
, const0_rtx
);
22889 tmp
= gen_rtx_IF_THEN_ELSE (VOIDmode
, tmp
,
22890 gen_rtx_LABEL_REF (VOIDmode
, end_2_label
),
22892 tmp
= emit_jump_insn (gen_rtx_SET (VOIDmode
, pc_rtx
, tmp
));
22893 JUMP_LABEL (tmp
) = end_2_label
;
22895 /* Not in the first two. Move two bytes forward. */
22896 emit_insn (gen_lshrsi3 (tmpreg
, tmpreg
, GEN_INT (16)));
22897 emit_insn (ix86_gen_add3 (out
, out
, const2_rtx
));
22899 emit_label (end_2_label
);
22903 /* Avoid branch in fixing the byte. */
22904 tmpreg
= gen_lowpart (QImode
, tmpreg
);
22905 emit_insn (gen_addqi3_cc (tmpreg
, tmpreg
, tmpreg
));
22906 tmp
= gen_rtx_REG (CCmode
, FLAGS_REG
);
22907 cmp
= gen_rtx_LTU (VOIDmode
, tmp
, const0_rtx
);
22908 emit_insn (ix86_gen_sub3_carry (out
, out
, GEN_INT (3), tmp
, cmp
));
22910 emit_label (end_0_label
);
22913 /* Expand strlen. */
22916 ix86_expand_strlen (rtx out
, rtx src
, rtx eoschar
, rtx align
)
22918 rtx addr
, scratch1
, scratch2
, scratch3
, scratch4
;
22920 /* The generic case of strlen expander is long. Avoid it's
22921 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
22923 if (TARGET_UNROLL_STRLEN
&& eoschar
== const0_rtx
&& optimize
> 1
22924 && !TARGET_INLINE_ALL_STRINGOPS
22925 && !optimize_insn_for_size_p ()
22926 && (!CONST_INT_P (align
) || INTVAL (align
) < 4))
22929 addr
= force_reg (Pmode
, XEXP (src
, 0));
22930 scratch1
= gen_reg_rtx (Pmode
);
22932 if (TARGET_UNROLL_STRLEN
&& eoschar
== const0_rtx
&& optimize
> 1
22933 && !optimize_insn_for_size_p ())
22935 /* Well it seems that some optimizer does not combine a call like
22936 foo(strlen(bar), strlen(bar));
22937 when the move and the subtraction is done here. It does calculate
22938 the length just once when these instructions are done inside of
22939 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
22940 often used and I use one fewer register for the lifetime of
22941 output_strlen_unroll() this is better. */
22943 emit_move_insn (out
, addr
);
22945 ix86_expand_strlensi_unroll_1 (out
, src
, align
);
22947 /* strlensi_unroll_1 returns the address of the zero at the end of
22948 the string, like memchr(), so compute the length by subtracting
22949 the start address. */
22950 emit_insn (ix86_gen_sub3 (out
, out
, addr
));
22956 /* Can't use this if the user has appropriated eax, ecx, or edi. */
22957 if (fixed_regs
[AX_REG
] || fixed_regs
[CX_REG
] || fixed_regs
[DI_REG
])
22960 scratch2
= gen_reg_rtx (Pmode
);
22961 scratch3
= gen_reg_rtx (Pmode
);
22962 scratch4
= force_reg (Pmode
, constm1_rtx
);
22964 emit_move_insn (scratch3
, addr
);
22965 eoschar
= force_reg (QImode
, eoschar
);
22967 src
= replace_equiv_address_nv (src
, scratch3
);
22969 /* If .md starts supporting :P, this can be done in .md. */
22970 unspec
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (4, src
, eoschar
, align
,
22971 scratch4
), UNSPEC_SCAS
);
22972 emit_insn (gen_strlenqi_1 (scratch1
, scratch3
, unspec
));
22973 emit_insn (ix86_gen_one_cmpl2 (scratch2
, scratch1
));
22974 emit_insn (ix86_gen_add3 (out
, scratch2
, constm1_rtx
));
22979 /* For given symbol (function) construct code to compute address of it's PLT
22980 entry in large x86-64 PIC model. */
22982 construct_plt_address (rtx symbol
)
22986 gcc_assert (GET_CODE (symbol
) == SYMBOL_REF
);
22987 gcc_assert (ix86_cmodel
== CM_LARGE_PIC
);
22988 gcc_assert (Pmode
== DImode
);
22990 tmp
= gen_reg_rtx (Pmode
);
22991 unspec
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, symbol
), UNSPEC_PLTOFF
);
22993 emit_move_insn (tmp
, gen_rtx_CONST (Pmode
, unspec
));
22994 emit_insn (ix86_gen_add3 (tmp
, tmp
, pic_offset_table_rtx
));
22999 ix86_expand_call (rtx retval
, rtx fnaddr
, rtx callarg1
,
23001 rtx pop
, bool sibcall
)
23003 /* We need to represent that SI and DI registers are clobbered
23005 static int clobbered_registers
[] = {
23006 XMM6_REG
, XMM7_REG
, XMM8_REG
,
23007 XMM9_REG
, XMM10_REG
, XMM11_REG
,
23008 XMM12_REG
, XMM13_REG
, XMM14_REG
,
23009 XMM15_REG
, SI_REG
, DI_REG
23011 rtx vec
[ARRAY_SIZE (clobbered_registers
) + 3];
23012 rtx use
= NULL
, call
;
23013 unsigned int vec_len
;
23015 if (pop
== const0_rtx
)
23017 gcc_assert (!TARGET_64BIT
|| !pop
);
23019 if (TARGET_MACHO
&& !TARGET_64BIT
)
23022 if (flag_pic
&& GET_CODE (XEXP (fnaddr
, 0)) == SYMBOL_REF
)
23023 fnaddr
= machopic_indirect_call_target (fnaddr
);
23028 /* Static functions and indirect calls don't need the pic register. */
23029 if (flag_pic
&& (!TARGET_64BIT
|| ix86_cmodel
== CM_LARGE_PIC
)
23030 && GET_CODE (XEXP (fnaddr
, 0)) == SYMBOL_REF
23031 && ! SYMBOL_REF_LOCAL_P (XEXP (fnaddr
, 0)))
23032 use_reg (&use
, pic_offset_table_rtx
);
23035 if (TARGET_64BIT
&& INTVAL (callarg2
) >= 0)
23037 rtx al
= gen_rtx_REG (QImode
, AX_REG
);
23038 emit_move_insn (al
, callarg2
);
23039 use_reg (&use
, al
);
23042 if (ix86_cmodel
== CM_LARGE_PIC
23044 && GET_CODE (XEXP (fnaddr
, 0)) == SYMBOL_REF
23045 && !local_symbolic_operand (XEXP (fnaddr
, 0), VOIDmode
))
23046 fnaddr
= gen_rtx_MEM (QImode
, construct_plt_address (XEXP (fnaddr
, 0)));
23048 ? !sibcall_insn_operand (XEXP (fnaddr
, 0), word_mode
)
23049 : !call_insn_operand (XEXP (fnaddr
, 0), word_mode
))
23051 fnaddr
= XEXP (fnaddr
, 0);
23052 if (GET_MODE (fnaddr
) != word_mode
)
23053 fnaddr
= convert_to_mode (word_mode
, fnaddr
, 1);
23054 fnaddr
= gen_rtx_MEM (QImode
, copy_to_mode_reg (word_mode
, fnaddr
));
23058 call
= gen_rtx_CALL (VOIDmode
, fnaddr
, callarg1
);
23060 call
= gen_rtx_SET (VOIDmode
, retval
, call
);
23061 vec
[vec_len
++] = call
;
23065 pop
= gen_rtx_PLUS (Pmode
, stack_pointer_rtx
, pop
);
23066 pop
= gen_rtx_SET (VOIDmode
, stack_pointer_rtx
, pop
);
23067 vec
[vec_len
++] = pop
;
23070 if (TARGET_64BIT_MS_ABI
23071 && (!callarg2
|| INTVAL (callarg2
) != -2))
23075 vec
[vec_len
++] = gen_rtx_UNSPEC (VOIDmode
, gen_rtvec (1, const0_rtx
),
23076 UNSPEC_MS_TO_SYSV_CALL
);
23078 for (i
= 0; i
< ARRAY_SIZE (clobbered_registers
); i
++)
23080 = gen_rtx_CLOBBER (SSE_REGNO_P (clobbered_registers
[i
])
23082 gen_rtx_REG (SSE_REGNO_P (clobbered_registers
[i
])
23084 clobbered_registers
[i
]));
23087 /* Add UNSPEC_CALL_NEEDS_VZEROUPPER decoration. */
23088 if (TARGET_VZEROUPPER
)
23091 if (cfun
->machine
->callee_pass_avx256_p
)
23093 if (cfun
->machine
->callee_return_avx256_p
)
23094 avx256
= callee_return_pass_avx256
;
23096 avx256
= callee_pass_avx256
;
23098 else if (cfun
->machine
->callee_return_avx256_p
)
23099 avx256
= callee_return_avx256
;
23101 avx256
= call_no_avx256
;
23103 if (reload_completed
)
23104 emit_insn (gen_avx_vzeroupper (GEN_INT (avx256
)));
23106 vec
[vec_len
++] = gen_rtx_UNSPEC (VOIDmode
,
23107 gen_rtvec (1, GEN_INT (avx256
)),
23108 UNSPEC_CALL_NEEDS_VZEROUPPER
);
23112 call
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec_v (vec_len
, vec
));
23113 call
= emit_call_insn (call
);
23115 CALL_INSN_FUNCTION_USAGE (call
) = use
;
23121 ix86_split_call_vzeroupper (rtx insn
, rtx vzeroupper
)
23123 rtx pat
= PATTERN (insn
);
23124 rtvec vec
= XVEC (pat
, 0);
23125 int len
= GET_NUM_ELEM (vec
) - 1;
23127 /* Strip off the last entry of the parallel. */
23128 gcc_assert (GET_CODE (RTVEC_ELT (vec
, len
)) == UNSPEC
);
23129 gcc_assert (XINT (RTVEC_ELT (vec
, len
), 1) == UNSPEC_CALL_NEEDS_VZEROUPPER
);
23131 pat
= RTVEC_ELT (vec
, 0);
23133 pat
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec_v (len
, &RTVEC_ELT (vec
, 0)));
23135 emit_insn (gen_avx_vzeroupper (vzeroupper
));
23136 emit_call_insn (pat
);
23139 /* Output the assembly for a call instruction. */
23142 ix86_output_call_insn (rtx insn
, rtx call_op
)
23144 bool direct_p
= constant_call_address_operand (call_op
, VOIDmode
);
23145 bool seh_nop_p
= false;
23148 if (SIBLING_CALL_P (insn
))
23152 /* SEH epilogue detection requires the indirect branch case
23153 to include REX.W. */
23154 else if (TARGET_SEH
)
23155 xasm
= "rex.W jmp %A0";
23159 output_asm_insn (xasm
, &call_op
);
23163 /* SEH unwinding can require an extra nop to be emitted in several
23164 circumstances. Determine if we have one of those. */
23169 for (i
= NEXT_INSN (insn
); i
; i
= NEXT_INSN (i
))
23171 /* If we get to another real insn, we don't need the nop. */
23175 /* If we get to the epilogue note, prevent a catch region from
23176 being adjacent to the standard epilogue sequence. If non-
23177 call-exceptions, we'll have done this during epilogue emission. */
23178 if (NOTE_P (i
) && NOTE_KIND (i
) == NOTE_INSN_EPILOGUE_BEG
23179 && !flag_non_call_exceptions
23180 && !can_throw_internal (insn
))
23187 /* If we didn't find a real insn following the call, prevent the
23188 unwinder from looking into the next function. */
23194 xasm
= "call\t%P0";
23196 xasm
= "call\t%A0";
23198 output_asm_insn (xasm
, &call_op
);
23206 /* Clear stack slot assignments remembered from previous functions.
23207 This is called from INIT_EXPANDERS once before RTL is emitted for each
23210 static struct machine_function
*
23211 ix86_init_machine_status (void)
23213 struct machine_function
*f
;
23215 f
= ggc_alloc_cleared_machine_function ();
23216 f
->use_fast_prologue_epilogue_nregs
= -1;
23217 f
->tls_descriptor_call_expanded_p
= 0;
23218 f
->call_abi
= ix86_abi
;
23223 /* Return a MEM corresponding to a stack slot with mode MODE.
23224 Allocate a new slot if necessary.
23226 The RTL for a function can have several slots available: N is
23227 which slot to use. */
23230 assign_386_stack_local (enum machine_mode mode
, enum ix86_stack_slot n
)
23232 struct stack_local_entry
*s
;
23234 gcc_assert (n
< MAX_386_STACK_LOCALS
);
23236 /* Virtual slot is valid only before vregs are instantiated. */
23237 gcc_assert ((n
== SLOT_VIRTUAL
) == !virtuals_instantiated
);
23239 for (s
= ix86_stack_locals
; s
; s
= s
->next
)
23240 if (s
->mode
== mode
&& s
->n
== n
)
23241 return validize_mem (copy_rtx (s
->rtl
));
23243 s
= ggc_alloc_stack_local_entry ();
23246 s
->rtl
= assign_stack_local (mode
, GET_MODE_SIZE (mode
), 0);
23248 s
->next
= ix86_stack_locals
;
23249 ix86_stack_locals
= s
;
23250 return validize_mem (s
->rtl
);
23253 /* Calculate the length of the memory address in the instruction encoding.
23254 Includes addr32 prefix, does not include the one-byte modrm, opcode,
23255 or other prefixes. */
23258 memory_address_length (rtx addr
)
23260 struct ix86_address parts
;
23261 rtx base
, index
, disp
;
23265 if (GET_CODE (addr
) == PRE_DEC
23266 || GET_CODE (addr
) == POST_INC
23267 || GET_CODE (addr
) == PRE_MODIFY
23268 || GET_CODE (addr
) == POST_MODIFY
)
23271 ok
= ix86_decompose_address (addr
, &parts
);
23274 if (parts
.base
&& GET_CODE (parts
.base
) == SUBREG
)
23275 parts
.base
= SUBREG_REG (parts
.base
);
23276 if (parts
.index
&& GET_CODE (parts
.index
) == SUBREG
)
23277 parts
.index
= SUBREG_REG (parts
.index
);
23280 index
= parts
.index
;
23283 /* Add length of addr32 prefix. */
23284 len
= (GET_CODE (addr
) == ZERO_EXTEND
23285 || GET_CODE (addr
) == AND
);
23288 - esp as the base always wants an index,
23289 - ebp as the base always wants a displacement,
23290 - r12 as the base always wants an index,
23291 - r13 as the base always wants a displacement. */
23293 /* Register Indirect. */
23294 if (base
&& !index
&& !disp
)
23296 /* esp (for its index) and ebp (for its displacement) need
23297 the two-byte modrm form. Similarly for r12 and r13 in 64-bit
23300 && (addr
== arg_pointer_rtx
23301 || addr
== frame_pointer_rtx
23302 || REGNO (addr
) == SP_REG
23303 || REGNO (addr
) == BP_REG
23304 || REGNO (addr
) == R12_REG
23305 || REGNO (addr
) == R13_REG
))
23309 /* Direct Addressing. In 64-bit mode mod 00 r/m 5
23310 is not disp32, but disp32(%rip), so for disp32
23311 SIB byte is needed, unless print_operand_address
23312 optimizes it into disp32(%rip) or (%rip) is implied
23314 else if (disp
&& !base
&& !index
)
23321 if (GET_CODE (disp
) == CONST
)
23322 symbol
= XEXP (disp
, 0);
23323 if (GET_CODE (symbol
) == PLUS
23324 && CONST_INT_P (XEXP (symbol
, 1)))
23325 symbol
= XEXP (symbol
, 0);
23327 if (GET_CODE (symbol
) != LABEL_REF
23328 && (GET_CODE (symbol
) != SYMBOL_REF
23329 || SYMBOL_REF_TLS_MODEL (symbol
) != 0)
23330 && (GET_CODE (symbol
) != UNSPEC
23331 || (XINT (symbol
, 1) != UNSPEC_GOTPCREL
23332 && XINT (symbol
, 1) != UNSPEC_PCREL
23333 && XINT (symbol
, 1) != UNSPEC_GOTNTPOFF
)))
23340 /* Find the length of the displacement constant. */
23343 if (base
&& satisfies_constraint_K (disp
))
23348 /* ebp always wants a displacement. Similarly r13. */
23349 else if (base
&& REG_P (base
)
23350 && (REGNO (base
) == BP_REG
|| REGNO (base
) == R13_REG
))
23353 /* An index requires the two-byte modrm form.... */
23355 /* ...like esp (or r12), which always wants an index. */
23356 || base
== arg_pointer_rtx
23357 || base
== frame_pointer_rtx
23358 || (base
&& REG_P (base
)
23359 && (REGNO (base
) == SP_REG
|| REGNO (base
) == R12_REG
)))
23376 /* Compute default value for "length_immediate" attribute. When SHORTFORM
23377 is set, expect that insn have 8bit immediate alternative. */
23379 ix86_attr_length_immediate_default (rtx insn
, bool shortform
)
23383 extract_insn_cached (insn
);
23384 for (i
= recog_data
.n_operands
- 1; i
>= 0; --i
)
23385 if (CONSTANT_P (recog_data
.operand
[i
]))
23387 enum attr_mode mode
= get_attr_mode (insn
);
23390 if (shortform
&& CONST_INT_P (recog_data
.operand
[i
]))
23392 HOST_WIDE_INT ival
= INTVAL (recog_data
.operand
[i
]);
23399 ival
= trunc_int_for_mode (ival
, HImode
);
23402 ival
= trunc_int_for_mode (ival
, SImode
);
23407 if (IN_RANGE (ival
, -128, 127))
23424 /* Immediates for DImode instructions are encoded as 32bit sign extended values. */
23429 fatal_insn ("unknown insn mode", insn
);
23434 /* Compute default value for "length_address" attribute. */
23436 ix86_attr_length_address_default (rtx insn
)
23440 if (get_attr_type (insn
) == TYPE_LEA
)
23442 rtx set
= PATTERN (insn
), addr
;
23444 if (GET_CODE (set
) == PARALLEL
)
23445 set
= XVECEXP (set
, 0, 0);
23447 gcc_assert (GET_CODE (set
) == SET
);
23449 addr
= SET_SRC (set
);
23450 if (TARGET_64BIT
&& get_attr_mode (insn
) == MODE_SI
)
23452 if (GET_CODE (addr
) == ZERO_EXTEND
)
23453 addr
= XEXP (addr
, 0);
23454 if (GET_CODE (addr
) == SUBREG
)
23455 addr
= SUBREG_REG (addr
);
23458 return memory_address_length (addr
);
23461 extract_insn_cached (insn
);
23462 for (i
= recog_data
.n_operands
- 1; i
>= 0; --i
)
23463 if (MEM_P (recog_data
.operand
[i
]))
23465 constrain_operands_cached (reload_completed
);
23466 if (which_alternative
!= -1)
23468 const char *constraints
= recog_data
.constraints
[i
];
23469 int alt
= which_alternative
;
23471 while (*constraints
== '=' || *constraints
== '+')
23474 while (*constraints
++ != ',')
23476 /* Skip ignored operands. */
23477 if (*constraints
== 'X')
23480 return memory_address_length (XEXP (recog_data
.operand
[i
], 0));
23485 /* Compute default value for "length_vex" attribute. It includes
23486 2 or 3 byte VEX prefix and 1 opcode byte. */
23489 ix86_attr_length_vex_default (rtx insn
, bool has_0f_opcode
, bool has_vex_w
)
23493 /* Only 0f opcode can use 2 byte VEX prefix and VEX W bit uses 3
23494 byte VEX prefix. */
23495 if (!has_0f_opcode
|| has_vex_w
)
23498 /* We can always use 2 byte VEX prefix in 32bit. */
23502 extract_insn_cached (insn
);
23504 for (i
= recog_data
.n_operands
- 1; i
>= 0; --i
)
23505 if (REG_P (recog_data
.operand
[i
]))
23507 /* REX.W bit uses 3 byte VEX prefix. */
23508 if (GET_MODE (recog_data
.operand
[i
]) == DImode
23509 && GENERAL_REG_P (recog_data
.operand
[i
]))
23514 /* REX.X or REX.B bits use 3 byte VEX prefix. */
23515 if (MEM_P (recog_data
.operand
[i
])
23516 && x86_extended_reg_mentioned_p (recog_data
.operand
[i
]))
23523 /* Return the maximum number of instructions a cpu can issue. */
23526 ix86_issue_rate (void)
23530 case PROCESSOR_PENTIUM
:
23531 case PROCESSOR_ATOM
:
23535 case PROCESSOR_PENTIUMPRO
:
23536 case PROCESSOR_PENTIUM4
:
23537 case PROCESSOR_CORE2_32
:
23538 case PROCESSOR_CORE2_64
:
23539 case PROCESSOR_COREI7_32
:
23540 case PROCESSOR_COREI7_64
:
23541 case PROCESSOR_ATHLON
:
23543 case PROCESSOR_AMDFAM10
:
23544 case PROCESSOR_NOCONA
:
23545 case PROCESSOR_GENERIC32
:
23546 case PROCESSOR_GENERIC64
:
23547 case PROCESSOR_BDVER1
:
23548 case PROCESSOR_BDVER2
:
23549 case PROCESSOR_BTVER1
:
23557 /* A subroutine of ix86_adjust_cost -- return TRUE iff INSN reads flags set
23558 by DEP_INSN and nothing set by DEP_INSN. */
23561 ix86_flags_dependent (rtx insn
, rtx dep_insn
, enum attr_type insn_type
)
23565 /* Simplify the test for uninteresting insns. */
23566 if (insn_type
!= TYPE_SETCC
23567 && insn_type
!= TYPE_ICMOV
23568 && insn_type
!= TYPE_FCMOV
23569 && insn_type
!= TYPE_IBR
)
23572 if ((set
= single_set (dep_insn
)) != 0)
23574 set
= SET_DEST (set
);
23577 else if (GET_CODE (PATTERN (dep_insn
)) == PARALLEL
23578 && XVECLEN (PATTERN (dep_insn
), 0) == 2
23579 && GET_CODE (XVECEXP (PATTERN (dep_insn
), 0, 0)) == SET
23580 && GET_CODE (XVECEXP (PATTERN (dep_insn
), 0, 1)) == SET
)
23582 set
= SET_DEST (XVECEXP (PATTERN (dep_insn
), 0, 0));
23583 set2
= SET_DEST (XVECEXP (PATTERN (dep_insn
), 0, 0));
23588 if (!REG_P (set
) || REGNO (set
) != FLAGS_REG
)
23591 /* This test is true if the dependent insn reads the flags but
23592 not any other potentially set register. */
23593 if (!reg_overlap_mentioned_p (set
, PATTERN (insn
)))
23596 if (set2
&& reg_overlap_mentioned_p (set2
, PATTERN (insn
)))
23602 /* Return true iff USE_INSN has a memory address with operands set by
23606 ix86_agi_dependent (rtx set_insn
, rtx use_insn
)
23609 extract_insn_cached (use_insn
);
23610 for (i
= recog_data
.n_operands
- 1; i
>= 0; --i
)
23611 if (MEM_P (recog_data
.operand
[i
]))
23613 rtx addr
= XEXP (recog_data
.operand
[i
], 0);
23614 return modified_in_p (addr
, set_insn
) != 0;
23620 ix86_adjust_cost (rtx insn
, rtx link
, rtx dep_insn
, int cost
)
23622 enum attr_type insn_type
, dep_insn_type
;
23623 enum attr_memory memory
;
23625 int dep_insn_code_number
;
23627 /* Anti and output dependencies have zero cost on all CPUs. */
23628 if (REG_NOTE_KIND (link
) != 0)
23631 dep_insn_code_number
= recog_memoized (dep_insn
);
23633 /* If we can't recognize the insns, we can't really do anything. */
23634 if (dep_insn_code_number
< 0 || recog_memoized (insn
) < 0)
23637 insn_type
= get_attr_type (insn
);
23638 dep_insn_type
= get_attr_type (dep_insn
);
23642 case PROCESSOR_PENTIUM
:
23643 /* Address Generation Interlock adds a cycle of latency. */
23644 if (insn_type
== TYPE_LEA
)
23646 rtx addr
= PATTERN (insn
);
23648 if (GET_CODE (addr
) == PARALLEL
)
23649 addr
= XVECEXP (addr
, 0, 0);
23651 gcc_assert (GET_CODE (addr
) == SET
);
23653 addr
= SET_SRC (addr
);
23654 if (modified_in_p (addr
, dep_insn
))
23657 else if (ix86_agi_dependent (dep_insn
, insn
))
23660 /* ??? Compares pair with jump/setcc. */
23661 if (ix86_flags_dependent (insn
, dep_insn
, insn_type
))
23664 /* Floating point stores require value to be ready one cycle earlier. */
23665 if (insn_type
== TYPE_FMOV
23666 && get_attr_memory (insn
) == MEMORY_STORE
23667 && !ix86_agi_dependent (dep_insn
, insn
))
23671 case PROCESSOR_PENTIUMPRO
:
23672 memory
= get_attr_memory (insn
);
23674 /* INT->FP conversion is expensive. */
23675 if (get_attr_fp_int_src (dep_insn
))
23678 /* There is one cycle extra latency between an FP op and a store. */
23679 if (insn_type
== TYPE_FMOV
23680 && (set
= single_set (dep_insn
)) != NULL_RTX
23681 && (set2
= single_set (insn
)) != NULL_RTX
23682 && rtx_equal_p (SET_DEST (set
), SET_SRC (set2
))
23683 && MEM_P (SET_DEST (set2
)))
23686 /* Show ability of reorder buffer to hide latency of load by executing
23687 in parallel with previous instruction in case
23688 previous instruction is not needed to compute the address. */
23689 if ((memory
== MEMORY_LOAD
|| memory
== MEMORY_BOTH
)
23690 && !ix86_agi_dependent (dep_insn
, insn
))
23692 /* Claim moves to take one cycle, as core can issue one load
23693 at time and the next load can start cycle later. */
23694 if (dep_insn_type
== TYPE_IMOV
23695 || dep_insn_type
== TYPE_FMOV
)
23703 memory
= get_attr_memory (insn
);
23705 /* The esp dependency is resolved before the instruction is really
23707 if ((insn_type
== TYPE_PUSH
|| insn_type
== TYPE_POP
)
23708 && (dep_insn_type
== TYPE_PUSH
|| dep_insn_type
== TYPE_POP
))
23711 /* INT->FP conversion is expensive. */
23712 if (get_attr_fp_int_src (dep_insn
))
23715 /* Show ability of reorder buffer to hide latency of load by executing
23716 in parallel with previous instruction in case
23717 previous instruction is not needed to compute the address. */
23718 if ((memory
== MEMORY_LOAD
|| memory
== MEMORY_BOTH
)
23719 && !ix86_agi_dependent (dep_insn
, insn
))
23721 /* Claim moves to take one cycle, as core can issue one load
23722 at time and the next load can start cycle later. */
23723 if (dep_insn_type
== TYPE_IMOV
23724 || dep_insn_type
== TYPE_FMOV
)
23733 case PROCESSOR_ATHLON
:
23735 case PROCESSOR_AMDFAM10
:
23736 case PROCESSOR_BDVER1
:
23737 case PROCESSOR_BDVER2
:
23738 case PROCESSOR_BTVER1
:
23739 case PROCESSOR_ATOM
:
23740 case PROCESSOR_GENERIC32
:
23741 case PROCESSOR_GENERIC64
:
23742 memory
= get_attr_memory (insn
);
23744 /* Show ability of reorder buffer to hide latency of load by executing
23745 in parallel with previous instruction in case
23746 previous instruction is not needed to compute the address. */
23747 if ((memory
== MEMORY_LOAD
|| memory
== MEMORY_BOTH
)
23748 && !ix86_agi_dependent (dep_insn
, insn
))
23750 enum attr_unit unit
= get_attr_unit (insn
);
23753 /* Because of the difference between the length of integer and
23754 floating unit pipeline preparation stages, the memory operands
23755 for floating point are cheaper.
23757 ??? For Athlon it the difference is most probably 2. */
23758 if (unit
== UNIT_INTEGER
|| unit
== UNIT_UNKNOWN
)
23761 loadcost
= TARGET_ATHLON
? 2 : 0;
23763 if (cost
>= loadcost
)
23776 /* How many alternative schedules to try. This should be as wide as the
23777 scheduling freedom in the DFA, but no wider. Making this value too
23778 large results extra work for the scheduler. */
23781 ia32_multipass_dfa_lookahead (void)
23785 case PROCESSOR_PENTIUM
:
23788 case PROCESSOR_PENTIUMPRO
:
23792 case PROCESSOR_CORE2_32
:
23793 case PROCESSOR_CORE2_64
:
23794 case PROCESSOR_COREI7_32
:
23795 case PROCESSOR_COREI7_64
:
23796 /* Generally, we want haifa-sched:max_issue() to look ahead as far
23797 as many instructions can be executed on a cycle, i.e.,
23798 issue_rate. I wonder why tuning for many CPUs does not do this. */
23799 return ix86_issue_rate ();
23808 /* Model decoder of Core 2/i7.
23809 Below hooks for multipass scheduling (see haifa-sched.c:max_issue)
23810 track the instruction fetch block boundaries and make sure that long
23811 (9+ bytes) instructions are assigned to D0. */
23813 /* Maximum length of an insn that can be handled by
23814 a secondary decoder unit. '8' for Core 2/i7. */
23815 static int core2i7_secondary_decoder_max_insn_size
;
23817 /* Ifetch block size, i.e., number of bytes decoder reads per cycle.
23818 '16' for Core 2/i7. */
23819 static int core2i7_ifetch_block_size
;
23821 /* Maximum number of instructions decoder can handle per cycle.
23822 '6' for Core 2/i7. */
23823 static int core2i7_ifetch_block_max_insns
;
23825 typedef struct ix86_first_cycle_multipass_data_
*
23826 ix86_first_cycle_multipass_data_t
;
23827 typedef const struct ix86_first_cycle_multipass_data_
*
23828 const_ix86_first_cycle_multipass_data_t
;
23830 /* A variable to store target state across calls to max_issue within
23832 static struct ix86_first_cycle_multipass_data_ _ix86_first_cycle_multipass_data
,
23833 *ix86_first_cycle_multipass_data
= &_ix86_first_cycle_multipass_data
;
23835 /* Initialize DATA. */
23837 core2i7_first_cycle_multipass_init (void *_data
)
23839 ix86_first_cycle_multipass_data_t data
23840 = (ix86_first_cycle_multipass_data_t
) _data
;
23842 data
->ifetch_block_len
= 0;
23843 data
->ifetch_block_n_insns
= 0;
23844 data
->ready_try_change
= NULL
;
23845 data
->ready_try_change_size
= 0;
23848 /* Advancing the cycle; reset ifetch block counts. */
23850 core2i7_dfa_post_advance_cycle (void)
23852 ix86_first_cycle_multipass_data_t data
= ix86_first_cycle_multipass_data
;
23854 gcc_assert (data
->ifetch_block_n_insns
<= core2i7_ifetch_block_max_insns
);
23856 data
->ifetch_block_len
= 0;
23857 data
->ifetch_block_n_insns
= 0;
23860 static int min_insn_size (rtx
);
23862 /* Filter out insns from ready_try that the core will not be able to issue
23863 on current cycle due to decoder. */
23865 core2i7_first_cycle_multipass_filter_ready_try
23866 (const_ix86_first_cycle_multipass_data_t data
,
23867 char *ready_try
, int n_ready
, bool first_cycle_insn_p
)
23874 if (ready_try
[n_ready
])
23877 insn
= get_ready_element (n_ready
);
23878 insn_size
= min_insn_size (insn
);
23880 if (/* If this is a too long an insn for a secondary decoder ... */
23881 (!first_cycle_insn_p
23882 && insn_size
> core2i7_secondary_decoder_max_insn_size
)
23883 /* ... or it would not fit into the ifetch block ... */
23884 || data
->ifetch_block_len
+ insn_size
> core2i7_ifetch_block_size
23885 /* ... or the decoder is full already ... */
23886 || data
->ifetch_block_n_insns
+ 1 > core2i7_ifetch_block_max_insns
)
23887 /* ... mask the insn out. */
23889 ready_try
[n_ready
] = 1;
23891 if (data
->ready_try_change
)
23892 SET_BIT (data
->ready_try_change
, n_ready
);
23897 /* Prepare for a new round of multipass lookahead scheduling. */
23899 core2i7_first_cycle_multipass_begin (void *_data
, char *ready_try
, int n_ready
,
23900 bool first_cycle_insn_p
)
23902 ix86_first_cycle_multipass_data_t data
23903 = (ix86_first_cycle_multipass_data_t
) _data
;
23904 const_ix86_first_cycle_multipass_data_t prev_data
23905 = ix86_first_cycle_multipass_data
;
23907 /* Restore the state from the end of the previous round. */
23908 data
->ifetch_block_len
= prev_data
->ifetch_block_len
;
23909 data
->ifetch_block_n_insns
= prev_data
->ifetch_block_n_insns
;
23911 /* Filter instructions that cannot be issued on current cycle due to
23912 decoder restrictions. */
23913 core2i7_first_cycle_multipass_filter_ready_try (data
, ready_try
, n_ready
,
23914 first_cycle_insn_p
);
23917 /* INSN is being issued in current solution. Account for its impact on
23918 the decoder model. */
23920 core2i7_first_cycle_multipass_issue (void *_data
, char *ready_try
, int n_ready
,
23921 rtx insn
, const void *_prev_data
)
23923 ix86_first_cycle_multipass_data_t data
23924 = (ix86_first_cycle_multipass_data_t
) _data
;
23925 const_ix86_first_cycle_multipass_data_t prev_data
23926 = (const_ix86_first_cycle_multipass_data_t
) _prev_data
;
23928 int insn_size
= min_insn_size (insn
);
23930 data
->ifetch_block_len
= prev_data
->ifetch_block_len
+ insn_size
;
23931 data
->ifetch_block_n_insns
= prev_data
->ifetch_block_n_insns
+ 1;
23932 gcc_assert (data
->ifetch_block_len
<= core2i7_ifetch_block_size
23933 && data
->ifetch_block_n_insns
<= core2i7_ifetch_block_max_insns
);
23935 /* Allocate or resize the bitmap for storing INSN's effect on ready_try. */
23936 if (!data
->ready_try_change
)
23938 data
->ready_try_change
= sbitmap_alloc (n_ready
);
23939 data
->ready_try_change_size
= n_ready
;
23941 else if (data
->ready_try_change_size
< n_ready
)
23943 data
->ready_try_change
= sbitmap_resize (data
->ready_try_change
,
23945 data
->ready_try_change_size
= n_ready
;
23947 sbitmap_zero (data
->ready_try_change
);
23949 /* Filter out insns from ready_try that the core will not be able to issue
23950 on current cycle due to decoder. */
23951 core2i7_first_cycle_multipass_filter_ready_try (data
, ready_try
, n_ready
,
23955 /* Revert the effect on ready_try. */
23957 core2i7_first_cycle_multipass_backtrack (const void *_data
,
23959 int n_ready ATTRIBUTE_UNUSED
)
23961 const_ix86_first_cycle_multipass_data_t data
23962 = (const_ix86_first_cycle_multipass_data_t
) _data
;
23963 unsigned int i
= 0;
23964 sbitmap_iterator sbi
;
23966 gcc_assert (sbitmap_last_set_bit (data
->ready_try_change
) < n_ready
);
23967 EXECUTE_IF_SET_IN_SBITMAP (data
->ready_try_change
, 0, i
, sbi
)
23973 /* Save the result of multipass lookahead scheduling for the next round. */
23975 core2i7_first_cycle_multipass_end (const void *_data
)
23977 const_ix86_first_cycle_multipass_data_t data
23978 = (const_ix86_first_cycle_multipass_data_t
) _data
;
23979 ix86_first_cycle_multipass_data_t next_data
23980 = ix86_first_cycle_multipass_data
;
23984 next_data
->ifetch_block_len
= data
->ifetch_block_len
;
23985 next_data
->ifetch_block_n_insns
= data
->ifetch_block_n_insns
;
23989 /* Deallocate target data. */
23991 core2i7_first_cycle_multipass_fini (void *_data
)
23993 ix86_first_cycle_multipass_data_t data
23994 = (ix86_first_cycle_multipass_data_t
) _data
;
23996 if (data
->ready_try_change
)
23998 sbitmap_free (data
->ready_try_change
);
23999 data
->ready_try_change
= NULL
;
24000 data
->ready_try_change_size
= 0;
24004 /* Prepare for scheduling pass. */
24006 ix86_sched_init_global (FILE *dump ATTRIBUTE_UNUSED
,
24007 int verbose ATTRIBUTE_UNUSED
,
24008 int max_uid ATTRIBUTE_UNUSED
)
24010 /* Install scheduling hooks for current CPU. Some of these hooks are used
24011 in time-critical parts of the scheduler, so we only set them up when
24012 they are actually used. */
24015 case PROCESSOR_CORE2_32
:
24016 case PROCESSOR_CORE2_64
:
24017 case PROCESSOR_COREI7_32
:
24018 case PROCESSOR_COREI7_64
:
24019 targetm
.sched
.dfa_post_advance_cycle
24020 = core2i7_dfa_post_advance_cycle
;
24021 targetm
.sched
.first_cycle_multipass_init
24022 = core2i7_first_cycle_multipass_init
;
24023 targetm
.sched
.first_cycle_multipass_begin
24024 = core2i7_first_cycle_multipass_begin
;
24025 targetm
.sched
.first_cycle_multipass_issue
24026 = core2i7_first_cycle_multipass_issue
;
24027 targetm
.sched
.first_cycle_multipass_backtrack
24028 = core2i7_first_cycle_multipass_backtrack
;
24029 targetm
.sched
.first_cycle_multipass_end
24030 = core2i7_first_cycle_multipass_end
;
24031 targetm
.sched
.first_cycle_multipass_fini
24032 = core2i7_first_cycle_multipass_fini
;
24034 /* Set decoder parameters. */
24035 core2i7_secondary_decoder_max_insn_size
= 8;
24036 core2i7_ifetch_block_size
= 16;
24037 core2i7_ifetch_block_max_insns
= 6;
24041 targetm
.sched
.dfa_post_advance_cycle
= NULL
;
24042 targetm
.sched
.first_cycle_multipass_init
= NULL
;
24043 targetm
.sched
.first_cycle_multipass_begin
= NULL
;
24044 targetm
.sched
.first_cycle_multipass_issue
= NULL
;
24045 targetm
.sched
.first_cycle_multipass_backtrack
= NULL
;
24046 targetm
.sched
.first_cycle_multipass_end
= NULL
;
24047 targetm
.sched
.first_cycle_multipass_fini
= NULL
;
24053 /* Compute the alignment given to a constant that is being placed in memory.
24054 EXP is the constant and ALIGN is the alignment that the object would
24056 The value of this function is used instead of that alignment to align
24060 ix86_constant_alignment (tree exp
, int align
)
24062 if (TREE_CODE (exp
) == REAL_CST
|| TREE_CODE (exp
) == VECTOR_CST
24063 || TREE_CODE (exp
) == INTEGER_CST
)
24065 if (TYPE_MODE (TREE_TYPE (exp
)) == DFmode
&& align
< 64)
24067 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp
))) && align
< 128)
24070 else if (!optimize_size
&& TREE_CODE (exp
) == STRING_CST
24071 && TREE_STRING_LENGTH (exp
) >= 31 && align
< BITS_PER_WORD
)
24072 return BITS_PER_WORD
;
24077 /* Compute the alignment for a static variable.
24078 TYPE is the data type, and ALIGN is the alignment that
24079 the object would ordinarily have. The value of this function is used
24080 instead of that alignment to align the object. */
24083 ix86_data_alignment (tree type
, int align
)
24085 int max_align
= optimize_size
? BITS_PER_WORD
: MIN (256, MAX_OFILE_ALIGNMENT
);
24087 if (AGGREGATE_TYPE_P (type
)
24088 && TYPE_SIZE (type
)
24089 && TREE_CODE (TYPE_SIZE (type
)) == INTEGER_CST
24090 && (TREE_INT_CST_LOW (TYPE_SIZE (type
)) >= (unsigned) max_align
24091 || TREE_INT_CST_HIGH (TYPE_SIZE (type
)))
24092 && align
< max_align
)
24095 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
24096 to 16byte boundary. */
24099 if (AGGREGATE_TYPE_P (type
)
24100 && TYPE_SIZE (type
)
24101 && TREE_CODE (TYPE_SIZE (type
)) == INTEGER_CST
24102 && (TREE_INT_CST_LOW (TYPE_SIZE (type
)) >= 128
24103 || TREE_INT_CST_HIGH (TYPE_SIZE (type
))) && align
< 128)
24107 if (TREE_CODE (type
) == ARRAY_TYPE
)
24109 if (TYPE_MODE (TREE_TYPE (type
)) == DFmode
&& align
< 64)
24111 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type
))) && align
< 128)
24114 else if (TREE_CODE (type
) == COMPLEX_TYPE
)
24117 if (TYPE_MODE (type
) == DCmode
&& align
< 64)
24119 if ((TYPE_MODE (type
) == XCmode
24120 || TYPE_MODE (type
) == TCmode
) && align
< 128)
24123 else if ((TREE_CODE (type
) == RECORD_TYPE
24124 || TREE_CODE (type
) == UNION_TYPE
24125 || TREE_CODE (type
) == QUAL_UNION_TYPE
)
24126 && TYPE_FIELDS (type
))
24128 if (DECL_MODE (TYPE_FIELDS (type
)) == DFmode
&& align
< 64)
24130 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type
))) && align
< 128)
24133 else if (TREE_CODE (type
) == REAL_TYPE
|| TREE_CODE (type
) == VECTOR_TYPE
24134 || TREE_CODE (type
) == INTEGER_TYPE
)
24136 if (TYPE_MODE (type
) == DFmode
&& align
< 64)
24138 if (ALIGN_MODE_128 (TYPE_MODE (type
)) && align
< 128)
24145 /* Compute the alignment for a local variable or a stack slot. EXP is
24146 the data type or decl itself, MODE is the widest mode available and
24147 ALIGN is the alignment that the object would ordinarily have. The
24148 value of this macro is used instead of that alignment to align the
24152 ix86_local_alignment (tree exp
, enum machine_mode mode
,
24153 unsigned int align
)
24157 if (exp
&& DECL_P (exp
))
24159 type
= TREE_TYPE (exp
);
24168 /* Don't do dynamic stack realignment for long long objects with
24169 -mpreferred-stack-boundary=2. */
24172 && ix86_preferred_stack_boundary
< 64
24173 && (mode
== DImode
|| (type
&& TYPE_MODE (type
) == DImode
))
24174 && (!type
|| !TYPE_USER_ALIGN (type
))
24175 && (!decl
|| !DECL_USER_ALIGN (decl
)))
24178 /* If TYPE is NULL, we are allocating a stack slot for caller-save
24179 register in MODE. We will return the largest alignment of XF
24183 if (mode
== XFmode
&& align
< GET_MODE_ALIGNMENT (DFmode
))
24184 align
= GET_MODE_ALIGNMENT (DFmode
);
24188 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
24189 to 16byte boundary. Exact wording is:
24191 An array uses the same alignment as its elements, except that a local or
24192 global array variable of length at least 16 bytes or
24193 a C99 variable-length array variable always has alignment of at least 16 bytes.
24195 This was added to allow use of aligned SSE instructions at arrays. This
24196 rule is meant for static storage (where compiler can not do the analysis
24197 by itself). We follow it for automatic variables only when convenient.
24198 We fully control everything in the function compiled and functions from
24199 other unit can not rely on the alignment.
24201 Exclude va_list type. It is the common case of local array where
24202 we can not benefit from the alignment. */
24203 if (TARGET_64BIT
&& optimize_function_for_speed_p (cfun
)
24206 if (AGGREGATE_TYPE_P (type
)
24207 && (va_list_type_node
== NULL_TREE
24208 || (TYPE_MAIN_VARIANT (type
)
24209 != TYPE_MAIN_VARIANT (va_list_type_node
)))
24210 && TYPE_SIZE (type
)
24211 && TREE_CODE (TYPE_SIZE (type
)) == INTEGER_CST
24212 && (TREE_INT_CST_LOW (TYPE_SIZE (type
)) >= 16
24213 || TREE_INT_CST_HIGH (TYPE_SIZE (type
))) && align
< 128)
24216 if (TREE_CODE (type
) == ARRAY_TYPE
)
24218 if (TYPE_MODE (TREE_TYPE (type
)) == DFmode
&& align
< 64)
24220 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type
))) && align
< 128)
24223 else if (TREE_CODE (type
) == COMPLEX_TYPE
)
24225 if (TYPE_MODE (type
) == DCmode
&& align
< 64)
24227 if ((TYPE_MODE (type
) == XCmode
24228 || TYPE_MODE (type
) == TCmode
) && align
< 128)
24231 else if ((TREE_CODE (type
) == RECORD_TYPE
24232 || TREE_CODE (type
) == UNION_TYPE
24233 || TREE_CODE (type
) == QUAL_UNION_TYPE
)
24234 && TYPE_FIELDS (type
))
24236 if (DECL_MODE (TYPE_FIELDS (type
)) == DFmode
&& align
< 64)
24238 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type
))) && align
< 128)
24241 else if (TREE_CODE (type
) == REAL_TYPE
|| TREE_CODE (type
) == VECTOR_TYPE
24242 || TREE_CODE (type
) == INTEGER_TYPE
)
24245 if (TYPE_MODE (type
) == DFmode
&& align
< 64)
24247 if (ALIGN_MODE_128 (TYPE_MODE (type
)) && align
< 128)
24253 /* Compute the minimum required alignment for dynamic stack realignment
24254 purposes for a local variable, parameter or a stack slot. EXP is
24255 the data type or decl itself, MODE is its mode and ALIGN is the
24256 alignment that the object would ordinarily have. */
24259 ix86_minimum_alignment (tree exp
, enum machine_mode mode
,
24260 unsigned int align
)
24264 if (exp
&& DECL_P (exp
))
24266 type
= TREE_TYPE (exp
);
24275 if (TARGET_64BIT
|| align
!= 64 || ix86_preferred_stack_boundary
>= 64)
24278 /* Don't do dynamic stack realignment for long long objects with
24279 -mpreferred-stack-boundary=2. */
24280 if ((mode
== DImode
|| (type
&& TYPE_MODE (type
) == DImode
))
24281 && (!type
|| !TYPE_USER_ALIGN (type
))
24282 && (!decl
|| !DECL_USER_ALIGN (decl
)))
24288 /* Find a location for the static chain incoming to a nested function.
24289 This is a register, unless all free registers are used by arguments. */
24292 ix86_static_chain (const_tree fndecl
, bool incoming_p
)
24296 if (!DECL_STATIC_CHAIN (fndecl
))
24301 /* We always use R10 in 64-bit mode. */
24309 /* By default in 32-bit mode we use ECX to pass the static chain. */
24312 fntype
= TREE_TYPE (fndecl
);
24313 ccvt
= ix86_get_callcvt (fntype
);
24314 if ((ccvt
& (IX86_CALLCVT_FASTCALL
| IX86_CALLCVT_THISCALL
)) != 0)
24316 /* Fastcall functions use ecx/edx for arguments, which leaves
24317 us with EAX for the static chain.
24318 Thiscall functions use ecx for arguments, which also
24319 leaves us with EAX for the static chain. */
24322 else if (ix86_function_regparm (fntype
, fndecl
) == 3)
24324 /* For regparm 3, we have no free call-clobbered registers in
24325 which to store the static chain. In order to implement this,
24326 we have the trampoline push the static chain to the stack.
24327 However, we can't push a value below the return address when
24328 we call the nested function directly, so we have to use an
24329 alternate entry point. For this we use ESI, and have the
24330 alternate entry point push ESI, so that things appear the
24331 same once we're executing the nested function. */
24334 if (fndecl
== current_function_decl
)
24335 ix86_static_chain_on_stack
= true;
24336 return gen_frame_mem (SImode
,
24337 plus_constant (arg_pointer_rtx
, -8));
24343 return gen_rtx_REG (Pmode
, regno
);
24346 /* Emit RTL insns to initialize the variable parts of a trampoline.
24347 FNDECL is the decl of the target address; M_TRAMP is a MEM for
24348 the trampoline, and CHAIN_VALUE is an RTX for the static chain
24349 to be passed to the target function. */
24352 ix86_trampoline_init (rtx m_tramp
, tree fndecl
, rtx chain_value
)
24358 fnaddr
= XEXP (DECL_RTL (fndecl
), 0);
24364 /* Load the function address to r11. Try to load address using
24365 the shorter movl instead of movabs. We may want to support
24366 movq for kernel mode, but kernel does not use trampolines at
24367 the moment. FNADDR is a 32bit address and may not be in
24368 DImode when ptr_mode == SImode. Always use movl in this
24370 if (ptr_mode
== SImode
24371 || x86_64_zext_immediate_operand (fnaddr
, VOIDmode
))
24373 fnaddr
= copy_addr_to_reg (fnaddr
);
24375 mem
= adjust_address (m_tramp
, HImode
, offset
);
24376 emit_move_insn (mem
, gen_int_mode (0xbb41, HImode
));
24378 mem
= adjust_address (m_tramp
, SImode
, offset
+ 2);
24379 emit_move_insn (mem
, gen_lowpart (SImode
, fnaddr
));
24384 mem
= adjust_address (m_tramp
, HImode
, offset
);
24385 emit_move_insn (mem
, gen_int_mode (0xbb49, HImode
));
24387 mem
= adjust_address (m_tramp
, DImode
, offset
+ 2);
24388 emit_move_insn (mem
, fnaddr
);
24392 /* Load static chain using movabs to r10. Use the shorter movl
24393 instead of movabs when ptr_mode == SImode. */
24394 if (ptr_mode
== SImode
)
24405 mem
= adjust_address (m_tramp
, HImode
, offset
);
24406 emit_move_insn (mem
, gen_int_mode (opcode
, HImode
));
24408 mem
= adjust_address (m_tramp
, ptr_mode
, offset
+ 2);
24409 emit_move_insn (mem
, chain_value
);
24412 /* Jump to r11; the last (unused) byte is a nop, only there to
24413 pad the write out to a single 32-bit store. */
24414 mem
= adjust_address (m_tramp
, SImode
, offset
);
24415 emit_move_insn (mem
, gen_int_mode (0x90e3ff49, SImode
));
24422 /* Depending on the static chain location, either load a register
24423 with a constant, or push the constant to the stack. All of the
24424 instructions are the same size. */
24425 chain
= ix86_static_chain (fndecl
, true);
24428 switch (REGNO (chain
))
24431 opcode
= 0xb8; break;
24433 opcode
= 0xb9; break;
24435 gcc_unreachable ();
24441 mem
= adjust_address (m_tramp
, QImode
, offset
);
24442 emit_move_insn (mem
, gen_int_mode (opcode
, QImode
));
24444 mem
= adjust_address (m_tramp
, SImode
, offset
+ 1);
24445 emit_move_insn (mem
, chain_value
);
24448 mem
= adjust_address (m_tramp
, QImode
, offset
);
24449 emit_move_insn (mem
, gen_int_mode (0xe9, QImode
));
24451 mem
= adjust_address (m_tramp
, SImode
, offset
+ 1);
24453 /* Compute offset from the end of the jmp to the target function.
24454 In the case in which the trampoline stores the static chain on
24455 the stack, we need to skip the first insn which pushes the
24456 (call-saved) register static chain; this push is 1 byte. */
24458 disp
= expand_binop (SImode
, sub_optab
, fnaddr
,
24459 plus_constant (XEXP (m_tramp
, 0),
24460 offset
- (MEM_P (chain
) ? 1 : 0)),
24461 NULL_RTX
, 1, OPTAB_DIRECT
);
24462 emit_move_insn (mem
, disp
);
24465 gcc_assert (offset
<= TRAMPOLINE_SIZE
);
24467 #ifdef HAVE_ENABLE_EXECUTE_STACK
24468 #ifdef CHECK_EXECUTE_STACK_ENABLED
24469 if (CHECK_EXECUTE_STACK_ENABLED
)
24471 emit_library_call (gen_rtx_SYMBOL_REF (Pmode
, "__enable_execute_stack"),
24472 LCT_NORMAL
, VOIDmode
, 1, XEXP (m_tramp
, 0), Pmode
);
24476 /* The following file contains several enumerations and data structures
24477 built from the definitions in i386-builtin-types.def. */
24479 #include "i386-builtin-types.inc"
24481 /* Table for the ix86 builtin non-function types. */
24482 static GTY(()) tree ix86_builtin_type_tab
[(int) IX86_BT_LAST_CPTR
+ 1];
24484 /* Retrieve an element from the above table, building some of
24485 the types lazily. */
24488 ix86_get_builtin_type (enum ix86_builtin_type tcode
)
24490 unsigned int index
;
24493 gcc_assert ((unsigned)tcode
< ARRAY_SIZE(ix86_builtin_type_tab
));
24495 type
= ix86_builtin_type_tab
[(int) tcode
];
24499 gcc_assert (tcode
> IX86_BT_LAST_PRIM
);
24500 if (tcode
<= IX86_BT_LAST_VECT
)
24502 enum machine_mode mode
;
24504 index
= tcode
- IX86_BT_LAST_PRIM
- 1;
24505 itype
= ix86_get_builtin_type (ix86_builtin_type_vect_base
[index
]);
24506 mode
= ix86_builtin_type_vect_mode
[index
];
24508 type
= build_vector_type_for_mode (itype
, mode
);
24514 index
= tcode
- IX86_BT_LAST_VECT
- 1;
24515 if (tcode
<= IX86_BT_LAST_PTR
)
24516 quals
= TYPE_UNQUALIFIED
;
24518 quals
= TYPE_QUAL_CONST
;
24520 itype
= ix86_get_builtin_type (ix86_builtin_type_ptr_base
[index
]);
24521 if (quals
!= TYPE_UNQUALIFIED
)
24522 itype
= build_qualified_type (itype
, quals
);
24524 type
= build_pointer_type (itype
);
24527 ix86_builtin_type_tab
[(int) tcode
] = type
;
24531 /* Table for the ix86 builtin function types. */
24532 static GTY(()) tree ix86_builtin_func_type_tab
[(int) IX86_BT_LAST_ALIAS
+ 1];
24534 /* Retrieve an element from the above table, building some of
24535 the types lazily. */
24538 ix86_get_builtin_func_type (enum ix86_builtin_func_type tcode
)
24542 gcc_assert ((unsigned)tcode
< ARRAY_SIZE (ix86_builtin_func_type_tab
));
24544 type
= ix86_builtin_func_type_tab
[(int) tcode
];
24548 if (tcode
<= IX86_BT_LAST_FUNC
)
24550 unsigned start
= ix86_builtin_func_start
[(int) tcode
];
24551 unsigned after
= ix86_builtin_func_start
[(int) tcode
+ 1];
24552 tree rtype
, atype
, args
= void_list_node
;
24555 rtype
= ix86_get_builtin_type (ix86_builtin_func_args
[start
]);
24556 for (i
= after
- 1; i
> start
; --i
)
24558 atype
= ix86_get_builtin_type (ix86_builtin_func_args
[i
]);
24559 args
= tree_cons (NULL
, atype
, args
);
24562 type
= build_function_type (rtype
, args
);
24566 unsigned index
= tcode
- IX86_BT_LAST_FUNC
- 1;
24567 enum ix86_builtin_func_type icode
;
24569 icode
= ix86_builtin_func_alias_base
[index
];
24570 type
= ix86_get_builtin_func_type (icode
);
24573 ix86_builtin_func_type_tab
[(int) tcode
] = type
;
24578 /* Codes for all the SSE/MMX builtins. */
24581 IX86_BUILTIN_ADDPS
,
24582 IX86_BUILTIN_ADDSS
,
24583 IX86_BUILTIN_DIVPS
,
24584 IX86_BUILTIN_DIVSS
,
24585 IX86_BUILTIN_MULPS
,
24586 IX86_BUILTIN_MULSS
,
24587 IX86_BUILTIN_SUBPS
,
24588 IX86_BUILTIN_SUBSS
,
24590 IX86_BUILTIN_CMPEQPS
,
24591 IX86_BUILTIN_CMPLTPS
,
24592 IX86_BUILTIN_CMPLEPS
,
24593 IX86_BUILTIN_CMPGTPS
,
24594 IX86_BUILTIN_CMPGEPS
,
24595 IX86_BUILTIN_CMPNEQPS
,
24596 IX86_BUILTIN_CMPNLTPS
,
24597 IX86_BUILTIN_CMPNLEPS
,
24598 IX86_BUILTIN_CMPNGTPS
,
24599 IX86_BUILTIN_CMPNGEPS
,
24600 IX86_BUILTIN_CMPORDPS
,
24601 IX86_BUILTIN_CMPUNORDPS
,
24602 IX86_BUILTIN_CMPEQSS
,
24603 IX86_BUILTIN_CMPLTSS
,
24604 IX86_BUILTIN_CMPLESS
,
24605 IX86_BUILTIN_CMPNEQSS
,
24606 IX86_BUILTIN_CMPNLTSS
,
24607 IX86_BUILTIN_CMPNLESS
,
24608 IX86_BUILTIN_CMPNGTSS
,
24609 IX86_BUILTIN_CMPNGESS
,
24610 IX86_BUILTIN_CMPORDSS
,
24611 IX86_BUILTIN_CMPUNORDSS
,
24613 IX86_BUILTIN_COMIEQSS
,
24614 IX86_BUILTIN_COMILTSS
,
24615 IX86_BUILTIN_COMILESS
,
24616 IX86_BUILTIN_COMIGTSS
,
24617 IX86_BUILTIN_COMIGESS
,
24618 IX86_BUILTIN_COMINEQSS
,
24619 IX86_BUILTIN_UCOMIEQSS
,
24620 IX86_BUILTIN_UCOMILTSS
,
24621 IX86_BUILTIN_UCOMILESS
,
24622 IX86_BUILTIN_UCOMIGTSS
,
24623 IX86_BUILTIN_UCOMIGESS
,
24624 IX86_BUILTIN_UCOMINEQSS
,
24626 IX86_BUILTIN_CVTPI2PS
,
24627 IX86_BUILTIN_CVTPS2PI
,
24628 IX86_BUILTIN_CVTSI2SS
,
24629 IX86_BUILTIN_CVTSI642SS
,
24630 IX86_BUILTIN_CVTSS2SI
,
24631 IX86_BUILTIN_CVTSS2SI64
,
24632 IX86_BUILTIN_CVTTPS2PI
,
24633 IX86_BUILTIN_CVTTSS2SI
,
24634 IX86_BUILTIN_CVTTSS2SI64
,
24636 IX86_BUILTIN_MAXPS
,
24637 IX86_BUILTIN_MAXSS
,
24638 IX86_BUILTIN_MINPS
,
24639 IX86_BUILTIN_MINSS
,
24641 IX86_BUILTIN_LOADUPS
,
24642 IX86_BUILTIN_STOREUPS
,
24643 IX86_BUILTIN_MOVSS
,
24645 IX86_BUILTIN_MOVHLPS
,
24646 IX86_BUILTIN_MOVLHPS
,
24647 IX86_BUILTIN_LOADHPS
,
24648 IX86_BUILTIN_LOADLPS
,
24649 IX86_BUILTIN_STOREHPS
,
24650 IX86_BUILTIN_STORELPS
,
24652 IX86_BUILTIN_MASKMOVQ
,
24653 IX86_BUILTIN_MOVMSKPS
,
24654 IX86_BUILTIN_PMOVMSKB
,
24656 IX86_BUILTIN_MOVNTPS
,
24657 IX86_BUILTIN_MOVNTQ
,
24659 IX86_BUILTIN_LOADDQU
,
24660 IX86_BUILTIN_STOREDQU
,
24662 IX86_BUILTIN_PACKSSWB
,
24663 IX86_BUILTIN_PACKSSDW
,
24664 IX86_BUILTIN_PACKUSWB
,
24666 IX86_BUILTIN_PADDB
,
24667 IX86_BUILTIN_PADDW
,
24668 IX86_BUILTIN_PADDD
,
24669 IX86_BUILTIN_PADDQ
,
24670 IX86_BUILTIN_PADDSB
,
24671 IX86_BUILTIN_PADDSW
,
24672 IX86_BUILTIN_PADDUSB
,
24673 IX86_BUILTIN_PADDUSW
,
24674 IX86_BUILTIN_PSUBB
,
24675 IX86_BUILTIN_PSUBW
,
24676 IX86_BUILTIN_PSUBD
,
24677 IX86_BUILTIN_PSUBQ
,
24678 IX86_BUILTIN_PSUBSB
,
24679 IX86_BUILTIN_PSUBSW
,
24680 IX86_BUILTIN_PSUBUSB
,
24681 IX86_BUILTIN_PSUBUSW
,
24684 IX86_BUILTIN_PANDN
,
24688 IX86_BUILTIN_PAVGB
,
24689 IX86_BUILTIN_PAVGW
,
24691 IX86_BUILTIN_PCMPEQB
,
24692 IX86_BUILTIN_PCMPEQW
,
24693 IX86_BUILTIN_PCMPEQD
,
24694 IX86_BUILTIN_PCMPGTB
,
24695 IX86_BUILTIN_PCMPGTW
,
24696 IX86_BUILTIN_PCMPGTD
,
24698 IX86_BUILTIN_PMADDWD
,
24700 IX86_BUILTIN_PMAXSW
,
24701 IX86_BUILTIN_PMAXUB
,
24702 IX86_BUILTIN_PMINSW
,
24703 IX86_BUILTIN_PMINUB
,
24705 IX86_BUILTIN_PMULHUW
,
24706 IX86_BUILTIN_PMULHW
,
24707 IX86_BUILTIN_PMULLW
,
24709 IX86_BUILTIN_PSADBW
,
24710 IX86_BUILTIN_PSHUFW
,
24712 IX86_BUILTIN_PSLLW
,
24713 IX86_BUILTIN_PSLLD
,
24714 IX86_BUILTIN_PSLLQ
,
24715 IX86_BUILTIN_PSRAW
,
24716 IX86_BUILTIN_PSRAD
,
24717 IX86_BUILTIN_PSRLW
,
24718 IX86_BUILTIN_PSRLD
,
24719 IX86_BUILTIN_PSRLQ
,
24720 IX86_BUILTIN_PSLLWI
,
24721 IX86_BUILTIN_PSLLDI
,
24722 IX86_BUILTIN_PSLLQI
,
24723 IX86_BUILTIN_PSRAWI
,
24724 IX86_BUILTIN_PSRADI
,
24725 IX86_BUILTIN_PSRLWI
,
24726 IX86_BUILTIN_PSRLDI
,
24727 IX86_BUILTIN_PSRLQI
,
24729 IX86_BUILTIN_PUNPCKHBW
,
24730 IX86_BUILTIN_PUNPCKHWD
,
24731 IX86_BUILTIN_PUNPCKHDQ
,
24732 IX86_BUILTIN_PUNPCKLBW
,
24733 IX86_BUILTIN_PUNPCKLWD
,
24734 IX86_BUILTIN_PUNPCKLDQ
,
24736 IX86_BUILTIN_SHUFPS
,
24738 IX86_BUILTIN_RCPPS
,
24739 IX86_BUILTIN_RCPSS
,
24740 IX86_BUILTIN_RSQRTPS
,
24741 IX86_BUILTIN_RSQRTPS_NR
,
24742 IX86_BUILTIN_RSQRTSS
,
24743 IX86_BUILTIN_RSQRTF
,
24744 IX86_BUILTIN_SQRTPS
,
24745 IX86_BUILTIN_SQRTPS_NR
,
24746 IX86_BUILTIN_SQRTSS
,
24748 IX86_BUILTIN_UNPCKHPS
,
24749 IX86_BUILTIN_UNPCKLPS
,
24751 IX86_BUILTIN_ANDPS
,
24752 IX86_BUILTIN_ANDNPS
,
24754 IX86_BUILTIN_XORPS
,
24757 IX86_BUILTIN_LDMXCSR
,
24758 IX86_BUILTIN_STMXCSR
,
24759 IX86_BUILTIN_SFENCE
,
24761 /* 3DNow! Original */
24762 IX86_BUILTIN_FEMMS
,
24763 IX86_BUILTIN_PAVGUSB
,
24764 IX86_BUILTIN_PF2ID
,
24765 IX86_BUILTIN_PFACC
,
24766 IX86_BUILTIN_PFADD
,
24767 IX86_BUILTIN_PFCMPEQ
,
24768 IX86_BUILTIN_PFCMPGE
,
24769 IX86_BUILTIN_PFCMPGT
,
24770 IX86_BUILTIN_PFMAX
,
24771 IX86_BUILTIN_PFMIN
,
24772 IX86_BUILTIN_PFMUL
,
24773 IX86_BUILTIN_PFRCP
,
24774 IX86_BUILTIN_PFRCPIT1
,
24775 IX86_BUILTIN_PFRCPIT2
,
24776 IX86_BUILTIN_PFRSQIT1
,
24777 IX86_BUILTIN_PFRSQRT
,
24778 IX86_BUILTIN_PFSUB
,
24779 IX86_BUILTIN_PFSUBR
,
24780 IX86_BUILTIN_PI2FD
,
24781 IX86_BUILTIN_PMULHRW
,
24783 /* 3DNow! Athlon Extensions */
24784 IX86_BUILTIN_PF2IW
,
24785 IX86_BUILTIN_PFNACC
,
24786 IX86_BUILTIN_PFPNACC
,
24787 IX86_BUILTIN_PI2FW
,
24788 IX86_BUILTIN_PSWAPDSI
,
24789 IX86_BUILTIN_PSWAPDSF
,
24792 IX86_BUILTIN_ADDPD
,
24793 IX86_BUILTIN_ADDSD
,
24794 IX86_BUILTIN_DIVPD
,
24795 IX86_BUILTIN_DIVSD
,
24796 IX86_BUILTIN_MULPD
,
24797 IX86_BUILTIN_MULSD
,
24798 IX86_BUILTIN_SUBPD
,
24799 IX86_BUILTIN_SUBSD
,
24801 IX86_BUILTIN_CMPEQPD
,
24802 IX86_BUILTIN_CMPLTPD
,
24803 IX86_BUILTIN_CMPLEPD
,
24804 IX86_BUILTIN_CMPGTPD
,
24805 IX86_BUILTIN_CMPGEPD
,
24806 IX86_BUILTIN_CMPNEQPD
,
24807 IX86_BUILTIN_CMPNLTPD
,
24808 IX86_BUILTIN_CMPNLEPD
,
24809 IX86_BUILTIN_CMPNGTPD
,
24810 IX86_BUILTIN_CMPNGEPD
,
24811 IX86_BUILTIN_CMPORDPD
,
24812 IX86_BUILTIN_CMPUNORDPD
,
24813 IX86_BUILTIN_CMPEQSD
,
24814 IX86_BUILTIN_CMPLTSD
,
24815 IX86_BUILTIN_CMPLESD
,
24816 IX86_BUILTIN_CMPNEQSD
,
24817 IX86_BUILTIN_CMPNLTSD
,
24818 IX86_BUILTIN_CMPNLESD
,
24819 IX86_BUILTIN_CMPORDSD
,
24820 IX86_BUILTIN_CMPUNORDSD
,
24822 IX86_BUILTIN_COMIEQSD
,
24823 IX86_BUILTIN_COMILTSD
,
24824 IX86_BUILTIN_COMILESD
,
24825 IX86_BUILTIN_COMIGTSD
,
24826 IX86_BUILTIN_COMIGESD
,
24827 IX86_BUILTIN_COMINEQSD
,
24828 IX86_BUILTIN_UCOMIEQSD
,
24829 IX86_BUILTIN_UCOMILTSD
,
24830 IX86_BUILTIN_UCOMILESD
,
24831 IX86_BUILTIN_UCOMIGTSD
,
24832 IX86_BUILTIN_UCOMIGESD
,
24833 IX86_BUILTIN_UCOMINEQSD
,
24835 IX86_BUILTIN_MAXPD
,
24836 IX86_BUILTIN_MAXSD
,
24837 IX86_BUILTIN_MINPD
,
24838 IX86_BUILTIN_MINSD
,
24840 IX86_BUILTIN_ANDPD
,
24841 IX86_BUILTIN_ANDNPD
,
24843 IX86_BUILTIN_XORPD
,
24845 IX86_BUILTIN_SQRTPD
,
24846 IX86_BUILTIN_SQRTSD
,
24848 IX86_BUILTIN_UNPCKHPD
,
24849 IX86_BUILTIN_UNPCKLPD
,
24851 IX86_BUILTIN_SHUFPD
,
24853 IX86_BUILTIN_LOADUPD
,
24854 IX86_BUILTIN_STOREUPD
,
24855 IX86_BUILTIN_MOVSD
,
24857 IX86_BUILTIN_LOADHPD
,
24858 IX86_BUILTIN_LOADLPD
,
24860 IX86_BUILTIN_CVTDQ2PD
,
24861 IX86_BUILTIN_CVTDQ2PS
,
24863 IX86_BUILTIN_CVTPD2DQ
,
24864 IX86_BUILTIN_CVTPD2PI
,
24865 IX86_BUILTIN_CVTPD2PS
,
24866 IX86_BUILTIN_CVTTPD2DQ
,
24867 IX86_BUILTIN_CVTTPD2PI
,
24869 IX86_BUILTIN_CVTPI2PD
,
24870 IX86_BUILTIN_CVTSI2SD
,
24871 IX86_BUILTIN_CVTSI642SD
,
24873 IX86_BUILTIN_CVTSD2SI
,
24874 IX86_BUILTIN_CVTSD2SI64
,
24875 IX86_BUILTIN_CVTSD2SS
,
24876 IX86_BUILTIN_CVTSS2SD
,
24877 IX86_BUILTIN_CVTTSD2SI
,
24878 IX86_BUILTIN_CVTTSD2SI64
,
24880 IX86_BUILTIN_CVTPS2DQ
,
24881 IX86_BUILTIN_CVTPS2PD
,
24882 IX86_BUILTIN_CVTTPS2DQ
,
24884 IX86_BUILTIN_MOVNTI
,
24885 IX86_BUILTIN_MOVNTI64
,
24886 IX86_BUILTIN_MOVNTPD
,
24887 IX86_BUILTIN_MOVNTDQ
,
24889 IX86_BUILTIN_MOVQ128
,
24892 IX86_BUILTIN_MASKMOVDQU
,
24893 IX86_BUILTIN_MOVMSKPD
,
24894 IX86_BUILTIN_PMOVMSKB128
,
24896 IX86_BUILTIN_PACKSSWB128
,
24897 IX86_BUILTIN_PACKSSDW128
,
24898 IX86_BUILTIN_PACKUSWB128
,
24900 IX86_BUILTIN_PADDB128
,
24901 IX86_BUILTIN_PADDW128
,
24902 IX86_BUILTIN_PADDD128
,
24903 IX86_BUILTIN_PADDQ128
,
24904 IX86_BUILTIN_PADDSB128
,
24905 IX86_BUILTIN_PADDSW128
,
24906 IX86_BUILTIN_PADDUSB128
,
24907 IX86_BUILTIN_PADDUSW128
,
24908 IX86_BUILTIN_PSUBB128
,
24909 IX86_BUILTIN_PSUBW128
,
24910 IX86_BUILTIN_PSUBD128
,
24911 IX86_BUILTIN_PSUBQ128
,
24912 IX86_BUILTIN_PSUBSB128
,
24913 IX86_BUILTIN_PSUBSW128
,
24914 IX86_BUILTIN_PSUBUSB128
,
24915 IX86_BUILTIN_PSUBUSW128
,
24917 IX86_BUILTIN_PAND128
,
24918 IX86_BUILTIN_PANDN128
,
24919 IX86_BUILTIN_POR128
,
24920 IX86_BUILTIN_PXOR128
,
24922 IX86_BUILTIN_PAVGB128
,
24923 IX86_BUILTIN_PAVGW128
,
24925 IX86_BUILTIN_PCMPEQB128
,
24926 IX86_BUILTIN_PCMPEQW128
,
24927 IX86_BUILTIN_PCMPEQD128
,
24928 IX86_BUILTIN_PCMPGTB128
,
24929 IX86_BUILTIN_PCMPGTW128
,
24930 IX86_BUILTIN_PCMPGTD128
,
24932 IX86_BUILTIN_PMADDWD128
,
24934 IX86_BUILTIN_PMAXSW128
,
24935 IX86_BUILTIN_PMAXUB128
,
24936 IX86_BUILTIN_PMINSW128
,
24937 IX86_BUILTIN_PMINUB128
,
24939 IX86_BUILTIN_PMULUDQ
,
24940 IX86_BUILTIN_PMULUDQ128
,
24941 IX86_BUILTIN_PMULHUW128
,
24942 IX86_BUILTIN_PMULHW128
,
24943 IX86_BUILTIN_PMULLW128
,
24945 IX86_BUILTIN_PSADBW128
,
24946 IX86_BUILTIN_PSHUFHW
,
24947 IX86_BUILTIN_PSHUFLW
,
24948 IX86_BUILTIN_PSHUFD
,
24950 IX86_BUILTIN_PSLLDQI128
,
24951 IX86_BUILTIN_PSLLWI128
,
24952 IX86_BUILTIN_PSLLDI128
,
24953 IX86_BUILTIN_PSLLQI128
,
24954 IX86_BUILTIN_PSRAWI128
,
24955 IX86_BUILTIN_PSRADI128
,
24956 IX86_BUILTIN_PSRLDQI128
,
24957 IX86_BUILTIN_PSRLWI128
,
24958 IX86_BUILTIN_PSRLDI128
,
24959 IX86_BUILTIN_PSRLQI128
,
24961 IX86_BUILTIN_PSLLDQ128
,
24962 IX86_BUILTIN_PSLLW128
,
24963 IX86_BUILTIN_PSLLD128
,
24964 IX86_BUILTIN_PSLLQ128
,
24965 IX86_BUILTIN_PSRAW128
,
24966 IX86_BUILTIN_PSRAD128
,
24967 IX86_BUILTIN_PSRLW128
,
24968 IX86_BUILTIN_PSRLD128
,
24969 IX86_BUILTIN_PSRLQ128
,
24971 IX86_BUILTIN_PUNPCKHBW128
,
24972 IX86_BUILTIN_PUNPCKHWD128
,
24973 IX86_BUILTIN_PUNPCKHDQ128
,
24974 IX86_BUILTIN_PUNPCKHQDQ128
,
24975 IX86_BUILTIN_PUNPCKLBW128
,
24976 IX86_BUILTIN_PUNPCKLWD128
,
24977 IX86_BUILTIN_PUNPCKLDQ128
,
24978 IX86_BUILTIN_PUNPCKLQDQ128
,
24980 IX86_BUILTIN_CLFLUSH
,
24981 IX86_BUILTIN_MFENCE
,
24982 IX86_BUILTIN_LFENCE
,
24983 IX86_BUILTIN_PAUSE
,
24985 IX86_BUILTIN_BSRSI
,
24986 IX86_BUILTIN_BSRDI
,
24987 IX86_BUILTIN_RDPMC
,
24988 IX86_BUILTIN_RDTSC
,
24989 IX86_BUILTIN_RDTSCP
,
24990 IX86_BUILTIN_ROLQI
,
24991 IX86_BUILTIN_ROLHI
,
24992 IX86_BUILTIN_RORQI
,
24993 IX86_BUILTIN_RORHI
,
24996 IX86_BUILTIN_ADDSUBPS
,
24997 IX86_BUILTIN_HADDPS
,
24998 IX86_BUILTIN_HSUBPS
,
24999 IX86_BUILTIN_MOVSHDUP
,
25000 IX86_BUILTIN_MOVSLDUP
,
25001 IX86_BUILTIN_ADDSUBPD
,
25002 IX86_BUILTIN_HADDPD
,
25003 IX86_BUILTIN_HSUBPD
,
25004 IX86_BUILTIN_LDDQU
,
25006 IX86_BUILTIN_MONITOR
,
25007 IX86_BUILTIN_MWAIT
,
25010 IX86_BUILTIN_PHADDW
,
25011 IX86_BUILTIN_PHADDD
,
25012 IX86_BUILTIN_PHADDSW
,
25013 IX86_BUILTIN_PHSUBW
,
25014 IX86_BUILTIN_PHSUBD
,
25015 IX86_BUILTIN_PHSUBSW
,
25016 IX86_BUILTIN_PMADDUBSW
,
25017 IX86_BUILTIN_PMULHRSW
,
25018 IX86_BUILTIN_PSHUFB
,
25019 IX86_BUILTIN_PSIGNB
,
25020 IX86_BUILTIN_PSIGNW
,
25021 IX86_BUILTIN_PSIGND
,
25022 IX86_BUILTIN_PALIGNR
,
25023 IX86_BUILTIN_PABSB
,
25024 IX86_BUILTIN_PABSW
,
25025 IX86_BUILTIN_PABSD
,
25027 IX86_BUILTIN_PHADDW128
,
25028 IX86_BUILTIN_PHADDD128
,
25029 IX86_BUILTIN_PHADDSW128
,
25030 IX86_BUILTIN_PHSUBW128
,
25031 IX86_BUILTIN_PHSUBD128
,
25032 IX86_BUILTIN_PHSUBSW128
,
25033 IX86_BUILTIN_PMADDUBSW128
,
25034 IX86_BUILTIN_PMULHRSW128
,
25035 IX86_BUILTIN_PSHUFB128
,
25036 IX86_BUILTIN_PSIGNB128
,
25037 IX86_BUILTIN_PSIGNW128
,
25038 IX86_BUILTIN_PSIGND128
,
25039 IX86_BUILTIN_PALIGNR128
,
25040 IX86_BUILTIN_PABSB128
,
25041 IX86_BUILTIN_PABSW128
,
25042 IX86_BUILTIN_PABSD128
,
25044 /* AMDFAM10 - SSE4A New Instructions. */
25045 IX86_BUILTIN_MOVNTSD
,
25046 IX86_BUILTIN_MOVNTSS
,
25047 IX86_BUILTIN_EXTRQI
,
25048 IX86_BUILTIN_EXTRQ
,
25049 IX86_BUILTIN_INSERTQI
,
25050 IX86_BUILTIN_INSERTQ
,
25053 IX86_BUILTIN_BLENDPD
,
25054 IX86_BUILTIN_BLENDPS
,
25055 IX86_BUILTIN_BLENDVPD
,
25056 IX86_BUILTIN_BLENDVPS
,
25057 IX86_BUILTIN_PBLENDVB128
,
25058 IX86_BUILTIN_PBLENDW128
,
25063 IX86_BUILTIN_INSERTPS128
,
25065 IX86_BUILTIN_MOVNTDQA
,
25066 IX86_BUILTIN_MPSADBW128
,
25067 IX86_BUILTIN_PACKUSDW128
,
25068 IX86_BUILTIN_PCMPEQQ
,
25069 IX86_BUILTIN_PHMINPOSUW128
,
25071 IX86_BUILTIN_PMAXSB128
,
25072 IX86_BUILTIN_PMAXSD128
,
25073 IX86_BUILTIN_PMAXUD128
,
25074 IX86_BUILTIN_PMAXUW128
,
25076 IX86_BUILTIN_PMINSB128
,
25077 IX86_BUILTIN_PMINSD128
,
25078 IX86_BUILTIN_PMINUD128
,
25079 IX86_BUILTIN_PMINUW128
,
25081 IX86_BUILTIN_PMOVSXBW128
,
25082 IX86_BUILTIN_PMOVSXBD128
,
25083 IX86_BUILTIN_PMOVSXBQ128
,
25084 IX86_BUILTIN_PMOVSXWD128
,
25085 IX86_BUILTIN_PMOVSXWQ128
,
25086 IX86_BUILTIN_PMOVSXDQ128
,
25088 IX86_BUILTIN_PMOVZXBW128
,
25089 IX86_BUILTIN_PMOVZXBD128
,
25090 IX86_BUILTIN_PMOVZXBQ128
,
25091 IX86_BUILTIN_PMOVZXWD128
,
25092 IX86_BUILTIN_PMOVZXWQ128
,
25093 IX86_BUILTIN_PMOVZXDQ128
,
25095 IX86_BUILTIN_PMULDQ128
,
25096 IX86_BUILTIN_PMULLD128
,
25098 IX86_BUILTIN_ROUNDSD
,
25099 IX86_BUILTIN_ROUNDSS
,
25101 IX86_BUILTIN_ROUNDPD
,
25102 IX86_BUILTIN_ROUNDPS
,
25104 IX86_BUILTIN_FLOORPD
,
25105 IX86_BUILTIN_CEILPD
,
25106 IX86_BUILTIN_TRUNCPD
,
25107 IX86_BUILTIN_RINTPD
,
25108 IX86_BUILTIN_ROUNDPD_AZ
,
25110 IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX
,
25111 IX86_BUILTIN_CEILPD_VEC_PACK_SFIX
,
25112 IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX
,
25114 IX86_BUILTIN_FLOORPS
,
25115 IX86_BUILTIN_CEILPS
,
25116 IX86_BUILTIN_TRUNCPS
,
25117 IX86_BUILTIN_RINTPS
,
25118 IX86_BUILTIN_ROUNDPS_AZ
,
25120 IX86_BUILTIN_FLOORPS_SFIX
,
25121 IX86_BUILTIN_CEILPS_SFIX
,
25122 IX86_BUILTIN_ROUNDPS_AZ_SFIX
,
25124 IX86_BUILTIN_PTESTZ
,
25125 IX86_BUILTIN_PTESTC
,
25126 IX86_BUILTIN_PTESTNZC
,
25128 IX86_BUILTIN_VEC_INIT_V2SI
,
25129 IX86_BUILTIN_VEC_INIT_V4HI
,
25130 IX86_BUILTIN_VEC_INIT_V8QI
,
25131 IX86_BUILTIN_VEC_EXT_V2DF
,
25132 IX86_BUILTIN_VEC_EXT_V2DI
,
25133 IX86_BUILTIN_VEC_EXT_V4SF
,
25134 IX86_BUILTIN_VEC_EXT_V4SI
,
25135 IX86_BUILTIN_VEC_EXT_V8HI
,
25136 IX86_BUILTIN_VEC_EXT_V2SI
,
25137 IX86_BUILTIN_VEC_EXT_V4HI
,
25138 IX86_BUILTIN_VEC_EXT_V16QI
,
25139 IX86_BUILTIN_VEC_SET_V2DI
,
25140 IX86_BUILTIN_VEC_SET_V4SF
,
25141 IX86_BUILTIN_VEC_SET_V4SI
,
25142 IX86_BUILTIN_VEC_SET_V8HI
,
25143 IX86_BUILTIN_VEC_SET_V4HI
,
25144 IX86_BUILTIN_VEC_SET_V16QI
,
25146 IX86_BUILTIN_VEC_PACK_SFIX
,
25147 IX86_BUILTIN_VEC_PACK_SFIX256
,
25150 IX86_BUILTIN_CRC32QI
,
25151 IX86_BUILTIN_CRC32HI
,
25152 IX86_BUILTIN_CRC32SI
,
25153 IX86_BUILTIN_CRC32DI
,
25155 IX86_BUILTIN_PCMPESTRI128
,
25156 IX86_BUILTIN_PCMPESTRM128
,
25157 IX86_BUILTIN_PCMPESTRA128
,
25158 IX86_BUILTIN_PCMPESTRC128
,
25159 IX86_BUILTIN_PCMPESTRO128
,
25160 IX86_BUILTIN_PCMPESTRS128
,
25161 IX86_BUILTIN_PCMPESTRZ128
,
25162 IX86_BUILTIN_PCMPISTRI128
,
25163 IX86_BUILTIN_PCMPISTRM128
,
25164 IX86_BUILTIN_PCMPISTRA128
,
25165 IX86_BUILTIN_PCMPISTRC128
,
25166 IX86_BUILTIN_PCMPISTRO128
,
25167 IX86_BUILTIN_PCMPISTRS128
,
25168 IX86_BUILTIN_PCMPISTRZ128
,
25170 IX86_BUILTIN_PCMPGTQ
,
25172 /* AES instructions */
25173 IX86_BUILTIN_AESENC128
,
25174 IX86_BUILTIN_AESENCLAST128
,
25175 IX86_BUILTIN_AESDEC128
,
25176 IX86_BUILTIN_AESDECLAST128
,
25177 IX86_BUILTIN_AESIMC128
,
25178 IX86_BUILTIN_AESKEYGENASSIST128
,
25180 /* PCLMUL instruction */
25181 IX86_BUILTIN_PCLMULQDQ128
,
25184 IX86_BUILTIN_ADDPD256
,
25185 IX86_BUILTIN_ADDPS256
,
25186 IX86_BUILTIN_ADDSUBPD256
,
25187 IX86_BUILTIN_ADDSUBPS256
,
25188 IX86_BUILTIN_ANDPD256
,
25189 IX86_BUILTIN_ANDPS256
,
25190 IX86_BUILTIN_ANDNPD256
,
25191 IX86_BUILTIN_ANDNPS256
,
25192 IX86_BUILTIN_BLENDPD256
,
25193 IX86_BUILTIN_BLENDPS256
,
25194 IX86_BUILTIN_BLENDVPD256
,
25195 IX86_BUILTIN_BLENDVPS256
,
25196 IX86_BUILTIN_DIVPD256
,
25197 IX86_BUILTIN_DIVPS256
,
25198 IX86_BUILTIN_DPPS256
,
25199 IX86_BUILTIN_HADDPD256
,
25200 IX86_BUILTIN_HADDPS256
,
25201 IX86_BUILTIN_HSUBPD256
,
25202 IX86_BUILTIN_HSUBPS256
,
25203 IX86_BUILTIN_MAXPD256
,
25204 IX86_BUILTIN_MAXPS256
,
25205 IX86_BUILTIN_MINPD256
,
25206 IX86_BUILTIN_MINPS256
,
25207 IX86_BUILTIN_MULPD256
,
25208 IX86_BUILTIN_MULPS256
,
25209 IX86_BUILTIN_ORPD256
,
25210 IX86_BUILTIN_ORPS256
,
25211 IX86_BUILTIN_SHUFPD256
,
25212 IX86_BUILTIN_SHUFPS256
,
25213 IX86_BUILTIN_SUBPD256
,
25214 IX86_BUILTIN_SUBPS256
,
25215 IX86_BUILTIN_XORPD256
,
25216 IX86_BUILTIN_XORPS256
,
25217 IX86_BUILTIN_CMPSD
,
25218 IX86_BUILTIN_CMPSS
,
25219 IX86_BUILTIN_CMPPD
,
25220 IX86_BUILTIN_CMPPS
,
25221 IX86_BUILTIN_CMPPD256
,
25222 IX86_BUILTIN_CMPPS256
,
25223 IX86_BUILTIN_CVTDQ2PD256
,
25224 IX86_BUILTIN_CVTDQ2PS256
,
25225 IX86_BUILTIN_CVTPD2PS256
,
25226 IX86_BUILTIN_CVTPS2DQ256
,
25227 IX86_BUILTIN_CVTPS2PD256
,
25228 IX86_BUILTIN_CVTTPD2DQ256
,
25229 IX86_BUILTIN_CVTPD2DQ256
,
25230 IX86_BUILTIN_CVTTPS2DQ256
,
25231 IX86_BUILTIN_EXTRACTF128PD256
,
25232 IX86_BUILTIN_EXTRACTF128PS256
,
25233 IX86_BUILTIN_EXTRACTF128SI256
,
25234 IX86_BUILTIN_VZEROALL
,
25235 IX86_BUILTIN_VZEROUPPER
,
25236 IX86_BUILTIN_VPERMILVARPD
,
25237 IX86_BUILTIN_VPERMILVARPS
,
25238 IX86_BUILTIN_VPERMILVARPD256
,
25239 IX86_BUILTIN_VPERMILVARPS256
,
25240 IX86_BUILTIN_VPERMILPD
,
25241 IX86_BUILTIN_VPERMILPS
,
25242 IX86_BUILTIN_VPERMILPD256
,
25243 IX86_BUILTIN_VPERMILPS256
,
25244 IX86_BUILTIN_VPERMIL2PD
,
25245 IX86_BUILTIN_VPERMIL2PS
,
25246 IX86_BUILTIN_VPERMIL2PD256
,
25247 IX86_BUILTIN_VPERMIL2PS256
,
25248 IX86_BUILTIN_VPERM2F128PD256
,
25249 IX86_BUILTIN_VPERM2F128PS256
,
25250 IX86_BUILTIN_VPERM2F128SI256
,
25251 IX86_BUILTIN_VBROADCASTSS
,
25252 IX86_BUILTIN_VBROADCASTSD256
,
25253 IX86_BUILTIN_VBROADCASTSS256
,
25254 IX86_BUILTIN_VBROADCASTPD256
,
25255 IX86_BUILTIN_VBROADCASTPS256
,
25256 IX86_BUILTIN_VINSERTF128PD256
,
25257 IX86_BUILTIN_VINSERTF128PS256
,
25258 IX86_BUILTIN_VINSERTF128SI256
,
25259 IX86_BUILTIN_LOADUPD256
,
25260 IX86_BUILTIN_LOADUPS256
,
25261 IX86_BUILTIN_STOREUPD256
,
25262 IX86_BUILTIN_STOREUPS256
,
25263 IX86_BUILTIN_LDDQU256
,
25264 IX86_BUILTIN_MOVNTDQ256
,
25265 IX86_BUILTIN_MOVNTPD256
,
25266 IX86_BUILTIN_MOVNTPS256
,
25267 IX86_BUILTIN_LOADDQU256
,
25268 IX86_BUILTIN_STOREDQU256
,
25269 IX86_BUILTIN_MASKLOADPD
,
25270 IX86_BUILTIN_MASKLOADPS
,
25271 IX86_BUILTIN_MASKSTOREPD
,
25272 IX86_BUILTIN_MASKSTOREPS
,
25273 IX86_BUILTIN_MASKLOADPD256
,
25274 IX86_BUILTIN_MASKLOADPS256
,
25275 IX86_BUILTIN_MASKSTOREPD256
,
25276 IX86_BUILTIN_MASKSTOREPS256
,
25277 IX86_BUILTIN_MOVSHDUP256
,
25278 IX86_BUILTIN_MOVSLDUP256
,
25279 IX86_BUILTIN_MOVDDUP256
,
25281 IX86_BUILTIN_SQRTPD256
,
25282 IX86_BUILTIN_SQRTPS256
,
25283 IX86_BUILTIN_SQRTPS_NR256
,
25284 IX86_BUILTIN_RSQRTPS256
,
25285 IX86_BUILTIN_RSQRTPS_NR256
,
25287 IX86_BUILTIN_RCPPS256
,
25289 IX86_BUILTIN_ROUNDPD256
,
25290 IX86_BUILTIN_ROUNDPS256
,
25292 IX86_BUILTIN_FLOORPD256
,
25293 IX86_BUILTIN_CEILPD256
,
25294 IX86_BUILTIN_TRUNCPD256
,
25295 IX86_BUILTIN_RINTPD256
,
25296 IX86_BUILTIN_ROUNDPD_AZ256
,
25298 IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX256
,
25299 IX86_BUILTIN_CEILPD_VEC_PACK_SFIX256
,
25300 IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX256
,
25302 IX86_BUILTIN_FLOORPS256
,
25303 IX86_BUILTIN_CEILPS256
,
25304 IX86_BUILTIN_TRUNCPS256
,
25305 IX86_BUILTIN_RINTPS256
,
25306 IX86_BUILTIN_ROUNDPS_AZ256
,
25308 IX86_BUILTIN_FLOORPS_SFIX256
,
25309 IX86_BUILTIN_CEILPS_SFIX256
,
25310 IX86_BUILTIN_ROUNDPS_AZ_SFIX256
,
25312 IX86_BUILTIN_UNPCKHPD256
,
25313 IX86_BUILTIN_UNPCKLPD256
,
25314 IX86_BUILTIN_UNPCKHPS256
,
25315 IX86_BUILTIN_UNPCKLPS256
,
25317 IX86_BUILTIN_SI256_SI
,
25318 IX86_BUILTIN_PS256_PS
,
25319 IX86_BUILTIN_PD256_PD
,
25320 IX86_BUILTIN_SI_SI256
,
25321 IX86_BUILTIN_PS_PS256
,
25322 IX86_BUILTIN_PD_PD256
,
25324 IX86_BUILTIN_VTESTZPD
,
25325 IX86_BUILTIN_VTESTCPD
,
25326 IX86_BUILTIN_VTESTNZCPD
,
25327 IX86_BUILTIN_VTESTZPS
,
25328 IX86_BUILTIN_VTESTCPS
,
25329 IX86_BUILTIN_VTESTNZCPS
,
25330 IX86_BUILTIN_VTESTZPD256
,
25331 IX86_BUILTIN_VTESTCPD256
,
25332 IX86_BUILTIN_VTESTNZCPD256
,
25333 IX86_BUILTIN_VTESTZPS256
,
25334 IX86_BUILTIN_VTESTCPS256
,
25335 IX86_BUILTIN_VTESTNZCPS256
,
25336 IX86_BUILTIN_PTESTZ256
,
25337 IX86_BUILTIN_PTESTC256
,
25338 IX86_BUILTIN_PTESTNZC256
,
25340 IX86_BUILTIN_MOVMSKPD256
,
25341 IX86_BUILTIN_MOVMSKPS256
,
25344 IX86_BUILTIN_MPSADBW256
,
25345 IX86_BUILTIN_PABSB256
,
25346 IX86_BUILTIN_PABSW256
,
25347 IX86_BUILTIN_PABSD256
,
25348 IX86_BUILTIN_PACKSSDW256
,
25349 IX86_BUILTIN_PACKSSWB256
,
25350 IX86_BUILTIN_PACKUSDW256
,
25351 IX86_BUILTIN_PACKUSWB256
,
25352 IX86_BUILTIN_PADDB256
,
25353 IX86_BUILTIN_PADDW256
,
25354 IX86_BUILTIN_PADDD256
,
25355 IX86_BUILTIN_PADDQ256
,
25356 IX86_BUILTIN_PADDSB256
,
25357 IX86_BUILTIN_PADDSW256
,
25358 IX86_BUILTIN_PADDUSB256
,
25359 IX86_BUILTIN_PADDUSW256
,
25360 IX86_BUILTIN_PALIGNR256
,
25361 IX86_BUILTIN_AND256I
,
25362 IX86_BUILTIN_ANDNOT256I
,
25363 IX86_BUILTIN_PAVGB256
,
25364 IX86_BUILTIN_PAVGW256
,
25365 IX86_BUILTIN_PBLENDVB256
,
25366 IX86_BUILTIN_PBLENDVW256
,
25367 IX86_BUILTIN_PCMPEQB256
,
25368 IX86_BUILTIN_PCMPEQW256
,
25369 IX86_BUILTIN_PCMPEQD256
,
25370 IX86_BUILTIN_PCMPEQQ256
,
25371 IX86_BUILTIN_PCMPGTB256
,
25372 IX86_BUILTIN_PCMPGTW256
,
25373 IX86_BUILTIN_PCMPGTD256
,
25374 IX86_BUILTIN_PCMPGTQ256
,
25375 IX86_BUILTIN_PHADDW256
,
25376 IX86_BUILTIN_PHADDD256
,
25377 IX86_BUILTIN_PHADDSW256
,
25378 IX86_BUILTIN_PHSUBW256
,
25379 IX86_BUILTIN_PHSUBD256
,
25380 IX86_BUILTIN_PHSUBSW256
,
25381 IX86_BUILTIN_PMADDUBSW256
,
25382 IX86_BUILTIN_PMADDWD256
,
25383 IX86_BUILTIN_PMAXSB256
,
25384 IX86_BUILTIN_PMAXSW256
,
25385 IX86_BUILTIN_PMAXSD256
,
25386 IX86_BUILTIN_PMAXUB256
,
25387 IX86_BUILTIN_PMAXUW256
,
25388 IX86_BUILTIN_PMAXUD256
,
25389 IX86_BUILTIN_PMINSB256
,
25390 IX86_BUILTIN_PMINSW256
,
25391 IX86_BUILTIN_PMINSD256
,
25392 IX86_BUILTIN_PMINUB256
,
25393 IX86_BUILTIN_PMINUW256
,
25394 IX86_BUILTIN_PMINUD256
,
25395 IX86_BUILTIN_PMOVMSKB256
,
25396 IX86_BUILTIN_PMOVSXBW256
,
25397 IX86_BUILTIN_PMOVSXBD256
,
25398 IX86_BUILTIN_PMOVSXBQ256
,
25399 IX86_BUILTIN_PMOVSXWD256
,
25400 IX86_BUILTIN_PMOVSXWQ256
,
25401 IX86_BUILTIN_PMOVSXDQ256
,
25402 IX86_BUILTIN_PMOVZXBW256
,
25403 IX86_BUILTIN_PMOVZXBD256
,
25404 IX86_BUILTIN_PMOVZXBQ256
,
25405 IX86_BUILTIN_PMOVZXWD256
,
25406 IX86_BUILTIN_PMOVZXWQ256
,
25407 IX86_BUILTIN_PMOVZXDQ256
,
25408 IX86_BUILTIN_PMULDQ256
,
25409 IX86_BUILTIN_PMULHRSW256
,
25410 IX86_BUILTIN_PMULHUW256
,
25411 IX86_BUILTIN_PMULHW256
,
25412 IX86_BUILTIN_PMULLW256
,
25413 IX86_BUILTIN_PMULLD256
,
25414 IX86_BUILTIN_PMULUDQ256
,
25415 IX86_BUILTIN_POR256
,
25416 IX86_BUILTIN_PSADBW256
,
25417 IX86_BUILTIN_PSHUFB256
,
25418 IX86_BUILTIN_PSHUFD256
,
25419 IX86_BUILTIN_PSHUFHW256
,
25420 IX86_BUILTIN_PSHUFLW256
,
25421 IX86_BUILTIN_PSIGNB256
,
25422 IX86_BUILTIN_PSIGNW256
,
25423 IX86_BUILTIN_PSIGND256
,
25424 IX86_BUILTIN_PSLLDQI256
,
25425 IX86_BUILTIN_PSLLWI256
,
25426 IX86_BUILTIN_PSLLW256
,
25427 IX86_BUILTIN_PSLLDI256
,
25428 IX86_BUILTIN_PSLLD256
,
25429 IX86_BUILTIN_PSLLQI256
,
25430 IX86_BUILTIN_PSLLQ256
,
25431 IX86_BUILTIN_PSRAWI256
,
25432 IX86_BUILTIN_PSRAW256
,
25433 IX86_BUILTIN_PSRADI256
,
25434 IX86_BUILTIN_PSRAD256
,
25435 IX86_BUILTIN_PSRLDQI256
,
25436 IX86_BUILTIN_PSRLWI256
,
25437 IX86_BUILTIN_PSRLW256
,
25438 IX86_BUILTIN_PSRLDI256
,
25439 IX86_BUILTIN_PSRLD256
,
25440 IX86_BUILTIN_PSRLQI256
,
25441 IX86_BUILTIN_PSRLQ256
,
25442 IX86_BUILTIN_PSUBB256
,
25443 IX86_BUILTIN_PSUBW256
,
25444 IX86_BUILTIN_PSUBD256
,
25445 IX86_BUILTIN_PSUBQ256
,
25446 IX86_BUILTIN_PSUBSB256
,
25447 IX86_BUILTIN_PSUBSW256
,
25448 IX86_BUILTIN_PSUBUSB256
,
25449 IX86_BUILTIN_PSUBUSW256
,
25450 IX86_BUILTIN_PUNPCKHBW256
,
25451 IX86_BUILTIN_PUNPCKHWD256
,
25452 IX86_BUILTIN_PUNPCKHDQ256
,
25453 IX86_BUILTIN_PUNPCKHQDQ256
,
25454 IX86_BUILTIN_PUNPCKLBW256
,
25455 IX86_BUILTIN_PUNPCKLWD256
,
25456 IX86_BUILTIN_PUNPCKLDQ256
,
25457 IX86_BUILTIN_PUNPCKLQDQ256
,
25458 IX86_BUILTIN_PXOR256
,
25459 IX86_BUILTIN_MOVNTDQA256
,
25460 IX86_BUILTIN_VBROADCASTSS_PS
,
25461 IX86_BUILTIN_VBROADCASTSS_PS256
,
25462 IX86_BUILTIN_VBROADCASTSD_PD256
,
25463 IX86_BUILTIN_VBROADCASTSI256
,
25464 IX86_BUILTIN_PBLENDD256
,
25465 IX86_BUILTIN_PBLENDD128
,
25466 IX86_BUILTIN_PBROADCASTB256
,
25467 IX86_BUILTIN_PBROADCASTW256
,
25468 IX86_BUILTIN_PBROADCASTD256
,
25469 IX86_BUILTIN_PBROADCASTQ256
,
25470 IX86_BUILTIN_PBROADCASTB128
,
25471 IX86_BUILTIN_PBROADCASTW128
,
25472 IX86_BUILTIN_PBROADCASTD128
,
25473 IX86_BUILTIN_PBROADCASTQ128
,
25474 IX86_BUILTIN_VPERMVARSI256
,
25475 IX86_BUILTIN_VPERMDF256
,
25476 IX86_BUILTIN_VPERMVARSF256
,
25477 IX86_BUILTIN_VPERMDI256
,
25478 IX86_BUILTIN_VPERMTI256
,
25479 IX86_BUILTIN_VEXTRACT128I256
,
25480 IX86_BUILTIN_VINSERT128I256
,
25481 IX86_BUILTIN_MASKLOADD
,
25482 IX86_BUILTIN_MASKLOADQ
,
25483 IX86_BUILTIN_MASKLOADD256
,
25484 IX86_BUILTIN_MASKLOADQ256
,
25485 IX86_BUILTIN_MASKSTORED
,
25486 IX86_BUILTIN_MASKSTOREQ
,
25487 IX86_BUILTIN_MASKSTORED256
,
25488 IX86_BUILTIN_MASKSTOREQ256
,
25489 IX86_BUILTIN_PSLLVV4DI
,
25490 IX86_BUILTIN_PSLLVV2DI
,
25491 IX86_BUILTIN_PSLLVV8SI
,
25492 IX86_BUILTIN_PSLLVV4SI
,
25493 IX86_BUILTIN_PSRAVV8SI
,
25494 IX86_BUILTIN_PSRAVV4SI
,
25495 IX86_BUILTIN_PSRLVV4DI
,
25496 IX86_BUILTIN_PSRLVV2DI
,
25497 IX86_BUILTIN_PSRLVV8SI
,
25498 IX86_BUILTIN_PSRLVV4SI
,
25500 IX86_BUILTIN_GATHERSIV2DF
,
25501 IX86_BUILTIN_GATHERSIV4DF
,
25502 IX86_BUILTIN_GATHERDIV2DF
,
25503 IX86_BUILTIN_GATHERDIV4DF
,
25504 IX86_BUILTIN_GATHERSIV4SF
,
25505 IX86_BUILTIN_GATHERSIV8SF
,
25506 IX86_BUILTIN_GATHERDIV4SF
,
25507 IX86_BUILTIN_GATHERDIV8SF
,
25508 IX86_BUILTIN_GATHERSIV2DI
,
25509 IX86_BUILTIN_GATHERSIV4DI
,
25510 IX86_BUILTIN_GATHERDIV2DI
,
25511 IX86_BUILTIN_GATHERDIV4DI
,
25512 IX86_BUILTIN_GATHERSIV4SI
,
25513 IX86_BUILTIN_GATHERSIV8SI
,
25514 IX86_BUILTIN_GATHERDIV4SI
,
25515 IX86_BUILTIN_GATHERDIV8SI
,
25517 /* Alternate 4 element gather for the vectorizer where
25518 all operands are 32-byte wide. */
25519 IX86_BUILTIN_GATHERALTSIV4DF
,
25520 IX86_BUILTIN_GATHERALTDIV8SF
,
25521 IX86_BUILTIN_GATHERALTSIV4DI
,
25522 IX86_BUILTIN_GATHERALTDIV8SI
,
25524 /* TFmode support builtins. */
25526 IX86_BUILTIN_HUGE_VALQ
,
25527 IX86_BUILTIN_FABSQ
,
25528 IX86_BUILTIN_COPYSIGNQ
,
25530 /* Vectorizer support builtins. */
25531 IX86_BUILTIN_CPYSGNPS
,
25532 IX86_BUILTIN_CPYSGNPD
,
25533 IX86_BUILTIN_CPYSGNPS256
,
25534 IX86_BUILTIN_CPYSGNPD256
,
25536 /* FMA4 instructions. */
25537 IX86_BUILTIN_VFMADDSS
,
25538 IX86_BUILTIN_VFMADDSD
,
25539 IX86_BUILTIN_VFMADDPS
,
25540 IX86_BUILTIN_VFMADDPD
,
25541 IX86_BUILTIN_VFMADDPS256
,
25542 IX86_BUILTIN_VFMADDPD256
,
25543 IX86_BUILTIN_VFMADDSUBPS
,
25544 IX86_BUILTIN_VFMADDSUBPD
,
25545 IX86_BUILTIN_VFMADDSUBPS256
,
25546 IX86_BUILTIN_VFMADDSUBPD256
,
25548 /* FMA3 instructions. */
25549 IX86_BUILTIN_VFMADDSS3
,
25550 IX86_BUILTIN_VFMADDSD3
,
25552 /* XOP instructions. */
25553 IX86_BUILTIN_VPCMOV
,
25554 IX86_BUILTIN_VPCMOV_V2DI
,
25555 IX86_BUILTIN_VPCMOV_V4SI
,
25556 IX86_BUILTIN_VPCMOV_V8HI
,
25557 IX86_BUILTIN_VPCMOV_V16QI
,
25558 IX86_BUILTIN_VPCMOV_V4SF
,
25559 IX86_BUILTIN_VPCMOV_V2DF
,
25560 IX86_BUILTIN_VPCMOV256
,
25561 IX86_BUILTIN_VPCMOV_V4DI256
,
25562 IX86_BUILTIN_VPCMOV_V8SI256
,
25563 IX86_BUILTIN_VPCMOV_V16HI256
,
25564 IX86_BUILTIN_VPCMOV_V32QI256
,
25565 IX86_BUILTIN_VPCMOV_V8SF256
,
25566 IX86_BUILTIN_VPCMOV_V4DF256
,
25568 IX86_BUILTIN_VPPERM
,
25570 IX86_BUILTIN_VPMACSSWW
,
25571 IX86_BUILTIN_VPMACSWW
,
25572 IX86_BUILTIN_VPMACSSWD
,
25573 IX86_BUILTIN_VPMACSWD
,
25574 IX86_BUILTIN_VPMACSSDD
,
25575 IX86_BUILTIN_VPMACSDD
,
25576 IX86_BUILTIN_VPMACSSDQL
,
25577 IX86_BUILTIN_VPMACSSDQH
,
25578 IX86_BUILTIN_VPMACSDQL
,
25579 IX86_BUILTIN_VPMACSDQH
,
25580 IX86_BUILTIN_VPMADCSSWD
,
25581 IX86_BUILTIN_VPMADCSWD
,
25583 IX86_BUILTIN_VPHADDBW
,
25584 IX86_BUILTIN_VPHADDBD
,
25585 IX86_BUILTIN_VPHADDBQ
,
25586 IX86_BUILTIN_VPHADDWD
,
25587 IX86_BUILTIN_VPHADDWQ
,
25588 IX86_BUILTIN_VPHADDDQ
,
25589 IX86_BUILTIN_VPHADDUBW
,
25590 IX86_BUILTIN_VPHADDUBD
,
25591 IX86_BUILTIN_VPHADDUBQ
,
25592 IX86_BUILTIN_VPHADDUWD
,
25593 IX86_BUILTIN_VPHADDUWQ
,
25594 IX86_BUILTIN_VPHADDUDQ
,
25595 IX86_BUILTIN_VPHSUBBW
,
25596 IX86_BUILTIN_VPHSUBWD
,
25597 IX86_BUILTIN_VPHSUBDQ
,
25599 IX86_BUILTIN_VPROTB
,
25600 IX86_BUILTIN_VPROTW
,
25601 IX86_BUILTIN_VPROTD
,
25602 IX86_BUILTIN_VPROTQ
,
25603 IX86_BUILTIN_VPROTB_IMM
,
25604 IX86_BUILTIN_VPROTW_IMM
,
25605 IX86_BUILTIN_VPROTD_IMM
,
25606 IX86_BUILTIN_VPROTQ_IMM
,
25608 IX86_BUILTIN_VPSHLB
,
25609 IX86_BUILTIN_VPSHLW
,
25610 IX86_BUILTIN_VPSHLD
,
25611 IX86_BUILTIN_VPSHLQ
,
25612 IX86_BUILTIN_VPSHAB
,
25613 IX86_BUILTIN_VPSHAW
,
25614 IX86_BUILTIN_VPSHAD
,
25615 IX86_BUILTIN_VPSHAQ
,
25617 IX86_BUILTIN_VFRCZSS
,
25618 IX86_BUILTIN_VFRCZSD
,
25619 IX86_BUILTIN_VFRCZPS
,
25620 IX86_BUILTIN_VFRCZPD
,
25621 IX86_BUILTIN_VFRCZPS256
,
25622 IX86_BUILTIN_VFRCZPD256
,
25624 IX86_BUILTIN_VPCOMEQUB
,
25625 IX86_BUILTIN_VPCOMNEUB
,
25626 IX86_BUILTIN_VPCOMLTUB
,
25627 IX86_BUILTIN_VPCOMLEUB
,
25628 IX86_BUILTIN_VPCOMGTUB
,
25629 IX86_BUILTIN_VPCOMGEUB
,
25630 IX86_BUILTIN_VPCOMFALSEUB
,
25631 IX86_BUILTIN_VPCOMTRUEUB
,
25633 IX86_BUILTIN_VPCOMEQUW
,
25634 IX86_BUILTIN_VPCOMNEUW
,
25635 IX86_BUILTIN_VPCOMLTUW
,
25636 IX86_BUILTIN_VPCOMLEUW
,
25637 IX86_BUILTIN_VPCOMGTUW
,
25638 IX86_BUILTIN_VPCOMGEUW
,
25639 IX86_BUILTIN_VPCOMFALSEUW
,
25640 IX86_BUILTIN_VPCOMTRUEUW
,
25642 IX86_BUILTIN_VPCOMEQUD
,
25643 IX86_BUILTIN_VPCOMNEUD
,
25644 IX86_BUILTIN_VPCOMLTUD
,
25645 IX86_BUILTIN_VPCOMLEUD
,
25646 IX86_BUILTIN_VPCOMGTUD
,
25647 IX86_BUILTIN_VPCOMGEUD
,
25648 IX86_BUILTIN_VPCOMFALSEUD
,
25649 IX86_BUILTIN_VPCOMTRUEUD
,
25651 IX86_BUILTIN_VPCOMEQUQ
,
25652 IX86_BUILTIN_VPCOMNEUQ
,
25653 IX86_BUILTIN_VPCOMLTUQ
,
25654 IX86_BUILTIN_VPCOMLEUQ
,
25655 IX86_BUILTIN_VPCOMGTUQ
,
25656 IX86_BUILTIN_VPCOMGEUQ
,
25657 IX86_BUILTIN_VPCOMFALSEUQ
,
25658 IX86_BUILTIN_VPCOMTRUEUQ
,
25660 IX86_BUILTIN_VPCOMEQB
,
25661 IX86_BUILTIN_VPCOMNEB
,
25662 IX86_BUILTIN_VPCOMLTB
,
25663 IX86_BUILTIN_VPCOMLEB
,
25664 IX86_BUILTIN_VPCOMGTB
,
25665 IX86_BUILTIN_VPCOMGEB
,
25666 IX86_BUILTIN_VPCOMFALSEB
,
25667 IX86_BUILTIN_VPCOMTRUEB
,
25669 IX86_BUILTIN_VPCOMEQW
,
25670 IX86_BUILTIN_VPCOMNEW
,
25671 IX86_BUILTIN_VPCOMLTW
,
25672 IX86_BUILTIN_VPCOMLEW
,
25673 IX86_BUILTIN_VPCOMGTW
,
25674 IX86_BUILTIN_VPCOMGEW
,
25675 IX86_BUILTIN_VPCOMFALSEW
,
25676 IX86_BUILTIN_VPCOMTRUEW
,
25678 IX86_BUILTIN_VPCOMEQD
,
25679 IX86_BUILTIN_VPCOMNED
,
25680 IX86_BUILTIN_VPCOMLTD
,
25681 IX86_BUILTIN_VPCOMLED
,
25682 IX86_BUILTIN_VPCOMGTD
,
25683 IX86_BUILTIN_VPCOMGED
,
25684 IX86_BUILTIN_VPCOMFALSED
,
25685 IX86_BUILTIN_VPCOMTRUED
,
25687 IX86_BUILTIN_VPCOMEQQ
,
25688 IX86_BUILTIN_VPCOMNEQ
,
25689 IX86_BUILTIN_VPCOMLTQ
,
25690 IX86_BUILTIN_VPCOMLEQ
,
25691 IX86_BUILTIN_VPCOMGTQ
,
25692 IX86_BUILTIN_VPCOMGEQ
,
25693 IX86_BUILTIN_VPCOMFALSEQ
,
25694 IX86_BUILTIN_VPCOMTRUEQ
,
25696 /* LWP instructions. */
25697 IX86_BUILTIN_LLWPCB
,
25698 IX86_BUILTIN_SLWPCB
,
25699 IX86_BUILTIN_LWPVAL32
,
25700 IX86_BUILTIN_LWPVAL64
,
25701 IX86_BUILTIN_LWPINS32
,
25702 IX86_BUILTIN_LWPINS64
,
25707 IX86_BUILTIN_XBEGIN
,
25709 IX86_BUILTIN_XABORT
,
25710 IX86_BUILTIN_XTEST
,
25712 /* BMI instructions. */
25713 IX86_BUILTIN_BEXTR32
,
25714 IX86_BUILTIN_BEXTR64
,
25717 /* TBM instructions. */
25718 IX86_BUILTIN_BEXTRI32
,
25719 IX86_BUILTIN_BEXTRI64
,
25721 /* BMI2 instructions. */
25722 IX86_BUILTIN_BZHI32
,
25723 IX86_BUILTIN_BZHI64
,
25724 IX86_BUILTIN_PDEP32
,
25725 IX86_BUILTIN_PDEP64
,
25726 IX86_BUILTIN_PEXT32
,
25727 IX86_BUILTIN_PEXT64
,
25729 /* FSGSBASE instructions. */
25730 IX86_BUILTIN_RDFSBASE32
,
25731 IX86_BUILTIN_RDFSBASE64
,
25732 IX86_BUILTIN_RDGSBASE32
,
25733 IX86_BUILTIN_RDGSBASE64
,
25734 IX86_BUILTIN_WRFSBASE32
,
25735 IX86_BUILTIN_WRFSBASE64
,
25736 IX86_BUILTIN_WRGSBASE32
,
25737 IX86_BUILTIN_WRGSBASE64
,
25739 /* RDRND instructions. */
25740 IX86_BUILTIN_RDRAND16_STEP
,
25741 IX86_BUILTIN_RDRAND32_STEP
,
25742 IX86_BUILTIN_RDRAND64_STEP
,
25744 /* F16C instructions. */
25745 IX86_BUILTIN_CVTPH2PS
,
25746 IX86_BUILTIN_CVTPH2PS256
,
25747 IX86_BUILTIN_CVTPS2PH
,
25748 IX86_BUILTIN_CVTPS2PH256
,
25750 /* CFString built-in for darwin */
25751 IX86_BUILTIN_CFSTRING
,
25756 /* Table for the ix86 builtin decls. */
25757 static GTY(()) tree ix86_builtins
[(int) IX86_BUILTIN_MAX
];
25759 /* Table of all of the builtin functions that are possible with different ISA's
25760 but are waiting to be built until a function is declared to use that
25762 struct builtin_isa
{
25763 const char *name
; /* function name */
25764 enum ix86_builtin_func_type tcode
; /* type to use in the declaration */
25765 HOST_WIDE_INT isa
; /* isa_flags this builtin is defined for */
25766 bool const_p
; /* true if the declaration is constant */
25767 bool set_and_not_built_p
;
25770 static struct builtin_isa ix86_builtins_isa
[(int) IX86_BUILTIN_MAX
];
25773 /* Add an ix86 target builtin function with CODE, NAME and TYPE. Save the MASK
25774 of which isa_flags to use in the ix86_builtins_isa array. Stores the
25775 function decl in the ix86_builtins array. Returns the function decl or
25776 NULL_TREE, if the builtin was not added.
25778 If the front end has a special hook for builtin functions, delay adding
25779 builtin functions that aren't in the current ISA until the ISA is changed
25780 with function specific optimization. Doing so, can save about 300K for the
25781 default compiler. When the builtin is expanded, check at that time whether
25784 If the front end doesn't have a special hook, record all builtins, even if
25785 it isn't an instruction set in the current ISA in case the user uses
25786 function specific options for a different ISA, so that we don't get scope
25787 errors if a builtin is added in the middle of a function scope. */
25790 def_builtin (HOST_WIDE_INT mask
, const char *name
,
25791 enum ix86_builtin_func_type tcode
,
25792 enum ix86_builtins code
)
25794 tree decl
= NULL_TREE
;
25796 if (!(mask
& OPTION_MASK_ISA_64BIT
) || TARGET_64BIT
)
25798 ix86_builtins_isa
[(int) code
].isa
= mask
;
25800 mask
&= ~OPTION_MASK_ISA_64BIT
;
25802 || (mask
& ix86_isa_flags
) != 0
25803 || (lang_hooks
.builtin_function
25804 == lang_hooks
.builtin_function_ext_scope
))
25807 tree type
= ix86_get_builtin_func_type (tcode
);
25808 decl
= add_builtin_function (name
, type
, code
, BUILT_IN_MD
,
25810 ix86_builtins
[(int) code
] = decl
;
25811 ix86_builtins_isa
[(int) code
].set_and_not_built_p
= false;
25815 ix86_builtins
[(int) code
] = NULL_TREE
;
25816 ix86_builtins_isa
[(int) code
].tcode
= tcode
;
25817 ix86_builtins_isa
[(int) code
].name
= name
;
25818 ix86_builtins_isa
[(int) code
].const_p
= false;
25819 ix86_builtins_isa
[(int) code
].set_and_not_built_p
= true;
25826 /* Like def_builtin, but also marks the function decl "const". */
25829 def_builtin_const (HOST_WIDE_INT mask
, const char *name
,
25830 enum ix86_builtin_func_type tcode
, enum ix86_builtins code
)
25832 tree decl
= def_builtin (mask
, name
, tcode
, code
);
25834 TREE_READONLY (decl
) = 1;
25836 ix86_builtins_isa
[(int) code
].const_p
= true;
25841 /* Add any new builtin functions for a given ISA that may not have been
25842 declared. This saves a bit of space compared to adding all of the
25843 declarations to the tree, even if we didn't use them. */
25846 ix86_add_new_builtins (HOST_WIDE_INT isa
)
25850 for (i
= 0; i
< (int)IX86_BUILTIN_MAX
; i
++)
25852 if ((ix86_builtins_isa
[i
].isa
& isa
) != 0
25853 && ix86_builtins_isa
[i
].set_and_not_built_p
)
25857 /* Don't define the builtin again. */
25858 ix86_builtins_isa
[i
].set_and_not_built_p
= false;
25860 type
= ix86_get_builtin_func_type (ix86_builtins_isa
[i
].tcode
);
25861 decl
= add_builtin_function_ext_scope (ix86_builtins_isa
[i
].name
,
25862 type
, i
, BUILT_IN_MD
, NULL
,
25865 ix86_builtins
[i
] = decl
;
25866 if (ix86_builtins_isa
[i
].const_p
)
25867 TREE_READONLY (decl
) = 1;
25872 /* Bits for builtin_description.flag. */
25874 /* Set when we don't support the comparison natively, and should
25875 swap_comparison in order to support it. */
25876 #define BUILTIN_DESC_SWAP_OPERANDS 1
25878 struct builtin_description
25880 const HOST_WIDE_INT mask
;
25881 const enum insn_code icode
;
25882 const char *const name
;
25883 const enum ix86_builtins code
;
25884 const enum rtx_code comparison
;
25888 static const struct builtin_description bdesc_comi
[] =
25890 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_comi
, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS
, UNEQ
, 0 },
25891 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_comi
, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS
, UNLT
, 0 },
25892 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_comi
, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS
, UNLE
, 0 },
25893 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_comi
, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS
, GT
, 0 },
25894 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_comi
, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS
, GE
, 0 },
25895 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_comi
, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS
, LTGT
, 0 },
25896 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS
, UNEQ
, 0 },
25897 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS
, UNLT
, 0 },
25898 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS
, UNLE
, 0 },
25899 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS
, GT
, 0 },
25900 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS
, GE
, 0 },
25901 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS
, LTGT
, 0 },
25902 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD
, UNEQ
, 0 },
25903 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD
, UNLT
, 0 },
25904 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD
, UNLE
, 0 },
25905 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD
, GT
, 0 },
25906 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD
, GE
, 0 },
25907 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD
, LTGT
, 0 },
25908 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD
, UNEQ
, 0 },
25909 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD
, UNLT
, 0 },
25910 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD
, UNLE
, 0 },
25911 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD
, GT
, 0 },
25912 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD
, GE
, 0 },
25913 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD
, LTGT
, 0 },
25916 static const struct builtin_description bdesc_pcmpestr
[] =
25919 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpestr
, "__builtin_ia32_pcmpestri128", IX86_BUILTIN_PCMPESTRI128
, UNKNOWN
, 0 },
25920 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpestr
, "__builtin_ia32_pcmpestrm128", IX86_BUILTIN_PCMPESTRM128
, UNKNOWN
, 0 },
25921 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpestr
, "__builtin_ia32_pcmpestria128", IX86_BUILTIN_PCMPESTRA128
, UNKNOWN
, (int) CCAmode
},
25922 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpestr
, "__builtin_ia32_pcmpestric128", IX86_BUILTIN_PCMPESTRC128
, UNKNOWN
, (int) CCCmode
},
25923 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpestr
, "__builtin_ia32_pcmpestrio128", IX86_BUILTIN_PCMPESTRO128
, UNKNOWN
, (int) CCOmode
},
25924 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpestr
, "__builtin_ia32_pcmpestris128", IX86_BUILTIN_PCMPESTRS128
, UNKNOWN
, (int) CCSmode
},
25925 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpestr
, "__builtin_ia32_pcmpestriz128", IX86_BUILTIN_PCMPESTRZ128
, UNKNOWN
, (int) CCZmode
},
25928 static const struct builtin_description bdesc_pcmpistr
[] =
25931 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpistr
, "__builtin_ia32_pcmpistri128", IX86_BUILTIN_PCMPISTRI128
, UNKNOWN
, 0 },
25932 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpistr
, "__builtin_ia32_pcmpistrm128", IX86_BUILTIN_PCMPISTRM128
, UNKNOWN
, 0 },
25933 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpistr
, "__builtin_ia32_pcmpistria128", IX86_BUILTIN_PCMPISTRA128
, UNKNOWN
, (int) CCAmode
},
25934 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpistr
, "__builtin_ia32_pcmpistric128", IX86_BUILTIN_PCMPISTRC128
, UNKNOWN
, (int) CCCmode
},
25935 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpistr
, "__builtin_ia32_pcmpistrio128", IX86_BUILTIN_PCMPISTRO128
, UNKNOWN
, (int) CCOmode
},
25936 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpistr
, "__builtin_ia32_pcmpistris128", IX86_BUILTIN_PCMPISTRS128
, UNKNOWN
, (int) CCSmode
},
25937 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpistr
, "__builtin_ia32_pcmpistriz128", IX86_BUILTIN_PCMPISTRZ128
, UNKNOWN
, (int) CCZmode
},
25940 /* Special builtins with variable number of arguments. */
25941 static const struct builtin_description bdesc_special_args
[] =
25943 { ~OPTION_MASK_ISA_64BIT
, CODE_FOR_rdtsc
, "__builtin_ia32_rdtsc", IX86_BUILTIN_RDTSC
, UNKNOWN
, (int) UINT64_FTYPE_VOID
},
25944 { ~OPTION_MASK_ISA_64BIT
, CODE_FOR_rdtscp
, "__builtin_ia32_rdtscp", IX86_BUILTIN_RDTSCP
, UNKNOWN
, (int) UINT64_FTYPE_PUNSIGNED
},
25945 { ~OPTION_MASK_ISA_64BIT
, CODE_FOR_pause
, "__builtin_ia32_pause", IX86_BUILTIN_PAUSE
, UNKNOWN
, (int) VOID_FTYPE_VOID
},
25948 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_emms
, "__builtin_ia32_emms", IX86_BUILTIN_EMMS
, UNKNOWN
, (int) VOID_FTYPE_VOID
},
25951 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_femms
, "__builtin_ia32_femms", IX86_BUILTIN_FEMMS
, UNKNOWN
, (int) VOID_FTYPE_VOID
},
25954 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_movups
, "__builtin_ia32_storeups", IX86_BUILTIN_STOREUPS
, UNKNOWN
, (int) VOID_FTYPE_PFLOAT_V4SF
},
25955 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_movntv4sf
, "__builtin_ia32_movntps", IX86_BUILTIN_MOVNTPS
, UNKNOWN
, (int) VOID_FTYPE_PFLOAT_V4SF
},
25956 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_movups
, "__builtin_ia32_loadups", IX86_BUILTIN_LOADUPS
, UNKNOWN
, (int) V4SF_FTYPE_PCFLOAT
},
25958 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_loadhps_exp
, "__builtin_ia32_loadhps", IX86_BUILTIN_LOADHPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_PCV2SF
},
25959 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_loadlps_exp
, "__builtin_ia32_loadlps", IX86_BUILTIN_LOADLPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_PCV2SF
},
25960 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_storehps
, "__builtin_ia32_storehps", IX86_BUILTIN_STOREHPS
, UNKNOWN
, (int) VOID_FTYPE_PV2SF_V4SF
},
25961 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_storelps
, "__builtin_ia32_storelps", IX86_BUILTIN_STORELPS
, UNKNOWN
, (int) VOID_FTYPE_PV2SF_V4SF
},
25963 /* SSE or 3DNow!A */
25964 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_sse_sfence
, "__builtin_ia32_sfence", IX86_BUILTIN_SFENCE
, UNKNOWN
, (int) VOID_FTYPE_VOID
},
25965 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_sse_movntq
, "__builtin_ia32_movntq", IX86_BUILTIN_MOVNTQ
, UNKNOWN
, (int) VOID_FTYPE_PULONGLONG_ULONGLONG
},
25968 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_lfence
, "__builtin_ia32_lfence", IX86_BUILTIN_LFENCE
, UNKNOWN
, (int) VOID_FTYPE_VOID
},
25969 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_mfence
, 0, IX86_BUILTIN_MFENCE
, UNKNOWN
, (int) VOID_FTYPE_VOID
},
25970 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_movupd
, "__builtin_ia32_storeupd", IX86_BUILTIN_STOREUPD
, UNKNOWN
, (int) VOID_FTYPE_PDOUBLE_V2DF
},
25971 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_movdqu
, "__builtin_ia32_storedqu", IX86_BUILTIN_STOREDQU
, UNKNOWN
, (int) VOID_FTYPE_PCHAR_V16QI
},
25972 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_movntv2df
, "__builtin_ia32_movntpd", IX86_BUILTIN_MOVNTPD
, UNKNOWN
, (int) VOID_FTYPE_PDOUBLE_V2DF
},
25973 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_movntv2di
, "__builtin_ia32_movntdq", IX86_BUILTIN_MOVNTDQ
, UNKNOWN
, (int) VOID_FTYPE_PV2DI_V2DI
},
25974 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_movntisi
, "__builtin_ia32_movnti", IX86_BUILTIN_MOVNTI
, UNKNOWN
, (int) VOID_FTYPE_PINT_INT
},
25975 { OPTION_MASK_ISA_SSE2
| OPTION_MASK_ISA_64BIT
, CODE_FOR_sse2_movntidi
, "__builtin_ia32_movnti64", IX86_BUILTIN_MOVNTI64
, UNKNOWN
, (int) VOID_FTYPE_PLONGLONG_LONGLONG
},
25976 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_movupd
, "__builtin_ia32_loadupd", IX86_BUILTIN_LOADUPD
, UNKNOWN
, (int) V2DF_FTYPE_PCDOUBLE
},
25977 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_movdqu
, "__builtin_ia32_loaddqu", IX86_BUILTIN_LOADDQU
, UNKNOWN
, (int) V16QI_FTYPE_PCCHAR
},
25979 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_loadhpd_exp
, "__builtin_ia32_loadhpd", IX86_BUILTIN_LOADHPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_PCDOUBLE
},
25980 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_loadlpd_exp
, "__builtin_ia32_loadlpd", IX86_BUILTIN_LOADLPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_PCDOUBLE
},
25983 { OPTION_MASK_ISA_SSE3
, CODE_FOR_sse3_lddqu
, "__builtin_ia32_lddqu", IX86_BUILTIN_LDDQU
, UNKNOWN
, (int) V16QI_FTYPE_PCCHAR
},
25986 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_movntdqa
, "__builtin_ia32_movntdqa", IX86_BUILTIN_MOVNTDQA
, UNKNOWN
, (int) V2DI_FTYPE_PV2DI
},
25989 { OPTION_MASK_ISA_SSE4A
, CODE_FOR_sse4a_vmmovntv2df
, "__builtin_ia32_movntsd", IX86_BUILTIN_MOVNTSD
, UNKNOWN
, (int) VOID_FTYPE_PDOUBLE_V2DF
},
25990 { OPTION_MASK_ISA_SSE4A
, CODE_FOR_sse4a_vmmovntv4sf
, "__builtin_ia32_movntss", IX86_BUILTIN_MOVNTSS
, UNKNOWN
, (int) VOID_FTYPE_PFLOAT_V4SF
},
25993 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vzeroall
, "__builtin_ia32_vzeroall", IX86_BUILTIN_VZEROALL
, UNKNOWN
, (int) VOID_FTYPE_VOID
},
25994 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vzeroupper
, "__builtin_ia32_vzeroupper", IX86_BUILTIN_VZEROUPPER
, UNKNOWN
, (int) VOID_FTYPE_VOID
},
25996 { OPTION_MASK_ISA_AVX
, CODE_FOR_vec_dupv4sf
, "__builtin_ia32_vbroadcastss", IX86_BUILTIN_VBROADCASTSS
, UNKNOWN
, (int) V4SF_FTYPE_PCFLOAT
},
25997 { OPTION_MASK_ISA_AVX
, CODE_FOR_vec_dupv4df
, "__builtin_ia32_vbroadcastsd256", IX86_BUILTIN_VBROADCASTSD256
, UNKNOWN
, (int) V4DF_FTYPE_PCDOUBLE
},
25998 { OPTION_MASK_ISA_AVX
, CODE_FOR_vec_dupv8sf
, "__builtin_ia32_vbroadcastss256", IX86_BUILTIN_VBROADCASTSS256
, UNKNOWN
, (int) V8SF_FTYPE_PCFLOAT
},
25999 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vbroadcastf128_v4df
, "__builtin_ia32_vbroadcastf128_pd256", IX86_BUILTIN_VBROADCASTPD256
, UNKNOWN
, (int) V4DF_FTYPE_PCV2DF
},
26000 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vbroadcastf128_v8sf
, "__builtin_ia32_vbroadcastf128_ps256", IX86_BUILTIN_VBROADCASTPS256
, UNKNOWN
, (int) V8SF_FTYPE_PCV4SF
},
26002 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_movupd256
, "__builtin_ia32_loadupd256", IX86_BUILTIN_LOADUPD256
, UNKNOWN
, (int) V4DF_FTYPE_PCDOUBLE
},
26003 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_movups256
, "__builtin_ia32_loadups256", IX86_BUILTIN_LOADUPS256
, UNKNOWN
, (int) V8SF_FTYPE_PCFLOAT
},
26004 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_movupd256
, "__builtin_ia32_storeupd256", IX86_BUILTIN_STOREUPD256
, UNKNOWN
, (int) VOID_FTYPE_PDOUBLE_V4DF
},
26005 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_movups256
, "__builtin_ia32_storeups256", IX86_BUILTIN_STOREUPS256
, UNKNOWN
, (int) VOID_FTYPE_PFLOAT_V8SF
},
26006 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_movdqu256
, "__builtin_ia32_loaddqu256", IX86_BUILTIN_LOADDQU256
, UNKNOWN
, (int) V32QI_FTYPE_PCCHAR
},
26007 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_movdqu256
, "__builtin_ia32_storedqu256", IX86_BUILTIN_STOREDQU256
, UNKNOWN
, (int) VOID_FTYPE_PCHAR_V32QI
},
26008 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_lddqu256
, "__builtin_ia32_lddqu256", IX86_BUILTIN_LDDQU256
, UNKNOWN
, (int) V32QI_FTYPE_PCCHAR
},
26010 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_movntv4di
, "__builtin_ia32_movntdq256", IX86_BUILTIN_MOVNTDQ256
, UNKNOWN
, (int) VOID_FTYPE_PV4DI_V4DI
},
26011 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_movntv4df
, "__builtin_ia32_movntpd256", IX86_BUILTIN_MOVNTPD256
, UNKNOWN
, (int) VOID_FTYPE_PDOUBLE_V4DF
},
26012 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_movntv8sf
, "__builtin_ia32_movntps256", IX86_BUILTIN_MOVNTPS256
, UNKNOWN
, (int) VOID_FTYPE_PFLOAT_V8SF
},
26014 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_maskloadpd
, "__builtin_ia32_maskloadpd", IX86_BUILTIN_MASKLOADPD
, UNKNOWN
, (int) V2DF_FTYPE_PCV2DF_V2DI
},
26015 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_maskloadps
, "__builtin_ia32_maskloadps", IX86_BUILTIN_MASKLOADPS
, UNKNOWN
, (int) V4SF_FTYPE_PCV4SF_V4SI
},
26016 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_maskloadpd256
, "__builtin_ia32_maskloadpd256", IX86_BUILTIN_MASKLOADPD256
, UNKNOWN
, (int) V4DF_FTYPE_PCV4DF_V4DI
},
26017 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_maskloadps256
, "__builtin_ia32_maskloadps256", IX86_BUILTIN_MASKLOADPS256
, UNKNOWN
, (int) V8SF_FTYPE_PCV8SF_V8SI
},
26018 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_maskstorepd
, "__builtin_ia32_maskstorepd", IX86_BUILTIN_MASKSTOREPD
, UNKNOWN
, (int) VOID_FTYPE_PV2DF_V2DI_V2DF
},
26019 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_maskstoreps
, "__builtin_ia32_maskstoreps", IX86_BUILTIN_MASKSTOREPS
, UNKNOWN
, (int) VOID_FTYPE_PV4SF_V4SI_V4SF
},
26020 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_maskstorepd256
, "__builtin_ia32_maskstorepd256", IX86_BUILTIN_MASKSTOREPD256
, UNKNOWN
, (int) VOID_FTYPE_PV4DF_V4DI_V4DF
},
26021 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_maskstoreps256
, "__builtin_ia32_maskstoreps256", IX86_BUILTIN_MASKSTOREPS256
, UNKNOWN
, (int) VOID_FTYPE_PV8SF_V8SI_V8SF
},
26024 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_movntdqa
, "__builtin_ia32_movntdqa256", IX86_BUILTIN_MOVNTDQA256
, UNKNOWN
, (int) V4DI_FTYPE_PV4DI
},
26025 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_maskloadd
, "__builtin_ia32_maskloadd", IX86_BUILTIN_MASKLOADD
, UNKNOWN
, (int) V4SI_FTYPE_PCV4SI_V4SI
},
26026 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_maskloadq
, "__builtin_ia32_maskloadq", IX86_BUILTIN_MASKLOADQ
, UNKNOWN
, (int) V2DI_FTYPE_PCV2DI_V2DI
},
26027 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_maskloadd256
, "__builtin_ia32_maskloadd256", IX86_BUILTIN_MASKLOADD256
, UNKNOWN
, (int) V8SI_FTYPE_PCV8SI_V8SI
},
26028 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_maskloadq256
, "__builtin_ia32_maskloadq256", IX86_BUILTIN_MASKLOADQ256
, UNKNOWN
, (int) V4DI_FTYPE_PCV4DI_V4DI
},
26029 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_maskstored
, "__builtin_ia32_maskstored", IX86_BUILTIN_MASKSTORED
, UNKNOWN
, (int) VOID_FTYPE_PV4SI_V4SI_V4SI
},
26030 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_maskstoreq
, "__builtin_ia32_maskstoreq", IX86_BUILTIN_MASKSTOREQ
, UNKNOWN
, (int) VOID_FTYPE_PV2DI_V2DI_V2DI
},
26031 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_maskstored256
, "__builtin_ia32_maskstored256", IX86_BUILTIN_MASKSTORED256
, UNKNOWN
, (int) VOID_FTYPE_PV8SI_V8SI_V8SI
},
26032 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_maskstoreq256
, "__builtin_ia32_maskstoreq256", IX86_BUILTIN_MASKSTOREQ256
, UNKNOWN
, (int) VOID_FTYPE_PV4DI_V4DI_V4DI
},
26034 { OPTION_MASK_ISA_LWP
, CODE_FOR_lwp_llwpcb
, "__builtin_ia32_llwpcb", IX86_BUILTIN_LLWPCB
, UNKNOWN
, (int) VOID_FTYPE_PVOID
},
26035 { OPTION_MASK_ISA_LWP
, CODE_FOR_lwp_slwpcb
, "__builtin_ia32_slwpcb", IX86_BUILTIN_SLWPCB
, UNKNOWN
, (int) PVOID_FTYPE_VOID
},
26036 { OPTION_MASK_ISA_LWP
, CODE_FOR_lwp_lwpvalsi3
, "__builtin_ia32_lwpval32", IX86_BUILTIN_LWPVAL32
, UNKNOWN
, (int) VOID_FTYPE_UINT_UINT_UINT
},
26037 { OPTION_MASK_ISA_LWP
, CODE_FOR_lwp_lwpvaldi3
, "__builtin_ia32_lwpval64", IX86_BUILTIN_LWPVAL64
, UNKNOWN
, (int) VOID_FTYPE_UINT64_UINT_UINT
},
26038 { OPTION_MASK_ISA_LWP
, CODE_FOR_lwp_lwpinssi3
, "__builtin_ia32_lwpins32", IX86_BUILTIN_LWPINS32
, UNKNOWN
, (int) UCHAR_FTYPE_UINT_UINT_UINT
},
26039 { OPTION_MASK_ISA_LWP
, CODE_FOR_lwp_lwpinsdi3
, "__builtin_ia32_lwpins64", IX86_BUILTIN_LWPINS64
, UNKNOWN
, (int) UCHAR_FTYPE_UINT64_UINT_UINT
},
26042 { OPTION_MASK_ISA_FSGSBASE
| OPTION_MASK_ISA_64BIT
, CODE_FOR_rdfsbasesi
, "__builtin_ia32_rdfsbase32", IX86_BUILTIN_RDFSBASE32
, UNKNOWN
, (int) UNSIGNED_FTYPE_VOID
},
26043 { OPTION_MASK_ISA_FSGSBASE
| OPTION_MASK_ISA_64BIT
, CODE_FOR_rdfsbasedi
, "__builtin_ia32_rdfsbase64", IX86_BUILTIN_RDFSBASE64
, UNKNOWN
, (int) UINT64_FTYPE_VOID
},
26044 { OPTION_MASK_ISA_FSGSBASE
| OPTION_MASK_ISA_64BIT
, CODE_FOR_rdgsbasesi
, "__builtin_ia32_rdgsbase32", IX86_BUILTIN_RDGSBASE32
, UNKNOWN
, (int) UNSIGNED_FTYPE_VOID
},
26045 { OPTION_MASK_ISA_FSGSBASE
| OPTION_MASK_ISA_64BIT
, CODE_FOR_rdgsbasedi
, "__builtin_ia32_rdgsbase64", IX86_BUILTIN_RDGSBASE64
, UNKNOWN
, (int) UINT64_FTYPE_VOID
},
26046 { OPTION_MASK_ISA_FSGSBASE
| OPTION_MASK_ISA_64BIT
, CODE_FOR_wrfsbasesi
, "__builtin_ia32_wrfsbase32", IX86_BUILTIN_WRFSBASE32
, UNKNOWN
, (int) VOID_FTYPE_UNSIGNED
},
26047 { OPTION_MASK_ISA_FSGSBASE
| OPTION_MASK_ISA_64BIT
, CODE_FOR_wrfsbasedi
, "__builtin_ia32_wrfsbase64", IX86_BUILTIN_WRFSBASE64
, UNKNOWN
, (int) VOID_FTYPE_UINT64
},
26048 { OPTION_MASK_ISA_FSGSBASE
| OPTION_MASK_ISA_64BIT
, CODE_FOR_wrgsbasesi
, "__builtin_ia32_wrgsbase32", IX86_BUILTIN_WRGSBASE32
, UNKNOWN
, (int) VOID_FTYPE_UNSIGNED
},
26049 { OPTION_MASK_ISA_FSGSBASE
| OPTION_MASK_ISA_64BIT
, CODE_FOR_wrgsbasedi
, "__builtin_ia32_wrgsbase64", IX86_BUILTIN_WRGSBASE64
, UNKNOWN
, (int) VOID_FTYPE_UINT64
},
26052 { OPTION_MASK_ISA_RTM
, CODE_FOR_xbegin
, "__builtin_ia32_xbegin", IX86_BUILTIN_XBEGIN
, UNKNOWN
, (int) UNSIGNED_FTYPE_VOID
},
26053 { OPTION_MASK_ISA_RTM
, CODE_FOR_xend
, "__builtin_ia32_xend", IX86_BUILTIN_XEND
, UNKNOWN
, (int) VOID_FTYPE_VOID
},
26054 { OPTION_MASK_ISA_RTM
, CODE_FOR_xtest
, "__builtin_ia32_xtest", IX86_BUILTIN_XTEST
, UNKNOWN
, (int) INT_FTYPE_VOID
},
26057 /* Builtins with variable number of arguments. */
26058 static const struct builtin_description bdesc_args
[] =
26060 { ~OPTION_MASK_ISA_64BIT
, CODE_FOR_bsr
, "__builtin_ia32_bsrsi", IX86_BUILTIN_BSRSI
, UNKNOWN
, (int) INT_FTYPE_INT
},
26061 { OPTION_MASK_ISA_64BIT
, CODE_FOR_bsr_rex64
, "__builtin_ia32_bsrdi", IX86_BUILTIN_BSRDI
, UNKNOWN
, (int) INT64_FTYPE_INT64
},
26062 { ~OPTION_MASK_ISA_64BIT
, CODE_FOR_rdpmc
, "__builtin_ia32_rdpmc", IX86_BUILTIN_RDPMC
, UNKNOWN
, (int) UINT64_FTYPE_INT
},
26063 { ~OPTION_MASK_ISA_64BIT
, CODE_FOR_rotlqi3
, "__builtin_ia32_rolqi", IX86_BUILTIN_ROLQI
, UNKNOWN
, (int) UINT8_FTYPE_UINT8_INT
},
26064 { ~OPTION_MASK_ISA_64BIT
, CODE_FOR_rotlhi3
, "__builtin_ia32_rolhi", IX86_BUILTIN_ROLHI
, UNKNOWN
, (int) UINT16_FTYPE_UINT16_INT
},
26065 { ~OPTION_MASK_ISA_64BIT
, CODE_FOR_rotrqi3
, "__builtin_ia32_rorqi", IX86_BUILTIN_RORQI
, UNKNOWN
, (int) UINT8_FTYPE_UINT8_INT
},
26066 { ~OPTION_MASK_ISA_64BIT
, CODE_FOR_rotrhi3
, "__builtin_ia32_rorhi", IX86_BUILTIN_RORHI
, UNKNOWN
, (int) UINT16_FTYPE_UINT16_INT
},
26069 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_addv8qi3
, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
26070 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_addv4hi3
, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
26071 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_addv2si3
, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI
},
26072 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_subv8qi3
, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
26073 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_subv4hi3
, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
26074 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_subv2si3
, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI
},
26076 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ssaddv8qi3
, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
26077 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ssaddv4hi3
, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
26078 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_sssubv8qi3
, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
26079 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_sssubv4hi3
, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
26080 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_usaddv8qi3
, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
26081 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_usaddv4hi3
, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
26082 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ussubv8qi3
, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
26083 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ussubv4hi3
, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
26085 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_mulv4hi3
, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
26086 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_smulv4hi3_highpart
, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
26088 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_andv2si3
, "__builtin_ia32_pand", IX86_BUILTIN_PAND
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI
},
26089 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_andnotv2si3
, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI
},
26090 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_iorv2si3
, "__builtin_ia32_por", IX86_BUILTIN_POR
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI
},
26091 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_xorv2si3
, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI
},
26093 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_eqv8qi3
, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
26094 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_eqv4hi3
, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
26095 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_eqv2si3
, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI
},
26096 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_gtv8qi3
, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
26097 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_gtv4hi3
, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
26098 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_gtv2si3
, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI
},
26100 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_punpckhbw
, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
26101 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_punpckhwd
, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
26102 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_punpckhdq
, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI
},
26103 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_punpcklbw
, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
26104 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_punpcklwd
, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
26105 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_punpckldq
, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI
},
26107 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_packsswb
, "__builtin_ia32_packsswb", IX86_BUILTIN_PACKSSWB
, UNKNOWN
, (int) V8QI_FTYPE_V4HI_V4HI
},
26108 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_packssdw
, "__builtin_ia32_packssdw", IX86_BUILTIN_PACKSSDW
, UNKNOWN
, (int) V4HI_FTYPE_V2SI_V2SI
},
26109 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_packuswb
, "__builtin_ia32_packuswb", IX86_BUILTIN_PACKUSWB
, UNKNOWN
, (int) V8QI_FTYPE_V4HI_V4HI
},
26111 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_pmaddwd
, "__builtin_ia32_pmaddwd", IX86_BUILTIN_PMADDWD
, UNKNOWN
, (int) V2SI_FTYPE_V4HI_V4HI
},
26113 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ashlv4hi3
, "__builtin_ia32_psllwi", IX86_BUILTIN_PSLLWI
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_SI_COUNT
},
26114 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ashlv2si3
, "__builtin_ia32_pslldi", IX86_BUILTIN_PSLLDI
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_SI_COUNT
},
26115 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ashlv1di3
, "__builtin_ia32_psllqi", IX86_BUILTIN_PSLLQI
, UNKNOWN
, (int) V1DI_FTYPE_V1DI_SI_COUNT
},
26116 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ashlv4hi3
, "__builtin_ia32_psllw", IX86_BUILTIN_PSLLW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI_COUNT
},
26117 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ashlv2si3
, "__builtin_ia32_pslld", IX86_BUILTIN_PSLLD
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI_COUNT
},
26118 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ashlv1di3
, "__builtin_ia32_psllq", IX86_BUILTIN_PSLLQ
, UNKNOWN
, (int) V1DI_FTYPE_V1DI_V1DI_COUNT
},
26120 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_lshrv4hi3
, "__builtin_ia32_psrlwi", IX86_BUILTIN_PSRLWI
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_SI_COUNT
},
26121 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_lshrv2si3
, "__builtin_ia32_psrldi", IX86_BUILTIN_PSRLDI
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_SI_COUNT
},
26122 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_lshrv1di3
, "__builtin_ia32_psrlqi", IX86_BUILTIN_PSRLQI
, UNKNOWN
, (int) V1DI_FTYPE_V1DI_SI_COUNT
},
26123 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_lshrv4hi3
, "__builtin_ia32_psrlw", IX86_BUILTIN_PSRLW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI_COUNT
},
26124 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_lshrv2si3
, "__builtin_ia32_psrld", IX86_BUILTIN_PSRLD
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI_COUNT
},
26125 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_lshrv1di3
, "__builtin_ia32_psrlq", IX86_BUILTIN_PSRLQ
, UNKNOWN
, (int) V1DI_FTYPE_V1DI_V1DI_COUNT
},
26127 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ashrv4hi3
, "__builtin_ia32_psrawi", IX86_BUILTIN_PSRAWI
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_SI_COUNT
},
26128 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ashrv2si3
, "__builtin_ia32_psradi", IX86_BUILTIN_PSRADI
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_SI_COUNT
},
26129 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ashrv4hi3
, "__builtin_ia32_psraw", IX86_BUILTIN_PSRAW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI_COUNT
},
26130 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ashrv2si3
, "__builtin_ia32_psrad", IX86_BUILTIN_PSRAD
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI_COUNT
},
26133 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_pf2id
, "__builtin_ia32_pf2id", IX86_BUILTIN_PF2ID
, UNKNOWN
, (int) V2SI_FTYPE_V2SF
},
26134 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_floatv2si2
, "__builtin_ia32_pi2fd", IX86_BUILTIN_PI2FD
, UNKNOWN
, (int) V2SF_FTYPE_V2SI
},
26135 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_rcpv2sf2
, "__builtin_ia32_pfrcp", IX86_BUILTIN_PFRCP
, UNKNOWN
, (int) V2SF_FTYPE_V2SF
},
26136 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_rsqrtv2sf2
, "__builtin_ia32_pfrsqrt", IX86_BUILTIN_PFRSQRT
, UNKNOWN
, (int) V2SF_FTYPE_V2SF
},
26138 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_uavgv8qi3
, "__builtin_ia32_pavgusb", IX86_BUILTIN_PAVGUSB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
26139 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_haddv2sf3
, "__builtin_ia32_pfacc", IX86_BUILTIN_PFACC
, UNKNOWN
, (int) V2SF_FTYPE_V2SF_V2SF
},
26140 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_addv2sf3
, "__builtin_ia32_pfadd", IX86_BUILTIN_PFADD
, UNKNOWN
, (int) V2SF_FTYPE_V2SF_V2SF
},
26141 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_eqv2sf3
, "__builtin_ia32_pfcmpeq", IX86_BUILTIN_PFCMPEQ
, UNKNOWN
, (int) V2SI_FTYPE_V2SF_V2SF
},
26142 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_gev2sf3
, "__builtin_ia32_pfcmpge", IX86_BUILTIN_PFCMPGE
, UNKNOWN
, (int) V2SI_FTYPE_V2SF_V2SF
},
26143 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_gtv2sf3
, "__builtin_ia32_pfcmpgt", IX86_BUILTIN_PFCMPGT
, UNKNOWN
, (int) V2SI_FTYPE_V2SF_V2SF
},
26144 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_smaxv2sf3
, "__builtin_ia32_pfmax", IX86_BUILTIN_PFMAX
, UNKNOWN
, (int) V2SF_FTYPE_V2SF_V2SF
},
26145 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_sminv2sf3
, "__builtin_ia32_pfmin", IX86_BUILTIN_PFMIN
, UNKNOWN
, (int) V2SF_FTYPE_V2SF_V2SF
},
26146 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_mulv2sf3
, "__builtin_ia32_pfmul", IX86_BUILTIN_PFMUL
, UNKNOWN
, (int) V2SF_FTYPE_V2SF_V2SF
},
26147 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_rcpit1v2sf3
, "__builtin_ia32_pfrcpit1", IX86_BUILTIN_PFRCPIT1
, UNKNOWN
, (int) V2SF_FTYPE_V2SF_V2SF
},
26148 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_rcpit2v2sf3
, "__builtin_ia32_pfrcpit2", IX86_BUILTIN_PFRCPIT2
, UNKNOWN
, (int) V2SF_FTYPE_V2SF_V2SF
},
26149 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_rsqit1v2sf3
, "__builtin_ia32_pfrsqit1", IX86_BUILTIN_PFRSQIT1
, UNKNOWN
, (int) V2SF_FTYPE_V2SF_V2SF
},
26150 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_subv2sf3
, "__builtin_ia32_pfsub", IX86_BUILTIN_PFSUB
, UNKNOWN
, (int) V2SF_FTYPE_V2SF_V2SF
},
26151 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_subrv2sf3
, "__builtin_ia32_pfsubr", IX86_BUILTIN_PFSUBR
, UNKNOWN
, (int) V2SF_FTYPE_V2SF_V2SF
},
26152 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_pmulhrwv4hi3
, "__builtin_ia32_pmulhrw", IX86_BUILTIN_PMULHRW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
26155 { OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_pf2iw
, "__builtin_ia32_pf2iw", IX86_BUILTIN_PF2IW
, UNKNOWN
, (int) V2SI_FTYPE_V2SF
},
26156 { OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_pi2fw
, "__builtin_ia32_pi2fw", IX86_BUILTIN_PI2FW
, UNKNOWN
, (int) V2SF_FTYPE_V2SI
},
26157 { OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_pswapdv2si2
, "__builtin_ia32_pswapdsi", IX86_BUILTIN_PSWAPDSI
, UNKNOWN
, (int) V2SI_FTYPE_V2SI
},
26158 { OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_pswapdv2sf2
, "__builtin_ia32_pswapdsf", IX86_BUILTIN_PSWAPDSF
, UNKNOWN
, (int) V2SF_FTYPE_V2SF
},
26159 { OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_hsubv2sf3
, "__builtin_ia32_pfnacc", IX86_BUILTIN_PFNACC
, UNKNOWN
, (int) V2SF_FTYPE_V2SF_V2SF
},
26160 { OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_addsubv2sf3
, "__builtin_ia32_pfpnacc", IX86_BUILTIN_PFPNACC
, UNKNOWN
, (int) V2SF_FTYPE_V2SF_V2SF
},
26163 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_movmskps
, "__builtin_ia32_movmskps", IX86_BUILTIN_MOVMSKPS
, UNKNOWN
, (int) INT_FTYPE_V4SF
},
26164 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_sqrtv4sf2
, "__builtin_ia32_sqrtps", IX86_BUILTIN_SQRTPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF
},
26165 { OPTION_MASK_ISA_SSE
, CODE_FOR_sqrtv4sf2
, "__builtin_ia32_sqrtps_nr", IX86_BUILTIN_SQRTPS_NR
, UNKNOWN
, (int) V4SF_FTYPE_V4SF
},
26166 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_rsqrtv4sf2
, "__builtin_ia32_rsqrtps", IX86_BUILTIN_RSQRTPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF
},
26167 { OPTION_MASK_ISA_SSE
, CODE_FOR_rsqrtv4sf2
, "__builtin_ia32_rsqrtps_nr", IX86_BUILTIN_RSQRTPS_NR
, UNKNOWN
, (int) V4SF_FTYPE_V4SF
},
26168 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_rcpv4sf2
, "__builtin_ia32_rcpps", IX86_BUILTIN_RCPPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF
},
26169 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_cvtps2pi
, "__builtin_ia32_cvtps2pi", IX86_BUILTIN_CVTPS2PI
, UNKNOWN
, (int) V2SI_FTYPE_V4SF
},
26170 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_cvtss2si
, "__builtin_ia32_cvtss2si", IX86_BUILTIN_CVTSS2SI
, UNKNOWN
, (int) INT_FTYPE_V4SF
},
26171 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_64BIT
, CODE_FOR_sse_cvtss2siq
, "__builtin_ia32_cvtss2si64", IX86_BUILTIN_CVTSS2SI64
, UNKNOWN
, (int) INT64_FTYPE_V4SF
},
26172 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_cvttps2pi
, "__builtin_ia32_cvttps2pi", IX86_BUILTIN_CVTTPS2PI
, UNKNOWN
, (int) V2SI_FTYPE_V4SF
},
26173 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_cvttss2si
, "__builtin_ia32_cvttss2si", IX86_BUILTIN_CVTTSS2SI
, UNKNOWN
, (int) INT_FTYPE_V4SF
},
26174 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_64BIT
, CODE_FOR_sse_cvttss2siq
, "__builtin_ia32_cvttss2si64", IX86_BUILTIN_CVTTSS2SI64
, UNKNOWN
, (int) INT64_FTYPE_V4SF
},
26176 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_shufps
, "__builtin_ia32_shufps", IX86_BUILTIN_SHUFPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF_INT
},
26178 { OPTION_MASK_ISA_SSE
, CODE_FOR_addv4sf3
, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
26179 { OPTION_MASK_ISA_SSE
, CODE_FOR_subv4sf3
, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
26180 { OPTION_MASK_ISA_SSE
, CODE_FOR_mulv4sf3
, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
26181 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_divv4sf3
, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
26182 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmaddv4sf3
, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
26183 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmsubv4sf3
, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
26184 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmmulv4sf3
, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
26185 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmdivv4sf3
, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
26187 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS
, EQ
, (int) V4SF_FTYPE_V4SF_V4SF
},
26188 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS
, LT
, (int) V4SF_FTYPE_V4SF_V4SF
},
26189 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS
, LE
, (int) V4SF_FTYPE_V4SF_V4SF
},
26190 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS
, LT
, (int) V4SF_FTYPE_V4SF_V4SF_SWAP
},
26191 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS
, LE
, (int) V4SF_FTYPE_V4SF_V4SF_SWAP
},
26192 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS
, UNORDERED
, (int) V4SF_FTYPE_V4SF_V4SF
},
26193 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS
, NE
, (int) V4SF_FTYPE_V4SF_V4SF
},
26194 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS
, UNGE
, (int) V4SF_FTYPE_V4SF_V4SF
},
26195 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS
, UNGT
, (int) V4SF_FTYPE_V4SF_V4SF
},
26196 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS
, UNGE
, (int) V4SF_FTYPE_V4SF_V4SF_SWAP
},
26197 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS
, UNGT
, (int) V4SF_FTYPE_V4SF_V4SF_SWAP
},
26198 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS
, ORDERED
, (int) V4SF_FTYPE_V4SF_V4SF
},
26199 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS
, EQ
, (int) V4SF_FTYPE_V4SF_V4SF
},
26200 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS
, LT
, (int) V4SF_FTYPE_V4SF_V4SF
},
26201 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS
, LE
, (int) V4SF_FTYPE_V4SF_V4SF
},
26202 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS
, UNORDERED
, (int) V4SF_FTYPE_V4SF_V4SF
},
26203 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS
, NE
, (int) V4SF_FTYPE_V4SF_V4SF
},
26204 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS
, UNGE
, (int) V4SF_FTYPE_V4SF_V4SF
},
26205 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS
, UNGT
, (int) V4SF_FTYPE_V4SF_V4SF
},
26206 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpngtss", IX86_BUILTIN_CMPNGTSS
, UNGE
, (int) V4SF_FTYPE_V4SF_V4SF_SWAP
},
26207 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpngess", IX86_BUILTIN_CMPNGESS
, UNGT
, (int) V4SF_FTYPE_V4SF_V4SF_SWAP
},
26208 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS
, ORDERED
, (int) V4SF_FTYPE_V4SF_V4SF
},
26210 { OPTION_MASK_ISA_SSE
, CODE_FOR_sminv4sf3
, "__builtin_ia32_minps", IX86_BUILTIN_MINPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
26211 { OPTION_MASK_ISA_SSE
, CODE_FOR_smaxv4sf3
, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
26212 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmsminv4sf3
, "__builtin_ia32_minss", IX86_BUILTIN_MINSS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
26213 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmsmaxv4sf3
, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
26215 { OPTION_MASK_ISA_SSE
, CODE_FOR_andv4sf3
, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
26216 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_andnotv4sf3
, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
26217 { OPTION_MASK_ISA_SSE
, CODE_FOR_iorv4sf3
, "__builtin_ia32_orps", IX86_BUILTIN_ORPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
26218 { OPTION_MASK_ISA_SSE
, CODE_FOR_xorv4sf3
, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
26220 { OPTION_MASK_ISA_SSE
, CODE_FOR_copysignv4sf3
, "__builtin_ia32_copysignps", IX86_BUILTIN_CPYSGNPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
26222 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_movss
, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
26223 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_movhlps_exp
, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
26224 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_movlhps_exp
, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
26225 { OPTION_MASK_ISA_SSE
, CODE_FOR_vec_interleave_highv4sf
, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
26226 { OPTION_MASK_ISA_SSE
, CODE_FOR_vec_interleave_lowv4sf
, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
26228 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_cvtpi2ps
, "__builtin_ia32_cvtpi2ps", IX86_BUILTIN_CVTPI2PS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V2SI
},
26229 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_cvtsi2ss
, "__builtin_ia32_cvtsi2ss", IX86_BUILTIN_CVTSI2SS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_SI
},
26230 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_64BIT
, CODE_FOR_sse_cvtsi2ssq
, "__builtin_ia32_cvtsi642ss", IX86_BUILTIN_CVTSI642SS
, UNKNOWN
, V4SF_FTYPE_V4SF_DI
},
26232 { OPTION_MASK_ISA_SSE
, CODE_FOR_rsqrtsf2
, "__builtin_ia32_rsqrtf", IX86_BUILTIN_RSQRTF
, UNKNOWN
, (int) FLOAT_FTYPE_FLOAT
},
26234 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmsqrtv4sf2
, "__builtin_ia32_sqrtss", IX86_BUILTIN_SQRTSS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_VEC_MERGE
},
26235 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmrsqrtv4sf2
, "__builtin_ia32_rsqrtss", IX86_BUILTIN_RSQRTSS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_VEC_MERGE
},
26236 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmrcpv4sf2
, "__builtin_ia32_rcpss", IX86_BUILTIN_RCPSS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_VEC_MERGE
},
26238 /* SSE MMX or 3Dnow!A */
26239 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_uavgv8qi3
, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
26240 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_uavgv4hi3
, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
26241 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_umulv4hi3_highpart
, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
26243 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_umaxv8qi3
, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
26244 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_smaxv4hi3
, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
26245 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_uminv8qi3
, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
26246 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_sminv4hi3
, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
26248 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_psadbw
, "__builtin_ia32_psadbw", IX86_BUILTIN_PSADBW
, UNKNOWN
, (int) V1DI_FTYPE_V8QI_V8QI
},
26249 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_pmovmskb
, "__builtin_ia32_pmovmskb", IX86_BUILTIN_PMOVMSKB
, UNKNOWN
, (int) INT_FTYPE_V8QI
},
26251 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_pshufw
, "__builtin_ia32_pshufw", IX86_BUILTIN_PSHUFW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_INT
},
26254 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_shufpd
, "__builtin_ia32_shufpd", IX86_BUILTIN_SHUFPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF_INT
},
26256 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_movmskpd
, "__builtin_ia32_movmskpd", IX86_BUILTIN_MOVMSKPD
, UNKNOWN
, (int) INT_FTYPE_V2DF
},
26257 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_pmovmskb
, "__builtin_ia32_pmovmskb128", IX86_BUILTIN_PMOVMSKB128
, UNKNOWN
, (int) INT_FTYPE_V16QI
},
26258 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sqrtv2df2
, "__builtin_ia32_sqrtpd", IX86_BUILTIN_SQRTPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF
},
26259 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvtdq2pd
, "__builtin_ia32_cvtdq2pd", IX86_BUILTIN_CVTDQ2PD
, UNKNOWN
, (int) V2DF_FTYPE_V4SI
},
26260 { OPTION_MASK_ISA_SSE2
, CODE_FOR_floatv4siv4sf2
, "__builtin_ia32_cvtdq2ps", IX86_BUILTIN_CVTDQ2PS
, UNKNOWN
, (int) V4SF_FTYPE_V4SI
},
26262 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvtpd2dq
, "__builtin_ia32_cvtpd2dq", IX86_BUILTIN_CVTPD2DQ
, UNKNOWN
, (int) V4SI_FTYPE_V2DF
},
26263 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvtpd2pi
, "__builtin_ia32_cvtpd2pi", IX86_BUILTIN_CVTPD2PI
, UNKNOWN
, (int) V2SI_FTYPE_V2DF
},
26264 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvtpd2ps
, "__builtin_ia32_cvtpd2ps", IX86_BUILTIN_CVTPD2PS
, UNKNOWN
, (int) V4SF_FTYPE_V2DF
},
26265 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvttpd2dq
, "__builtin_ia32_cvttpd2dq", IX86_BUILTIN_CVTTPD2DQ
, UNKNOWN
, (int) V4SI_FTYPE_V2DF
},
26266 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvttpd2pi
, "__builtin_ia32_cvttpd2pi", IX86_BUILTIN_CVTTPD2PI
, UNKNOWN
, (int) V2SI_FTYPE_V2DF
},
26268 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvtpi2pd
, "__builtin_ia32_cvtpi2pd", IX86_BUILTIN_CVTPI2PD
, UNKNOWN
, (int) V2DF_FTYPE_V2SI
},
26270 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvtsd2si
, "__builtin_ia32_cvtsd2si", IX86_BUILTIN_CVTSD2SI
, UNKNOWN
, (int) INT_FTYPE_V2DF
},
26271 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvttsd2si
, "__builtin_ia32_cvttsd2si", IX86_BUILTIN_CVTTSD2SI
, UNKNOWN
, (int) INT_FTYPE_V2DF
},
26272 { OPTION_MASK_ISA_SSE2
| OPTION_MASK_ISA_64BIT
, CODE_FOR_sse2_cvtsd2siq
, "__builtin_ia32_cvtsd2si64", IX86_BUILTIN_CVTSD2SI64
, UNKNOWN
, (int) INT64_FTYPE_V2DF
},
26273 { OPTION_MASK_ISA_SSE2
| OPTION_MASK_ISA_64BIT
, CODE_FOR_sse2_cvttsd2siq
, "__builtin_ia32_cvttsd2si64", IX86_BUILTIN_CVTTSD2SI64
, UNKNOWN
, (int) INT64_FTYPE_V2DF
},
26275 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvtps2dq
, "__builtin_ia32_cvtps2dq", IX86_BUILTIN_CVTPS2DQ
, UNKNOWN
, (int) V4SI_FTYPE_V4SF
},
26276 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvtps2pd
, "__builtin_ia32_cvtps2pd", IX86_BUILTIN_CVTPS2PD
, UNKNOWN
, (int) V2DF_FTYPE_V4SF
},
26277 { OPTION_MASK_ISA_SSE2
, CODE_FOR_fix_truncv4sfv4si2
, "__builtin_ia32_cvttps2dq", IX86_BUILTIN_CVTTPS2DQ
, UNKNOWN
, (int) V4SI_FTYPE_V4SF
},
26279 { OPTION_MASK_ISA_SSE2
, CODE_FOR_addv2df3
, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
26280 { OPTION_MASK_ISA_SSE2
, CODE_FOR_subv2df3
, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
26281 { OPTION_MASK_ISA_SSE2
, CODE_FOR_mulv2df3
, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
26282 { OPTION_MASK_ISA_SSE2
, CODE_FOR_divv2df3
, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
26283 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmaddv2df3
, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
26284 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmsubv2df3
, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
26285 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmmulv2df3
, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
26286 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmdivv2df3
, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
26288 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD
, EQ
, (int) V2DF_FTYPE_V2DF_V2DF
},
26289 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD
, LT
, (int) V2DF_FTYPE_V2DF_V2DF
},
26290 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD
, LE
, (int) V2DF_FTYPE_V2DF_V2DF
},
26291 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD
, LT
, (int) V2DF_FTYPE_V2DF_V2DF_SWAP
},
26292 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD
, LE
, (int) V2DF_FTYPE_V2DF_V2DF_SWAP
},
26293 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD
, UNORDERED
, (int) V2DF_FTYPE_V2DF_V2DF
},
26294 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD
, NE
, (int) V2DF_FTYPE_V2DF_V2DF
},
26295 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD
, UNGE
, (int) V2DF_FTYPE_V2DF_V2DF
},
26296 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD
, UNGT
, (int) V2DF_FTYPE_V2DF_V2DF
},
26297 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD
, UNGE
, (int) V2DF_FTYPE_V2DF_V2DF_SWAP
},
26298 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD
, UNGT
, (int) V2DF_FTYPE_V2DF_V2DF_SWAP
},
26299 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD
, ORDERED
, (int) V2DF_FTYPE_V2DF_V2DF
},
26300 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD
, EQ
, (int) V2DF_FTYPE_V2DF_V2DF
},
26301 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD
, LT
, (int) V2DF_FTYPE_V2DF_V2DF
},
26302 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD
, LE
, (int) V2DF_FTYPE_V2DF_V2DF
},
26303 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD
, UNORDERED
, (int) V2DF_FTYPE_V2DF_V2DF
},
26304 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD
, NE
, (int) V2DF_FTYPE_V2DF_V2DF
},
26305 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD
, UNGE
, (int) V2DF_FTYPE_V2DF_V2DF
},
26306 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD
, UNGT
, (int) V2DF_FTYPE_V2DF_V2DF
},
26307 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD
, ORDERED
, (int) V2DF_FTYPE_V2DF_V2DF
},
26309 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sminv2df3
, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
26310 { OPTION_MASK_ISA_SSE2
, CODE_FOR_smaxv2df3
, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
26311 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmsminv2df3
, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
26312 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmsmaxv2df3
, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
26314 { OPTION_MASK_ISA_SSE2
, CODE_FOR_andv2df3
, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
26315 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_andnotv2df3
, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
26316 { OPTION_MASK_ISA_SSE2
, CODE_FOR_iorv2df3
, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
26317 { OPTION_MASK_ISA_SSE2
, CODE_FOR_xorv2df3
, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
26319 { OPTION_MASK_ISA_SSE2
, CODE_FOR_copysignv2df3
, "__builtin_ia32_copysignpd", IX86_BUILTIN_CPYSGNPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
26321 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_movsd
, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
26322 { OPTION_MASK_ISA_SSE2
, CODE_FOR_vec_interleave_highv2df
, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
26323 { OPTION_MASK_ISA_SSE2
, CODE_FOR_vec_interleave_lowv2df
, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
26325 { OPTION_MASK_ISA_SSE2
, CODE_FOR_vec_pack_sfix_v2df
, "__builtin_ia32_vec_pack_sfix", IX86_BUILTIN_VEC_PACK_SFIX
, UNKNOWN
, (int) V4SI_FTYPE_V2DF_V2DF
},
26327 { OPTION_MASK_ISA_SSE2
, CODE_FOR_addv16qi3
, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
26328 { OPTION_MASK_ISA_SSE2
, CODE_FOR_addv8hi3
, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
26329 { OPTION_MASK_ISA_SSE2
, CODE_FOR_addv4si3
, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
26330 { OPTION_MASK_ISA_SSE2
, CODE_FOR_addv2di3
, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
26331 { OPTION_MASK_ISA_SSE2
, CODE_FOR_subv16qi3
, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
26332 { OPTION_MASK_ISA_SSE2
, CODE_FOR_subv8hi3
, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
26333 { OPTION_MASK_ISA_SSE2
, CODE_FOR_subv4si3
, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
26334 { OPTION_MASK_ISA_SSE2
, CODE_FOR_subv2di3
, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
26336 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_ssaddv16qi3
, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
26337 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_ssaddv8hi3
, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
26338 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_sssubv16qi3
, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
26339 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_sssubv8hi3
, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
26340 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_usaddv16qi3
, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
26341 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_usaddv8hi3
, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
26342 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_ussubv16qi3
, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
26343 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_ussubv8hi3
, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
26345 { OPTION_MASK_ISA_SSE2
, CODE_FOR_mulv8hi3
, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
26346 { OPTION_MASK_ISA_SSE2
, CODE_FOR_smulv8hi3_highpart
, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128
, UNKNOWN
,(int) V8HI_FTYPE_V8HI_V8HI
},
26348 { OPTION_MASK_ISA_SSE2
, CODE_FOR_andv2di3
, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
26349 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_andnotv2di3
, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
26350 { OPTION_MASK_ISA_SSE2
, CODE_FOR_iorv2di3
, "__builtin_ia32_por128", IX86_BUILTIN_POR128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
26351 { OPTION_MASK_ISA_SSE2
, CODE_FOR_xorv2di3
, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
26353 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_uavgv16qi3
, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
26354 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_uavgv8hi3
, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
26356 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_eqv16qi3
, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
26357 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_eqv8hi3
, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
26358 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_eqv4si3
, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
26359 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_gtv16qi3
, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
26360 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_gtv8hi3
, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
26361 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_gtv4si3
, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
26363 { OPTION_MASK_ISA_SSE2
, CODE_FOR_umaxv16qi3
, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
26364 { OPTION_MASK_ISA_SSE2
, CODE_FOR_smaxv8hi3
, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
26365 { OPTION_MASK_ISA_SSE2
, CODE_FOR_uminv16qi3
, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
26366 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sminv8hi3
, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
26368 { OPTION_MASK_ISA_SSE2
, CODE_FOR_vec_interleave_highv16qi
, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
26369 { OPTION_MASK_ISA_SSE2
, CODE_FOR_vec_interleave_highv8hi
, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
26370 { OPTION_MASK_ISA_SSE2
, CODE_FOR_vec_interleave_highv4si
, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
26371 { OPTION_MASK_ISA_SSE2
, CODE_FOR_vec_interleave_highv2di
, "__builtin_ia32_punpckhqdq128", IX86_BUILTIN_PUNPCKHQDQ128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
26372 { OPTION_MASK_ISA_SSE2
, CODE_FOR_vec_interleave_lowv16qi
, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
26373 { OPTION_MASK_ISA_SSE2
, CODE_FOR_vec_interleave_lowv8hi
, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
26374 { OPTION_MASK_ISA_SSE2
, CODE_FOR_vec_interleave_lowv4si
, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
26375 { OPTION_MASK_ISA_SSE2
, CODE_FOR_vec_interleave_lowv2di
, "__builtin_ia32_punpcklqdq128", IX86_BUILTIN_PUNPCKLQDQ128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
26377 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_packsswb
, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128
, UNKNOWN
, (int) V16QI_FTYPE_V8HI_V8HI
},
26378 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_packssdw
, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128
, UNKNOWN
, (int) V8HI_FTYPE_V4SI_V4SI
},
26379 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_packuswb
, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128
, UNKNOWN
, (int) V16QI_FTYPE_V8HI_V8HI
},
26381 { OPTION_MASK_ISA_SSE2
, CODE_FOR_umulv8hi3_highpart
, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
26382 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_psadbw
, "__builtin_ia32_psadbw128", IX86_BUILTIN_PSADBW128
, UNKNOWN
, (int) V2DI_FTYPE_V16QI_V16QI
},
26384 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_umulv1siv1di3
, "__builtin_ia32_pmuludq", IX86_BUILTIN_PMULUDQ
, UNKNOWN
, (int) V1DI_FTYPE_V2SI_V2SI
},
26385 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_umulv2siv2di3
, "__builtin_ia32_pmuludq128", IX86_BUILTIN_PMULUDQ128
, UNKNOWN
, (int) V2DI_FTYPE_V4SI_V4SI
},
26387 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_pmaddwd
, "__builtin_ia32_pmaddwd128", IX86_BUILTIN_PMADDWD128
, UNKNOWN
, (int) V4SI_FTYPE_V8HI_V8HI
},
26389 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvtsi2sd
, "__builtin_ia32_cvtsi2sd", IX86_BUILTIN_CVTSI2SD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_SI
},
26390 { OPTION_MASK_ISA_SSE2
| OPTION_MASK_ISA_64BIT
, CODE_FOR_sse2_cvtsi2sdq
, "__builtin_ia32_cvtsi642sd", IX86_BUILTIN_CVTSI642SD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_DI
},
26391 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvtsd2ss
, "__builtin_ia32_cvtsd2ss", IX86_BUILTIN_CVTSD2SS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V2DF
},
26392 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvtss2sd
, "__builtin_ia32_cvtss2sd", IX86_BUILTIN_CVTSS2SD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V4SF
},
26394 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_ashlv1ti3
, "__builtin_ia32_pslldqi128", IX86_BUILTIN_PSLLDQI128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_INT_CONVERT
},
26395 { OPTION_MASK_ISA_SSE2
, CODE_FOR_ashlv8hi3
, "__builtin_ia32_psllwi128", IX86_BUILTIN_PSLLWI128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_SI_COUNT
},
26396 { OPTION_MASK_ISA_SSE2
, CODE_FOR_ashlv4si3
, "__builtin_ia32_pslldi128", IX86_BUILTIN_PSLLDI128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_SI_COUNT
},
26397 { OPTION_MASK_ISA_SSE2
, CODE_FOR_ashlv2di3
, "__builtin_ia32_psllqi128", IX86_BUILTIN_PSLLQI128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_SI_COUNT
},
26398 { OPTION_MASK_ISA_SSE2
, CODE_FOR_ashlv8hi3
, "__builtin_ia32_psllw128", IX86_BUILTIN_PSLLW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI_COUNT
},
26399 { OPTION_MASK_ISA_SSE2
, CODE_FOR_ashlv4si3
, "__builtin_ia32_pslld128", IX86_BUILTIN_PSLLD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI_COUNT
},
26400 { OPTION_MASK_ISA_SSE2
, CODE_FOR_ashlv2di3
, "__builtin_ia32_psllq128", IX86_BUILTIN_PSLLQ128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI_COUNT
},
26402 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_lshrv1ti3
, "__builtin_ia32_psrldqi128", IX86_BUILTIN_PSRLDQI128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_INT_CONVERT
},
26403 { OPTION_MASK_ISA_SSE2
, CODE_FOR_lshrv8hi3
, "__builtin_ia32_psrlwi128", IX86_BUILTIN_PSRLWI128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_SI_COUNT
},
26404 { OPTION_MASK_ISA_SSE2
, CODE_FOR_lshrv4si3
, "__builtin_ia32_psrldi128", IX86_BUILTIN_PSRLDI128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_SI_COUNT
},
26405 { OPTION_MASK_ISA_SSE2
, CODE_FOR_lshrv2di3
, "__builtin_ia32_psrlqi128", IX86_BUILTIN_PSRLQI128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_SI_COUNT
},
26406 { OPTION_MASK_ISA_SSE2
, CODE_FOR_lshrv8hi3
, "__builtin_ia32_psrlw128", IX86_BUILTIN_PSRLW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI_COUNT
},
26407 { OPTION_MASK_ISA_SSE2
, CODE_FOR_lshrv4si3
, "__builtin_ia32_psrld128", IX86_BUILTIN_PSRLD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI_COUNT
},
26408 { OPTION_MASK_ISA_SSE2
, CODE_FOR_lshrv2di3
, "__builtin_ia32_psrlq128", IX86_BUILTIN_PSRLQ128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI_COUNT
},
26410 { OPTION_MASK_ISA_SSE2
, CODE_FOR_ashrv8hi3
, "__builtin_ia32_psrawi128", IX86_BUILTIN_PSRAWI128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_SI_COUNT
},
26411 { OPTION_MASK_ISA_SSE2
, CODE_FOR_ashrv4si3
, "__builtin_ia32_psradi128", IX86_BUILTIN_PSRADI128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_SI_COUNT
},
26412 { OPTION_MASK_ISA_SSE2
, CODE_FOR_ashrv8hi3
, "__builtin_ia32_psraw128", IX86_BUILTIN_PSRAW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI_COUNT
},
26413 { OPTION_MASK_ISA_SSE2
, CODE_FOR_ashrv4si3
, "__builtin_ia32_psrad128", IX86_BUILTIN_PSRAD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI_COUNT
},
26415 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_pshufd
, "__builtin_ia32_pshufd", IX86_BUILTIN_PSHUFD
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_INT
},
26416 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_pshuflw
, "__builtin_ia32_pshuflw", IX86_BUILTIN_PSHUFLW
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_INT
},
26417 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_pshufhw
, "__builtin_ia32_pshufhw", IX86_BUILTIN_PSHUFHW
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_INT
},
26419 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmsqrtv2df2
, "__builtin_ia32_sqrtsd", IX86_BUILTIN_SQRTSD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_VEC_MERGE
},
26421 { OPTION_MASK_ISA_SSE2
, CODE_FOR_abstf2
, 0, IX86_BUILTIN_FABSQ
, UNKNOWN
, (int) FLOAT128_FTYPE_FLOAT128
},
26422 { OPTION_MASK_ISA_SSE2
, CODE_FOR_copysigntf3
, 0, IX86_BUILTIN_COPYSIGNQ
, UNKNOWN
, (int) FLOAT128_FTYPE_FLOAT128_FLOAT128
},
26424 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse2_movq128
, "__builtin_ia32_movq128", IX86_BUILTIN_MOVQ128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI
},
26427 { OPTION_MASK_ISA_SSE2
, CODE_FOR_mmx_addv1di3
, "__builtin_ia32_paddq", IX86_BUILTIN_PADDQ
, UNKNOWN
, (int) V1DI_FTYPE_V1DI_V1DI
},
26428 { OPTION_MASK_ISA_SSE2
, CODE_FOR_mmx_subv1di3
, "__builtin_ia32_psubq", IX86_BUILTIN_PSUBQ
, UNKNOWN
, (int) V1DI_FTYPE_V1DI_V1DI
},
26431 { OPTION_MASK_ISA_SSE3
, CODE_FOR_sse3_movshdup
, "__builtin_ia32_movshdup", IX86_BUILTIN_MOVSHDUP
, UNKNOWN
, (int) V4SF_FTYPE_V4SF
},
26432 { OPTION_MASK_ISA_SSE3
, CODE_FOR_sse3_movsldup
, "__builtin_ia32_movsldup", IX86_BUILTIN_MOVSLDUP
, UNKNOWN
, (int) V4SF_FTYPE_V4SF
},
26434 { OPTION_MASK_ISA_SSE3
, CODE_FOR_sse3_addsubv4sf3
, "__builtin_ia32_addsubps", IX86_BUILTIN_ADDSUBPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
26435 { OPTION_MASK_ISA_SSE3
, CODE_FOR_sse3_addsubv2df3
, "__builtin_ia32_addsubpd", IX86_BUILTIN_ADDSUBPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
26436 { OPTION_MASK_ISA_SSE3
, CODE_FOR_sse3_haddv4sf3
, "__builtin_ia32_haddps", IX86_BUILTIN_HADDPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
26437 { OPTION_MASK_ISA_SSE3
, CODE_FOR_sse3_haddv2df3
, "__builtin_ia32_haddpd", IX86_BUILTIN_HADDPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
26438 { OPTION_MASK_ISA_SSE3
, CODE_FOR_sse3_hsubv4sf3
, "__builtin_ia32_hsubps", IX86_BUILTIN_HSUBPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
26439 { OPTION_MASK_ISA_SSE3
, CODE_FOR_sse3_hsubv2df3
, "__builtin_ia32_hsubpd", IX86_BUILTIN_HSUBPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
26442 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_absv16qi2
, "__builtin_ia32_pabsb128", IX86_BUILTIN_PABSB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI
},
26443 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_absv8qi2
, "__builtin_ia32_pabsb", IX86_BUILTIN_PABSB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI
},
26444 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_absv8hi2
, "__builtin_ia32_pabsw128", IX86_BUILTIN_PABSW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI
},
26445 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_absv4hi2
, "__builtin_ia32_pabsw", IX86_BUILTIN_PABSW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI
},
26446 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_absv4si2
, "__builtin_ia32_pabsd128", IX86_BUILTIN_PABSD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI
},
26447 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_absv2si2
, "__builtin_ia32_pabsd", IX86_BUILTIN_PABSD
, UNKNOWN
, (int) V2SI_FTYPE_V2SI
},
26449 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_phaddwv8hi3
, "__builtin_ia32_phaddw128", IX86_BUILTIN_PHADDW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
26450 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_phaddwv4hi3
, "__builtin_ia32_phaddw", IX86_BUILTIN_PHADDW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
26451 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_phadddv4si3
, "__builtin_ia32_phaddd128", IX86_BUILTIN_PHADDD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
26452 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_phadddv2si3
, "__builtin_ia32_phaddd", IX86_BUILTIN_PHADDD
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI
},
26453 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_phaddswv8hi3
, "__builtin_ia32_phaddsw128", IX86_BUILTIN_PHADDSW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
26454 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_phaddswv4hi3
, "__builtin_ia32_phaddsw", IX86_BUILTIN_PHADDSW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
26455 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_phsubwv8hi3
, "__builtin_ia32_phsubw128", IX86_BUILTIN_PHSUBW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
26456 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_phsubwv4hi3
, "__builtin_ia32_phsubw", IX86_BUILTIN_PHSUBW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
26457 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_phsubdv4si3
, "__builtin_ia32_phsubd128", IX86_BUILTIN_PHSUBD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
26458 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_phsubdv2si3
, "__builtin_ia32_phsubd", IX86_BUILTIN_PHSUBD
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI
},
26459 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_phsubswv8hi3
, "__builtin_ia32_phsubsw128", IX86_BUILTIN_PHSUBSW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
26460 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_phsubswv4hi3
, "__builtin_ia32_phsubsw", IX86_BUILTIN_PHSUBSW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
26461 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_pmaddubsw128
, "__builtin_ia32_pmaddubsw128", IX86_BUILTIN_PMADDUBSW128
, UNKNOWN
, (int) V8HI_FTYPE_V16QI_V16QI
},
26462 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_pmaddubsw
, "__builtin_ia32_pmaddubsw", IX86_BUILTIN_PMADDUBSW
, UNKNOWN
, (int) V4HI_FTYPE_V8QI_V8QI
},
26463 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_pmulhrswv8hi3
, "__builtin_ia32_pmulhrsw128", IX86_BUILTIN_PMULHRSW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
26464 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_pmulhrswv4hi3
, "__builtin_ia32_pmulhrsw", IX86_BUILTIN_PMULHRSW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
26465 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_pshufbv16qi3
, "__builtin_ia32_pshufb128", IX86_BUILTIN_PSHUFB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
26466 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_pshufbv8qi3
, "__builtin_ia32_pshufb", IX86_BUILTIN_PSHUFB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
26467 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_psignv16qi3
, "__builtin_ia32_psignb128", IX86_BUILTIN_PSIGNB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
26468 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_psignv8qi3
, "__builtin_ia32_psignb", IX86_BUILTIN_PSIGNB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
26469 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_psignv8hi3
, "__builtin_ia32_psignw128", IX86_BUILTIN_PSIGNW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
26470 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_psignv4hi3
, "__builtin_ia32_psignw", IX86_BUILTIN_PSIGNW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
26471 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_psignv4si3
, "__builtin_ia32_psignd128", IX86_BUILTIN_PSIGND128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
26472 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_psignv2si3
, "__builtin_ia32_psignd", IX86_BUILTIN_PSIGND
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI
},
26475 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_palignrti
, "__builtin_ia32_palignr128", IX86_BUILTIN_PALIGNR128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI_INT_CONVERT
},
26476 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_palignrdi
, "__builtin_ia32_palignr", IX86_BUILTIN_PALIGNR
, UNKNOWN
, (int) V1DI_FTYPE_V1DI_V1DI_INT_CONVERT
},
26479 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_blendpd
, "__builtin_ia32_blendpd", IX86_BUILTIN_BLENDPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF_INT
},
26480 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_blendps
, "__builtin_ia32_blendps", IX86_BUILTIN_BLENDPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF_INT
},
26481 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_blendvpd
, "__builtin_ia32_blendvpd", IX86_BUILTIN_BLENDVPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF_V2DF
},
26482 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_blendvps
, "__builtin_ia32_blendvps", IX86_BUILTIN_BLENDVPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF_V4SF
},
26483 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_dppd
, "__builtin_ia32_dppd", IX86_BUILTIN_DPPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF_INT
},
26484 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_dpps
, "__builtin_ia32_dpps", IX86_BUILTIN_DPPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF_INT
},
26485 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_insertps
, "__builtin_ia32_insertps128", IX86_BUILTIN_INSERTPS128
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF_INT
},
26486 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_mpsadbw
, "__builtin_ia32_mpsadbw128", IX86_BUILTIN_MPSADBW128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI_INT
},
26487 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_pblendvb
, "__builtin_ia32_pblendvb128", IX86_BUILTIN_PBLENDVB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI_V16QI
},
26488 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_pblendw
, "__builtin_ia32_pblendw128", IX86_BUILTIN_PBLENDW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI_INT
},
26490 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_sign_extendv8qiv8hi2
, "__builtin_ia32_pmovsxbw128", IX86_BUILTIN_PMOVSXBW128
, UNKNOWN
, (int) V8HI_FTYPE_V16QI
},
26491 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_sign_extendv4qiv4si2
, "__builtin_ia32_pmovsxbd128", IX86_BUILTIN_PMOVSXBD128
, UNKNOWN
, (int) V4SI_FTYPE_V16QI
},
26492 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_sign_extendv2qiv2di2
, "__builtin_ia32_pmovsxbq128", IX86_BUILTIN_PMOVSXBQ128
, UNKNOWN
, (int) V2DI_FTYPE_V16QI
},
26493 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_sign_extendv4hiv4si2
, "__builtin_ia32_pmovsxwd128", IX86_BUILTIN_PMOVSXWD128
, UNKNOWN
, (int) V4SI_FTYPE_V8HI
},
26494 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_sign_extendv2hiv2di2
, "__builtin_ia32_pmovsxwq128", IX86_BUILTIN_PMOVSXWQ128
, UNKNOWN
, (int) V2DI_FTYPE_V8HI
},
26495 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_sign_extendv2siv2di2
, "__builtin_ia32_pmovsxdq128", IX86_BUILTIN_PMOVSXDQ128
, UNKNOWN
, (int) V2DI_FTYPE_V4SI
},
26496 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_zero_extendv8qiv8hi2
, "__builtin_ia32_pmovzxbw128", IX86_BUILTIN_PMOVZXBW128
, UNKNOWN
, (int) V8HI_FTYPE_V16QI
},
26497 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_zero_extendv4qiv4si2
, "__builtin_ia32_pmovzxbd128", IX86_BUILTIN_PMOVZXBD128
, UNKNOWN
, (int) V4SI_FTYPE_V16QI
},
26498 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_zero_extendv2qiv2di2
, "__builtin_ia32_pmovzxbq128", IX86_BUILTIN_PMOVZXBQ128
, UNKNOWN
, (int) V2DI_FTYPE_V16QI
},
26499 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_zero_extendv4hiv4si2
, "__builtin_ia32_pmovzxwd128", IX86_BUILTIN_PMOVZXWD128
, UNKNOWN
, (int) V4SI_FTYPE_V8HI
},
26500 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_zero_extendv2hiv2di2
, "__builtin_ia32_pmovzxwq128", IX86_BUILTIN_PMOVZXWQ128
, UNKNOWN
, (int) V2DI_FTYPE_V8HI
},
26501 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_zero_extendv2siv2di2
, "__builtin_ia32_pmovzxdq128", IX86_BUILTIN_PMOVZXDQ128
, UNKNOWN
, (int) V2DI_FTYPE_V4SI
},
26502 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_phminposuw
, "__builtin_ia32_phminposuw128", IX86_BUILTIN_PHMINPOSUW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI
},
26504 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_packusdw
, "__builtin_ia32_packusdw128", IX86_BUILTIN_PACKUSDW128
, UNKNOWN
, (int) V8HI_FTYPE_V4SI_V4SI
},
26505 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_eqv2di3
, "__builtin_ia32_pcmpeqq", IX86_BUILTIN_PCMPEQQ
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
26506 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_smaxv16qi3
, "__builtin_ia32_pmaxsb128", IX86_BUILTIN_PMAXSB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
26507 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_smaxv4si3
, "__builtin_ia32_pmaxsd128", IX86_BUILTIN_PMAXSD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
26508 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_umaxv4si3
, "__builtin_ia32_pmaxud128", IX86_BUILTIN_PMAXUD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
26509 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_umaxv8hi3
, "__builtin_ia32_pmaxuw128", IX86_BUILTIN_PMAXUW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
26510 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sminv16qi3
, "__builtin_ia32_pminsb128", IX86_BUILTIN_PMINSB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
26511 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sminv4si3
, "__builtin_ia32_pminsd128", IX86_BUILTIN_PMINSD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
26512 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_uminv4si3
, "__builtin_ia32_pminud128", IX86_BUILTIN_PMINUD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
26513 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_uminv8hi3
, "__builtin_ia32_pminuw128", IX86_BUILTIN_PMINUW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
26514 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_mulv2siv2di3
, "__builtin_ia32_pmuldq128", IX86_BUILTIN_PMULDQ128
, UNKNOWN
, (int) V2DI_FTYPE_V4SI_V4SI
},
26515 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_mulv4si3
, "__builtin_ia32_pmulld128", IX86_BUILTIN_PMULLD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
26518 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundpd
, "__builtin_ia32_roundpd", IX86_BUILTIN_ROUNDPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_INT
},
26519 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundps
, "__builtin_ia32_roundps", IX86_BUILTIN_ROUNDPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_INT
},
26520 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundsd
, "__builtin_ia32_roundsd", IX86_BUILTIN_ROUNDSD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF_INT
},
26521 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundss
, "__builtin_ia32_roundss", IX86_BUILTIN_ROUNDSS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF_INT
},
26523 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundpd
, "__builtin_ia32_floorpd", IX86_BUILTIN_FLOORPD
, (enum rtx_code
) ROUND_FLOOR
, (int) V2DF_FTYPE_V2DF_ROUND
},
26524 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundpd
, "__builtin_ia32_ceilpd", IX86_BUILTIN_CEILPD
, (enum rtx_code
) ROUND_CEIL
, (int) V2DF_FTYPE_V2DF_ROUND
},
26525 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundpd
, "__builtin_ia32_truncpd", IX86_BUILTIN_TRUNCPD
, (enum rtx_code
) ROUND_TRUNC
, (int) V2DF_FTYPE_V2DF_ROUND
},
26526 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundpd
, "__builtin_ia32_rintpd", IX86_BUILTIN_RINTPD
, (enum rtx_code
) ROUND_MXCSR
, (int) V2DF_FTYPE_V2DF_ROUND
},
26528 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundpd_vec_pack_sfix
, "__builtin_ia32_floorpd_vec_pack_sfix", IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX
, (enum rtx_code
) ROUND_FLOOR
, (int) V4SI_FTYPE_V2DF_V2DF_ROUND
},
26529 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundpd_vec_pack_sfix
, "__builtin_ia32_ceilpd_vec_pack_sfix", IX86_BUILTIN_CEILPD_VEC_PACK_SFIX
, (enum rtx_code
) ROUND_CEIL
, (int) V4SI_FTYPE_V2DF_V2DF_ROUND
},
26531 { OPTION_MASK_ISA_ROUND
, CODE_FOR_roundv2df2
, "__builtin_ia32_roundpd_az", IX86_BUILTIN_ROUNDPD_AZ
, UNKNOWN
, (int) V2DF_FTYPE_V2DF
},
26532 { OPTION_MASK_ISA_ROUND
, CODE_FOR_roundv2df2_vec_pack_sfix
, "__builtin_ia32_roundpd_az_vec_pack_sfix", IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX
, UNKNOWN
, (int) V4SI_FTYPE_V2DF_V2DF
},
26534 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundps
, "__builtin_ia32_floorps", IX86_BUILTIN_FLOORPS
, (enum rtx_code
) ROUND_FLOOR
, (int) V4SF_FTYPE_V4SF_ROUND
},
26535 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundps
, "__builtin_ia32_ceilps", IX86_BUILTIN_CEILPS
, (enum rtx_code
) ROUND_CEIL
, (int) V4SF_FTYPE_V4SF_ROUND
},
26536 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundps
, "__builtin_ia32_truncps", IX86_BUILTIN_TRUNCPS
, (enum rtx_code
) ROUND_TRUNC
, (int) V4SF_FTYPE_V4SF_ROUND
},
26537 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundps
, "__builtin_ia32_rintps", IX86_BUILTIN_RINTPS
, (enum rtx_code
) ROUND_MXCSR
, (int) V4SF_FTYPE_V4SF_ROUND
},
26539 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundps_sfix
, "__builtin_ia32_floorps_sfix", IX86_BUILTIN_FLOORPS_SFIX
, (enum rtx_code
) ROUND_FLOOR
, (int) V4SI_FTYPE_V4SF_ROUND
},
26540 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundps_sfix
, "__builtin_ia32_ceilps_sfix", IX86_BUILTIN_CEILPS_SFIX
, (enum rtx_code
) ROUND_CEIL
, (int) V4SI_FTYPE_V4SF_ROUND
},
26542 { OPTION_MASK_ISA_ROUND
, CODE_FOR_roundv4sf2
, "__builtin_ia32_roundps_az", IX86_BUILTIN_ROUNDPS_AZ
, UNKNOWN
, (int) V4SF_FTYPE_V4SF
},
26543 { OPTION_MASK_ISA_ROUND
, CODE_FOR_roundv4sf2_sfix
, "__builtin_ia32_roundps_az_sfix", IX86_BUILTIN_ROUNDPS_AZ_SFIX
, UNKNOWN
, (int) V4SI_FTYPE_V4SF
},
26545 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_ptest
, "__builtin_ia32_ptestz128", IX86_BUILTIN_PTESTZ
, EQ
, (int) INT_FTYPE_V2DI_V2DI_PTEST
},
26546 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_ptest
, "__builtin_ia32_ptestc128", IX86_BUILTIN_PTESTC
, LTU
, (int) INT_FTYPE_V2DI_V2DI_PTEST
},
26547 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_ptest
, "__builtin_ia32_ptestnzc128", IX86_BUILTIN_PTESTNZC
, GTU
, (int) INT_FTYPE_V2DI_V2DI_PTEST
},
26550 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_gtv2di3
, "__builtin_ia32_pcmpgtq", IX86_BUILTIN_PCMPGTQ
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
26551 { OPTION_MASK_ISA_SSE4_2
| OPTION_MASK_ISA_CRC32
, CODE_FOR_sse4_2_crc32qi
, "__builtin_ia32_crc32qi", IX86_BUILTIN_CRC32QI
, UNKNOWN
, (int) UINT_FTYPE_UINT_UCHAR
},
26552 { OPTION_MASK_ISA_SSE4_2
| OPTION_MASK_ISA_CRC32
, CODE_FOR_sse4_2_crc32hi
, "__builtin_ia32_crc32hi", IX86_BUILTIN_CRC32HI
, UNKNOWN
, (int) UINT_FTYPE_UINT_USHORT
},
26553 { OPTION_MASK_ISA_SSE4_2
| OPTION_MASK_ISA_CRC32
, CODE_FOR_sse4_2_crc32si
, "__builtin_ia32_crc32si", IX86_BUILTIN_CRC32SI
, UNKNOWN
, (int) UINT_FTYPE_UINT_UINT
},
26554 { OPTION_MASK_ISA_SSE4_2
| OPTION_MASK_ISA_CRC32
| OPTION_MASK_ISA_64BIT
, CODE_FOR_sse4_2_crc32di
, "__builtin_ia32_crc32di", IX86_BUILTIN_CRC32DI
, UNKNOWN
, (int) UINT64_FTYPE_UINT64_UINT64
},
26557 { OPTION_MASK_ISA_SSE4A
, CODE_FOR_sse4a_extrqi
, "__builtin_ia32_extrqi", IX86_BUILTIN_EXTRQI
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_UINT_UINT
},
26558 { OPTION_MASK_ISA_SSE4A
, CODE_FOR_sse4a_extrq
, "__builtin_ia32_extrq", IX86_BUILTIN_EXTRQ
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V16QI
},
26559 { OPTION_MASK_ISA_SSE4A
, CODE_FOR_sse4a_insertqi
, "__builtin_ia32_insertqi", IX86_BUILTIN_INSERTQI
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI_UINT_UINT
},
26560 { OPTION_MASK_ISA_SSE4A
, CODE_FOR_sse4a_insertq
, "__builtin_ia32_insertq", IX86_BUILTIN_INSERTQ
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
26563 { OPTION_MASK_ISA_SSE2
, CODE_FOR_aeskeygenassist
, 0, IX86_BUILTIN_AESKEYGENASSIST128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_INT
},
26564 { OPTION_MASK_ISA_SSE2
, CODE_FOR_aesimc
, 0, IX86_BUILTIN_AESIMC128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI
},
26566 { OPTION_MASK_ISA_SSE2
, CODE_FOR_aesenc
, 0, IX86_BUILTIN_AESENC128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
26567 { OPTION_MASK_ISA_SSE2
, CODE_FOR_aesenclast
, 0, IX86_BUILTIN_AESENCLAST128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
26568 { OPTION_MASK_ISA_SSE2
, CODE_FOR_aesdec
, 0, IX86_BUILTIN_AESDEC128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
26569 { OPTION_MASK_ISA_SSE2
, CODE_FOR_aesdeclast
, 0, IX86_BUILTIN_AESDECLAST128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
26572 { OPTION_MASK_ISA_SSE2
, CODE_FOR_pclmulqdq
, 0, IX86_BUILTIN_PCLMULQDQ128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI_INT
},
26575 { OPTION_MASK_ISA_AVX
, CODE_FOR_addv4df3
, "__builtin_ia32_addpd256", IX86_BUILTIN_ADDPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
26576 { OPTION_MASK_ISA_AVX
, CODE_FOR_addv8sf3
, "__builtin_ia32_addps256", IX86_BUILTIN_ADDPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
26577 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_addsubv4df3
, "__builtin_ia32_addsubpd256", IX86_BUILTIN_ADDSUBPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
26578 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_addsubv8sf3
, "__builtin_ia32_addsubps256", IX86_BUILTIN_ADDSUBPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
26579 { OPTION_MASK_ISA_AVX
, CODE_FOR_andv4df3
, "__builtin_ia32_andpd256", IX86_BUILTIN_ANDPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
26580 { OPTION_MASK_ISA_AVX
, CODE_FOR_andv8sf3
, "__builtin_ia32_andps256", IX86_BUILTIN_ANDPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
26581 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_andnotv4df3
, "__builtin_ia32_andnpd256", IX86_BUILTIN_ANDNPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
26582 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_andnotv8sf3
, "__builtin_ia32_andnps256", IX86_BUILTIN_ANDNPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
26583 { OPTION_MASK_ISA_AVX
, CODE_FOR_divv4df3
, "__builtin_ia32_divpd256", IX86_BUILTIN_DIVPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
26584 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_divv8sf3
, "__builtin_ia32_divps256", IX86_BUILTIN_DIVPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
26585 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_haddv4df3
, "__builtin_ia32_haddpd256", IX86_BUILTIN_HADDPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
26586 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_hsubv8sf3
, "__builtin_ia32_hsubps256", IX86_BUILTIN_HSUBPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
26587 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_hsubv4df3
, "__builtin_ia32_hsubpd256", IX86_BUILTIN_HSUBPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
26588 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_haddv8sf3
, "__builtin_ia32_haddps256", IX86_BUILTIN_HADDPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
26589 { OPTION_MASK_ISA_AVX
, CODE_FOR_smaxv4df3
, "__builtin_ia32_maxpd256", IX86_BUILTIN_MAXPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
26590 { OPTION_MASK_ISA_AVX
, CODE_FOR_smaxv8sf3
, "__builtin_ia32_maxps256", IX86_BUILTIN_MAXPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
26591 { OPTION_MASK_ISA_AVX
, CODE_FOR_sminv4df3
, "__builtin_ia32_minpd256", IX86_BUILTIN_MINPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
26592 { OPTION_MASK_ISA_AVX
, CODE_FOR_sminv8sf3
, "__builtin_ia32_minps256", IX86_BUILTIN_MINPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
26593 { OPTION_MASK_ISA_AVX
, CODE_FOR_mulv4df3
, "__builtin_ia32_mulpd256", IX86_BUILTIN_MULPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
26594 { OPTION_MASK_ISA_AVX
, CODE_FOR_mulv8sf3
, "__builtin_ia32_mulps256", IX86_BUILTIN_MULPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
26595 { OPTION_MASK_ISA_AVX
, CODE_FOR_iorv4df3
, "__builtin_ia32_orpd256", IX86_BUILTIN_ORPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
26596 { OPTION_MASK_ISA_AVX
, CODE_FOR_iorv8sf3
, "__builtin_ia32_orps256", IX86_BUILTIN_ORPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
26597 { OPTION_MASK_ISA_AVX
, CODE_FOR_subv4df3
, "__builtin_ia32_subpd256", IX86_BUILTIN_SUBPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
26598 { OPTION_MASK_ISA_AVX
, CODE_FOR_subv8sf3
, "__builtin_ia32_subps256", IX86_BUILTIN_SUBPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
26599 { OPTION_MASK_ISA_AVX
, CODE_FOR_xorv4df3
, "__builtin_ia32_xorpd256", IX86_BUILTIN_XORPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
26600 { OPTION_MASK_ISA_AVX
, CODE_FOR_xorv8sf3
, "__builtin_ia32_xorps256", IX86_BUILTIN_XORPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
26602 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vpermilvarv2df3
, "__builtin_ia32_vpermilvarpd", IX86_BUILTIN_VPERMILVARPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DI
},
26603 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vpermilvarv4sf3
, "__builtin_ia32_vpermilvarps", IX86_BUILTIN_VPERMILVARPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SI
},
26604 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vpermilvarv4df3
, "__builtin_ia32_vpermilvarpd256", IX86_BUILTIN_VPERMILVARPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DI
},
26605 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vpermilvarv8sf3
, "__builtin_ia32_vpermilvarps256", IX86_BUILTIN_VPERMILVARPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SI
},
26607 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_blendpd256
, "__builtin_ia32_blendpd256", IX86_BUILTIN_BLENDPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF_INT
},
26608 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_blendps256
, "__builtin_ia32_blendps256", IX86_BUILTIN_BLENDPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF_INT
},
26609 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_blendvpd256
, "__builtin_ia32_blendvpd256", IX86_BUILTIN_BLENDVPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF_V4DF
},
26610 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_blendvps256
, "__builtin_ia32_blendvps256", IX86_BUILTIN_BLENDVPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF_V8SF
},
26611 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_dpps256
, "__builtin_ia32_dpps256", IX86_BUILTIN_DPPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF_INT
},
26612 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_shufpd256
, "__builtin_ia32_shufpd256", IX86_BUILTIN_SHUFPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF_INT
},
26613 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_shufps256
, "__builtin_ia32_shufps256", IX86_BUILTIN_SHUFPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF_INT
},
26614 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vmcmpv2df3
, "__builtin_ia32_cmpsd", IX86_BUILTIN_CMPSD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF_INT
},
26615 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vmcmpv4sf3
, "__builtin_ia32_cmpss", IX86_BUILTIN_CMPSS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF_INT
},
26616 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_cmpv2df3
, "__builtin_ia32_cmppd", IX86_BUILTIN_CMPPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF_INT
},
26617 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_cmpv4sf3
, "__builtin_ia32_cmpps", IX86_BUILTIN_CMPPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF_INT
},
26618 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_cmpv4df3
, "__builtin_ia32_cmppd256", IX86_BUILTIN_CMPPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF_INT
},
26619 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_cmpv8sf3
, "__builtin_ia32_cmpps256", IX86_BUILTIN_CMPPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF_INT
},
26620 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vextractf128v4df
, "__builtin_ia32_vextractf128_pd256", IX86_BUILTIN_EXTRACTF128PD256
, UNKNOWN
, (int) V2DF_FTYPE_V4DF_INT
},
26621 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vextractf128v8sf
, "__builtin_ia32_vextractf128_ps256", IX86_BUILTIN_EXTRACTF128PS256
, UNKNOWN
, (int) V4SF_FTYPE_V8SF_INT
},
26622 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vextractf128v8si
, "__builtin_ia32_vextractf128_si256", IX86_BUILTIN_EXTRACTF128SI256
, UNKNOWN
, (int) V4SI_FTYPE_V8SI_INT
},
26623 { OPTION_MASK_ISA_AVX
, CODE_FOR_floatv4siv4df2
, "__builtin_ia32_cvtdq2pd256", IX86_BUILTIN_CVTDQ2PD256
, UNKNOWN
, (int) V4DF_FTYPE_V4SI
},
26624 { OPTION_MASK_ISA_AVX
, CODE_FOR_floatv8siv8sf2
, "__builtin_ia32_cvtdq2ps256", IX86_BUILTIN_CVTDQ2PS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SI
},
26625 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_cvtpd2ps256
, "__builtin_ia32_cvtpd2ps256", IX86_BUILTIN_CVTPD2PS256
, UNKNOWN
, (int) V4SF_FTYPE_V4DF
},
26626 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_cvtps2dq256
, "__builtin_ia32_cvtps2dq256", IX86_BUILTIN_CVTPS2DQ256
, UNKNOWN
, (int) V8SI_FTYPE_V8SF
},
26627 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_cvtps2pd256
, "__builtin_ia32_cvtps2pd256", IX86_BUILTIN_CVTPS2PD256
, UNKNOWN
, (int) V4DF_FTYPE_V4SF
},
26628 { OPTION_MASK_ISA_AVX
, CODE_FOR_fix_truncv4dfv4si2
, "__builtin_ia32_cvttpd2dq256", IX86_BUILTIN_CVTTPD2DQ256
, UNKNOWN
, (int) V4SI_FTYPE_V4DF
},
26629 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_cvtpd2dq256
, "__builtin_ia32_cvtpd2dq256", IX86_BUILTIN_CVTPD2DQ256
, UNKNOWN
, (int) V4SI_FTYPE_V4DF
},
26630 { OPTION_MASK_ISA_AVX
, CODE_FOR_fix_truncv8sfv8si2
, "__builtin_ia32_cvttps2dq256", IX86_BUILTIN_CVTTPS2DQ256
, UNKNOWN
, (int) V8SI_FTYPE_V8SF
},
26631 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vperm2f128v4df3
, "__builtin_ia32_vperm2f128_pd256", IX86_BUILTIN_VPERM2F128PD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF_INT
},
26632 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vperm2f128v8sf3
, "__builtin_ia32_vperm2f128_ps256", IX86_BUILTIN_VPERM2F128PS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF_INT
},
26633 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vperm2f128v8si3
, "__builtin_ia32_vperm2f128_si256", IX86_BUILTIN_VPERM2F128SI256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI_INT
},
26634 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vpermilv2df
, "__builtin_ia32_vpermilpd", IX86_BUILTIN_VPERMILPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_INT
},
26635 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vpermilv4sf
, "__builtin_ia32_vpermilps", IX86_BUILTIN_VPERMILPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_INT
},
26636 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vpermilv4df
, "__builtin_ia32_vpermilpd256", IX86_BUILTIN_VPERMILPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_INT
},
26637 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vpermilv8sf
, "__builtin_ia32_vpermilps256", IX86_BUILTIN_VPERMILPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_INT
},
26638 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vinsertf128v4df
, "__builtin_ia32_vinsertf128_pd256", IX86_BUILTIN_VINSERTF128PD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V2DF_INT
},
26639 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vinsertf128v8sf
, "__builtin_ia32_vinsertf128_ps256", IX86_BUILTIN_VINSERTF128PS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V4SF_INT
},
26640 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vinsertf128v8si
, "__builtin_ia32_vinsertf128_si256", IX86_BUILTIN_VINSERTF128SI256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V4SI_INT
},
26642 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_movshdup256
, "__builtin_ia32_movshdup256", IX86_BUILTIN_MOVSHDUP256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF
},
26643 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_movsldup256
, "__builtin_ia32_movsldup256", IX86_BUILTIN_MOVSLDUP256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF
},
26644 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_movddup256
, "__builtin_ia32_movddup256", IX86_BUILTIN_MOVDDUP256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF
},
26646 { OPTION_MASK_ISA_AVX
, CODE_FOR_sqrtv4df2
, "__builtin_ia32_sqrtpd256", IX86_BUILTIN_SQRTPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF
},
26647 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_sqrtv8sf2
, "__builtin_ia32_sqrtps256", IX86_BUILTIN_SQRTPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF
},
26648 { OPTION_MASK_ISA_AVX
, CODE_FOR_sqrtv8sf2
, "__builtin_ia32_sqrtps_nr256", IX86_BUILTIN_SQRTPS_NR256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF
},
26649 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_rsqrtv8sf2
, "__builtin_ia32_rsqrtps256", IX86_BUILTIN_RSQRTPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF
},
26650 { OPTION_MASK_ISA_AVX
, CODE_FOR_rsqrtv8sf2
, "__builtin_ia32_rsqrtps_nr256", IX86_BUILTIN_RSQRTPS_NR256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF
},
26652 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_rcpv8sf2
, "__builtin_ia32_rcpps256", IX86_BUILTIN_RCPPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF
},
26654 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_roundpd256
, "__builtin_ia32_roundpd256", IX86_BUILTIN_ROUNDPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_INT
},
26655 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_roundps256
, "__builtin_ia32_roundps256", IX86_BUILTIN_ROUNDPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_INT
},
26657 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_roundpd256
, "__builtin_ia32_floorpd256", IX86_BUILTIN_FLOORPD256
, (enum rtx_code
) ROUND_FLOOR
, (int) V4DF_FTYPE_V4DF_ROUND
},
26658 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_roundpd256
, "__builtin_ia32_ceilpd256", IX86_BUILTIN_CEILPD256
, (enum rtx_code
) ROUND_CEIL
, (int) V4DF_FTYPE_V4DF_ROUND
},
26659 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_roundpd256
, "__builtin_ia32_truncpd256", IX86_BUILTIN_TRUNCPD256
, (enum rtx_code
) ROUND_TRUNC
, (int) V4DF_FTYPE_V4DF_ROUND
},
26660 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_roundpd256
, "__builtin_ia32_rintpd256", IX86_BUILTIN_RINTPD256
, (enum rtx_code
) ROUND_MXCSR
, (int) V4DF_FTYPE_V4DF_ROUND
},
26662 { OPTION_MASK_ISA_AVX
, CODE_FOR_roundv4df2
, "__builtin_ia32_roundpd_az256", IX86_BUILTIN_ROUNDPD_AZ256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF
},
26663 { OPTION_MASK_ISA_AVX
, CODE_FOR_roundv4df2_vec_pack_sfix
, "__builtin_ia32_roundpd_az_vec_pack_sfix256", IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX256
, UNKNOWN
, (int) V8SI_FTYPE_V4DF_V4DF
},
26665 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_roundpd_vec_pack_sfix256
, "__builtin_ia32_floorpd_vec_pack_sfix256", IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX256
, (enum rtx_code
) ROUND_FLOOR
, (int) V8SI_FTYPE_V4DF_V4DF_ROUND
},
26666 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_roundpd_vec_pack_sfix256
, "__builtin_ia32_ceilpd_vec_pack_sfix256", IX86_BUILTIN_CEILPD_VEC_PACK_SFIX256
, (enum rtx_code
) ROUND_CEIL
, (int) V8SI_FTYPE_V4DF_V4DF_ROUND
},
26668 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_roundps256
, "__builtin_ia32_floorps256", IX86_BUILTIN_FLOORPS256
, (enum rtx_code
) ROUND_FLOOR
, (int) V8SF_FTYPE_V8SF_ROUND
},
26669 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_roundps256
, "__builtin_ia32_ceilps256", IX86_BUILTIN_CEILPS256
, (enum rtx_code
) ROUND_CEIL
, (int) V8SF_FTYPE_V8SF_ROUND
},
26670 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_roundps256
, "__builtin_ia32_truncps256", IX86_BUILTIN_TRUNCPS256
, (enum rtx_code
) ROUND_TRUNC
, (int) V8SF_FTYPE_V8SF_ROUND
},
26671 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_roundps256
, "__builtin_ia32_rintps256", IX86_BUILTIN_RINTPS256
, (enum rtx_code
) ROUND_MXCSR
, (int) V8SF_FTYPE_V8SF_ROUND
},
26673 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_roundps_sfix256
, "__builtin_ia32_floorps_sfix256", IX86_BUILTIN_FLOORPS_SFIX256
, (enum rtx_code
) ROUND_FLOOR
, (int) V8SI_FTYPE_V8SF_ROUND
},
26674 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_roundps_sfix256
, "__builtin_ia32_ceilps_sfix256", IX86_BUILTIN_CEILPS_SFIX256
, (enum rtx_code
) ROUND_CEIL
, (int) V8SI_FTYPE_V8SF_ROUND
},
26676 { OPTION_MASK_ISA_AVX
, CODE_FOR_roundv8sf2
, "__builtin_ia32_roundps_az256", IX86_BUILTIN_ROUNDPS_AZ256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF
},
26677 { OPTION_MASK_ISA_AVX
, CODE_FOR_roundv8sf2_sfix
, "__builtin_ia32_roundps_az_sfix256", IX86_BUILTIN_ROUNDPS_AZ_SFIX256
, UNKNOWN
, (int) V8SI_FTYPE_V8SF
},
26679 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_unpckhpd256
, "__builtin_ia32_unpckhpd256", IX86_BUILTIN_UNPCKHPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
26680 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_unpcklpd256
, "__builtin_ia32_unpcklpd256", IX86_BUILTIN_UNPCKLPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
26681 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_unpckhps256
, "__builtin_ia32_unpckhps256", IX86_BUILTIN_UNPCKHPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
26682 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_unpcklps256
, "__builtin_ia32_unpcklps256", IX86_BUILTIN_UNPCKLPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
26684 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_si256_si
, "__builtin_ia32_si256_si", IX86_BUILTIN_SI256_SI
, UNKNOWN
, (int) V8SI_FTYPE_V4SI
},
26685 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_ps256_ps
, "__builtin_ia32_ps256_ps", IX86_BUILTIN_PS256_PS
, UNKNOWN
, (int) V8SF_FTYPE_V4SF
},
26686 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_pd256_pd
, "__builtin_ia32_pd256_pd", IX86_BUILTIN_PD256_PD
, UNKNOWN
, (int) V4DF_FTYPE_V2DF
},
26687 { OPTION_MASK_ISA_AVX
, CODE_FOR_vec_extract_lo_v8si
, "__builtin_ia32_si_si256", IX86_BUILTIN_SI_SI256
, UNKNOWN
, (int) V4SI_FTYPE_V8SI
},
26688 { OPTION_MASK_ISA_AVX
, CODE_FOR_vec_extract_lo_v8sf
, "__builtin_ia32_ps_ps256", IX86_BUILTIN_PS_PS256
, UNKNOWN
, (int) V4SF_FTYPE_V8SF
},
26689 { OPTION_MASK_ISA_AVX
, CODE_FOR_vec_extract_lo_v4df
, "__builtin_ia32_pd_pd256", IX86_BUILTIN_PD_PD256
, UNKNOWN
, (int) V2DF_FTYPE_V4DF
},
26691 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vtestpd
, "__builtin_ia32_vtestzpd", IX86_BUILTIN_VTESTZPD
, EQ
, (int) INT_FTYPE_V2DF_V2DF_PTEST
},
26692 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vtestpd
, "__builtin_ia32_vtestcpd", IX86_BUILTIN_VTESTCPD
, LTU
, (int) INT_FTYPE_V2DF_V2DF_PTEST
},
26693 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vtestpd
, "__builtin_ia32_vtestnzcpd", IX86_BUILTIN_VTESTNZCPD
, GTU
, (int) INT_FTYPE_V2DF_V2DF_PTEST
},
26694 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vtestps
, "__builtin_ia32_vtestzps", IX86_BUILTIN_VTESTZPS
, EQ
, (int) INT_FTYPE_V4SF_V4SF_PTEST
},
26695 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vtestps
, "__builtin_ia32_vtestcps", IX86_BUILTIN_VTESTCPS
, LTU
, (int) INT_FTYPE_V4SF_V4SF_PTEST
},
26696 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vtestps
, "__builtin_ia32_vtestnzcps", IX86_BUILTIN_VTESTNZCPS
, GTU
, (int) INT_FTYPE_V4SF_V4SF_PTEST
},
26697 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vtestpd256
, "__builtin_ia32_vtestzpd256", IX86_BUILTIN_VTESTZPD256
, EQ
, (int) INT_FTYPE_V4DF_V4DF_PTEST
},
26698 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vtestpd256
, "__builtin_ia32_vtestcpd256", IX86_BUILTIN_VTESTCPD256
, LTU
, (int) INT_FTYPE_V4DF_V4DF_PTEST
},
26699 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vtestpd256
, "__builtin_ia32_vtestnzcpd256", IX86_BUILTIN_VTESTNZCPD256
, GTU
, (int) INT_FTYPE_V4DF_V4DF_PTEST
},
26700 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vtestps256
, "__builtin_ia32_vtestzps256", IX86_BUILTIN_VTESTZPS256
, EQ
, (int) INT_FTYPE_V8SF_V8SF_PTEST
},
26701 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vtestps256
, "__builtin_ia32_vtestcps256", IX86_BUILTIN_VTESTCPS256
, LTU
, (int) INT_FTYPE_V8SF_V8SF_PTEST
},
26702 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vtestps256
, "__builtin_ia32_vtestnzcps256", IX86_BUILTIN_VTESTNZCPS256
, GTU
, (int) INT_FTYPE_V8SF_V8SF_PTEST
},
26703 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_ptest256
, "__builtin_ia32_ptestz256", IX86_BUILTIN_PTESTZ256
, EQ
, (int) INT_FTYPE_V4DI_V4DI_PTEST
},
26704 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_ptest256
, "__builtin_ia32_ptestc256", IX86_BUILTIN_PTESTC256
, LTU
, (int) INT_FTYPE_V4DI_V4DI_PTEST
},
26705 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_ptest256
, "__builtin_ia32_ptestnzc256", IX86_BUILTIN_PTESTNZC256
, GTU
, (int) INT_FTYPE_V4DI_V4DI_PTEST
},
26707 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_movmskpd256
, "__builtin_ia32_movmskpd256", IX86_BUILTIN_MOVMSKPD256
, UNKNOWN
, (int) INT_FTYPE_V4DF
},
26708 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_movmskps256
, "__builtin_ia32_movmskps256", IX86_BUILTIN_MOVMSKPS256
, UNKNOWN
, (int) INT_FTYPE_V8SF
},
26710 { OPTION_MASK_ISA_AVX
, CODE_FOR_copysignv8sf3
, "__builtin_ia32_copysignps256", IX86_BUILTIN_CPYSGNPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
26711 { OPTION_MASK_ISA_AVX
, CODE_FOR_copysignv4df3
, "__builtin_ia32_copysignpd256", IX86_BUILTIN_CPYSGNPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
26713 { OPTION_MASK_ISA_AVX
, CODE_FOR_vec_pack_sfix_v4df
, "__builtin_ia32_vec_pack_sfix256 ", IX86_BUILTIN_VEC_PACK_SFIX256
, UNKNOWN
, (int) V8SI_FTYPE_V4DF_V4DF
},
26716 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_mpsadbw
, "__builtin_ia32_mpsadbw256", IX86_BUILTIN_MPSADBW256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI_INT
},
26717 { OPTION_MASK_ISA_AVX2
, CODE_FOR_absv32qi2
, "__builtin_ia32_pabsb256", IX86_BUILTIN_PABSB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI
},
26718 { OPTION_MASK_ISA_AVX2
, CODE_FOR_absv16hi2
, "__builtin_ia32_pabsw256", IX86_BUILTIN_PABSW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI
},
26719 { OPTION_MASK_ISA_AVX2
, CODE_FOR_absv8si2
, "__builtin_ia32_pabsd256", IX86_BUILTIN_PABSD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI
},
26720 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_packssdw
, "__builtin_ia32_packssdw256", IX86_BUILTIN_PACKSSDW256
, UNKNOWN
, (int) V16HI_FTYPE_V8SI_V8SI
},
26721 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_packsswb
, "__builtin_ia32_packsswb256", IX86_BUILTIN_PACKSSWB256
, UNKNOWN
, (int) V32QI_FTYPE_V16HI_V16HI
},
26722 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_packusdw
, "__builtin_ia32_packusdw256", IX86_BUILTIN_PACKUSDW256
, UNKNOWN
, (int) V16HI_FTYPE_V8SI_V8SI
},
26723 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_packuswb
, "__builtin_ia32_packuswb256", IX86_BUILTIN_PACKUSWB256
, UNKNOWN
, (int) V32QI_FTYPE_V16HI_V16HI
},
26724 { OPTION_MASK_ISA_AVX2
, CODE_FOR_addv32qi3
, "__builtin_ia32_paddb256", IX86_BUILTIN_PADDB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
26725 { OPTION_MASK_ISA_AVX2
, CODE_FOR_addv16hi3
, "__builtin_ia32_paddw256", IX86_BUILTIN_PADDW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
26726 { OPTION_MASK_ISA_AVX2
, CODE_FOR_addv8si3
, "__builtin_ia32_paddd256", IX86_BUILTIN_PADDD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
26727 { OPTION_MASK_ISA_AVX2
, CODE_FOR_addv4di3
, "__builtin_ia32_paddq256", IX86_BUILTIN_PADDQ256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V4DI
},
26728 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_ssaddv32qi3
, "__builtin_ia32_paddsb256", IX86_BUILTIN_PADDSB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
26729 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_ssaddv16hi3
, "__builtin_ia32_paddsw256", IX86_BUILTIN_PADDSW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
26730 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_usaddv32qi3
, "__builtin_ia32_paddusb256", IX86_BUILTIN_PADDUSB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
26731 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_usaddv16hi3
, "__builtin_ia32_paddusw256", IX86_BUILTIN_PADDUSW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
26732 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_palignrv2ti
, "__builtin_ia32_palignr256", IX86_BUILTIN_PALIGNR256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V4DI_INT_CONVERT
},
26733 { OPTION_MASK_ISA_AVX2
, CODE_FOR_andv4di3
, "__builtin_ia32_andsi256", IX86_BUILTIN_AND256I
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V4DI
},
26734 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_andnotv4di3
, "__builtin_ia32_andnotsi256", IX86_BUILTIN_ANDNOT256I
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V4DI
},
26735 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_uavgv32qi3
, "__builtin_ia32_pavgb256", IX86_BUILTIN_PAVGB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
26736 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_uavgv16hi3
, "__builtin_ia32_pavgw256", IX86_BUILTIN_PAVGW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
26737 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pblendvb
, "__builtin_ia32_pblendvb256", IX86_BUILTIN_PBLENDVB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI_V32QI
},
26738 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pblendw
, "__builtin_ia32_pblendw256", IX86_BUILTIN_PBLENDVW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI_INT
},
26739 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_eqv32qi3
, "__builtin_ia32_pcmpeqb256", IX86_BUILTIN_PCMPEQB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
26740 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_eqv16hi3
, "__builtin_ia32_pcmpeqw256", IX86_BUILTIN_PCMPEQW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
26741 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_eqv8si3
, "__builtin_ia32_pcmpeqd256", IX86_BUILTIN_PCMPEQD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
26742 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_eqv4di3
, "__builtin_ia32_pcmpeqq256", IX86_BUILTIN_PCMPEQQ256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V4DI
},
26743 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_gtv32qi3
, "__builtin_ia32_pcmpgtb256", IX86_BUILTIN_PCMPGTB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
26744 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_gtv16hi3
, "__builtin_ia32_pcmpgtw256", IX86_BUILTIN_PCMPGTW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
26745 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_gtv8si3
, "__builtin_ia32_pcmpgtd256", IX86_BUILTIN_PCMPGTD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
26746 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_gtv4di3
, "__builtin_ia32_pcmpgtq256", IX86_BUILTIN_PCMPGTQ256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V4DI
},
26747 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_phaddwv16hi3
, "__builtin_ia32_phaddw256", IX86_BUILTIN_PHADDW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
26748 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_phadddv8si3
, "__builtin_ia32_phaddd256", IX86_BUILTIN_PHADDD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
26749 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_phaddswv16hi3
, "__builtin_ia32_phaddsw256", IX86_BUILTIN_PHADDSW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
26750 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_phsubwv16hi3
, "__builtin_ia32_phsubw256", IX86_BUILTIN_PHSUBW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
26751 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_phsubdv8si3
, "__builtin_ia32_phsubd256", IX86_BUILTIN_PHSUBD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
26752 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_phsubswv16hi3
, "__builtin_ia32_phsubsw256", IX86_BUILTIN_PHSUBSW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
26753 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pmaddubsw256
, "__builtin_ia32_pmaddubsw256", IX86_BUILTIN_PMADDUBSW256
, UNKNOWN
, (int) V16HI_FTYPE_V32QI_V32QI
},
26754 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pmaddwd
, "__builtin_ia32_pmaddwd256", IX86_BUILTIN_PMADDWD256
, UNKNOWN
, (int) V8SI_FTYPE_V16HI_V16HI
},
26755 { OPTION_MASK_ISA_AVX2
, CODE_FOR_smaxv32qi3
, "__builtin_ia32_pmaxsb256", IX86_BUILTIN_PMAXSB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
26756 { OPTION_MASK_ISA_AVX2
, CODE_FOR_smaxv16hi3
, "__builtin_ia32_pmaxsw256", IX86_BUILTIN_PMAXSW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
26757 { OPTION_MASK_ISA_AVX2
, CODE_FOR_smaxv8si3
, "__builtin_ia32_pmaxsd256", IX86_BUILTIN_PMAXSD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
26758 { OPTION_MASK_ISA_AVX2
, CODE_FOR_umaxv32qi3
, "__builtin_ia32_pmaxub256", IX86_BUILTIN_PMAXUB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
26759 { OPTION_MASK_ISA_AVX2
, CODE_FOR_umaxv16hi3
, "__builtin_ia32_pmaxuw256", IX86_BUILTIN_PMAXUW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
26760 { OPTION_MASK_ISA_AVX2
, CODE_FOR_umaxv8si3
, "__builtin_ia32_pmaxud256", IX86_BUILTIN_PMAXUD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
26761 { OPTION_MASK_ISA_AVX2
, CODE_FOR_sminv32qi3
, "__builtin_ia32_pminsb256", IX86_BUILTIN_PMINSB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
26762 { OPTION_MASK_ISA_AVX2
, CODE_FOR_sminv16hi3
, "__builtin_ia32_pminsw256", IX86_BUILTIN_PMINSW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
26763 { OPTION_MASK_ISA_AVX2
, CODE_FOR_sminv8si3
, "__builtin_ia32_pminsd256", IX86_BUILTIN_PMINSD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
26764 { OPTION_MASK_ISA_AVX2
, CODE_FOR_uminv32qi3
, "__builtin_ia32_pminub256", IX86_BUILTIN_PMINUB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
26765 { OPTION_MASK_ISA_AVX2
, CODE_FOR_uminv16hi3
, "__builtin_ia32_pminuw256", IX86_BUILTIN_PMINUW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
26766 { OPTION_MASK_ISA_AVX2
, CODE_FOR_uminv8si3
, "__builtin_ia32_pminud256", IX86_BUILTIN_PMINUD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
26767 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pmovmskb
, "__builtin_ia32_pmovmskb256", IX86_BUILTIN_PMOVMSKB256
, UNKNOWN
, (int) INT_FTYPE_V32QI
},
26768 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_sign_extendv16qiv16hi2
, "__builtin_ia32_pmovsxbw256", IX86_BUILTIN_PMOVSXBW256
, UNKNOWN
, (int) V16HI_FTYPE_V16QI
},
26769 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_sign_extendv8qiv8si2
, "__builtin_ia32_pmovsxbd256", IX86_BUILTIN_PMOVSXBD256
, UNKNOWN
, (int) V8SI_FTYPE_V16QI
},
26770 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_sign_extendv4qiv4di2
, "__builtin_ia32_pmovsxbq256", IX86_BUILTIN_PMOVSXBQ256
, UNKNOWN
, (int) V4DI_FTYPE_V16QI
},
26771 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_sign_extendv8hiv8si2
, "__builtin_ia32_pmovsxwd256", IX86_BUILTIN_PMOVSXWD256
, UNKNOWN
, (int) V8SI_FTYPE_V8HI
},
26772 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_sign_extendv4hiv4di2
, "__builtin_ia32_pmovsxwq256", IX86_BUILTIN_PMOVSXWQ256
, UNKNOWN
, (int) V4DI_FTYPE_V8HI
},
26773 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_sign_extendv4siv4di2
, "__builtin_ia32_pmovsxdq256", IX86_BUILTIN_PMOVSXDQ256
, UNKNOWN
, (int) V4DI_FTYPE_V4SI
},
26774 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_zero_extendv16qiv16hi2
, "__builtin_ia32_pmovzxbw256", IX86_BUILTIN_PMOVZXBW256
, UNKNOWN
, (int) V16HI_FTYPE_V16QI
},
26775 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_zero_extendv8qiv8si2
, "__builtin_ia32_pmovzxbd256", IX86_BUILTIN_PMOVZXBD256
, UNKNOWN
, (int) V8SI_FTYPE_V16QI
},
26776 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_zero_extendv4qiv4di2
, "__builtin_ia32_pmovzxbq256", IX86_BUILTIN_PMOVZXBQ256
, UNKNOWN
, (int) V4DI_FTYPE_V16QI
},
26777 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_zero_extendv8hiv8si2
, "__builtin_ia32_pmovzxwd256", IX86_BUILTIN_PMOVZXWD256
, UNKNOWN
, (int) V8SI_FTYPE_V8HI
},
26778 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_zero_extendv4hiv4di2
, "__builtin_ia32_pmovzxwq256", IX86_BUILTIN_PMOVZXWQ256
, UNKNOWN
, (int) V4DI_FTYPE_V8HI
},
26779 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_zero_extendv4siv4di2
, "__builtin_ia32_pmovzxdq256", IX86_BUILTIN_PMOVZXDQ256
, UNKNOWN
, (int) V4DI_FTYPE_V4SI
},
26780 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_mulv4siv4di3
, "__builtin_ia32_pmuldq256" , IX86_BUILTIN_PMULDQ256
, UNKNOWN
, (int) V4DI_FTYPE_V8SI_V8SI
},
26781 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_umulhrswv16hi3
, "__builtin_ia32_pmulhrsw256", IX86_BUILTIN_PMULHRSW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
26782 { OPTION_MASK_ISA_AVX2
, CODE_FOR_umulv16hi3_highpart
, "__builtin_ia32_pmulhuw256" , IX86_BUILTIN_PMULHUW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
26783 { OPTION_MASK_ISA_AVX2
, CODE_FOR_smulv16hi3_highpart
, "__builtin_ia32_pmulhw256" , IX86_BUILTIN_PMULHW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
26784 { OPTION_MASK_ISA_AVX2
, CODE_FOR_mulv16hi3
, "__builtin_ia32_pmullw256" , IX86_BUILTIN_PMULLW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
26785 { OPTION_MASK_ISA_AVX2
, CODE_FOR_mulv8si3
, "__builtin_ia32_pmulld256" , IX86_BUILTIN_PMULLD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
26786 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_umulv4siv4di3
, "__builtin_ia32_pmuludq256" , IX86_BUILTIN_PMULUDQ256
, UNKNOWN
, (int) V4DI_FTYPE_V8SI_V8SI
},
26787 { OPTION_MASK_ISA_AVX2
, CODE_FOR_iorv4di3
, "__builtin_ia32_por256", IX86_BUILTIN_POR256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V4DI
},
26788 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_psadbw
, "__builtin_ia32_psadbw256", IX86_BUILTIN_PSADBW256
, UNKNOWN
, (int) V16HI_FTYPE_V32QI_V32QI
},
26789 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pshufbv32qi3
, "__builtin_ia32_pshufb256", IX86_BUILTIN_PSHUFB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
26790 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pshufdv3
, "__builtin_ia32_pshufd256", IX86_BUILTIN_PSHUFD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_INT
},
26791 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pshufhwv3
, "__builtin_ia32_pshufhw256", IX86_BUILTIN_PSHUFHW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_INT
},
26792 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pshuflwv3
, "__builtin_ia32_pshuflw256", IX86_BUILTIN_PSHUFLW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_INT
},
26793 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_psignv32qi3
, "__builtin_ia32_psignb256", IX86_BUILTIN_PSIGNB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
26794 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_psignv16hi3
, "__builtin_ia32_psignw256", IX86_BUILTIN_PSIGNW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
26795 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_psignv8si3
, "__builtin_ia32_psignd256", IX86_BUILTIN_PSIGND256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
26796 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_ashlv2ti3
, "__builtin_ia32_pslldqi256", IX86_BUILTIN_PSLLDQI256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_INT_CONVERT
},
26797 { OPTION_MASK_ISA_AVX2
, CODE_FOR_ashlv16hi3
, "__builtin_ia32_psllwi256", IX86_BUILTIN_PSLLWI256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_SI_COUNT
},
26798 { OPTION_MASK_ISA_AVX2
, CODE_FOR_ashlv16hi3
, "__builtin_ia32_psllw256", IX86_BUILTIN_PSLLW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V8HI_COUNT
},
26799 { OPTION_MASK_ISA_AVX2
, CODE_FOR_ashlv8si3
, "__builtin_ia32_pslldi256", IX86_BUILTIN_PSLLDI256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_SI_COUNT
},
26800 { OPTION_MASK_ISA_AVX2
, CODE_FOR_ashlv8si3
, "__builtin_ia32_pslld256", IX86_BUILTIN_PSLLD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V4SI_COUNT
},
26801 { OPTION_MASK_ISA_AVX2
, CODE_FOR_ashlv4di3
, "__builtin_ia32_psllqi256", IX86_BUILTIN_PSLLQI256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_INT_COUNT
},
26802 { OPTION_MASK_ISA_AVX2
, CODE_FOR_ashlv4di3
, "__builtin_ia32_psllq256", IX86_BUILTIN_PSLLQ256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V2DI_COUNT
},
26803 { OPTION_MASK_ISA_AVX2
, CODE_FOR_ashrv16hi3
, "__builtin_ia32_psrawi256", IX86_BUILTIN_PSRAWI256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_SI_COUNT
},
26804 { OPTION_MASK_ISA_AVX2
, CODE_FOR_ashrv16hi3
, "__builtin_ia32_psraw256", IX86_BUILTIN_PSRAW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V8HI_COUNT
},
26805 { OPTION_MASK_ISA_AVX2
, CODE_FOR_ashrv8si3
, "__builtin_ia32_psradi256", IX86_BUILTIN_PSRADI256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_SI_COUNT
},
26806 { OPTION_MASK_ISA_AVX2
, CODE_FOR_ashrv8si3
, "__builtin_ia32_psrad256", IX86_BUILTIN_PSRAD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V4SI_COUNT
},
26807 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_lshrv2ti3
, "__builtin_ia32_psrldqi256", IX86_BUILTIN_PSRLDQI256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_INT_CONVERT
},
26808 { OPTION_MASK_ISA_AVX2
, CODE_FOR_lshrv16hi3
, "__builtin_ia32_psrlwi256", IX86_BUILTIN_PSRLWI256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_SI_COUNT
},
26809 { OPTION_MASK_ISA_AVX2
, CODE_FOR_lshrv16hi3
, "__builtin_ia32_psrlw256", IX86_BUILTIN_PSRLW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V8HI_COUNT
},
26810 { OPTION_MASK_ISA_AVX2
, CODE_FOR_lshrv8si3
, "__builtin_ia32_psrldi256", IX86_BUILTIN_PSRLDI256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_SI_COUNT
},
26811 { OPTION_MASK_ISA_AVX2
, CODE_FOR_lshrv8si3
, "__builtin_ia32_psrld256", IX86_BUILTIN_PSRLD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V4SI_COUNT
},
26812 { OPTION_MASK_ISA_AVX2
, CODE_FOR_lshrv4di3
, "__builtin_ia32_psrlqi256", IX86_BUILTIN_PSRLQI256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_INT_COUNT
},
26813 { OPTION_MASK_ISA_AVX2
, CODE_FOR_lshrv4di3
, "__builtin_ia32_psrlq256", IX86_BUILTIN_PSRLQ256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V2DI_COUNT
},
26814 { OPTION_MASK_ISA_AVX2
, CODE_FOR_subv32qi3
, "__builtin_ia32_psubb256", IX86_BUILTIN_PSUBB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
26815 { OPTION_MASK_ISA_AVX2
, CODE_FOR_subv16hi3
, "__builtin_ia32_psubw256", IX86_BUILTIN_PSUBW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
26816 { OPTION_MASK_ISA_AVX2
, CODE_FOR_subv8si3
, "__builtin_ia32_psubd256", IX86_BUILTIN_PSUBD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
26817 { OPTION_MASK_ISA_AVX2
, CODE_FOR_subv4di3
, "__builtin_ia32_psubq256", IX86_BUILTIN_PSUBQ256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V4DI
},
26818 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_sssubv32qi3
, "__builtin_ia32_psubsb256", IX86_BUILTIN_PSUBSB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
26819 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_sssubv16hi3
, "__builtin_ia32_psubsw256", IX86_BUILTIN_PSUBSW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
26820 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_ussubv32qi3
, "__builtin_ia32_psubusb256", IX86_BUILTIN_PSUBUSB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
26821 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_ussubv16hi3
, "__builtin_ia32_psubusw256", IX86_BUILTIN_PSUBUSW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
26822 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_interleave_highv32qi
, "__builtin_ia32_punpckhbw256", IX86_BUILTIN_PUNPCKHBW256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
26823 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_interleave_highv16hi
, "__builtin_ia32_punpckhwd256", IX86_BUILTIN_PUNPCKHWD256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
26824 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_interleave_highv8si
, "__builtin_ia32_punpckhdq256", IX86_BUILTIN_PUNPCKHDQ256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
26825 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_interleave_highv4di
, "__builtin_ia32_punpckhqdq256", IX86_BUILTIN_PUNPCKHQDQ256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V4DI
},
26826 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_interleave_lowv32qi
, "__builtin_ia32_punpcklbw256", IX86_BUILTIN_PUNPCKLBW256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
26827 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_interleave_lowv16hi
, "__builtin_ia32_punpcklwd256", IX86_BUILTIN_PUNPCKLWD256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
26828 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_interleave_lowv8si
, "__builtin_ia32_punpckldq256", IX86_BUILTIN_PUNPCKLDQ256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
26829 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_interleave_lowv4di
, "__builtin_ia32_punpcklqdq256", IX86_BUILTIN_PUNPCKLQDQ256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V4DI
},
26830 { OPTION_MASK_ISA_AVX2
, CODE_FOR_xorv4di3
, "__builtin_ia32_pxor256", IX86_BUILTIN_PXOR256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V4DI
},
26831 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_vec_dupv4sf
, "__builtin_ia32_vbroadcastss_ps", IX86_BUILTIN_VBROADCASTSS_PS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF
},
26832 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_vec_dupv8sf
, "__builtin_ia32_vbroadcastss_ps256", IX86_BUILTIN_VBROADCASTSS_PS256
, UNKNOWN
, (int) V8SF_FTYPE_V4SF
},
26833 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_vec_dupv4df
, "__builtin_ia32_vbroadcastsd_pd256", IX86_BUILTIN_VBROADCASTSD_PD256
, UNKNOWN
, (int) V4DF_FTYPE_V2DF
},
26834 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_vbroadcasti128_v4di
, "__builtin_ia32_vbroadcastsi256", IX86_BUILTIN_VBROADCASTSI256
, UNKNOWN
, (int) V4DI_FTYPE_V2DI
},
26835 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pblenddv4si
, "__builtin_ia32_pblendd128", IX86_BUILTIN_PBLENDD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI_INT
},
26836 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pblenddv8si
, "__builtin_ia32_pblendd256", IX86_BUILTIN_PBLENDD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI_INT
},
26837 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pbroadcastv32qi
, "__builtin_ia32_pbroadcastb256", IX86_BUILTIN_PBROADCASTB256
, UNKNOWN
, (int) V32QI_FTYPE_V16QI
},
26838 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pbroadcastv16hi
, "__builtin_ia32_pbroadcastw256", IX86_BUILTIN_PBROADCASTW256
, UNKNOWN
, (int) V16HI_FTYPE_V8HI
},
26839 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pbroadcastv8si
, "__builtin_ia32_pbroadcastd256", IX86_BUILTIN_PBROADCASTD256
, UNKNOWN
, (int) V8SI_FTYPE_V4SI
},
26840 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pbroadcastv4di
, "__builtin_ia32_pbroadcastq256", IX86_BUILTIN_PBROADCASTQ256
, UNKNOWN
, (int) V4DI_FTYPE_V2DI
},
26841 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pbroadcastv16qi
, "__builtin_ia32_pbroadcastb128", IX86_BUILTIN_PBROADCASTB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI
},
26842 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pbroadcastv8hi
, "__builtin_ia32_pbroadcastw128", IX86_BUILTIN_PBROADCASTW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI
},
26843 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pbroadcastv4si
, "__builtin_ia32_pbroadcastd128", IX86_BUILTIN_PBROADCASTD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI
},
26844 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pbroadcastv2di
, "__builtin_ia32_pbroadcastq128", IX86_BUILTIN_PBROADCASTQ128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI
},
26845 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_permvarv8si
, "__builtin_ia32_permvarsi256", IX86_BUILTIN_VPERMVARSI256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
26846 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_permv4df
, "__builtin_ia32_permdf256", IX86_BUILTIN_VPERMDF256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_INT
},
26847 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_permvarv8sf
, "__builtin_ia32_permvarsf256", IX86_BUILTIN_VPERMVARSF256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
26848 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_permv4di
, "__builtin_ia32_permdi256", IX86_BUILTIN_VPERMDI256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_INT
},
26849 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_permv2ti
, "__builtin_ia32_permti256", IX86_BUILTIN_VPERMTI256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V4DI_INT
},
26850 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_extracti128
, "__builtin_ia32_extract128i256", IX86_BUILTIN_VEXTRACT128I256
, UNKNOWN
, (int) V2DI_FTYPE_V4DI_INT
},
26851 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_inserti128
, "__builtin_ia32_insert128i256", IX86_BUILTIN_VINSERT128I256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V2DI_INT
},
26852 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_ashlvv4di
, "__builtin_ia32_psllv4di", IX86_BUILTIN_PSLLVV4DI
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V4DI
},
26853 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_ashlvv2di
, "__builtin_ia32_psllv2di", IX86_BUILTIN_PSLLVV2DI
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
26854 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_ashlvv8si
, "__builtin_ia32_psllv8si", IX86_BUILTIN_PSLLVV8SI
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
26855 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_ashlvv4si
, "__builtin_ia32_psllv4si", IX86_BUILTIN_PSLLVV4SI
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
26856 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_ashrvv8si
, "__builtin_ia32_psrav8si", IX86_BUILTIN_PSRAVV8SI
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
26857 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_ashrvv4si
, "__builtin_ia32_psrav4si", IX86_BUILTIN_PSRAVV4SI
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
26858 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_lshrvv4di
, "__builtin_ia32_psrlv4di", IX86_BUILTIN_PSRLVV4DI
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V4DI
},
26859 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_lshrvv2di
, "__builtin_ia32_psrlv2di", IX86_BUILTIN_PSRLVV2DI
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
26860 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_lshrvv8si
, "__builtin_ia32_psrlv8si", IX86_BUILTIN_PSRLVV8SI
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
26861 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_lshrvv4si
, "__builtin_ia32_psrlv4si", IX86_BUILTIN_PSRLVV4SI
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
26863 { OPTION_MASK_ISA_LZCNT
, CODE_FOR_clzhi2_lzcnt
, "__builtin_clzs", IX86_BUILTIN_CLZS
, UNKNOWN
, (int) UINT16_FTYPE_UINT16
},
26866 { OPTION_MASK_ISA_BMI
, CODE_FOR_bmi_bextr_si
, "__builtin_ia32_bextr_u32", IX86_BUILTIN_BEXTR32
, UNKNOWN
, (int) UINT_FTYPE_UINT_UINT
},
26867 { OPTION_MASK_ISA_BMI
, CODE_FOR_bmi_bextr_di
, "__builtin_ia32_bextr_u64", IX86_BUILTIN_BEXTR64
, UNKNOWN
, (int) UINT64_FTYPE_UINT64_UINT64
},
26868 { OPTION_MASK_ISA_BMI
, CODE_FOR_ctzhi2
, "__builtin_ctzs", IX86_BUILTIN_CTZS
, UNKNOWN
, (int) UINT16_FTYPE_UINT16
},
26871 { OPTION_MASK_ISA_TBM
, CODE_FOR_tbm_bextri_si
, "__builtin_ia32_bextri_u32", IX86_BUILTIN_BEXTRI32
, UNKNOWN
, (int) UINT_FTYPE_UINT_UINT
},
26872 { OPTION_MASK_ISA_TBM
, CODE_FOR_tbm_bextri_di
, "__builtin_ia32_bextri_u64", IX86_BUILTIN_BEXTRI64
, UNKNOWN
, (int) UINT64_FTYPE_UINT64_UINT64
},
26875 { OPTION_MASK_ISA_F16C
, CODE_FOR_vcvtph2ps
, "__builtin_ia32_vcvtph2ps", IX86_BUILTIN_CVTPH2PS
, UNKNOWN
, (int) V4SF_FTYPE_V8HI
},
26876 { OPTION_MASK_ISA_F16C
, CODE_FOR_vcvtph2ps256
, "__builtin_ia32_vcvtph2ps256", IX86_BUILTIN_CVTPH2PS256
, UNKNOWN
, (int) V8SF_FTYPE_V8HI
},
26877 { OPTION_MASK_ISA_F16C
, CODE_FOR_vcvtps2ph
, "__builtin_ia32_vcvtps2ph", IX86_BUILTIN_CVTPS2PH
, UNKNOWN
, (int) V8HI_FTYPE_V4SF_INT
},
26878 { OPTION_MASK_ISA_F16C
, CODE_FOR_vcvtps2ph256
, "__builtin_ia32_vcvtps2ph256", IX86_BUILTIN_CVTPS2PH256
, UNKNOWN
, (int) V8HI_FTYPE_V8SF_INT
},
26881 { OPTION_MASK_ISA_BMI2
, CODE_FOR_bmi2_bzhi_si3
, "__builtin_ia32_bzhi_si", IX86_BUILTIN_BZHI32
, UNKNOWN
, (int) UINT_FTYPE_UINT_UINT
},
26882 { OPTION_MASK_ISA_BMI2
, CODE_FOR_bmi2_bzhi_di3
, "__builtin_ia32_bzhi_di", IX86_BUILTIN_BZHI64
, UNKNOWN
, (int) UINT64_FTYPE_UINT64_UINT64
},
26883 { OPTION_MASK_ISA_BMI2
, CODE_FOR_bmi2_pdep_si3
, "__builtin_ia32_pdep_si", IX86_BUILTIN_PDEP32
, UNKNOWN
, (int) UINT_FTYPE_UINT_UINT
},
26884 { OPTION_MASK_ISA_BMI2
, CODE_FOR_bmi2_pdep_di3
, "__builtin_ia32_pdep_di", IX86_BUILTIN_PDEP64
, UNKNOWN
, (int) UINT64_FTYPE_UINT64_UINT64
},
26885 { OPTION_MASK_ISA_BMI2
, CODE_FOR_bmi2_pext_si3
, "__builtin_ia32_pext_si", IX86_BUILTIN_PEXT32
, UNKNOWN
, (int) UINT_FTYPE_UINT_UINT
},
26886 { OPTION_MASK_ISA_BMI2
, CODE_FOR_bmi2_pext_di3
, "__builtin_ia32_pext_di", IX86_BUILTIN_PEXT64
, UNKNOWN
, (int) UINT64_FTYPE_UINT64_UINT64
},
26889 /* FMA4 and XOP. */
26890 #define MULTI_ARG_4_DF2_DI_I V2DF_FTYPE_V2DF_V2DF_V2DI_INT
26891 #define MULTI_ARG_4_DF2_DI_I1 V4DF_FTYPE_V4DF_V4DF_V4DI_INT
26892 #define MULTI_ARG_4_SF2_SI_I V4SF_FTYPE_V4SF_V4SF_V4SI_INT
26893 #define MULTI_ARG_4_SF2_SI_I1 V8SF_FTYPE_V8SF_V8SF_V8SI_INT
26894 #define MULTI_ARG_3_SF V4SF_FTYPE_V4SF_V4SF_V4SF
26895 #define MULTI_ARG_3_DF V2DF_FTYPE_V2DF_V2DF_V2DF
26896 #define MULTI_ARG_3_SF2 V8SF_FTYPE_V8SF_V8SF_V8SF
26897 #define MULTI_ARG_3_DF2 V4DF_FTYPE_V4DF_V4DF_V4DF
26898 #define MULTI_ARG_3_DI V2DI_FTYPE_V2DI_V2DI_V2DI
26899 #define MULTI_ARG_3_SI V4SI_FTYPE_V4SI_V4SI_V4SI
26900 #define MULTI_ARG_3_SI_DI V4SI_FTYPE_V4SI_V4SI_V2DI
26901 #define MULTI_ARG_3_HI V8HI_FTYPE_V8HI_V8HI_V8HI
26902 #define MULTI_ARG_3_HI_SI V8HI_FTYPE_V8HI_V8HI_V4SI
26903 #define MULTI_ARG_3_QI V16QI_FTYPE_V16QI_V16QI_V16QI
26904 #define MULTI_ARG_3_DI2 V4DI_FTYPE_V4DI_V4DI_V4DI
26905 #define MULTI_ARG_3_SI2 V8SI_FTYPE_V8SI_V8SI_V8SI
26906 #define MULTI_ARG_3_HI2 V16HI_FTYPE_V16HI_V16HI_V16HI
26907 #define MULTI_ARG_3_QI2 V32QI_FTYPE_V32QI_V32QI_V32QI
26908 #define MULTI_ARG_2_SF V4SF_FTYPE_V4SF_V4SF
26909 #define MULTI_ARG_2_DF V2DF_FTYPE_V2DF_V2DF
26910 #define MULTI_ARG_2_DI V2DI_FTYPE_V2DI_V2DI
26911 #define MULTI_ARG_2_SI V4SI_FTYPE_V4SI_V4SI
26912 #define MULTI_ARG_2_HI V8HI_FTYPE_V8HI_V8HI
26913 #define MULTI_ARG_2_QI V16QI_FTYPE_V16QI_V16QI
26914 #define MULTI_ARG_2_DI_IMM V2DI_FTYPE_V2DI_SI
26915 #define MULTI_ARG_2_SI_IMM V4SI_FTYPE_V4SI_SI
26916 #define MULTI_ARG_2_HI_IMM V8HI_FTYPE_V8HI_SI
26917 #define MULTI_ARG_2_QI_IMM V16QI_FTYPE_V16QI_SI
26918 #define MULTI_ARG_2_DI_CMP V2DI_FTYPE_V2DI_V2DI_CMP
26919 #define MULTI_ARG_2_SI_CMP V4SI_FTYPE_V4SI_V4SI_CMP
26920 #define MULTI_ARG_2_HI_CMP V8HI_FTYPE_V8HI_V8HI_CMP
26921 #define MULTI_ARG_2_QI_CMP V16QI_FTYPE_V16QI_V16QI_CMP
26922 #define MULTI_ARG_2_SF_TF V4SF_FTYPE_V4SF_V4SF_TF
26923 #define MULTI_ARG_2_DF_TF V2DF_FTYPE_V2DF_V2DF_TF
26924 #define MULTI_ARG_2_DI_TF V2DI_FTYPE_V2DI_V2DI_TF
26925 #define MULTI_ARG_2_SI_TF V4SI_FTYPE_V4SI_V4SI_TF
26926 #define MULTI_ARG_2_HI_TF V8HI_FTYPE_V8HI_V8HI_TF
26927 #define MULTI_ARG_2_QI_TF V16QI_FTYPE_V16QI_V16QI_TF
26928 #define MULTI_ARG_1_SF V4SF_FTYPE_V4SF
26929 #define MULTI_ARG_1_DF V2DF_FTYPE_V2DF
26930 #define MULTI_ARG_1_SF2 V8SF_FTYPE_V8SF
26931 #define MULTI_ARG_1_DF2 V4DF_FTYPE_V4DF
26932 #define MULTI_ARG_1_DI V2DI_FTYPE_V2DI
26933 #define MULTI_ARG_1_SI V4SI_FTYPE_V4SI
26934 #define MULTI_ARG_1_HI V8HI_FTYPE_V8HI
26935 #define MULTI_ARG_1_QI V16QI_FTYPE_V16QI
26936 #define MULTI_ARG_1_SI_DI V2DI_FTYPE_V4SI
26937 #define MULTI_ARG_1_HI_DI V2DI_FTYPE_V8HI
26938 #define MULTI_ARG_1_HI_SI V4SI_FTYPE_V8HI
26939 #define MULTI_ARG_1_QI_DI V2DI_FTYPE_V16QI
26940 #define MULTI_ARG_1_QI_SI V4SI_FTYPE_V16QI
26941 #define MULTI_ARG_1_QI_HI V8HI_FTYPE_V16QI
26943 static const struct builtin_description bdesc_multi_arg
[] =
26945 { OPTION_MASK_ISA_FMA4
, CODE_FOR_fma4i_vmfmadd_v4sf
,
26946 "__builtin_ia32_vfmaddss", IX86_BUILTIN_VFMADDSS
,
26947 UNKNOWN
, (int)MULTI_ARG_3_SF
},
26948 { OPTION_MASK_ISA_FMA4
, CODE_FOR_fma4i_vmfmadd_v2df
,
26949 "__builtin_ia32_vfmaddsd", IX86_BUILTIN_VFMADDSD
,
26950 UNKNOWN
, (int)MULTI_ARG_3_DF
},
26952 { OPTION_MASK_ISA_FMA
, CODE_FOR_fmai_vmfmadd_v4sf
,
26953 "__builtin_ia32_vfmaddss3", IX86_BUILTIN_VFMADDSS3
,
26954 UNKNOWN
, (int)MULTI_ARG_3_SF
},
26955 { OPTION_MASK_ISA_FMA
, CODE_FOR_fmai_vmfmadd_v2df
,
26956 "__builtin_ia32_vfmaddsd3", IX86_BUILTIN_VFMADDSD3
,
26957 UNKNOWN
, (int)MULTI_ARG_3_DF
},
26959 { OPTION_MASK_ISA_FMA
| OPTION_MASK_ISA_FMA4
, CODE_FOR_fma4i_fmadd_v4sf
,
26960 "__builtin_ia32_vfmaddps", IX86_BUILTIN_VFMADDPS
,
26961 UNKNOWN
, (int)MULTI_ARG_3_SF
},
26962 { OPTION_MASK_ISA_FMA
| OPTION_MASK_ISA_FMA4
, CODE_FOR_fma4i_fmadd_v2df
,
26963 "__builtin_ia32_vfmaddpd", IX86_BUILTIN_VFMADDPD
,
26964 UNKNOWN
, (int)MULTI_ARG_3_DF
},
26965 { OPTION_MASK_ISA_FMA
| OPTION_MASK_ISA_FMA4
, CODE_FOR_fma4i_fmadd_v8sf
,
26966 "__builtin_ia32_vfmaddps256", IX86_BUILTIN_VFMADDPS256
,
26967 UNKNOWN
, (int)MULTI_ARG_3_SF2
},
26968 { OPTION_MASK_ISA_FMA
| OPTION_MASK_ISA_FMA4
, CODE_FOR_fma4i_fmadd_v4df
,
26969 "__builtin_ia32_vfmaddpd256", IX86_BUILTIN_VFMADDPD256
,
26970 UNKNOWN
, (int)MULTI_ARG_3_DF2
},
26972 { OPTION_MASK_ISA_FMA
| OPTION_MASK_ISA_FMA4
, CODE_FOR_fmaddsub_v4sf
,
26973 "__builtin_ia32_vfmaddsubps", IX86_BUILTIN_VFMADDSUBPS
,
26974 UNKNOWN
, (int)MULTI_ARG_3_SF
},
26975 { OPTION_MASK_ISA_FMA
| OPTION_MASK_ISA_FMA4
, CODE_FOR_fmaddsub_v2df
,
26976 "__builtin_ia32_vfmaddsubpd", IX86_BUILTIN_VFMADDSUBPD
,
26977 UNKNOWN
, (int)MULTI_ARG_3_DF
},
26978 { OPTION_MASK_ISA_FMA
| OPTION_MASK_ISA_FMA4
, CODE_FOR_fmaddsub_v8sf
,
26979 "__builtin_ia32_vfmaddsubps256", IX86_BUILTIN_VFMADDSUBPS256
,
26980 UNKNOWN
, (int)MULTI_ARG_3_SF2
},
26981 { OPTION_MASK_ISA_FMA
| OPTION_MASK_ISA_FMA4
, CODE_FOR_fmaddsub_v4df
,
26982 "__builtin_ia32_vfmaddsubpd256", IX86_BUILTIN_VFMADDSUBPD256
,
26983 UNKNOWN
, (int)MULTI_ARG_3_DF2
},
26985 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcmov_v2di
, "__builtin_ia32_vpcmov", IX86_BUILTIN_VPCMOV
, UNKNOWN
, (int)MULTI_ARG_3_DI
},
26986 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcmov_v2di
, "__builtin_ia32_vpcmov_v2di", IX86_BUILTIN_VPCMOV_V2DI
, UNKNOWN
, (int)MULTI_ARG_3_DI
},
26987 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcmov_v4si
, "__builtin_ia32_vpcmov_v4si", IX86_BUILTIN_VPCMOV_V4SI
, UNKNOWN
, (int)MULTI_ARG_3_SI
},
26988 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcmov_v8hi
, "__builtin_ia32_vpcmov_v8hi", IX86_BUILTIN_VPCMOV_V8HI
, UNKNOWN
, (int)MULTI_ARG_3_HI
},
26989 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcmov_v16qi
, "__builtin_ia32_vpcmov_v16qi",IX86_BUILTIN_VPCMOV_V16QI
,UNKNOWN
, (int)MULTI_ARG_3_QI
},
26990 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcmov_v2df
, "__builtin_ia32_vpcmov_v2df", IX86_BUILTIN_VPCMOV_V2DF
, UNKNOWN
, (int)MULTI_ARG_3_DF
},
26991 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcmov_v4sf
, "__builtin_ia32_vpcmov_v4sf", IX86_BUILTIN_VPCMOV_V4SF
, UNKNOWN
, (int)MULTI_ARG_3_SF
},
26993 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcmov_v4di256
, "__builtin_ia32_vpcmov256", IX86_BUILTIN_VPCMOV256
, UNKNOWN
, (int)MULTI_ARG_3_DI2
},
26994 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcmov_v4di256
, "__builtin_ia32_vpcmov_v4di256", IX86_BUILTIN_VPCMOV_V4DI256
, UNKNOWN
, (int)MULTI_ARG_3_DI2
},
26995 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcmov_v8si256
, "__builtin_ia32_vpcmov_v8si256", IX86_BUILTIN_VPCMOV_V8SI256
, UNKNOWN
, (int)MULTI_ARG_3_SI2
},
26996 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcmov_v16hi256
, "__builtin_ia32_vpcmov_v16hi256", IX86_BUILTIN_VPCMOV_V16HI256
, UNKNOWN
, (int)MULTI_ARG_3_HI2
},
26997 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcmov_v32qi256
, "__builtin_ia32_vpcmov_v32qi256", IX86_BUILTIN_VPCMOV_V32QI256
, UNKNOWN
, (int)MULTI_ARG_3_QI2
},
26998 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcmov_v4df256
, "__builtin_ia32_vpcmov_v4df256", IX86_BUILTIN_VPCMOV_V4DF256
, UNKNOWN
, (int)MULTI_ARG_3_DF2
},
26999 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcmov_v8sf256
, "__builtin_ia32_vpcmov_v8sf256", IX86_BUILTIN_VPCMOV_V8SF256
, UNKNOWN
, (int)MULTI_ARG_3_SF2
},
27001 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pperm
, "__builtin_ia32_vpperm", IX86_BUILTIN_VPPERM
, UNKNOWN
, (int)MULTI_ARG_3_QI
},
27003 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pmacssww
, "__builtin_ia32_vpmacssww", IX86_BUILTIN_VPMACSSWW
, UNKNOWN
, (int)MULTI_ARG_3_HI
},
27004 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pmacsww
, "__builtin_ia32_vpmacsww", IX86_BUILTIN_VPMACSWW
, UNKNOWN
, (int)MULTI_ARG_3_HI
},
27005 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pmacsswd
, "__builtin_ia32_vpmacsswd", IX86_BUILTIN_VPMACSSWD
, UNKNOWN
, (int)MULTI_ARG_3_HI_SI
},
27006 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pmacswd
, "__builtin_ia32_vpmacswd", IX86_BUILTIN_VPMACSWD
, UNKNOWN
, (int)MULTI_ARG_3_HI_SI
},
27007 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pmacssdd
, "__builtin_ia32_vpmacssdd", IX86_BUILTIN_VPMACSSDD
, UNKNOWN
, (int)MULTI_ARG_3_SI
},
27008 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pmacsdd
, "__builtin_ia32_vpmacsdd", IX86_BUILTIN_VPMACSDD
, UNKNOWN
, (int)MULTI_ARG_3_SI
},
27009 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pmacssdql
, "__builtin_ia32_vpmacssdql", IX86_BUILTIN_VPMACSSDQL
, UNKNOWN
, (int)MULTI_ARG_3_SI_DI
},
27010 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pmacssdqh
, "__builtin_ia32_vpmacssdqh", IX86_BUILTIN_VPMACSSDQH
, UNKNOWN
, (int)MULTI_ARG_3_SI_DI
},
27011 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pmacsdql
, "__builtin_ia32_vpmacsdql", IX86_BUILTIN_VPMACSDQL
, UNKNOWN
, (int)MULTI_ARG_3_SI_DI
},
27012 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pmacsdqh
, "__builtin_ia32_vpmacsdqh", IX86_BUILTIN_VPMACSDQH
, UNKNOWN
, (int)MULTI_ARG_3_SI_DI
},
27013 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pmadcsswd
, "__builtin_ia32_vpmadcsswd", IX86_BUILTIN_VPMADCSSWD
, UNKNOWN
, (int)MULTI_ARG_3_HI_SI
},
27014 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pmadcswd
, "__builtin_ia32_vpmadcswd", IX86_BUILTIN_VPMADCSWD
, UNKNOWN
, (int)MULTI_ARG_3_HI_SI
},
27016 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_vrotlv2di3
, "__builtin_ia32_vprotq", IX86_BUILTIN_VPROTQ
, UNKNOWN
, (int)MULTI_ARG_2_DI
},
27017 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_vrotlv4si3
, "__builtin_ia32_vprotd", IX86_BUILTIN_VPROTD
, UNKNOWN
, (int)MULTI_ARG_2_SI
},
27018 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_vrotlv8hi3
, "__builtin_ia32_vprotw", IX86_BUILTIN_VPROTW
, UNKNOWN
, (int)MULTI_ARG_2_HI
},
27019 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_vrotlv16qi3
, "__builtin_ia32_vprotb", IX86_BUILTIN_VPROTB
, UNKNOWN
, (int)MULTI_ARG_2_QI
},
27020 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_rotlv2di3
, "__builtin_ia32_vprotqi", IX86_BUILTIN_VPROTQ_IMM
, UNKNOWN
, (int)MULTI_ARG_2_DI_IMM
},
27021 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_rotlv4si3
, "__builtin_ia32_vprotdi", IX86_BUILTIN_VPROTD_IMM
, UNKNOWN
, (int)MULTI_ARG_2_SI_IMM
},
27022 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_rotlv8hi3
, "__builtin_ia32_vprotwi", IX86_BUILTIN_VPROTW_IMM
, UNKNOWN
, (int)MULTI_ARG_2_HI_IMM
},
27023 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_rotlv16qi3
, "__builtin_ia32_vprotbi", IX86_BUILTIN_VPROTB_IMM
, UNKNOWN
, (int)MULTI_ARG_2_QI_IMM
},
27024 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_shav2di3
, "__builtin_ia32_vpshaq", IX86_BUILTIN_VPSHAQ
, UNKNOWN
, (int)MULTI_ARG_2_DI
},
27025 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_shav4si3
, "__builtin_ia32_vpshad", IX86_BUILTIN_VPSHAD
, UNKNOWN
, (int)MULTI_ARG_2_SI
},
27026 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_shav8hi3
, "__builtin_ia32_vpshaw", IX86_BUILTIN_VPSHAW
, UNKNOWN
, (int)MULTI_ARG_2_HI
},
27027 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_shav16qi3
, "__builtin_ia32_vpshab", IX86_BUILTIN_VPSHAB
, UNKNOWN
, (int)MULTI_ARG_2_QI
},
27028 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_shlv2di3
, "__builtin_ia32_vpshlq", IX86_BUILTIN_VPSHLQ
, UNKNOWN
, (int)MULTI_ARG_2_DI
},
27029 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_shlv4si3
, "__builtin_ia32_vpshld", IX86_BUILTIN_VPSHLD
, UNKNOWN
, (int)MULTI_ARG_2_SI
},
27030 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_shlv8hi3
, "__builtin_ia32_vpshlw", IX86_BUILTIN_VPSHLW
, UNKNOWN
, (int)MULTI_ARG_2_HI
},
27031 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_shlv16qi3
, "__builtin_ia32_vpshlb", IX86_BUILTIN_VPSHLB
, UNKNOWN
, (int)MULTI_ARG_2_QI
},
27033 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_vmfrczv4sf2
, "__builtin_ia32_vfrczss", IX86_BUILTIN_VFRCZSS
, UNKNOWN
, (int)MULTI_ARG_2_SF
},
27034 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_vmfrczv2df2
, "__builtin_ia32_vfrczsd", IX86_BUILTIN_VFRCZSD
, UNKNOWN
, (int)MULTI_ARG_2_DF
},
27035 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_frczv4sf2
, "__builtin_ia32_vfrczps", IX86_BUILTIN_VFRCZPS
, UNKNOWN
, (int)MULTI_ARG_1_SF
},
27036 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_frczv2df2
, "__builtin_ia32_vfrczpd", IX86_BUILTIN_VFRCZPD
, UNKNOWN
, (int)MULTI_ARG_1_DF
},
27037 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_frczv8sf2
, "__builtin_ia32_vfrczps256", IX86_BUILTIN_VFRCZPS256
, UNKNOWN
, (int)MULTI_ARG_1_SF2
},
27038 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_frczv4df2
, "__builtin_ia32_vfrczpd256", IX86_BUILTIN_VFRCZPD256
, UNKNOWN
, (int)MULTI_ARG_1_DF2
},
27040 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_phaddbw
, "__builtin_ia32_vphaddbw", IX86_BUILTIN_VPHADDBW
, UNKNOWN
, (int)MULTI_ARG_1_QI_HI
},
27041 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_phaddbd
, "__builtin_ia32_vphaddbd", IX86_BUILTIN_VPHADDBD
, UNKNOWN
, (int)MULTI_ARG_1_QI_SI
},
27042 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_phaddbq
, "__builtin_ia32_vphaddbq", IX86_BUILTIN_VPHADDBQ
, UNKNOWN
, (int)MULTI_ARG_1_QI_DI
},
27043 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_phaddwd
, "__builtin_ia32_vphaddwd", IX86_BUILTIN_VPHADDWD
, UNKNOWN
, (int)MULTI_ARG_1_HI_SI
},
27044 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_phaddwq
, "__builtin_ia32_vphaddwq", IX86_BUILTIN_VPHADDWQ
, UNKNOWN
, (int)MULTI_ARG_1_HI_DI
},
27045 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_phadddq
, "__builtin_ia32_vphadddq", IX86_BUILTIN_VPHADDDQ
, UNKNOWN
, (int)MULTI_ARG_1_SI_DI
},
27046 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_phaddubw
, "__builtin_ia32_vphaddubw", IX86_BUILTIN_VPHADDUBW
, UNKNOWN
, (int)MULTI_ARG_1_QI_HI
},
27047 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_phaddubd
, "__builtin_ia32_vphaddubd", IX86_BUILTIN_VPHADDUBD
, UNKNOWN
, (int)MULTI_ARG_1_QI_SI
},
27048 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_phaddubq
, "__builtin_ia32_vphaddubq", IX86_BUILTIN_VPHADDUBQ
, UNKNOWN
, (int)MULTI_ARG_1_QI_DI
},
27049 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_phadduwd
, "__builtin_ia32_vphadduwd", IX86_BUILTIN_VPHADDUWD
, UNKNOWN
, (int)MULTI_ARG_1_HI_SI
},
27050 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_phadduwq
, "__builtin_ia32_vphadduwq", IX86_BUILTIN_VPHADDUWQ
, UNKNOWN
, (int)MULTI_ARG_1_HI_DI
},
27051 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_phaddudq
, "__builtin_ia32_vphaddudq", IX86_BUILTIN_VPHADDUDQ
, UNKNOWN
, (int)MULTI_ARG_1_SI_DI
},
27052 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_phsubbw
, "__builtin_ia32_vphsubbw", IX86_BUILTIN_VPHSUBBW
, UNKNOWN
, (int)MULTI_ARG_1_QI_HI
},
27053 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_phsubwd
, "__builtin_ia32_vphsubwd", IX86_BUILTIN_VPHSUBWD
, UNKNOWN
, (int)MULTI_ARG_1_HI_SI
},
27054 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_phsubdq
, "__builtin_ia32_vphsubdq", IX86_BUILTIN_VPHSUBDQ
, UNKNOWN
, (int)MULTI_ARG_1_SI_DI
},
27056 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv16qi3
, "__builtin_ia32_vpcomeqb", IX86_BUILTIN_VPCOMEQB
, EQ
, (int)MULTI_ARG_2_QI_CMP
},
27057 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv16qi3
, "__builtin_ia32_vpcomneb", IX86_BUILTIN_VPCOMNEB
, NE
, (int)MULTI_ARG_2_QI_CMP
},
27058 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv16qi3
, "__builtin_ia32_vpcomneqb", IX86_BUILTIN_VPCOMNEB
, NE
, (int)MULTI_ARG_2_QI_CMP
},
27059 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv16qi3
, "__builtin_ia32_vpcomltb", IX86_BUILTIN_VPCOMLTB
, LT
, (int)MULTI_ARG_2_QI_CMP
},
27060 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv16qi3
, "__builtin_ia32_vpcomleb", IX86_BUILTIN_VPCOMLEB
, LE
, (int)MULTI_ARG_2_QI_CMP
},
27061 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv16qi3
, "__builtin_ia32_vpcomgtb", IX86_BUILTIN_VPCOMGTB
, GT
, (int)MULTI_ARG_2_QI_CMP
},
27062 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv16qi3
, "__builtin_ia32_vpcomgeb", IX86_BUILTIN_VPCOMGEB
, GE
, (int)MULTI_ARG_2_QI_CMP
},
27064 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv8hi3
, "__builtin_ia32_vpcomeqw", IX86_BUILTIN_VPCOMEQW
, EQ
, (int)MULTI_ARG_2_HI_CMP
},
27065 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv8hi3
, "__builtin_ia32_vpcomnew", IX86_BUILTIN_VPCOMNEW
, NE
, (int)MULTI_ARG_2_HI_CMP
},
27066 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv8hi3
, "__builtin_ia32_vpcomneqw", IX86_BUILTIN_VPCOMNEW
, NE
, (int)MULTI_ARG_2_HI_CMP
},
27067 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv8hi3
, "__builtin_ia32_vpcomltw", IX86_BUILTIN_VPCOMLTW
, LT
, (int)MULTI_ARG_2_HI_CMP
},
27068 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv8hi3
, "__builtin_ia32_vpcomlew", IX86_BUILTIN_VPCOMLEW
, LE
, (int)MULTI_ARG_2_HI_CMP
},
27069 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv8hi3
, "__builtin_ia32_vpcomgtw", IX86_BUILTIN_VPCOMGTW
, GT
, (int)MULTI_ARG_2_HI_CMP
},
27070 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv8hi3
, "__builtin_ia32_vpcomgew", IX86_BUILTIN_VPCOMGEW
, GE
, (int)MULTI_ARG_2_HI_CMP
},
27072 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv4si3
, "__builtin_ia32_vpcomeqd", IX86_BUILTIN_VPCOMEQD
, EQ
, (int)MULTI_ARG_2_SI_CMP
},
27073 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv4si3
, "__builtin_ia32_vpcomned", IX86_BUILTIN_VPCOMNED
, NE
, (int)MULTI_ARG_2_SI_CMP
},
27074 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv4si3
, "__builtin_ia32_vpcomneqd", IX86_BUILTIN_VPCOMNED
, NE
, (int)MULTI_ARG_2_SI_CMP
},
27075 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv4si3
, "__builtin_ia32_vpcomltd", IX86_BUILTIN_VPCOMLTD
, LT
, (int)MULTI_ARG_2_SI_CMP
},
27076 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv4si3
, "__builtin_ia32_vpcomled", IX86_BUILTIN_VPCOMLED
, LE
, (int)MULTI_ARG_2_SI_CMP
},
27077 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv4si3
, "__builtin_ia32_vpcomgtd", IX86_BUILTIN_VPCOMGTD
, GT
, (int)MULTI_ARG_2_SI_CMP
},
27078 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv4si3
, "__builtin_ia32_vpcomged", IX86_BUILTIN_VPCOMGED
, GE
, (int)MULTI_ARG_2_SI_CMP
},
27080 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv2di3
, "__builtin_ia32_vpcomeqq", IX86_BUILTIN_VPCOMEQQ
, EQ
, (int)MULTI_ARG_2_DI_CMP
},
27081 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv2di3
, "__builtin_ia32_vpcomneq", IX86_BUILTIN_VPCOMNEQ
, NE
, (int)MULTI_ARG_2_DI_CMP
},
27082 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv2di3
, "__builtin_ia32_vpcomneqq", IX86_BUILTIN_VPCOMNEQ
, NE
, (int)MULTI_ARG_2_DI_CMP
},
27083 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv2di3
, "__builtin_ia32_vpcomltq", IX86_BUILTIN_VPCOMLTQ
, LT
, (int)MULTI_ARG_2_DI_CMP
},
27084 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv2di3
, "__builtin_ia32_vpcomleq", IX86_BUILTIN_VPCOMLEQ
, LE
, (int)MULTI_ARG_2_DI_CMP
},
27085 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv2di3
, "__builtin_ia32_vpcomgtq", IX86_BUILTIN_VPCOMGTQ
, GT
, (int)MULTI_ARG_2_DI_CMP
},
27086 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv2di3
, "__builtin_ia32_vpcomgeq", IX86_BUILTIN_VPCOMGEQ
, GE
, (int)MULTI_ARG_2_DI_CMP
},
27088 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_uns2v16qi3
,"__builtin_ia32_vpcomequb", IX86_BUILTIN_VPCOMEQUB
, EQ
, (int)MULTI_ARG_2_QI_CMP
},
27089 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_uns2v16qi3
,"__builtin_ia32_vpcomneub", IX86_BUILTIN_VPCOMNEUB
, NE
, (int)MULTI_ARG_2_QI_CMP
},
27090 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_uns2v16qi3
,"__builtin_ia32_vpcomnequb", IX86_BUILTIN_VPCOMNEUB
, NE
, (int)MULTI_ARG_2_QI_CMP
},
27091 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv16qi3
, "__builtin_ia32_vpcomltub", IX86_BUILTIN_VPCOMLTUB
, LTU
, (int)MULTI_ARG_2_QI_CMP
},
27092 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv16qi3
, "__builtin_ia32_vpcomleub", IX86_BUILTIN_VPCOMLEUB
, LEU
, (int)MULTI_ARG_2_QI_CMP
},
27093 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv16qi3
, "__builtin_ia32_vpcomgtub", IX86_BUILTIN_VPCOMGTUB
, GTU
, (int)MULTI_ARG_2_QI_CMP
},
27094 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv16qi3
, "__builtin_ia32_vpcomgeub", IX86_BUILTIN_VPCOMGEUB
, GEU
, (int)MULTI_ARG_2_QI_CMP
},
27096 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_uns2v8hi3
, "__builtin_ia32_vpcomequw", IX86_BUILTIN_VPCOMEQUW
, EQ
, (int)MULTI_ARG_2_HI_CMP
},
27097 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_uns2v8hi3
, "__builtin_ia32_vpcomneuw", IX86_BUILTIN_VPCOMNEUW
, NE
, (int)MULTI_ARG_2_HI_CMP
},
27098 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_uns2v8hi3
, "__builtin_ia32_vpcomnequw", IX86_BUILTIN_VPCOMNEUW
, NE
, (int)MULTI_ARG_2_HI_CMP
},
27099 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv8hi3
, "__builtin_ia32_vpcomltuw", IX86_BUILTIN_VPCOMLTUW
, LTU
, (int)MULTI_ARG_2_HI_CMP
},
27100 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv8hi3
, "__builtin_ia32_vpcomleuw", IX86_BUILTIN_VPCOMLEUW
, LEU
, (int)MULTI_ARG_2_HI_CMP
},
27101 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv8hi3
, "__builtin_ia32_vpcomgtuw", IX86_BUILTIN_VPCOMGTUW
, GTU
, (int)MULTI_ARG_2_HI_CMP
},
27102 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv8hi3
, "__builtin_ia32_vpcomgeuw", IX86_BUILTIN_VPCOMGEUW
, GEU
, (int)MULTI_ARG_2_HI_CMP
},
27104 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_uns2v4si3
, "__builtin_ia32_vpcomequd", IX86_BUILTIN_VPCOMEQUD
, EQ
, (int)MULTI_ARG_2_SI_CMP
},
27105 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_uns2v4si3
, "__builtin_ia32_vpcomneud", IX86_BUILTIN_VPCOMNEUD
, NE
, (int)MULTI_ARG_2_SI_CMP
},
27106 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_uns2v4si3
, "__builtin_ia32_vpcomnequd", IX86_BUILTIN_VPCOMNEUD
, NE
, (int)MULTI_ARG_2_SI_CMP
},
27107 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv4si3
, "__builtin_ia32_vpcomltud", IX86_BUILTIN_VPCOMLTUD
, LTU
, (int)MULTI_ARG_2_SI_CMP
},
27108 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv4si3
, "__builtin_ia32_vpcomleud", IX86_BUILTIN_VPCOMLEUD
, LEU
, (int)MULTI_ARG_2_SI_CMP
},
27109 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv4si3
, "__builtin_ia32_vpcomgtud", IX86_BUILTIN_VPCOMGTUD
, GTU
, (int)MULTI_ARG_2_SI_CMP
},
27110 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv4si3
, "__builtin_ia32_vpcomgeud", IX86_BUILTIN_VPCOMGEUD
, GEU
, (int)MULTI_ARG_2_SI_CMP
},
27112 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_uns2v2di3
, "__builtin_ia32_vpcomequq", IX86_BUILTIN_VPCOMEQUQ
, EQ
, (int)MULTI_ARG_2_DI_CMP
},
27113 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_uns2v2di3
, "__builtin_ia32_vpcomneuq", IX86_BUILTIN_VPCOMNEUQ
, NE
, (int)MULTI_ARG_2_DI_CMP
},
27114 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_uns2v2di3
, "__builtin_ia32_vpcomnequq", IX86_BUILTIN_VPCOMNEUQ
, NE
, (int)MULTI_ARG_2_DI_CMP
},
27115 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv2di3
, "__builtin_ia32_vpcomltuq", IX86_BUILTIN_VPCOMLTUQ
, LTU
, (int)MULTI_ARG_2_DI_CMP
},
27116 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv2di3
, "__builtin_ia32_vpcomleuq", IX86_BUILTIN_VPCOMLEUQ
, LEU
, (int)MULTI_ARG_2_DI_CMP
},
27117 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv2di3
, "__builtin_ia32_vpcomgtuq", IX86_BUILTIN_VPCOMGTUQ
, GTU
, (int)MULTI_ARG_2_DI_CMP
},
27118 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv2di3
, "__builtin_ia32_vpcomgeuq", IX86_BUILTIN_VPCOMGEUQ
, GEU
, (int)MULTI_ARG_2_DI_CMP
},
27120 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv16qi3
, "__builtin_ia32_vpcomfalseb", IX86_BUILTIN_VPCOMFALSEB
, (enum rtx_code
) PCOM_FALSE
, (int)MULTI_ARG_2_QI_TF
},
27121 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv8hi3
, "__builtin_ia32_vpcomfalsew", IX86_BUILTIN_VPCOMFALSEW
, (enum rtx_code
) PCOM_FALSE
, (int)MULTI_ARG_2_HI_TF
},
27122 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv4si3
, "__builtin_ia32_vpcomfalsed", IX86_BUILTIN_VPCOMFALSED
, (enum rtx_code
) PCOM_FALSE
, (int)MULTI_ARG_2_SI_TF
},
27123 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv2di3
, "__builtin_ia32_vpcomfalseq", IX86_BUILTIN_VPCOMFALSEQ
, (enum rtx_code
) PCOM_FALSE
, (int)MULTI_ARG_2_DI_TF
},
27124 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv16qi3
, "__builtin_ia32_vpcomfalseub",IX86_BUILTIN_VPCOMFALSEUB
,(enum rtx_code
) PCOM_FALSE
, (int)MULTI_ARG_2_QI_TF
},
27125 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv8hi3
, "__builtin_ia32_vpcomfalseuw",IX86_BUILTIN_VPCOMFALSEUW
,(enum rtx_code
) PCOM_FALSE
, (int)MULTI_ARG_2_HI_TF
},
27126 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv4si3
, "__builtin_ia32_vpcomfalseud",IX86_BUILTIN_VPCOMFALSEUD
,(enum rtx_code
) PCOM_FALSE
, (int)MULTI_ARG_2_SI_TF
},
27127 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv2di3
, "__builtin_ia32_vpcomfalseuq",IX86_BUILTIN_VPCOMFALSEUQ
,(enum rtx_code
) PCOM_FALSE
, (int)MULTI_ARG_2_DI_TF
},
27129 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv16qi3
, "__builtin_ia32_vpcomtrueb", IX86_BUILTIN_VPCOMTRUEB
, (enum rtx_code
) PCOM_TRUE
, (int)MULTI_ARG_2_QI_TF
},
27130 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv8hi3
, "__builtin_ia32_vpcomtruew", IX86_BUILTIN_VPCOMTRUEW
, (enum rtx_code
) PCOM_TRUE
, (int)MULTI_ARG_2_HI_TF
},
27131 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv4si3
, "__builtin_ia32_vpcomtrued", IX86_BUILTIN_VPCOMTRUED
, (enum rtx_code
) PCOM_TRUE
, (int)MULTI_ARG_2_SI_TF
},
27132 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv2di3
, "__builtin_ia32_vpcomtrueq", IX86_BUILTIN_VPCOMTRUEQ
, (enum rtx_code
) PCOM_TRUE
, (int)MULTI_ARG_2_DI_TF
},
27133 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv16qi3
, "__builtin_ia32_vpcomtrueub", IX86_BUILTIN_VPCOMTRUEUB
, (enum rtx_code
) PCOM_TRUE
, (int)MULTI_ARG_2_QI_TF
},
27134 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv8hi3
, "__builtin_ia32_vpcomtrueuw", IX86_BUILTIN_VPCOMTRUEUW
, (enum rtx_code
) PCOM_TRUE
, (int)MULTI_ARG_2_HI_TF
},
27135 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv4si3
, "__builtin_ia32_vpcomtrueud", IX86_BUILTIN_VPCOMTRUEUD
, (enum rtx_code
) PCOM_TRUE
, (int)MULTI_ARG_2_SI_TF
},
27136 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv2di3
, "__builtin_ia32_vpcomtrueuq", IX86_BUILTIN_VPCOMTRUEUQ
, (enum rtx_code
) PCOM_TRUE
, (int)MULTI_ARG_2_DI_TF
},
27138 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_vpermil2v2df3
, "__builtin_ia32_vpermil2pd", IX86_BUILTIN_VPERMIL2PD
, UNKNOWN
, (int)MULTI_ARG_4_DF2_DI_I
},
27139 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_vpermil2v4sf3
, "__builtin_ia32_vpermil2ps", IX86_BUILTIN_VPERMIL2PS
, UNKNOWN
, (int)MULTI_ARG_4_SF2_SI_I
},
27140 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_vpermil2v4df3
, "__builtin_ia32_vpermil2pd256", IX86_BUILTIN_VPERMIL2PD256
, UNKNOWN
, (int)MULTI_ARG_4_DF2_DI_I1
},
27141 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_vpermil2v8sf3
, "__builtin_ia32_vpermil2ps256", IX86_BUILTIN_VPERMIL2PS256
, UNKNOWN
, (int)MULTI_ARG_4_SF2_SI_I1
},
27145 /* TM vector builtins. */
27147 /* Reuse the existing x86-specific `struct builtin_description' cause
27148 we're lazy. Add casts to make them fit. */
27149 static const struct builtin_description bdesc_tm
[] =
27151 { OPTION_MASK_ISA_MMX
, CODE_FOR_nothing
, "__builtin__ITM_WM64", (enum ix86_builtins
) BUILT_IN_TM_STORE_M64
, UNKNOWN
, VOID_FTYPE_PV2SI_V2SI
},
27152 { OPTION_MASK_ISA_MMX
, CODE_FOR_nothing
, "__builtin__ITM_WaRM64", (enum ix86_builtins
) BUILT_IN_TM_STORE_WAR_M64
, UNKNOWN
, VOID_FTYPE_PV2SI_V2SI
},
27153 { OPTION_MASK_ISA_MMX
, CODE_FOR_nothing
, "__builtin__ITM_WaWM64", (enum ix86_builtins
) BUILT_IN_TM_STORE_WAW_M64
, UNKNOWN
, VOID_FTYPE_PV2SI_V2SI
},
27154 { OPTION_MASK_ISA_MMX
, CODE_FOR_nothing
, "__builtin__ITM_RM64", (enum ix86_builtins
) BUILT_IN_TM_LOAD_M64
, UNKNOWN
, V2SI_FTYPE_PCV2SI
},
27155 { OPTION_MASK_ISA_MMX
, CODE_FOR_nothing
, "__builtin__ITM_RaRM64", (enum ix86_builtins
) BUILT_IN_TM_LOAD_RAR_M64
, UNKNOWN
, V2SI_FTYPE_PCV2SI
},
27156 { OPTION_MASK_ISA_MMX
, CODE_FOR_nothing
, "__builtin__ITM_RaWM64", (enum ix86_builtins
) BUILT_IN_TM_LOAD_RAW_M64
, UNKNOWN
, V2SI_FTYPE_PCV2SI
},
27157 { OPTION_MASK_ISA_MMX
, CODE_FOR_nothing
, "__builtin__ITM_RfWM64", (enum ix86_builtins
) BUILT_IN_TM_LOAD_RFW_M64
, UNKNOWN
, V2SI_FTYPE_PCV2SI
},
27159 { OPTION_MASK_ISA_SSE
, CODE_FOR_nothing
, "__builtin__ITM_WM128", (enum ix86_builtins
) BUILT_IN_TM_STORE_M128
, UNKNOWN
, VOID_FTYPE_PV4SF_V4SF
},
27160 { OPTION_MASK_ISA_SSE
, CODE_FOR_nothing
, "__builtin__ITM_WaRM128", (enum ix86_builtins
) BUILT_IN_TM_STORE_WAR_M128
, UNKNOWN
, VOID_FTYPE_PV4SF_V4SF
},
27161 { OPTION_MASK_ISA_SSE
, CODE_FOR_nothing
, "__builtin__ITM_WaWM128", (enum ix86_builtins
) BUILT_IN_TM_STORE_WAW_M128
, UNKNOWN
, VOID_FTYPE_PV4SF_V4SF
},
27162 { OPTION_MASK_ISA_SSE
, CODE_FOR_nothing
, "__builtin__ITM_RM128", (enum ix86_builtins
) BUILT_IN_TM_LOAD_M128
, UNKNOWN
, V4SF_FTYPE_PCV4SF
},
27163 { OPTION_MASK_ISA_SSE
, CODE_FOR_nothing
, "__builtin__ITM_RaRM128", (enum ix86_builtins
) BUILT_IN_TM_LOAD_RAR_M128
, UNKNOWN
, V4SF_FTYPE_PCV4SF
},
27164 { OPTION_MASK_ISA_SSE
, CODE_FOR_nothing
, "__builtin__ITM_RaWM128", (enum ix86_builtins
) BUILT_IN_TM_LOAD_RAW_M128
, UNKNOWN
, V4SF_FTYPE_PCV4SF
},
27165 { OPTION_MASK_ISA_SSE
, CODE_FOR_nothing
, "__builtin__ITM_RfWM128", (enum ix86_builtins
) BUILT_IN_TM_LOAD_RFW_M128
, UNKNOWN
, V4SF_FTYPE_PCV4SF
},
27167 { OPTION_MASK_ISA_AVX
, CODE_FOR_nothing
, "__builtin__ITM_WM256", (enum ix86_builtins
) BUILT_IN_TM_STORE_M256
, UNKNOWN
, VOID_FTYPE_PV8SF_V8SF
},
27168 { OPTION_MASK_ISA_AVX
, CODE_FOR_nothing
, "__builtin__ITM_WaRM256", (enum ix86_builtins
) BUILT_IN_TM_STORE_WAR_M256
, UNKNOWN
, VOID_FTYPE_PV8SF_V8SF
},
27169 { OPTION_MASK_ISA_AVX
, CODE_FOR_nothing
, "__builtin__ITM_WaWM256", (enum ix86_builtins
) BUILT_IN_TM_STORE_WAW_M256
, UNKNOWN
, VOID_FTYPE_PV8SF_V8SF
},
27170 { OPTION_MASK_ISA_AVX
, CODE_FOR_nothing
, "__builtin__ITM_RM256", (enum ix86_builtins
) BUILT_IN_TM_LOAD_M256
, UNKNOWN
, V8SF_FTYPE_PCV8SF
},
27171 { OPTION_MASK_ISA_AVX
, CODE_FOR_nothing
, "__builtin__ITM_RaRM256", (enum ix86_builtins
) BUILT_IN_TM_LOAD_RAR_M256
, UNKNOWN
, V8SF_FTYPE_PCV8SF
},
27172 { OPTION_MASK_ISA_AVX
, CODE_FOR_nothing
, "__builtin__ITM_RaWM256", (enum ix86_builtins
) BUILT_IN_TM_LOAD_RAW_M256
, UNKNOWN
, V8SF_FTYPE_PCV8SF
},
27173 { OPTION_MASK_ISA_AVX
, CODE_FOR_nothing
, "__builtin__ITM_RfWM256", (enum ix86_builtins
) BUILT_IN_TM_LOAD_RFW_M256
, UNKNOWN
, V8SF_FTYPE_PCV8SF
},
27175 { OPTION_MASK_ISA_MMX
, CODE_FOR_nothing
, "__builtin__ITM_LM64", (enum ix86_builtins
) BUILT_IN_TM_LOG_M64
, UNKNOWN
, VOID_FTYPE_PCVOID
},
27176 { OPTION_MASK_ISA_SSE
, CODE_FOR_nothing
, "__builtin__ITM_LM128", (enum ix86_builtins
) BUILT_IN_TM_LOG_M128
, UNKNOWN
, VOID_FTYPE_PCVOID
},
27177 { OPTION_MASK_ISA_AVX
, CODE_FOR_nothing
, "__builtin__ITM_LM256", (enum ix86_builtins
) BUILT_IN_TM_LOG_M256
, UNKNOWN
, VOID_FTYPE_PCVOID
},
27180 /* TM callbacks. */
27182 /* Return the builtin decl needed to load a vector of TYPE. */
27185 ix86_builtin_tm_load (tree type
)
27187 if (TREE_CODE (type
) == VECTOR_TYPE
)
27189 switch (tree_low_cst (TYPE_SIZE (type
), 1))
27192 return builtin_decl_explicit (BUILT_IN_TM_LOAD_M64
);
27194 return builtin_decl_explicit (BUILT_IN_TM_LOAD_M128
);
27196 return builtin_decl_explicit (BUILT_IN_TM_LOAD_M256
);
27202 /* Return the builtin decl needed to store a vector of TYPE. */
27205 ix86_builtin_tm_store (tree type
)
27207 if (TREE_CODE (type
) == VECTOR_TYPE
)
27209 switch (tree_low_cst (TYPE_SIZE (type
), 1))
27212 return builtin_decl_explicit (BUILT_IN_TM_STORE_M64
);
27214 return builtin_decl_explicit (BUILT_IN_TM_STORE_M128
);
27216 return builtin_decl_explicit (BUILT_IN_TM_STORE_M256
);
27222 /* Initialize the transactional memory vector load/store builtins. */
27225 ix86_init_tm_builtins (void)
27227 enum ix86_builtin_func_type ftype
;
27228 const struct builtin_description
*d
;
27231 tree attrs_load
, attrs_type_load
, attrs_store
, attrs_type_store
;
27232 tree attrs_log
, attrs_type_log
;
27237 /* If there are no builtins defined, we must be compiling in a
27238 language without trans-mem support. */
27239 if (!builtin_decl_explicit_p (BUILT_IN_TM_LOAD_1
))
27242 /* Use whatever attributes a normal TM load has. */
27243 decl
= builtin_decl_explicit (BUILT_IN_TM_LOAD_1
);
27244 attrs_load
= DECL_ATTRIBUTES (decl
);
27245 attrs_type_load
= TYPE_ATTRIBUTES (TREE_TYPE (decl
));
27246 /* Use whatever attributes a normal TM store has. */
27247 decl
= builtin_decl_explicit (BUILT_IN_TM_STORE_1
);
27248 attrs_store
= DECL_ATTRIBUTES (decl
);
27249 attrs_type_store
= TYPE_ATTRIBUTES (TREE_TYPE (decl
));
27250 /* Use whatever attributes a normal TM log has. */
27251 decl
= builtin_decl_explicit (BUILT_IN_TM_LOG
);
27252 attrs_log
= DECL_ATTRIBUTES (decl
);
27253 attrs_type_log
= TYPE_ATTRIBUTES (TREE_TYPE (decl
));
27255 for (i
= 0, d
= bdesc_tm
;
27256 i
< ARRAY_SIZE (bdesc_tm
);
27259 if ((d
->mask
& ix86_isa_flags
) != 0
27260 || (lang_hooks
.builtin_function
27261 == lang_hooks
.builtin_function_ext_scope
))
27263 tree type
, attrs
, attrs_type
;
27264 enum built_in_function code
= (enum built_in_function
) d
->code
;
27266 ftype
= (enum ix86_builtin_func_type
) d
->flag
;
27267 type
= ix86_get_builtin_func_type (ftype
);
27269 if (BUILTIN_TM_LOAD_P (code
))
27271 attrs
= attrs_load
;
27272 attrs_type
= attrs_type_load
;
27274 else if (BUILTIN_TM_STORE_P (code
))
27276 attrs
= attrs_store
;
27277 attrs_type
= attrs_type_store
;
27282 attrs_type
= attrs_type_log
;
27284 decl
= add_builtin_function (d
->name
, type
, code
, BUILT_IN_NORMAL
,
27285 /* The builtin without the prefix for
27286 calling it directly. */
27287 d
->name
+ strlen ("__builtin_"),
27289 /* add_builtin_function() will set the DECL_ATTRIBUTES, now
27290 set the TYPE_ATTRIBUTES. */
27291 decl_attributes (&TREE_TYPE (decl
), attrs_type
, ATTR_FLAG_BUILT_IN
);
27293 set_builtin_decl (code
, decl
, false);
27298 /* Set up all the MMX/SSE builtins, even builtins for instructions that are not
27299 in the current target ISA to allow the user to compile particular modules
27300 with different target specific options that differ from the command line
27303 ix86_init_mmx_sse_builtins (void)
27305 const struct builtin_description
* d
;
27306 enum ix86_builtin_func_type ftype
;
27309 /* Add all special builtins with variable number of operands. */
27310 for (i
= 0, d
= bdesc_special_args
;
27311 i
< ARRAY_SIZE (bdesc_special_args
);
27317 ftype
= (enum ix86_builtin_func_type
) d
->flag
;
27318 def_builtin (d
->mask
, d
->name
, ftype
, d
->code
);
27321 /* Add all builtins with variable number of operands. */
27322 for (i
= 0, d
= bdesc_args
;
27323 i
< ARRAY_SIZE (bdesc_args
);
27329 ftype
= (enum ix86_builtin_func_type
) d
->flag
;
27330 def_builtin_const (d
->mask
, d
->name
, ftype
, d
->code
);
27333 /* pcmpestr[im] insns. */
27334 for (i
= 0, d
= bdesc_pcmpestr
;
27335 i
< ARRAY_SIZE (bdesc_pcmpestr
);
27338 if (d
->code
== IX86_BUILTIN_PCMPESTRM128
)
27339 ftype
= V16QI_FTYPE_V16QI_INT_V16QI_INT_INT
;
27341 ftype
= INT_FTYPE_V16QI_INT_V16QI_INT_INT
;
27342 def_builtin_const (d
->mask
, d
->name
, ftype
, d
->code
);
27345 /* pcmpistr[im] insns. */
27346 for (i
= 0, d
= bdesc_pcmpistr
;
27347 i
< ARRAY_SIZE (bdesc_pcmpistr
);
27350 if (d
->code
== IX86_BUILTIN_PCMPISTRM128
)
27351 ftype
= V16QI_FTYPE_V16QI_V16QI_INT
;
27353 ftype
= INT_FTYPE_V16QI_V16QI_INT
;
27354 def_builtin_const (d
->mask
, d
->name
, ftype
, d
->code
);
27357 /* comi/ucomi insns. */
27358 for (i
= 0, d
= bdesc_comi
; i
< ARRAY_SIZE (bdesc_comi
); i
++, d
++)
27360 if (d
->mask
== OPTION_MASK_ISA_SSE2
)
27361 ftype
= INT_FTYPE_V2DF_V2DF
;
27363 ftype
= INT_FTYPE_V4SF_V4SF
;
27364 def_builtin_const (d
->mask
, d
->name
, ftype
, d
->code
);
27368 def_builtin (OPTION_MASK_ISA_SSE
, "__builtin_ia32_ldmxcsr",
27369 VOID_FTYPE_UNSIGNED
, IX86_BUILTIN_LDMXCSR
);
27370 def_builtin (OPTION_MASK_ISA_SSE
, "__builtin_ia32_stmxcsr",
27371 UNSIGNED_FTYPE_VOID
, IX86_BUILTIN_STMXCSR
);
27373 /* SSE or 3DNow!A */
27374 def_builtin (OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
,
27375 "__builtin_ia32_maskmovq", VOID_FTYPE_V8QI_V8QI_PCHAR
,
27376 IX86_BUILTIN_MASKMOVQ
);
27379 def_builtin (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_maskmovdqu",
27380 VOID_FTYPE_V16QI_V16QI_PCHAR
, IX86_BUILTIN_MASKMOVDQU
);
27382 def_builtin (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_clflush",
27383 VOID_FTYPE_PCVOID
, IX86_BUILTIN_CLFLUSH
);
27384 x86_mfence
= def_builtin (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_mfence",
27385 VOID_FTYPE_VOID
, IX86_BUILTIN_MFENCE
);
27388 def_builtin (OPTION_MASK_ISA_SSE3
, "__builtin_ia32_monitor",
27389 VOID_FTYPE_PCVOID_UNSIGNED_UNSIGNED
, IX86_BUILTIN_MONITOR
);
27390 def_builtin (OPTION_MASK_ISA_SSE3
, "__builtin_ia32_mwait",
27391 VOID_FTYPE_UNSIGNED_UNSIGNED
, IX86_BUILTIN_MWAIT
);
27394 def_builtin_const (OPTION_MASK_ISA_AES
, "__builtin_ia32_aesenc128",
27395 V2DI_FTYPE_V2DI_V2DI
, IX86_BUILTIN_AESENC128
);
27396 def_builtin_const (OPTION_MASK_ISA_AES
, "__builtin_ia32_aesenclast128",
27397 V2DI_FTYPE_V2DI_V2DI
, IX86_BUILTIN_AESENCLAST128
);
27398 def_builtin_const (OPTION_MASK_ISA_AES
, "__builtin_ia32_aesdec128",
27399 V2DI_FTYPE_V2DI_V2DI
, IX86_BUILTIN_AESDEC128
);
27400 def_builtin_const (OPTION_MASK_ISA_AES
, "__builtin_ia32_aesdeclast128",
27401 V2DI_FTYPE_V2DI_V2DI
, IX86_BUILTIN_AESDECLAST128
);
27402 def_builtin_const (OPTION_MASK_ISA_AES
, "__builtin_ia32_aesimc128",
27403 V2DI_FTYPE_V2DI
, IX86_BUILTIN_AESIMC128
);
27404 def_builtin_const (OPTION_MASK_ISA_AES
, "__builtin_ia32_aeskeygenassist128",
27405 V2DI_FTYPE_V2DI_INT
, IX86_BUILTIN_AESKEYGENASSIST128
);
27408 def_builtin_const (OPTION_MASK_ISA_PCLMUL
, "__builtin_ia32_pclmulqdq128",
27409 V2DI_FTYPE_V2DI_V2DI_INT
, IX86_BUILTIN_PCLMULQDQ128
);
27412 def_builtin (OPTION_MASK_ISA_RDRND
, "__builtin_ia32_rdrand16_step",
27413 INT_FTYPE_PUSHORT
, IX86_BUILTIN_RDRAND16_STEP
);
27414 def_builtin (OPTION_MASK_ISA_RDRND
, "__builtin_ia32_rdrand32_step",
27415 INT_FTYPE_PUNSIGNED
, IX86_BUILTIN_RDRAND32_STEP
);
27416 def_builtin (OPTION_MASK_ISA_RDRND
| OPTION_MASK_ISA_64BIT
,
27417 "__builtin_ia32_rdrand64_step", INT_FTYPE_PULONGLONG
,
27418 IX86_BUILTIN_RDRAND64_STEP
);
27421 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gathersiv2df",
27422 V2DF_FTYPE_V2DF_PCDOUBLE_V4SI_V2DF_INT
,
27423 IX86_BUILTIN_GATHERSIV2DF
);
27425 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gathersiv4df",
27426 V4DF_FTYPE_V4DF_PCDOUBLE_V4SI_V4DF_INT
,
27427 IX86_BUILTIN_GATHERSIV4DF
);
27429 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gatherdiv2df",
27430 V2DF_FTYPE_V2DF_PCDOUBLE_V2DI_V2DF_INT
,
27431 IX86_BUILTIN_GATHERDIV2DF
);
27433 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gatherdiv4df",
27434 V4DF_FTYPE_V4DF_PCDOUBLE_V4DI_V4DF_INT
,
27435 IX86_BUILTIN_GATHERDIV4DF
);
27437 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gathersiv4sf",
27438 V4SF_FTYPE_V4SF_PCFLOAT_V4SI_V4SF_INT
,
27439 IX86_BUILTIN_GATHERSIV4SF
);
27441 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gathersiv8sf",
27442 V8SF_FTYPE_V8SF_PCFLOAT_V8SI_V8SF_INT
,
27443 IX86_BUILTIN_GATHERSIV8SF
);
27445 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gatherdiv4sf",
27446 V4SF_FTYPE_V4SF_PCFLOAT_V2DI_V4SF_INT
,
27447 IX86_BUILTIN_GATHERDIV4SF
);
27449 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gatherdiv4sf256",
27450 V4SF_FTYPE_V4SF_PCFLOAT_V4DI_V4SF_INT
,
27451 IX86_BUILTIN_GATHERDIV8SF
);
27453 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gathersiv2di",
27454 V2DI_FTYPE_V2DI_PCINT64_V4SI_V2DI_INT
,
27455 IX86_BUILTIN_GATHERSIV2DI
);
27457 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gathersiv4di",
27458 V4DI_FTYPE_V4DI_PCINT64_V4SI_V4DI_INT
,
27459 IX86_BUILTIN_GATHERSIV4DI
);
27461 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gatherdiv2di",
27462 V2DI_FTYPE_V2DI_PCINT64_V2DI_V2DI_INT
,
27463 IX86_BUILTIN_GATHERDIV2DI
);
27465 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gatherdiv4di",
27466 V4DI_FTYPE_V4DI_PCINT64_V4DI_V4DI_INT
,
27467 IX86_BUILTIN_GATHERDIV4DI
);
27469 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gathersiv4si",
27470 V4SI_FTYPE_V4SI_PCINT_V4SI_V4SI_INT
,
27471 IX86_BUILTIN_GATHERSIV4SI
);
27473 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gathersiv8si",
27474 V8SI_FTYPE_V8SI_PCINT_V8SI_V8SI_INT
,
27475 IX86_BUILTIN_GATHERSIV8SI
);
27477 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gatherdiv4si",
27478 V4SI_FTYPE_V4SI_PCINT_V2DI_V4SI_INT
,
27479 IX86_BUILTIN_GATHERDIV4SI
);
27481 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gatherdiv4si256",
27482 V4SI_FTYPE_V4SI_PCINT_V4DI_V4SI_INT
,
27483 IX86_BUILTIN_GATHERDIV8SI
);
27485 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gatheraltsiv4df ",
27486 V4DF_FTYPE_V4DF_PCDOUBLE_V8SI_V4DF_INT
,
27487 IX86_BUILTIN_GATHERALTSIV4DF
);
27489 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gatheraltdiv4sf256 ",
27490 V8SF_FTYPE_V8SF_PCFLOAT_V4DI_V8SF_INT
,
27491 IX86_BUILTIN_GATHERALTDIV8SF
);
27493 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gatheraltsiv4di ",
27494 V4DI_FTYPE_V4DI_PCINT64_V8SI_V4DI_INT
,
27495 IX86_BUILTIN_GATHERALTSIV4DI
);
27497 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gatheraltdiv4si256 ",
27498 V8SI_FTYPE_V8SI_PCINT_V4DI_V8SI_INT
,
27499 IX86_BUILTIN_GATHERALTDIV8SI
);
27502 def_builtin (OPTION_MASK_ISA_RTM
, "__builtin_ia32_xabort",
27503 VOID_FTYPE_UNSIGNED
, IX86_BUILTIN_XABORT
);
27505 /* MMX access to the vec_init patterns. */
27506 def_builtin_const (OPTION_MASK_ISA_MMX
, "__builtin_ia32_vec_init_v2si",
27507 V2SI_FTYPE_INT_INT
, IX86_BUILTIN_VEC_INIT_V2SI
);
27509 def_builtin_const (OPTION_MASK_ISA_MMX
, "__builtin_ia32_vec_init_v4hi",
27510 V4HI_FTYPE_HI_HI_HI_HI
,
27511 IX86_BUILTIN_VEC_INIT_V4HI
);
27513 def_builtin_const (OPTION_MASK_ISA_MMX
, "__builtin_ia32_vec_init_v8qi",
27514 V8QI_FTYPE_QI_QI_QI_QI_QI_QI_QI_QI
,
27515 IX86_BUILTIN_VEC_INIT_V8QI
);
27517 /* Access to the vec_extract patterns. */
27518 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_vec_ext_v2df",
27519 DOUBLE_FTYPE_V2DF_INT
, IX86_BUILTIN_VEC_EXT_V2DF
);
27520 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_vec_ext_v2di",
27521 DI_FTYPE_V2DI_INT
, IX86_BUILTIN_VEC_EXT_V2DI
);
27522 def_builtin_const (OPTION_MASK_ISA_SSE
, "__builtin_ia32_vec_ext_v4sf",
27523 FLOAT_FTYPE_V4SF_INT
, IX86_BUILTIN_VEC_EXT_V4SF
);
27524 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_vec_ext_v4si",
27525 SI_FTYPE_V4SI_INT
, IX86_BUILTIN_VEC_EXT_V4SI
);
27526 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_vec_ext_v8hi",
27527 HI_FTYPE_V8HI_INT
, IX86_BUILTIN_VEC_EXT_V8HI
);
27529 def_builtin_const (OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
,
27530 "__builtin_ia32_vec_ext_v4hi",
27531 HI_FTYPE_V4HI_INT
, IX86_BUILTIN_VEC_EXT_V4HI
);
27533 def_builtin_const (OPTION_MASK_ISA_MMX
, "__builtin_ia32_vec_ext_v2si",
27534 SI_FTYPE_V2SI_INT
, IX86_BUILTIN_VEC_EXT_V2SI
);
27536 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_vec_ext_v16qi",
27537 QI_FTYPE_V16QI_INT
, IX86_BUILTIN_VEC_EXT_V16QI
);
27539 /* Access to the vec_set patterns. */
27540 def_builtin_const (OPTION_MASK_ISA_SSE4_1
| OPTION_MASK_ISA_64BIT
,
27541 "__builtin_ia32_vec_set_v2di",
27542 V2DI_FTYPE_V2DI_DI_INT
, IX86_BUILTIN_VEC_SET_V2DI
);
27544 def_builtin_const (OPTION_MASK_ISA_SSE4_1
, "__builtin_ia32_vec_set_v4sf",
27545 V4SF_FTYPE_V4SF_FLOAT_INT
, IX86_BUILTIN_VEC_SET_V4SF
);
27547 def_builtin_const (OPTION_MASK_ISA_SSE4_1
, "__builtin_ia32_vec_set_v4si",
27548 V4SI_FTYPE_V4SI_SI_INT
, IX86_BUILTIN_VEC_SET_V4SI
);
27550 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_vec_set_v8hi",
27551 V8HI_FTYPE_V8HI_HI_INT
, IX86_BUILTIN_VEC_SET_V8HI
);
27553 def_builtin_const (OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
,
27554 "__builtin_ia32_vec_set_v4hi",
27555 V4HI_FTYPE_V4HI_HI_INT
, IX86_BUILTIN_VEC_SET_V4HI
);
27557 def_builtin_const (OPTION_MASK_ISA_SSE4_1
, "__builtin_ia32_vec_set_v16qi",
27558 V16QI_FTYPE_V16QI_QI_INT
, IX86_BUILTIN_VEC_SET_V16QI
);
27560 /* Add FMA4 multi-arg argument instructions */
27561 for (i
= 0, d
= bdesc_multi_arg
; i
< ARRAY_SIZE (bdesc_multi_arg
); i
++, d
++)
27566 ftype
= (enum ix86_builtin_func_type
) d
->flag
;
27567 def_builtin_const (d
->mask
, d
->name
, ftype
, d
->code
);
27571 /* Internal method for ix86_init_builtins. */
27574 ix86_init_builtins_va_builtins_abi (void)
27576 tree ms_va_ref
, sysv_va_ref
;
27577 tree fnvoid_va_end_ms
, fnvoid_va_end_sysv
;
27578 tree fnvoid_va_start_ms
, fnvoid_va_start_sysv
;
27579 tree fnvoid_va_copy_ms
, fnvoid_va_copy_sysv
;
27580 tree fnattr_ms
= NULL_TREE
, fnattr_sysv
= NULL_TREE
;
27584 fnattr_ms
= build_tree_list (get_identifier ("ms_abi"), NULL_TREE
);
27585 fnattr_sysv
= build_tree_list (get_identifier ("sysv_abi"), NULL_TREE
);
27586 ms_va_ref
= build_reference_type (ms_va_list_type_node
);
27588 build_pointer_type (TREE_TYPE (sysv_va_list_type_node
));
27591 build_function_type_list (void_type_node
, ms_va_ref
, NULL_TREE
);
27592 fnvoid_va_start_ms
=
27593 build_varargs_function_type_list (void_type_node
, ms_va_ref
, NULL_TREE
);
27594 fnvoid_va_end_sysv
=
27595 build_function_type_list (void_type_node
, sysv_va_ref
, NULL_TREE
);
27596 fnvoid_va_start_sysv
=
27597 build_varargs_function_type_list (void_type_node
, sysv_va_ref
,
27599 fnvoid_va_copy_ms
=
27600 build_function_type_list (void_type_node
, ms_va_ref
, ms_va_list_type_node
,
27602 fnvoid_va_copy_sysv
=
27603 build_function_type_list (void_type_node
, sysv_va_ref
,
27604 sysv_va_ref
, NULL_TREE
);
27606 add_builtin_function ("__builtin_ms_va_start", fnvoid_va_start_ms
,
27607 BUILT_IN_VA_START
, BUILT_IN_NORMAL
, NULL
, fnattr_ms
);
27608 add_builtin_function ("__builtin_ms_va_end", fnvoid_va_end_ms
,
27609 BUILT_IN_VA_END
, BUILT_IN_NORMAL
, NULL
, fnattr_ms
);
27610 add_builtin_function ("__builtin_ms_va_copy", fnvoid_va_copy_ms
,
27611 BUILT_IN_VA_COPY
, BUILT_IN_NORMAL
, NULL
, fnattr_ms
);
27612 add_builtin_function ("__builtin_sysv_va_start", fnvoid_va_start_sysv
,
27613 BUILT_IN_VA_START
, BUILT_IN_NORMAL
, NULL
, fnattr_sysv
);
27614 add_builtin_function ("__builtin_sysv_va_end", fnvoid_va_end_sysv
,
27615 BUILT_IN_VA_END
, BUILT_IN_NORMAL
, NULL
, fnattr_sysv
);
27616 add_builtin_function ("__builtin_sysv_va_copy", fnvoid_va_copy_sysv
,
27617 BUILT_IN_VA_COPY
, BUILT_IN_NORMAL
, NULL
, fnattr_sysv
);
27621 ix86_init_builtin_types (void)
27623 tree float128_type_node
, float80_type_node
;
27625 /* The __float80 type. */
27626 float80_type_node
= long_double_type_node
;
27627 if (TYPE_MODE (float80_type_node
) != XFmode
)
27629 /* The __float80 type. */
27630 float80_type_node
= make_node (REAL_TYPE
);
27632 TYPE_PRECISION (float80_type_node
) = 80;
27633 layout_type (float80_type_node
);
27635 lang_hooks
.types
.register_builtin_type (float80_type_node
, "__float80");
27637 /* The __float128 type. */
27638 float128_type_node
= make_node (REAL_TYPE
);
27639 TYPE_PRECISION (float128_type_node
) = 128;
27640 layout_type (float128_type_node
);
27641 lang_hooks
.types
.register_builtin_type (float128_type_node
, "__float128");
27643 /* This macro is built by i386-builtin-types.awk. */
27644 DEFINE_BUILTIN_PRIMITIVE_TYPES
;
27648 ix86_init_builtins (void)
27652 ix86_init_builtin_types ();
27654 /* TFmode support builtins. */
27655 def_builtin_const (0, "__builtin_infq",
27656 FLOAT128_FTYPE_VOID
, IX86_BUILTIN_INFQ
);
27657 def_builtin_const (0, "__builtin_huge_valq",
27658 FLOAT128_FTYPE_VOID
, IX86_BUILTIN_HUGE_VALQ
);
27660 /* We will expand them to normal call if SSE2 isn't available since
27661 they are used by libgcc. */
27662 t
= ix86_get_builtin_func_type (FLOAT128_FTYPE_FLOAT128
);
27663 t
= add_builtin_function ("__builtin_fabsq", t
, IX86_BUILTIN_FABSQ
,
27664 BUILT_IN_MD
, "__fabstf2", NULL_TREE
);
27665 TREE_READONLY (t
) = 1;
27666 ix86_builtins
[(int) IX86_BUILTIN_FABSQ
] = t
;
27668 t
= ix86_get_builtin_func_type (FLOAT128_FTYPE_FLOAT128_FLOAT128
);
27669 t
= add_builtin_function ("__builtin_copysignq", t
, IX86_BUILTIN_COPYSIGNQ
,
27670 BUILT_IN_MD
, "__copysigntf3", NULL_TREE
);
27671 TREE_READONLY (t
) = 1;
27672 ix86_builtins
[(int) IX86_BUILTIN_COPYSIGNQ
] = t
;
27674 ix86_init_tm_builtins ();
27675 ix86_init_mmx_sse_builtins ();
27678 ix86_init_builtins_va_builtins_abi ();
27680 #ifdef SUBTARGET_INIT_BUILTINS
27681 SUBTARGET_INIT_BUILTINS
;
27685 /* Return the ix86 builtin for CODE. */
27688 ix86_builtin_decl (unsigned code
, bool initialize_p ATTRIBUTE_UNUSED
)
27690 if (code
>= IX86_BUILTIN_MAX
)
27691 return error_mark_node
;
27693 return ix86_builtins
[code
];
27696 /* Errors in the source file can cause expand_expr to return const0_rtx
27697 where we expect a vector. To avoid crashing, use one of the vector
27698 clear instructions. */
27700 safe_vector_operand (rtx x
, enum machine_mode mode
)
27702 if (x
== const0_rtx
)
27703 x
= CONST0_RTX (mode
);
27707 /* Subroutine of ix86_expand_builtin to take care of binop insns. */
27710 ix86_expand_binop_builtin (enum insn_code icode
, tree exp
, rtx target
)
27713 tree arg0
= CALL_EXPR_ARG (exp
, 0);
27714 tree arg1
= CALL_EXPR_ARG (exp
, 1);
27715 rtx op0
= expand_normal (arg0
);
27716 rtx op1
= expand_normal (arg1
);
27717 enum machine_mode tmode
= insn_data
[icode
].operand
[0].mode
;
27718 enum machine_mode mode0
= insn_data
[icode
].operand
[1].mode
;
27719 enum machine_mode mode1
= insn_data
[icode
].operand
[2].mode
;
27721 if (VECTOR_MODE_P (mode0
))
27722 op0
= safe_vector_operand (op0
, mode0
);
27723 if (VECTOR_MODE_P (mode1
))
27724 op1
= safe_vector_operand (op1
, mode1
);
27726 if (optimize
|| !target
27727 || GET_MODE (target
) != tmode
27728 || !insn_data
[icode
].operand
[0].predicate (target
, tmode
))
27729 target
= gen_reg_rtx (tmode
);
27731 if (GET_MODE (op1
) == SImode
&& mode1
== TImode
)
27733 rtx x
= gen_reg_rtx (V4SImode
);
27734 emit_insn (gen_sse2_loadd (x
, op1
));
27735 op1
= gen_lowpart (TImode
, x
);
27738 if (!insn_data
[icode
].operand
[1].predicate (op0
, mode0
))
27739 op0
= copy_to_mode_reg (mode0
, op0
);
27740 if (!insn_data
[icode
].operand
[2].predicate (op1
, mode1
))
27741 op1
= copy_to_mode_reg (mode1
, op1
);
27743 pat
= GEN_FCN (icode
) (target
, op0
, op1
);
27752 /* Subroutine of ix86_expand_builtin to take care of 2-4 argument insns. */
27755 ix86_expand_multi_arg_builtin (enum insn_code icode
, tree exp
, rtx target
,
27756 enum ix86_builtin_func_type m_type
,
27757 enum rtx_code sub_code
)
27762 bool comparison_p
= false;
27764 bool last_arg_constant
= false;
27765 int num_memory
= 0;
27768 enum machine_mode mode
;
27771 enum machine_mode tmode
= insn_data
[icode
].operand
[0].mode
;
27775 case MULTI_ARG_4_DF2_DI_I
:
27776 case MULTI_ARG_4_DF2_DI_I1
:
27777 case MULTI_ARG_4_SF2_SI_I
:
27778 case MULTI_ARG_4_SF2_SI_I1
:
27780 last_arg_constant
= true;
27783 case MULTI_ARG_3_SF
:
27784 case MULTI_ARG_3_DF
:
27785 case MULTI_ARG_3_SF2
:
27786 case MULTI_ARG_3_DF2
:
27787 case MULTI_ARG_3_DI
:
27788 case MULTI_ARG_3_SI
:
27789 case MULTI_ARG_3_SI_DI
:
27790 case MULTI_ARG_3_HI
:
27791 case MULTI_ARG_3_HI_SI
:
27792 case MULTI_ARG_3_QI
:
27793 case MULTI_ARG_3_DI2
:
27794 case MULTI_ARG_3_SI2
:
27795 case MULTI_ARG_3_HI2
:
27796 case MULTI_ARG_3_QI2
:
27800 case MULTI_ARG_2_SF
:
27801 case MULTI_ARG_2_DF
:
27802 case MULTI_ARG_2_DI
:
27803 case MULTI_ARG_2_SI
:
27804 case MULTI_ARG_2_HI
:
27805 case MULTI_ARG_2_QI
:
27809 case MULTI_ARG_2_DI_IMM
:
27810 case MULTI_ARG_2_SI_IMM
:
27811 case MULTI_ARG_2_HI_IMM
:
27812 case MULTI_ARG_2_QI_IMM
:
27814 last_arg_constant
= true;
27817 case MULTI_ARG_1_SF
:
27818 case MULTI_ARG_1_DF
:
27819 case MULTI_ARG_1_SF2
:
27820 case MULTI_ARG_1_DF2
:
27821 case MULTI_ARG_1_DI
:
27822 case MULTI_ARG_1_SI
:
27823 case MULTI_ARG_1_HI
:
27824 case MULTI_ARG_1_QI
:
27825 case MULTI_ARG_1_SI_DI
:
27826 case MULTI_ARG_1_HI_DI
:
27827 case MULTI_ARG_1_HI_SI
:
27828 case MULTI_ARG_1_QI_DI
:
27829 case MULTI_ARG_1_QI_SI
:
27830 case MULTI_ARG_1_QI_HI
:
27834 case MULTI_ARG_2_DI_CMP
:
27835 case MULTI_ARG_2_SI_CMP
:
27836 case MULTI_ARG_2_HI_CMP
:
27837 case MULTI_ARG_2_QI_CMP
:
27839 comparison_p
= true;
27842 case MULTI_ARG_2_SF_TF
:
27843 case MULTI_ARG_2_DF_TF
:
27844 case MULTI_ARG_2_DI_TF
:
27845 case MULTI_ARG_2_SI_TF
:
27846 case MULTI_ARG_2_HI_TF
:
27847 case MULTI_ARG_2_QI_TF
:
27853 gcc_unreachable ();
27856 if (optimize
|| !target
27857 || GET_MODE (target
) != tmode
27858 || !insn_data
[icode
].operand
[0].predicate (target
, tmode
))
27859 target
= gen_reg_rtx (tmode
);
27861 gcc_assert (nargs
<= 4);
27863 for (i
= 0; i
< nargs
; i
++)
27865 tree arg
= CALL_EXPR_ARG (exp
, i
);
27866 rtx op
= expand_normal (arg
);
27867 int adjust
= (comparison_p
) ? 1 : 0;
27868 enum machine_mode mode
= insn_data
[icode
].operand
[i
+adjust
+1].mode
;
27870 if (last_arg_constant
&& i
== nargs
- 1)
27872 if (!insn_data
[icode
].operand
[i
+ 1].predicate (op
, mode
))
27874 enum insn_code new_icode
= icode
;
27877 case CODE_FOR_xop_vpermil2v2df3
:
27878 case CODE_FOR_xop_vpermil2v4sf3
:
27879 case CODE_FOR_xop_vpermil2v4df3
:
27880 case CODE_FOR_xop_vpermil2v8sf3
:
27881 error ("the last argument must be a 2-bit immediate");
27882 return gen_reg_rtx (tmode
);
27883 case CODE_FOR_xop_rotlv2di3
:
27884 new_icode
= CODE_FOR_rotlv2di3
;
27886 case CODE_FOR_xop_rotlv4si3
:
27887 new_icode
= CODE_FOR_rotlv4si3
;
27889 case CODE_FOR_xop_rotlv8hi3
:
27890 new_icode
= CODE_FOR_rotlv8hi3
;
27892 case CODE_FOR_xop_rotlv16qi3
:
27893 new_icode
= CODE_FOR_rotlv16qi3
;
27895 if (CONST_INT_P (op
))
27897 int mask
= GET_MODE_BITSIZE (GET_MODE_INNER (tmode
)) - 1;
27898 op
= GEN_INT (INTVAL (op
) & mask
);
27899 gcc_checking_assert
27900 (insn_data
[icode
].operand
[i
+ 1].predicate (op
, mode
));
27904 gcc_checking_assert
27906 && insn_data
[new_icode
].operand
[0].mode
== tmode
27907 && insn_data
[new_icode
].operand
[1].mode
== tmode
27908 && insn_data
[new_icode
].operand
[2].mode
== mode
27909 && insn_data
[new_icode
].operand
[0].predicate
27910 == insn_data
[icode
].operand
[0].predicate
27911 && insn_data
[new_icode
].operand
[1].predicate
27912 == insn_data
[icode
].operand
[1].predicate
);
27918 gcc_unreachable ();
27925 if (VECTOR_MODE_P (mode
))
27926 op
= safe_vector_operand (op
, mode
);
27928 /* If we aren't optimizing, only allow one memory operand to be
27930 if (memory_operand (op
, mode
))
27933 gcc_assert (GET_MODE (op
) == mode
|| GET_MODE (op
) == VOIDmode
);
27936 || !insn_data
[icode
].operand
[i
+adjust
+1].predicate (op
, mode
)
27938 op
= force_reg (mode
, op
);
27942 args
[i
].mode
= mode
;
27948 pat
= GEN_FCN (icode
) (target
, args
[0].op
);
27953 pat
= GEN_FCN (icode
) (target
, args
[0].op
, args
[1].op
,
27954 GEN_INT ((int)sub_code
));
27955 else if (! comparison_p
)
27956 pat
= GEN_FCN (icode
) (target
, args
[0].op
, args
[1].op
);
27959 rtx cmp_op
= gen_rtx_fmt_ee (sub_code
, GET_MODE (target
),
27963 pat
= GEN_FCN (icode
) (target
, cmp_op
, args
[0].op
, args
[1].op
);
27968 pat
= GEN_FCN (icode
) (target
, args
[0].op
, args
[1].op
, args
[2].op
);
27972 pat
= GEN_FCN (icode
) (target
, args
[0].op
, args
[1].op
, args
[2].op
, args
[3].op
);
27976 gcc_unreachable ();
27986 /* Subroutine of ix86_expand_args_builtin to take care of scalar unop
27987 insns with vec_merge. */
27990 ix86_expand_unop_vec_merge_builtin (enum insn_code icode
, tree exp
,
27994 tree arg0
= CALL_EXPR_ARG (exp
, 0);
27995 rtx op1
, op0
= expand_normal (arg0
);
27996 enum machine_mode tmode
= insn_data
[icode
].operand
[0].mode
;
27997 enum machine_mode mode0
= insn_data
[icode
].operand
[1].mode
;
27999 if (optimize
|| !target
28000 || GET_MODE (target
) != tmode
28001 || !insn_data
[icode
].operand
[0].predicate (target
, tmode
))
28002 target
= gen_reg_rtx (tmode
);
28004 if (VECTOR_MODE_P (mode0
))
28005 op0
= safe_vector_operand (op0
, mode0
);
28007 if ((optimize
&& !register_operand (op0
, mode0
))
28008 || !insn_data
[icode
].operand
[1].predicate (op0
, mode0
))
28009 op0
= copy_to_mode_reg (mode0
, op0
);
28012 if (!insn_data
[icode
].operand
[2].predicate (op1
, mode0
))
28013 op1
= copy_to_mode_reg (mode0
, op1
);
28015 pat
= GEN_FCN (icode
) (target
, op0
, op1
);
28022 /* Subroutine of ix86_expand_builtin to take care of comparison insns. */
28025 ix86_expand_sse_compare (const struct builtin_description
*d
,
28026 tree exp
, rtx target
, bool swap
)
28029 tree arg0
= CALL_EXPR_ARG (exp
, 0);
28030 tree arg1
= CALL_EXPR_ARG (exp
, 1);
28031 rtx op0
= expand_normal (arg0
);
28032 rtx op1
= expand_normal (arg1
);
28034 enum machine_mode tmode
= insn_data
[d
->icode
].operand
[0].mode
;
28035 enum machine_mode mode0
= insn_data
[d
->icode
].operand
[1].mode
;
28036 enum machine_mode mode1
= insn_data
[d
->icode
].operand
[2].mode
;
28037 enum rtx_code comparison
= d
->comparison
;
28039 if (VECTOR_MODE_P (mode0
))
28040 op0
= safe_vector_operand (op0
, mode0
);
28041 if (VECTOR_MODE_P (mode1
))
28042 op1
= safe_vector_operand (op1
, mode1
);
28044 /* Swap operands if we have a comparison that isn't available in
28048 rtx tmp
= gen_reg_rtx (mode1
);
28049 emit_move_insn (tmp
, op1
);
28054 if (optimize
|| !target
28055 || GET_MODE (target
) != tmode
28056 || !insn_data
[d
->icode
].operand
[0].predicate (target
, tmode
))
28057 target
= gen_reg_rtx (tmode
);
28059 if ((optimize
&& !register_operand (op0
, mode0
))
28060 || !insn_data
[d
->icode
].operand
[1].predicate (op0
, mode0
))
28061 op0
= copy_to_mode_reg (mode0
, op0
);
28062 if ((optimize
&& !register_operand (op1
, mode1
))
28063 || !insn_data
[d
->icode
].operand
[2].predicate (op1
, mode1
))
28064 op1
= copy_to_mode_reg (mode1
, op1
);
28066 op2
= gen_rtx_fmt_ee (comparison
, mode0
, op0
, op1
);
28067 pat
= GEN_FCN (d
->icode
) (target
, op0
, op1
, op2
);
28074 /* Subroutine of ix86_expand_builtin to take care of comi insns. */
28077 ix86_expand_sse_comi (const struct builtin_description
*d
, tree exp
,
28081 tree arg0
= CALL_EXPR_ARG (exp
, 0);
28082 tree arg1
= CALL_EXPR_ARG (exp
, 1);
28083 rtx op0
= expand_normal (arg0
);
28084 rtx op1
= expand_normal (arg1
);
28085 enum machine_mode mode0
= insn_data
[d
->icode
].operand
[0].mode
;
28086 enum machine_mode mode1
= insn_data
[d
->icode
].operand
[1].mode
;
28087 enum rtx_code comparison
= d
->comparison
;
28089 if (VECTOR_MODE_P (mode0
))
28090 op0
= safe_vector_operand (op0
, mode0
);
28091 if (VECTOR_MODE_P (mode1
))
28092 op1
= safe_vector_operand (op1
, mode1
);
28094 /* Swap operands if we have a comparison that isn't available in
28096 if (d
->flag
& BUILTIN_DESC_SWAP_OPERANDS
)
28103 target
= gen_reg_rtx (SImode
);
28104 emit_move_insn (target
, const0_rtx
);
28105 target
= gen_rtx_SUBREG (QImode
, target
, 0);
28107 if ((optimize
&& !register_operand (op0
, mode0
))
28108 || !insn_data
[d
->icode
].operand
[0].predicate (op0
, mode0
))
28109 op0
= copy_to_mode_reg (mode0
, op0
);
28110 if ((optimize
&& !register_operand (op1
, mode1
))
28111 || !insn_data
[d
->icode
].operand
[1].predicate (op1
, mode1
))
28112 op1
= copy_to_mode_reg (mode1
, op1
);
28114 pat
= GEN_FCN (d
->icode
) (op0
, op1
);
28118 emit_insn (gen_rtx_SET (VOIDmode
,
28119 gen_rtx_STRICT_LOW_PART (VOIDmode
, target
),
28120 gen_rtx_fmt_ee (comparison
, QImode
,
28124 return SUBREG_REG (target
);
28127 /* Subroutines of ix86_expand_args_builtin to take care of round insns. */
28130 ix86_expand_sse_round (const struct builtin_description
*d
, tree exp
,
28134 tree arg0
= CALL_EXPR_ARG (exp
, 0);
28135 rtx op1
, op0
= expand_normal (arg0
);
28136 enum machine_mode tmode
= insn_data
[d
->icode
].operand
[0].mode
;
28137 enum machine_mode mode0
= insn_data
[d
->icode
].operand
[1].mode
;
28139 if (optimize
|| target
== 0
28140 || GET_MODE (target
) != tmode
28141 || !insn_data
[d
->icode
].operand
[0].predicate (target
, tmode
))
28142 target
= gen_reg_rtx (tmode
);
28144 if (VECTOR_MODE_P (mode0
))
28145 op0
= safe_vector_operand (op0
, mode0
);
28147 if ((optimize
&& !register_operand (op0
, mode0
))
28148 || !insn_data
[d
->icode
].operand
[0].predicate (op0
, mode0
))
28149 op0
= copy_to_mode_reg (mode0
, op0
);
28151 op1
= GEN_INT (d
->comparison
);
28153 pat
= GEN_FCN (d
->icode
) (target
, op0
, op1
);
28161 ix86_expand_sse_round_vec_pack_sfix (const struct builtin_description
*d
,
28162 tree exp
, rtx target
)
28165 tree arg0
= CALL_EXPR_ARG (exp
, 0);
28166 tree arg1
= CALL_EXPR_ARG (exp
, 1);
28167 rtx op0
= expand_normal (arg0
);
28168 rtx op1
= expand_normal (arg1
);
28170 enum machine_mode tmode
= insn_data
[d
->icode
].operand
[0].mode
;
28171 enum machine_mode mode0
= insn_data
[d
->icode
].operand
[1].mode
;
28172 enum machine_mode mode1
= insn_data
[d
->icode
].operand
[2].mode
;
28174 if (optimize
|| target
== 0
28175 || GET_MODE (target
) != tmode
28176 || !insn_data
[d
->icode
].operand
[0].predicate (target
, tmode
))
28177 target
= gen_reg_rtx (tmode
);
28179 op0
= safe_vector_operand (op0
, mode0
);
28180 op1
= safe_vector_operand (op1
, mode1
);
28182 if ((optimize
&& !register_operand (op0
, mode0
))
28183 || !insn_data
[d
->icode
].operand
[0].predicate (op0
, mode0
))
28184 op0
= copy_to_mode_reg (mode0
, op0
);
28185 if ((optimize
&& !register_operand (op1
, mode1
))
28186 || !insn_data
[d
->icode
].operand
[1].predicate (op1
, mode1
))
28187 op1
= copy_to_mode_reg (mode1
, op1
);
28189 op2
= GEN_INT (d
->comparison
);
28191 pat
= GEN_FCN (d
->icode
) (target
, op0
, op1
, op2
);
28198 /* Subroutine of ix86_expand_builtin to take care of ptest insns. */
28201 ix86_expand_sse_ptest (const struct builtin_description
*d
, tree exp
,
28205 tree arg0
= CALL_EXPR_ARG (exp
, 0);
28206 tree arg1
= CALL_EXPR_ARG (exp
, 1);
28207 rtx op0
= expand_normal (arg0
);
28208 rtx op1
= expand_normal (arg1
);
28209 enum machine_mode mode0
= insn_data
[d
->icode
].operand
[0].mode
;
28210 enum machine_mode mode1
= insn_data
[d
->icode
].operand
[1].mode
;
28211 enum rtx_code comparison
= d
->comparison
;
28213 if (VECTOR_MODE_P (mode0
))
28214 op0
= safe_vector_operand (op0
, mode0
);
28215 if (VECTOR_MODE_P (mode1
))
28216 op1
= safe_vector_operand (op1
, mode1
);
28218 target
= gen_reg_rtx (SImode
);
28219 emit_move_insn (target
, const0_rtx
);
28220 target
= gen_rtx_SUBREG (QImode
, target
, 0);
28222 if ((optimize
&& !register_operand (op0
, mode0
))
28223 || !insn_data
[d
->icode
].operand
[0].predicate (op0
, mode0
))
28224 op0
= copy_to_mode_reg (mode0
, op0
);
28225 if ((optimize
&& !register_operand (op1
, mode1
))
28226 || !insn_data
[d
->icode
].operand
[1].predicate (op1
, mode1
))
28227 op1
= copy_to_mode_reg (mode1
, op1
);
28229 pat
= GEN_FCN (d
->icode
) (op0
, op1
);
28233 emit_insn (gen_rtx_SET (VOIDmode
,
28234 gen_rtx_STRICT_LOW_PART (VOIDmode
, target
),
28235 gen_rtx_fmt_ee (comparison
, QImode
,
28239 return SUBREG_REG (target
);
28242 /* Subroutine of ix86_expand_builtin to take care of pcmpestr[im] insns. */
28245 ix86_expand_sse_pcmpestr (const struct builtin_description
*d
,
28246 tree exp
, rtx target
)
28249 tree arg0
= CALL_EXPR_ARG (exp
, 0);
28250 tree arg1
= CALL_EXPR_ARG (exp
, 1);
28251 tree arg2
= CALL_EXPR_ARG (exp
, 2);
28252 tree arg3
= CALL_EXPR_ARG (exp
, 3);
28253 tree arg4
= CALL_EXPR_ARG (exp
, 4);
28254 rtx scratch0
, scratch1
;
28255 rtx op0
= expand_normal (arg0
);
28256 rtx op1
= expand_normal (arg1
);
28257 rtx op2
= expand_normal (arg2
);
28258 rtx op3
= expand_normal (arg3
);
28259 rtx op4
= expand_normal (arg4
);
28260 enum machine_mode tmode0
, tmode1
, modev2
, modei3
, modev4
, modei5
, modeimm
;
28262 tmode0
= insn_data
[d
->icode
].operand
[0].mode
;
28263 tmode1
= insn_data
[d
->icode
].operand
[1].mode
;
28264 modev2
= insn_data
[d
->icode
].operand
[2].mode
;
28265 modei3
= insn_data
[d
->icode
].operand
[3].mode
;
28266 modev4
= insn_data
[d
->icode
].operand
[4].mode
;
28267 modei5
= insn_data
[d
->icode
].operand
[5].mode
;
28268 modeimm
= insn_data
[d
->icode
].operand
[6].mode
;
28270 if (VECTOR_MODE_P (modev2
))
28271 op0
= safe_vector_operand (op0
, modev2
);
28272 if (VECTOR_MODE_P (modev4
))
28273 op2
= safe_vector_operand (op2
, modev4
);
28275 if (!insn_data
[d
->icode
].operand
[2].predicate (op0
, modev2
))
28276 op0
= copy_to_mode_reg (modev2
, op0
);
28277 if (!insn_data
[d
->icode
].operand
[3].predicate (op1
, modei3
))
28278 op1
= copy_to_mode_reg (modei3
, op1
);
28279 if ((optimize
&& !register_operand (op2
, modev4
))
28280 || !insn_data
[d
->icode
].operand
[4].predicate (op2
, modev4
))
28281 op2
= copy_to_mode_reg (modev4
, op2
);
28282 if (!insn_data
[d
->icode
].operand
[5].predicate (op3
, modei5
))
28283 op3
= copy_to_mode_reg (modei5
, op3
);
28285 if (!insn_data
[d
->icode
].operand
[6].predicate (op4
, modeimm
))
28287 error ("the fifth argument must be an 8-bit immediate");
28291 if (d
->code
== IX86_BUILTIN_PCMPESTRI128
)
28293 if (optimize
|| !target
28294 || GET_MODE (target
) != tmode0
28295 || !insn_data
[d
->icode
].operand
[0].predicate (target
, tmode0
))
28296 target
= gen_reg_rtx (tmode0
);
28298 scratch1
= gen_reg_rtx (tmode1
);
28300 pat
= GEN_FCN (d
->icode
) (target
, scratch1
, op0
, op1
, op2
, op3
, op4
);
28302 else if (d
->code
== IX86_BUILTIN_PCMPESTRM128
)
28304 if (optimize
|| !target
28305 || GET_MODE (target
) != tmode1
28306 || !insn_data
[d
->icode
].operand
[1].predicate (target
, tmode1
))
28307 target
= gen_reg_rtx (tmode1
);
28309 scratch0
= gen_reg_rtx (tmode0
);
28311 pat
= GEN_FCN (d
->icode
) (scratch0
, target
, op0
, op1
, op2
, op3
, op4
);
28315 gcc_assert (d
->flag
);
28317 scratch0
= gen_reg_rtx (tmode0
);
28318 scratch1
= gen_reg_rtx (tmode1
);
28320 pat
= GEN_FCN (d
->icode
) (scratch0
, scratch1
, op0
, op1
, op2
, op3
, op4
);
28330 target
= gen_reg_rtx (SImode
);
28331 emit_move_insn (target
, const0_rtx
);
28332 target
= gen_rtx_SUBREG (QImode
, target
, 0);
28335 (gen_rtx_SET (VOIDmode
, gen_rtx_STRICT_LOW_PART (VOIDmode
, target
),
28336 gen_rtx_fmt_ee (EQ
, QImode
,
28337 gen_rtx_REG ((enum machine_mode
) d
->flag
,
28340 return SUBREG_REG (target
);
28347 /* Subroutine of ix86_expand_builtin to take care of pcmpistr[im] insns. */
28350 ix86_expand_sse_pcmpistr (const struct builtin_description
*d
,
28351 tree exp
, rtx target
)
28354 tree arg0
= CALL_EXPR_ARG (exp
, 0);
28355 tree arg1
= CALL_EXPR_ARG (exp
, 1);
28356 tree arg2
= CALL_EXPR_ARG (exp
, 2);
28357 rtx scratch0
, scratch1
;
28358 rtx op0
= expand_normal (arg0
);
28359 rtx op1
= expand_normal (arg1
);
28360 rtx op2
= expand_normal (arg2
);
28361 enum machine_mode tmode0
, tmode1
, modev2
, modev3
, modeimm
;
28363 tmode0
= insn_data
[d
->icode
].operand
[0].mode
;
28364 tmode1
= insn_data
[d
->icode
].operand
[1].mode
;
28365 modev2
= insn_data
[d
->icode
].operand
[2].mode
;
28366 modev3
= insn_data
[d
->icode
].operand
[3].mode
;
28367 modeimm
= insn_data
[d
->icode
].operand
[4].mode
;
28369 if (VECTOR_MODE_P (modev2
))
28370 op0
= safe_vector_operand (op0
, modev2
);
28371 if (VECTOR_MODE_P (modev3
))
28372 op1
= safe_vector_operand (op1
, modev3
);
28374 if (!insn_data
[d
->icode
].operand
[2].predicate (op0
, modev2
))
28375 op0
= copy_to_mode_reg (modev2
, op0
);
28376 if ((optimize
&& !register_operand (op1
, modev3
))
28377 || !insn_data
[d
->icode
].operand
[3].predicate (op1
, modev3
))
28378 op1
= copy_to_mode_reg (modev3
, op1
);
28380 if (!insn_data
[d
->icode
].operand
[4].predicate (op2
, modeimm
))
28382 error ("the third argument must be an 8-bit immediate");
28386 if (d
->code
== IX86_BUILTIN_PCMPISTRI128
)
28388 if (optimize
|| !target
28389 || GET_MODE (target
) != tmode0
28390 || !insn_data
[d
->icode
].operand
[0].predicate (target
, tmode0
))
28391 target
= gen_reg_rtx (tmode0
);
28393 scratch1
= gen_reg_rtx (tmode1
);
28395 pat
= GEN_FCN (d
->icode
) (target
, scratch1
, op0
, op1
, op2
);
28397 else if (d
->code
== IX86_BUILTIN_PCMPISTRM128
)
28399 if (optimize
|| !target
28400 || GET_MODE (target
) != tmode1
28401 || !insn_data
[d
->icode
].operand
[1].predicate (target
, tmode1
))
28402 target
= gen_reg_rtx (tmode1
);
28404 scratch0
= gen_reg_rtx (tmode0
);
28406 pat
= GEN_FCN (d
->icode
) (scratch0
, target
, op0
, op1
, op2
);
28410 gcc_assert (d
->flag
);
28412 scratch0
= gen_reg_rtx (tmode0
);
28413 scratch1
= gen_reg_rtx (tmode1
);
28415 pat
= GEN_FCN (d
->icode
) (scratch0
, scratch1
, op0
, op1
, op2
);
28425 target
= gen_reg_rtx (SImode
);
28426 emit_move_insn (target
, const0_rtx
);
28427 target
= gen_rtx_SUBREG (QImode
, target
, 0);
28430 (gen_rtx_SET (VOIDmode
, gen_rtx_STRICT_LOW_PART (VOIDmode
, target
),
28431 gen_rtx_fmt_ee (EQ
, QImode
,
28432 gen_rtx_REG ((enum machine_mode
) d
->flag
,
28435 return SUBREG_REG (target
);
28441 /* Subroutine of ix86_expand_builtin to take care of insns with
28442 variable number of operands. */
28445 ix86_expand_args_builtin (const struct builtin_description
*d
,
28446 tree exp
, rtx target
)
28448 rtx pat
, real_target
;
28449 unsigned int i
, nargs
;
28450 unsigned int nargs_constant
= 0;
28451 int num_memory
= 0;
28455 enum machine_mode mode
;
28457 bool last_arg_count
= false;
28458 enum insn_code icode
= d
->icode
;
28459 const struct insn_data_d
*insn_p
= &insn_data
[icode
];
28460 enum machine_mode tmode
= insn_p
->operand
[0].mode
;
28461 enum machine_mode rmode
= VOIDmode
;
28463 enum rtx_code comparison
= d
->comparison
;
28465 switch ((enum ix86_builtin_func_type
) d
->flag
)
28467 case V2DF_FTYPE_V2DF_ROUND
:
28468 case V4DF_FTYPE_V4DF_ROUND
:
28469 case V4SF_FTYPE_V4SF_ROUND
:
28470 case V8SF_FTYPE_V8SF_ROUND
:
28471 case V4SI_FTYPE_V4SF_ROUND
:
28472 case V8SI_FTYPE_V8SF_ROUND
:
28473 return ix86_expand_sse_round (d
, exp
, target
);
28474 case V4SI_FTYPE_V2DF_V2DF_ROUND
:
28475 case V8SI_FTYPE_V4DF_V4DF_ROUND
:
28476 return ix86_expand_sse_round_vec_pack_sfix (d
, exp
, target
);
28477 case INT_FTYPE_V8SF_V8SF_PTEST
:
28478 case INT_FTYPE_V4DI_V4DI_PTEST
:
28479 case INT_FTYPE_V4DF_V4DF_PTEST
:
28480 case INT_FTYPE_V4SF_V4SF_PTEST
:
28481 case INT_FTYPE_V2DI_V2DI_PTEST
:
28482 case INT_FTYPE_V2DF_V2DF_PTEST
:
28483 return ix86_expand_sse_ptest (d
, exp
, target
);
28484 case FLOAT128_FTYPE_FLOAT128
:
28485 case FLOAT_FTYPE_FLOAT
:
28486 case INT_FTYPE_INT
:
28487 case UINT64_FTYPE_INT
:
28488 case UINT16_FTYPE_UINT16
:
28489 case INT64_FTYPE_INT64
:
28490 case INT64_FTYPE_V4SF
:
28491 case INT64_FTYPE_V2DF
:
28492 case INT_FTYPE_V16QI
:
28493 case INT_FTYPE_V8QI
:
28494 case INT_FTYPE_V8SF
:
28495 case INT_FTYPE_V4DF
:
28496 case INT_FTYPE_V4SF
:
28497 case INT_FTYPE_V2DF
:
28498 case INT_FTYPE_V32QI
:
28499 case V16QI_FTYPE_V16QI
:
28500 case V8SI_FTYPE_V8SF
:
28501 case V8SI_FTYPE_V4SI
:
28502 case V8HI_FTYPE_V8HI
:
28503 case V8HI_FTYPE_V16QI
:
28504 case V8QI_FTYPE_V8QI
:
28505 case V8SF_FTYPE_V8SF
:
28506 case V8SF_FTYPE_V8SI
:
28507 case V8SF_FTYPE_V4SF
:
28508 case V8SF_FTYPE_V8HI
:
28509 case V4SI_FTYPE_V4SI
:
28510 case V4SI_FTYPE_V16QI
:
28511 case V4SI_FTYPE_V4SF
:
28512 case V4SI_FTYPE_V8SI
:
28513 case V4SI_FTYPE_V8HI
:
28514 case V4SI_FTYPE_V4DF
:
28515 case V4SI_FTYPE_V2DF
:
28516 case V4HI_FTYPE_V4HI
:
28517 case V4DF_FTYPE_V4DF
:
28518 case V4DF_FTYPE_V4SI
:
28519 case V4DF_FTYPE_V4SF
:
28520 case V4DF_FTYPE_V2DF
:
28521 case V4SF_FTYPE_V4SF
:
28522 case V4SF_FTYPE_V4SI
:
28523 case V4SF_FTYPE_V8SF
:
28524 case V4SF_FTYPE_V4DF
:
28525 case V4SF_FTYPE_V8HI
:
28526 case V4SF_FTYPE_V2DF
:
28527 case V2DI_FTYPE_V2DI
:
28528 case V2DI_FTYPE_V16QI
:
28529 case V2DI_FTYPE_V8HI
:
28530 case V2DI_FTYPE_V4SI
:
28531 case V2DF_FTYPE_V2DF
:
28532 case V2DF_FTYPE_V4SI
:
28533 case V2DF_FTYPE_V4DF
:
28534 case V2DF_FTYPE_V4SF
:
28535 case V2DF_FTYPE_V2SI
:
28536 case V2SI_FTYPE_V2SI
:
28537 case V2SI_FTYPE_V4SF
:
28538 case V2SI_FTYPE_V2SF
:
28539 case V2SI_FTYPE_V2DF
:
28540 case V2SF_FTYPE_V2SF
:
28541 case V2SF_FTYPE_V2SI
:
28542 case V32QI_FTYPE_V32QI
:
28543 case V32QI_FTYPE_V16QI
:
28544 case V16HI_FTYPE_V16HI
:
28545 case V16HI_FTYPE_V8HI
:
28546 case V8SI_FTYPE_V8SI
:
28547 case V16HI_FTYPE_V16QI
:
28548 case V8SI_FTYPE_V16QI
:
28549 case V4DI_FTYPE_V16QI
:
28550 case V8SI_FTYPE_V8HI
:
28551 case V4DI_FTYPE_V8HI
:
28552 case V4DI_FTYPE_V4SI
:
28553 case V4DI_FTYPE_V2DI
:
28556 case V4SF_FTYPE_V4SF_VEC_MERGE
:
28557 case V2DF_FTYPE_V2DF_VEC_MERGE
:
28558 return ix86_expand_unop_vec_merge_builtin (icode
, exp
, target
);
28559 case FLOAT128_FTYPE_FLOAT128_FLOAT128
:
28560 case V16QI_FTYPE_V16QI_V16QI
:
28561 case V16QI_FTYPE_V8HI_V8HI
:
28562 case V8QI_FTYPE_V8QI_V8QI
:
28563 case V8QI_FTYPE_V4HI_V4HI
:
28564 case V8HI_FTYPE_V8HI_V8HI
:
28565 case V8HI_FTYPE_V16QI_V16QI
:
28566 case V8HI_FTYPE_V4SI_V4SI
:
28567 case V8SF_FTYPE_V8SF_V8SF
:
28568 case V8SF_FTYPE_V8SF_V8SI
:
28569 case V4SI_FTYPE_V4SI_V4SI
:
28570 case V4SI_FTYPE_V8HI_V8HI
:
28571 case V4SI_FTYPE_V4SF_V4SF
:
28572 case V4SI_FTYPE_V2DF_V2DF
:
28573 case V4HI_FTYPE_V4HI_V4HI
:
28574 case V4HI_FTYPE_V8QI_V8QI
:
28575 case V4HI_FTYPE_V2SI_V2SI
:
28576 case V4DF_FTYPE_V4DF_V4DF
:
28577 case V4DF_FTYPE_V4DF_V4DI
:
28578 case V4SF_FTYPE_V4SF_V4SF
:
28579 case V4SF_FTYPE_V4SF_V4SI
:
28580 case V4SF_FTYPE_V4SF_V2SI
:
28581 case V4SF_FTYPE_V4SF_V2DF
:
28582 case V4SF_FTYPE_V4SF_DI
:
28583 case V4SF_FTYPE_V4SF_SI
:
28584 case V2DI_FTYPE_V2DI_V2DI
:
28585 case V2DI_FTYPE_V16QI_V16QI
:
28586 case V2DI_FTYPE_V4SI_V4SI
:
28587 case V2DI_FTYPE_V2DI_V16QI
:
28588 case V2DI_FTYPE_V2DF_V2DF
:
28589 case V2SI_FTYPE_V2SI_V2SI
:
28590 case V2SI_FTYPE_V4HI_V4HI
:
28591 case V2SI_FTYPE_V2SF_V2SF
:
28592 case V2DF_FTYPE_V2DF_V2DF
:
28593 case V2DF_FTYPE_V2DF_V4SF
:
28594 case V2DF_FTYPE_V2DF_V2DI
:
28595 case V2DF_FTYPE_V2DF_DI
:
28596 case V2DF_FTYPE_V2DF_SI
:
28597 case V2SF_FTYPE_V2SF_V2SF
:
28598 case V1DI_FTYPE_V1DI_V1DI
:
28599 case V1DI_FTYPE_V8QI_V8QI
:
28600 case V1DI_FTYPE_V2SI_V2SI
:
28601 case V32QI_FTYPE_V16HI_V16HI
:
28602 case V16HI_FTYPE_V8SI_V8SI
:
28603 case V32QI_FTYPE_V32QI_V32QI
:
28604 case V16HI_FTYPE_V32QI_V32QI
:
28605 case V16HI_FTYPE_V16HI_V16HI
:
28606 case V8SI_FTYPE_V4DF_V4DF
:
28607 case V8SI_FTYPE_V8SI_V8SI
:
28608 case V8SI_FTYPE_V16HI_V16HI
:
28609 case V4DI_FTYPE_V4DI_V4DI
:
28610 case V4DI_FTYPE_V8SI_V8SI
:
28611 if (comparison
== UNKNOWN
)
28612 return ix86_expand_binop_builtin (icode
, exp
, target
);
28615 case V4SF_FTYPE_V4SF_V4SF_SWAP
:
28616 case V2DF_FTYPE_V2DF_V2DF_SWAP
:
28617 gcc_assert (comparison
!= UNKNOWN
);
28621 case V16HI_FTYPE_V16HI_V8HI_COUNT
:
28622 case V16HI_FTYPE_V16HI_SI_COUNT
:
28623 case V8SI_FTYPE_V8SI_V4SI_COUNT
:
28624 case V8SI_FTYPE_V8SI_SI_COUNT
:
28625 case V4DI_FTYPE_V4DI_V2DI_COUNT
:
28626 case V4DI_FTYPE_V4DI_INT_COUNT
:
28627 case V8HI_FTYPE_V8HI_V8HI_COUNT
:
28628 case V8HI_FTYPE_V8HI_SI_COUNT
:
28629 case V4SI_FTYPE_V4SI_V4SI_COUNT
:
28630 case V4SI_FTYPE_V4SI_SI_COUNT
:
28631 case V4HI_FTYPE_V4HI_V4HI_COUNT
:
28632 case V4HI_FTYPE_V4HI_SI_COUNT
:
28633 case V2DI_FTYPE_V2DI_V2DI_COUNT
:
28634 case V2DI_FTYPE_V2DI_SI_COUNT
:
28635 case V2SI_FTYPE_V2SI_V2SI_COUNT
:
28636 case V2SI_FTYPE_V2SI_SI_COUNT
:
28637 case V1DI_FTYPE_V1DI_V1DI_COUNT
:
28638 case V1DI_FTYPE_V1DI_SI_COUNT
:
28640 last_arg_count
= true;
28642 case UINT64_FTYPE_UINT64_UINT64
:
28643 case UINT_FTYPE_UINT_UINT
:
28644 case UINT_FTYPE_UINT_USHORT
:
28645 case UINT_FTYPE_UINT_UCHAR
:
28646 case UINT16_FTYPE_UINT16_INT
:
28647 case UINT8_FTYPE_UINT8_INT
:
28650 case V2DI_FTYPE_V2DI_INT_CONVERT
:
28653 nargs_constant
= 1;
28655 case V4DI_FTYPE_V4DI_INT_CONVERT
:
28658 nargs_constant
= 1;
28660 case V8HI_FTYPE_V8HI_INT
:
28661 case V8HI_FTYPE_V8SF_INT
:
28662 case V8HI_FTYPE_V4SF_INT
:
28663 case V8SF_FTYPE_V8SF_INT
:
28664 case V4SI_FTYPE_V4SI_INT
:
28665 case V4SI_FTYPE_V8SI_INT
:
28666 case V4HI_FTYPE_V4HI_INT
:
28667 case V4DF_FTYPE_V4DF_INT
:
28668 case V4SF_FTYPE_V4SF_INT
:
28669 case V4SF_FTYPE_V8SF_INT
:
28670 case V2DI_FTYPE_V2DI_INT
:
28671 case V2DF_FTYPE_V2DF_INT
:
28672 case V2DF_FTYPE_V4DF_INT
:
28673 case V16HI_FTYPE_V16HI_INT
:
28674 case V8SI_FTYPE_V8SI_INT
:
28675 case V4DI_FTYPE_V4DI_INT
:
28676 case V2DI_FTYPE_V4DI_INT
:
28678 nargs_constant
= 1;
28680 case V16QI_FTYPE_V16QI_V16QI_V16QI
:
28681 case V8SF_FTYPE_V8SF_V8SF_V8SF
:
28682 case V4DF_FTYPE_V4DF_V4DF_V4DF
:
28683 case V4SF_FTYPE_V4SF_V4SF_V4SF
:
28684 case V2DF_FTYPE_V2DF_V2DF_V2DF
:
28685 case V32QI_FTYPE_V32QI_V32QI_V32QI
:
28688 case V32QI_FTYPE_V32QI_V32QI_INT
:
28689 case V16HI_FTYPE_V16HI_V16HI_INT
:
28690 case V16QI_FTYPE_V16QI_V16QI_INT
:
28691 case V4DI_FTYPE_V4DI_V4DI_INT
:
28692 case V8HI_FTYPE_V8HI_V8HI_INT
:
28693 case V8SI_FTYPE_V8SI_V8SI_INT
:
28694 case V8SI_FTYPE_V8SI_V4SI_INT
:
28695 case V8SF_FTYPE_V8SF_V8SF_INT
:
28696 case V8SF_FTYPE_V8SF_V4SF_INT
:
28697 case V4SI_FTYPE_V4SI_V4SI_INT
:
28698 case V4DF_FTYPE_V4DF_V4DF_INT
:
28699 case V4DF_FTYPE_V4DF_V2DF_INT
:
28700 case V4SF_FTYPE_V4SF_V4SF_INT
:
28701 case V2DI_FTYPE_V2DI_V2DI_INT
:
28702 case V4DI_FTYPE_V4DI_V2DI_INT
:
28703 case V2DF_FTYPE_V2DF_V2DF_INT
:
28705 nargs_constant
= 1;
28707 case V4DI_FTYPE_V4DI_V4DI_INT_CONVERT
:
28710 nargs_constant
= 1;
28712 case V2DI_FTYPE_V2DI_V2DI_INT_CONVERT
:
28715 nargs_constant
= 1;
28717 case V1DI_FTYPE_V1DI_V1DI_INT_CONVERT
:
28720 nargs_constant
= 1;
28722 case V2DI_FTYPE_V2DI_UINT_UINT
:
28724 nargs_constant
= 2;
28726 case V2DF_FTYPE_V2DF_V2DF_V2DI_INT
:
28727 case V4DF_FTYPE_V4DF_V4DF_V4DI_INT
:
28728 case V4SF_FTYPE_V4SF_V4SF_V4SI_INT
:
28729 case V8SF_FTYPE_V8SF_V8SF_V8SI_INT
:
28731 nargs_constant
= 1;
28733 case V2DI_FTYPE_V2DI_V2DI_UINT_UINT
:
28735 nargs_constant
= 2;
28738 gcc_unreachable ();
28741 gcc_assert (nargs
<= ARRAY_SIZE (args
));
28743 if (comparison
!= UNKNOWN
)
28745 gcc_assert (nargs
== 2);
28746 return ix86_expand_sse_compare (d
, exp
, target
, swap
);
28749 if (rmode
== VOIDmode
|| rmode
== tmode
)
28753 || GET_MODE (target
) != tmode
28754 || !insn_p
->operand
[0].predicate (target
, tmode
))
28755 target
= gen_reg_rtx (tmode
);
28756 real_target
= target
;
28760 target
= gen_reg_rtx (rmode
);
28761 real_target
= simplify_gen_subreg (tmode
, target
, rmode
, 0);
28764 for (i
= 0; i
< nargs
; i
++)
28766 tree arg
= CALL_EXPR_ARG (exp
, i
);
28767 rtx op
= expand_normal (arg
);
28768 enum machine_mode mode
= insn_p
->operand
[i
+ 1].mode
;
28769 bool match
= insn_p
->operand
[i
+ 1].predicate (op
, mode
);
28771 if (last_arg_count
&& (i
+ 1) == nargs
)
28773 /* SIMD shift insns take either an 8-bit immediate or
28774 register as count. But builtin functions take int as
28775 count. If count doesn't match, we put it in register. */
28778 op
= simplify_gen_subreg (SImode
, op
, GET_MODE (op
), 0);
28779 if (!insn_p
->operand
[i
+ 1].predicate (op
, mode
))
28780 op
= copy_to_reg (op
);
28783 else if ((nargs
- i
) <= nargs_constant
)
28788 case CODE_FOR_avx2_inserti128
:
28789 case CODE_FOR_avx2_extracti128
:
28790 error ("the last argument must be an 1-bit immediate");
28793 case CODE_FOR_sse4_1_roundsd
:
28794 case CODE_FOR_sse4_1_roundss
:
28796 case CODE_FOR_sse4_1_roundpd
:
28797 case CODE_FOR_sse4_1_roundps
:
28798 case CODE_FOR_avx_roundpd256
:
28799 case CODE_FOR_avx_roundps256
:
28801 case CODE_FOR_sse4_1_roundpd_vec_pack_sfix
:
28802 case CODE_FOR_sse4_1_roundps_sfix
:
28803 case CODE_FOR_avx_roundpd_vec_pack_sfix256
:
28804 case CODE_FOR_avx_roundps_sfix256
:
28806 case CODE_FOR_sse4_1_blendps
:
28807 case CODE_FOR_avx_blendpd256
:
28808 case CODE_FOR_avx_vpermilv4df
:
28809 error ("the last argument must be a 4-bit immediate");
28812 case CODE_FOR_sse4_1_blendpd
:
28813 case CODE_FOR_avx_vpermilv2df
:
28814 case CODE_FOR_xop_vpermil2v2df3
:
28815 case CODE_FOR_xop_vpermil2v4sf3
:
28816 case CODE_FOR_xop_vpermil2v4df3
:
28817 case CODE_FOR_xop_vpermil2v8sf3
:
28818 error ("the last argument must be a 2-bit immediate");
28821 case CODE_FOR_avx_vextractf128v4df
:
28822 case CODE_FOR_avx_vextractf128v8sf
:
28823 case CODE_FOR_avx_vextractf128v8si
:
28824 case CODE_FOR_avx_vinsertf128v4df
:
28825 case CODE_FOR_avx_vinsertf128v8sf
:
28826 case CODE_FOR_avx_vinsertf128v8si
:
28827 error ("the last argument must be a 1-bit immediate");
28830 case CODE_FOR_avx_vmcmpv2df3
:
28831 case CODE_FOR_avx_vmcmpv4sf3
:
28832 case CODE_FOR_avx_cmpv2df3
:
28833 case CODE_FOR_avx_cmpv4sf3
:
28834 case CODE_FOR_avx_cmpv4df3
:
28835 case CODE_FOR_avx_cmpv8sf3
:
28836 error ("the last argument must be a 5-bit immediate");
28840 switch (nargs_constant
)
28843 if ((nargs
- i
) == nargs_constant
)
28845 error ("the next to last argument must be an 8-bit immediate");
28849 error ("the last argument must be an 8-bit immediate");
28852 gcc_unreachable ();
28859 if (VECTOR_MODE_P (mode
))
28860 op
= safe_vector_operand (op
, mode
);
28862 /* If we aren't optimizing, only allow one memory operand to
28864 if (memory_operand (op
, mode
))
28867 if (GET_MODE (op
) == mode
|| GET_MODE (op
) == VOIDmode
)
28869 if (optimize
|| !match
|| num_memory
> 1)
28870 op
= copy_to_mode_reg (mode
, op
);
28874 op
= copy_to_reg (op
);
28875 op
= simplify_gen_subreg (mode
, op
, GET_MODE (op
), 0);
28880 args
[i
].mode
= mode
;
28886 pat
= GEN_FCN (icode
) (real_target
, args
[0].op
);
28889 pat
= GEN_FCN (icode
) (real_target
, args
[0].op
, args
[1].op
);
28892 pat
= GEN_FCN (icode
) (real_target
, args
[0].op
, args
[1].op
,
28896 pat
= GEN_FCN (icode
) (real_target
, args
[0].op
, args
[1].op
,
28897 args
[2].op
, args
[3].op
);
28900 gcc_unreachable ();
28910 /* Subroutine of ix86_expand_builtin to take care of special insns
28911 with variable number of operands. */
28914 ix86_expand_special_args_builtin (const struct builtin_description
*d
,
28915 tree exp
, rtx target
)
28919 unsigned int i
, nargs
, arg_adjust
, memory
;
28923 enum machine_mode mode
;
28925 enum insn_code icode
= d
->icode
;
28926 bool last_arg_constant
= false;
28927 const struct insn_data_d
*insn_p
= &insn_data
[icode
];
28928 enum machine_mode tmode
= insn_p
->operand
[0].mode
;
28929 enum { load
, store
} klass
;
28931 switch ((enum ix86_builtin_func_type
) d
->flag
)
28933 case VOID_FTYPE_VOID
:
28934 if (icode
== CODE_FOR_avx_vzeroupper
)
28935 target
= GEN_INT (vzeroupper_intrinsic
);
28936 emit_insn (GEN_FCN (icode
) (target
));
28938 case VOID_FTYPE_UINT64
:
28939 case VOID_FTYPE_UNSIGNED
:
28945 case INT_FTYPE_VOID
:
28946 case UINT64_FTYPE_VOID
:
28947 case UNSIGNED_FTYPE_VOID
:
28952 case UINT64_FTYPE_PUNSIGNED
:
28953 case V2DI_FTYPE_PV2DI
:
28954 case V4DI_FTYPE_PV4DI
:
28955 case V32QI_FTYPE_PCCHAR
:
28956 case V16QI_FTYPE_PCCHAR
:
28957 case V8SF_FTYPE_PCV4SF
:
28958 case V8SF_FTYPE_PCFLOAT
:
28959 case V4SF_FTYPE_PCFLOAT
:
28960 case V4DF_FTYPE_PCV2DF
:
28961 case V4DF_FTYPE_PCDOUBLE
:
28962 case V2DF_FTYPE_PCDOUBLE
:
28963 case VOID_FTYPE_PVOID
:
28968 case VOID_FTYPE_PV2SF_V4SF
:
28969 case VOID_FTYPE_PV4DI_V4DI
:
28970 case VOID_FTYPE_PV2DI_V2DI
:
28971 case VOID_FTYPE_PCHAR_V32QI
:
28972 case VOID_FTYPE_PCHAR_V16QI
:
28973 case VOID_FTYPE_PFLOAT_V8SF
:
28974 case VOID_FTYPE_PFLOAT_V4SF
:
28975 case VOID_FTYPE_PDOUBLE_V4DF
:
28976 case VOID_FTYPE_PDOUBLE_V2DF
:
28977 case VOID_FTYPE_PLONGLONG_LONGLONG
:
28978 case VOID_FTYPE_PULONGLONG_ULONGLONG
:
28979 case VOID_FTYPE_PINT_INT
:
28982 /* Reserve memory operand for target. */
28983 memory
= ARRAY_SIZE (args
);
28985 case V4SF_FTYPE_V4SF_PCV2SF
:
28986 case V2DF_FTYPE_V2DF_PCDOUBLE
:
28991 case V8SF_FTYPE_PCV8SF_V8SI
:
28992 case V4DF_FTYPE_PCV4DF_V4DI
:
28993 case V4SF_FTYPE_PCV4SF_V4SI
:
28994 case V2DF_FTYPE_PCV2DF_V2DI
:
28995 case V8SI_FTYPE_PCV8SI_V8SI
:
28996 case V4DI_FTYPE_PCV4DI_V4DI
:
28997 case V4SI_FTYPE_PCV4SI_V4SI
:
28998 case V2DI_FTYPE_PCV2DI_V2DI
:
29003 case VOID_FTYPE_PV8SF_V8SI_V8SF
:
29004 case VOID_FTYPE_PV4DF_V4DI_V4DF
:
29005 case VOID_FTYPE_PV4SF_V4SI_V4SF
:
29006 case VOID_FTYPE_PV2DF_V2DI_V2DF
:
29007 case VOID_FTYPE_PV8SI_V8SI_V8SI
:
29008 case VOID_FTYPE_PV4DI_V4DI_V4DI
:
29009 case VOID_FTYPE_PV4SI_V4SI_V4SI
:
29010 case VOID_FTYPE_PV2DI_V2DI_V2DI
:
29013 /* Reserve memory operand for target. */
29014 memory
= ARRAY_SIZE (args
);
29016 case VOID_FTYPE_UINT_UINT_UINT
:
29017 case VOID_FTYPE_UINT64_UINT_UINT
:
29018 case UCHAR_FTYPE_UINT_UINT_UINT
:
29019 case UCHAR_FTYPE_UINT64_UINT_UINT
:
29022 memory
= ARRAY_SIZE (args
);
29023 last_arg_constant
= true;
29026 gcc_unreachable ();
29029 gcc_assert (nargs
<= ARRAY_SIZE (args
));
29031 if (klass
== store
)
29033 arg
= CALL_EXPR_ARG (exp
, 0);
29034 op
= expand_normal (arg
);
29035 gcc_assert (target
== 0);
29038 if (GET_MODE (op
) != Pmode
)
29039 op
= convert_to_mode (Pmode
, op
, 1);
29040 target
= gen_rtx_MEM (tmode
, force_reg (Pmode
, op
));
29043 target
= force_reg (tmode
, op
);
29051 || GET_MODE (target
) != tmode
29052 || !insn_p
->operand
[0].predicate (target
, tmode
))
29053 target
= gen_reg_rtx (tmode
);
29056 for (i
= 0; i
< nargs
; i
++)
29058 enum machine_mode mode
= insn_p
->operand
[i
+ 1].mode
;
29061 arg
= CALL_EXPR_ARG (exp
, i
+ arg_adjust
);
29062 op
= expand_normal (arg
);
29063 match
= insn_p
->operand
[i
+ 1].predicate (op
, mode
);
29065 if (last_arg_constant
&& (i
+ 1) == nargs
)
29069 if (icode
== CODE_FOR_lwp_lwpvalsi3
29070 || icode
== CODE_FOR_lwp_lwpinssi3
29071 || icode
== CODE_FOR_lwp_lwpvaldi3
29072 || icode
== CODE_FOR_lwp_lwpinsdi3
)
29073 error ("the last argument must be a 32-bit immediate");
29075 error ("the last argument must be an 8-bit immediate");
29083 /* This must be the memory operand. */
29084 if (GET_MODE (op
) != Pmode
)
29085 op
= convert_to_mode (Pmode
, op
, 1);
29086 op
= gen_rtx_MEM (mode
, force_reg (Pmode
, op
));
29087 gcc_assert (GET_MODE (op
) == mode
29088 || GET_MODE (op
) == VOIDmode
);
29092 /* This must be register. */
29093 if (VECTOR_MODE_P (mode
))
29094 op
= safe_vector_operand (op
, mode
);
29096 gcc_assert (GET_MODE (op
) == mode
29097 || GET_MODE (op
) == VOIDmode
);
29098 op
= copy_to_mode_reg (mode
, op
);
29103 args
[i
].mode
= mode
;
29109 pat
= GEN_FCN (icode
) (target
);
29112 pat
= GEN_FCN (icode
) (target
, args
[0].op
);
29115 pat
= GEN_FCN (icode
) (target
, args
[0].op
, args
[1].op
);
29118 pat
= GEN_FCN (icode
) (target
, args
[0].op
, args
[1].op
, args
[2].op
);
29121 gcc_unreachable ();
29127 return klass
== store
? 0 : target
;
29130 /* Return the integer constant in ARG. Constrain it to be in the range
29131 of the subparts of VEC_TYPE; issue an error if not. */
29134 get_element_number (tree vec_type
, tree arg
)
29136 unsigned HOST_WIDE_INT elt
, max
= TYPE_VECTOR_SUBPARTS (vec_type
) - 1;
29138 if (!host_integerp (arg
, 1)
29139 || (elt
= tree_low_cst (arg
, 1), elt
> max
))
29141 error ("selector must be an integer constant in the range 0..%wi", max
);
29148 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
29149 ix86_expand_vector_init. We DO have language-level syntax for this, in
29150 the form of (type){ init-list }. Except that since we can't place emms
29151 instructions from inside the compiler, we can't allow the use of MMX
29152 registers unless the user explicitly asks for it. So we do *not* define
29153 vec_set/vec_extract/vec_init patterns for MMX modes in mmx.md. Instead
29154 we have builtins invoked by mmintrin.h that gives us license to emit
29155 these sorts of instructions. */
29158 ix86_expand_vec_init_builtin (tree type
, tree exp
, rtx target
)
29160 enum machine_mode tmode
= TYPE_MODE (type
);
29161 enum machine_mode inner_mode
= GET_MODE_INNER (tmode
);
29162 int i
, n_elt
= GET_MODE_NUNITS (tmode
);
29163 rtvec v
= rtvec_alloc (n_elt
);
29165 gcc_assert (VECTOR_MODE_P (tmode
));
29166 gcc_assert (call_expr_nargs (exp
) == n_elt
);
29168 for (i
= 0; i
< n_elt
; ++i
)
29170 rtx x
= expand_normal (CALL_EXPR_ARG (exp
, i
));
29171 RTVEC_ELT (v
, i
) = gen_lowpart (inner_mode
, x
);
29174 if (!target
|| !register_operand (target
, tmode
))
29175 target
= gen_reg_rtx (tmode
);
29177 ix86_expand_vector_init (true, target
, gen_rtx_PARALLEL (tmode
, v
));
29181 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
29182 ix86_expand_vector_extract. They would be redundant (for non-MMX) if we
29183 had a language-level syntax for referencing vector elements. */
29186 ix86_expand_vec_ext_builtin (tree exp
, rtx target
)
29188 enum machine_mode tmode
, mode0
;
29193 arg0
= CALL_EXPR_ARG (exp
, 0);
29194 arg1
= CALL_EXPR_ARG (exp
, 1);
29196 op0
= expand_normal (arg0
);
29197 elt
= get_element_number (TREE_TYPE (arg0
), arg1
);
29199 tmode
= TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0
)));
29200 mode0
= TYPE_MODE (TREE_TYPE (arg0
));
29201 gcc_assert (VECTOR_MODE_P (mode0
));
29203 op0
= force_reg (mode0
, op0
);
29205 if (optimize
|| !target
|| !register_operand (target
, tmode
))
29206 target
= gen_reg_rtx (tmode
);
29208 ix86_expand_vector_extract (true, target
, op0
, elt
);
29213 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
29214 ix86_expand_vector_set. They would be redundant (for non-MMX) if we had
29215 a language-level syntax for referencing vector elements. */
29218 ix86_expand_vec_set_builtin (tree exp
)
29220 enum machine_mode tmode
, mode1
;
29221 tree arg0
, arg1
, arg2
;
29223 rtx op0
, op1
, target
;
29225 arg0
= CALL_EXPR_ARG (exp
, 0);
29226 arg1
= CALL_EXPR_ARG (exp
, 1);
29227 arg2
= CALL_EXPR_ARG (exp
, 2);
29229 tmode
= TYPE_MODE (TREE_TYPE (arg0
));
29230 mode1
= TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0
)));
29231 gcc_assert (VECTOR_MODE_P (tmode
));
29233 op0
= expand_expr (arg0
, NULL_RTX
, tmode
, EXPAND_NORMAL
);
29234 op1
= expand_expr (arg1
, NULL_RTX
, mode1
, EXPAND_NORMAL
);
29235 elt
= get_element_number (TREE_TYPE (arg0
), arg2
);
29237 if (GET_MODE (op1
) != mode1
&& GET_MODE (op1
) != VOIDmode
)
29238 op1
= convert_modes (mode1
, GET_MODE (op1
), op1
, true);
29240 op0
= force_reg (tmode
, op0
);
29241 op1
= force_reg (mode1
, op1
);
29243 /* OP0 is the source of these builtin functions and shouldn't be
29244 modified. Create a copy, use it and return it as target. */
29245 target
= gen_reg_rtx (tmode
);
29246 emit_move_insn (target
, op0
);
29247 ix86_expand_vector_set (true, target
, op1
, elt
);
29252 /* Expand an expression EXP that calls a built-in function,
29253 with result going to TARGET if that's convenient
29254 (and in mode MODE if that's convenient).
29255 SUBTARGET may be used as the target for computing one of EXP's operands.
29256 IGNORE is nonzero if the value is to be ignored. */
29259 ix86_expand_builtin (tree exp
, rtx target
, rtx subtarget ATTRIBUTE_UNUSED
,
29260 enum machine_mode mode ATTRIBUTE_UNUSED
,
29261 int ignore ATTRIBUTE_UNUSED
)
29263 const struct builtin_description
*d
;
29265 enum insn_code icode
;
29266 tree fndecl
= TREE_OPERAND (CALL_EXPR_FN (exp
), 0);
29267 tree arg0
, arg1
, arg2
, arg3
, arg4
;
29268 rtx op0
, op1
, op2
, op3
, op4
, pat
;
29269 enum machine_mode mode0
, mode1
, mode2
, mode3
, mode4
;
29270 unsigned int fcode
= DECL_FUNCTION_CODE (fndecl
);
29272 /* Determine whether the builtin function is available under the current ISA.
29273 Originally the builtin was not created if it wasn't applicable to the
29274 current ISA based on the command line switches. With function specific
29275 options, we need to check in the context of the function making the call
29276 whether it is supported. */
29277 if (ix86_builtins_isa
[fcode
].isa
29278 && !(ix86_builtins_isa
[fcode
].isa
& ix86_isa_flags
))
29280 char *opts
= ix86_target_string (ix86_builtins_isa
[fcode
].isa
, 0, NULL
,
29281 NULL
, (enum fpmath_unit
) 0, false);
29284 error ("%qE needs unknown isa option", fndecl
);
29287 gcc_assert (opts
!= NULL
);
29288 error ("%qE needs isa option %s", fndecl
, opts
);
29296 case IX86_BUILTIN_MASKMOVQ
:
29297 case IX86_BUILTIN_MASKMOVDQU
:
29298 icode
= (fcode
== IX86_BUILTIN_MASKMOVQ
29299 ? CODE_FOR_mmx_maskmovq
29300 : CODE_FOR_sse2_maskmovdqu
);
29301 /* Note the arg order is different from the operand order. */
29302 arg1
= CALL_EXPR_ARG (exp
, 0);
29303 arg2
= CALL_EXPR_ARG (exp
, 1);
29304 arg0
= CALL_EXPR_ARG (exp
, 2);
29305 op0
= expand_normal (arg0
);
29306 op1
= expand_normal (arg1
);
29307 op2
= expand_normal (arg2
);
29308 mode0
= insn_data
[icode
].operand
[0].mode
;
29309 mode1
= insn_data
[icode
].operand
[1].mode
;
29310 mode2
= insn_data
[icode
].operand
[2].mode
;
29312 if (GET_MODE (op0
) != Pmode
)
29313 op0
= convert_to_mode (Pmode
, op0
, 1);
29314 op0
= gen_rtx_MEM (mode1
, force_reg (Pmode
, op0
));
29316 if (!insn_data
[icode
].operand
[0].predicate (op0
, mode0
))
29317 op0
= copy_to_mode_reg (mode0
, op0
);
29318 if (!insn_data
[icode
].operand
[1].predicate (op1
, mode1
))
29319 op1
= copy_to_mode_reg (mode1
, op1
);
29320 if (!insn_data
[icode
].operand
[2].predicate (op2
, mode2
))
29321 op2
= copy_to_mode_reg (mode2
, op2
);
29322 pat
= GEN_FCN (icode
) (op0
, op1
, op2
);
29328 case IX86_BUILTIN_LDMXCSR
:
29329 op0
= expand_normal (CALL_EXPR_ARG (exp
, 0));
29330 target
= assign_386_stack_local (SImode
, SLOT_VIRTUAL
);
29331 emit_move_insn (target
, op0
);
29332 emit_insn (gen_sse_ldmxcsr (target
));
29335 case IX86_BUILTIN_STMXCSR
:
29336 target
= assign_386_stack_local (SImode
, SLOT_VIRTUAL
);
29337 emit_insn (gen_sse_stmxcsr (target
));
29338 return copy_to_mode_reg (SImode
, target
);
29340 case IX86_BUILTIN_CLFLUSH
:
29341 arg0
= CALL_EXPR_ARG (exp
, 0);
29342 op0
= expand_normal (arg0
);
29343 icode
= CODE_FOR_sse2_clflush
;
29344 if (!insn_data
[icode
].operand
[0].predicate (op0
, Pmode
))
29346 if (GET_MODE (op0
) != Pmode
)
29347 op0
= convert_to_mode (Pmode
, op0
, 1);
29348 op0
= force_reg (Pmode
, op0
);
29351 emit_insn (gen_sse2_clflush (op0
));
29354 case IX86_BUILTIN_MONITOR
:
29355 arg0
= CALL_EXPR_ARG (exp
, 0);
29356 arg1
= CALL_EXPR_ARG (exp
, 1);
29357 arg2
= CALL_EXPR_ARG (exp
, 2);
29358 op0
= expand_normal (arg0
);
29359 op1
= expand_normal (arg1
);
29360 op2
= expand_normal (arg2
);
29363 if (GET_MODE (op0
) != Pmode
)
29364 op0
= convert_to_mode (Pmode
, op0
, 1);
29365 op0
= force_reg (Pmode
, op0
);
29368 op1
= copy_to_mode_reg (SImode
, op1
);
29370 op2
= copy_to_mode_reg (SImode
, op2
);
29371 emit_insn (ix86_gen_monitor (op0
, op1
, op2
));
29374 case IX86_BUILTIN_MWAIT
:
29375 arg0
= CALL_EXPR_ARG (exp
, 0);
29376 arg1
= CALL_EXPR_ARG (exp
, 1);
29377 op0
= expand_normal (arg0
);
29378 op1
= expand_normal (arg1
);
29380 op0
= copy_to_mode_reg (SImode
, op0
);
29382 op1
= copy_to_mode_reg (SImode
, op1
);
29383 emit_insn (gen_sse3_mwait (op0
, op1
));
29386 case IX86_BUILTIN_VEC_INIT_V2SI
:
29387 case IX86_BUILTIN_VEC_INIT_V4HI
:
29388 case IX86_BUILTIN_VEC_INIT_V8QI
:
29389 return ix86_expand_vec_init_builtin (TREE_TYPE (exp
), exp
, target
);
29391 case IX86_BUILTIN_VEC_EXT_V2DF
:
29392 case IX86_BUILTIN_VEC_EXT_V2DI
:
29393 case IX86_BUILTIN_VEC_EXT_V4SF
:
29394 case IX86_BUILTIN_VEC_EXT_V4SI
:
29395 case IX86_BUILTIN_VEC_EXT_V8HI
:
29396 case IX86_BUILTIN_VEC_EXT_V2SI
:
29397 case IX86_BUILTIN_VEC_EXT_V4HI
:
29398 case IX86_BUILTIN_VEC_EXT_V16QI
:
29399 return ix86_expand_vec_ext_builtin (exp
, target
);
29401 case IX86_BUILTIN_VEC_SET_V2DI
:
29402 case IX86_BUILTIN_VEC_SET_V4SF
:
29403 case IX86_BUILTIN_VEC_SET_V4SI
:
29404 case IX86_BUILTIN_VEC_SET_V8HI
:
29405 case IX86_BUILTIN_VEC_SET_V4HI
:
29406 case IX86_BUILTIN_VEC_SET_V16QI
:
29407 return ix86_expand_vec_set_builtin (exp
);
29409 case IX86_BUILTIN_INFQ
:
29410 case IX86_BUILTIN_HUGE_VALQ
:
29412 REAL_VALUE_TYPE inf
;
29416 tmp
= CONST_DOUBLE_FROM_REAL_VALUE (inf
, mode
);
29418 tmp
= validize_mem (force_const_mem (mode
, tmp
));
29421 target
= gen_reg_rtx (mode
);
29423 emit_move_insn (target
, tmp
);
29427 case IX86_BUILTIN_LLWPCB
:
29428 arg0
= CALL_EXPR_ARG (exp
, 0);
29429 op0
= expand_normal (arg0
);
29430 icode
= CODE_FOR_lwp_llwpcb
;
29431 if (!insn_data
[icode
].operand
[0].predicate (op0
, Pmode
))
29433 if (GET_MODE (op0
) != Pmode
)
29434 op0
= convert_to_mode (Pmode
, op0
, 1);
29435 op0
= force_reg (Pmode
, op0
);
29437 emit_insn (gen_lwp_llwpcb (op0
));
29440 case IX86_BUILTIN_SLWPCB
:
29441 icode
= CODE_FOR_lwp_slwpcb
;
29443 || !insn_data
[icode
].operand
[0].predicate (target
, Pmode
))
29444 target
= gen_reg_rtx (Pmode
);
29445 emit_insn (gen_lwp_slwpcb (target
));
29448 case IX86_BUILTIN_BEXTRI32
:
29449 case IX86_BUILTIN_BEXTRI64
:
29450 arg0
= CALL_EXPR_ARG (exp
, 0);
29451 arg1
= CALL_EXPR_ARG (exp
, 1);
29452 op0
= expand_normal (arg0
);
29453 op1
= expand_normal (arg1
);
29454 icode
= (fcode
== IX86_BUILTIN_BEXTRI32
29455 ? CODE_FOR_tbm_bextri_si
29456 : CODE_FOR_tbm_bextri_di
);
29457 if (!CONST_INT_P (op1
))
29459 error ("last argument must be an immediate");
29464 unsigned char length
= (INTVAL (op1
) >> 8) & 0xFF;
29465 unsigned char lsb_index
= INTVAL (op1
) & 0xFF;
29466 op1
= GEN_INT (length
);
29467 op2
= GEN_INT (lsb_index
);
29468 pat
= GEN_FCN (icode
) (target
, op0
, op1
, op2
);
29474 case IX86_BUILTIN_RDRAND16_STEP
:
29475 icode
= CODE_FOR_rdrandhi_1
;
29479 case IX86_BUILTIN_RDRAND32_STEP
:
29480 icode
= CODE_FOR_rdrandsi_1
;
29484 case IX86_BUILTIN_RDRAND64_STEP
:
29485 icode
= CODE_FOR_rdranddi_1
;
29489 op0
= gen_reg_rtx (mode0
);
29490 emit_insn (GEN_FCN (icode
) (op0
));
29492 arg0
= CALL_EXPR_ARG (exp
, 0);
29493 op1
= expand_normal (arg0
);
29494 if (!address_operand (op1
, VOIDmode
))
29496 op1
= convert_memory_address (Pmode
, op1
);
29497 op1
= copy_addr_to_reg (op1
);
29499 emit_move_insn (gen_rtx_MEM (mode0
, op1
), op0
);
29501 op1
= gen_reg_rtx (SImode
);
29502 emit_move_insn (op1
, CONST1_RTX (SImode
));
29504 /* Emit SImode conditional move. */
29505 if (mode0
== HImode
)
29507 op2
= gen_reg_rtx (SImode
);
29508 emit_insn (gen_zero_extendhisi2 (op2
, op0
));
29510 else if (mode0
== SImode
)
29513 op2
= gen_rtx_SUBREG (SImode
, op0
, 0);
29516 target
= gen_reg_rtx (SImode
);
29518 pat
= gen_rtx_GEU (VOIDmode
, gen_rtx_REG (CCCmode
, FLAGS_REG
),
29520 emit_insn (gen_rtx_SET (VOIDmode
, target
,
29521 gen_rtx_IF_THEN_ELSE (SImode
, pat
, op2
, op1
)));
29524 case IX86_BUILTIN_GATHERSIV2DF
:
29525 icode
= CODE_FOR_avx2_gathersiv2df
;
29527 case IX86_BUILTIN_GATHERSIV4DF
:
29528 icode
= CODE_FOR_avx2_gathersiv4df
;
29530 case IX86_BUILTIN_GATHERDIV2DF
:
29531 icode
= CODE_FOR_avx2_gatherdiv2df
;
29533 case IX86_BUILTIN_GATHERDIV4DF
:
29534 icode
= CODE_FOR_avx2_gatherdiv4df
;
29536 case IX86_BUILTIN_GATHERSIV4SF
:
29537 icode
= CODE_FOR_avx2_gathersiv4sf
;
29539 case IX86_BUILTIN_GATHERSIV8SF
:
29540 icode
= CODE_FOR_avx2_gathersiv8sf
;
29542 case IX86_BUILTIN_GATHERDIV4SF
:
29543 icode
= CODE_FOR_avx2_gatherdiv4sf
;
29545 case IX86_BUILTIN_GATHERDIV8SF
:
29546 icode
= CODE_FOR_avx2_gatherdiv8sf
;
29548 case IX86_BUILTIN_GATHERSIV2DI
:
29549 icode
= CODE_FOR_avx2_gathersiv2di
;
29551 case IX86_BUILTIN_GATHERSIV4DI
:
29552 icode
= CODE_FOR_avx2_gathersiv4di
;
29554 case IX86_BUILTIN_GATHERDIV2DI
:
29555 icode
= CODE_FOR_avx2_gatherdiv2di
;
29557 case IX86_BUILTIN_GATHERDIV4DI
:
29558 icode
= CODE_FOR_avx2_gatherdiv4di
;
29560 case IX86_BUILTIN_GATHERSIV4SI
:
29561 icode
= CODE_FOR_avx2_gathersiv4si
;
29563 case IX86_BUILTIN_GATHERSIV8SI
:
29564 icode
= CODE_FOR_avx2_gathersiv8si
;
29566 case IX86_BUILTIN_GATHERDIV4SI
:
29567 icode
= CODE_FOR_avx2_gatherdiv4si
;
29569 case IX86_BUILTIN_GATHERDIV8SI
:
29570 icode
= CODE_FOR_avx2_gatherdiv8si
;
29572 case IX86_BUILTIN_GATHERALTSIV4DF
:
29573 icode
= CODE_FOR_avx2_gathersiv4df
;
29575 case IX86_BUILTIN_GATHERALTDIV8SF
:
29576 icode
= CODE_FOR_avx2_gatherdiv8sf
;
29578 case IX86_BUILTIN_GATHERALTSIV4DI
:
29579 icode
= CODE_FOR_avx2_gathersiv4di
;
29581 case IX86_BUILTIN_GATHERALTDIV8SI
:
29582 icode
= CODE_FOR_avx2_gatherdiv8si
;
29586 arg0
= CALL_EXPR_ARG (exp
, 0);
29587 arg1
= CALL_EXPR_ARG (exp
, 1);
29588 arg2
= CALL_EXPR_ARG (exp
, 2);
29589 arg3
= CALL_EXPR_ARG (exp
, 3);
29590 arg4
= CALL_EXPR_ARG (exp
, 4);
29591 op0
= expand_normal (arg0
);
29592 op1
= expand_normal (arg1
);
29593 op2
= expand_normal (arg2
);
29594 op3
= expand_normal (arg3
);
29595 op4
= expand_normal (arg4
);
29596 /* Note the arg order is different from the operand order. */
29597 mode0
= insn_data
[icode
].operand
[1].mode
;
29598 mode2
= insn_data
[icode
].operand
[3].mode
;
29599 mode3
= insn_data
[icode
].operand
[4].mode
;
29600 mode4
= insn_data
[icode
].operand
[5].mode
;
29602 if (target
== NULL_RTX
29603 || GET_MODE (target
) != insn_data
[icode
].operand
[0].mode
)
29604 subtarget
= gen_reg_rtx (insn_data
[icode
].operand
[0].mode
);
29606 subtarget
= target
;
29608 if (fcode
== IX86_BUILTIN_GATHERALTSIV4DF
29609 || fcode
== IX86_BUILTIN_GATHERALTSIV4DI
)
29611 rtx half
= gen_reg_rtx (V4SImode
);
29612 if (!nonimmediate_operand (op2
, V8SImode
))
29613 op2
= copy_to_mode_reg (V8SImode
, op2
);
29614 emit_insn (gen_vec_extract_lo_v8si (half
, op2
));
29617 else if (fcode
== IX86_BUILTIN_GATHERALTDIV8SF
29618 || fcode
== IX86_BUILTIN_GATHERALTDIV8SI
)
29620 rtx (*gen
) (rtx
, rtx
);
29621 rtx half
= gen_reg_rtx (mode0
);
29622 if (mode0
== V4SFmode
)
29623 gen
= gen_vec_extract_lo_v8sf
;
29625 gen
= gen_vec_extract_lo_v8si
;
29626 if (!nonimmediate_operand (op0
, GET_MODE (op0
)))
29627 op0
= copy_to_mode_reg (GET_MODE (op0
), op0
);
29628 emit_insn (gen (half
, op0
));
29630 if (!nonimmediate_operand (op3
, GET_MODE (op3
)))
29631 op3
= copy_to_mode_reg (GET_MODE (op3
), op3
);
29632 emit_insn (gen (half
, op3
));
29636 /* Force memory operand only with base register here. But we
29637 don't want to do it on memory operand for other builtin
29639 if (GET_MODE (op1
) != Pmode
)
29640 op1
= convert_to_mode (Pmode
, op1
, 1);
29641 op1
= force_reg (Pmode
, op1
);
29643 if (!insn_data
[icode
].operand
[1].predicate (op0
, mode0
))
29644 op0
= copy_to_mode_reg (mode0
, op0
);
29645 if (!insn_data
[icode
].operand
[2].predicate (op1
, Pmode
))
29646 op1
= copy_to_mode_reg (Pmode
, op1
);
29647 if (!insn_data
[icode
].operand
[3].predicate (op2
, mode2
))
29648 op2
= copy_to_mode_reg (mode2
, op2
);
29649 if (!insn_data
[icode
].operand
[4].predicate (op3
, mode3
))
29650 op3
= copy_to_mode_reg (mode3
, op3
);
29651 if (!insn_data
[icode
].operand
[5].predicate (op4
, mode4
))
29653 error ("last argument must be scale 1, 2, 4, 8");
29657 /* Optimize. If mask is known to have all high bits set,
29658 replace op0 with pc_rtx to signal that the instruction
29659 overwrites the whole destination and doesn't use its
29660 previous contents. */
29663 if (TREE_CODE (arg3
) == VECTOR_CST
)
29665 unsigned int negative
= 0;
29666 for (i
= 0; i
< VECTOR_CST_NELTS (arg3
); ++i
)
29668 tree cst
= VECTOR_CST_ELT (arg3
, i
);
29669 if (TREE_CODE (cst
) == INTEGER_CST
29670 && tree_int_cst_sign_bit (cst
))
29672 else if (TREE_CODE (cst
) == REAL_CST
29673 && REAL_VALUE_NEGATIVE (TREE_REAL_CST (cst
)))
29676 if (negative
== TYPE_VECTOR_SUBPARTS (TREE_TYPE (arg3
)))
29679 else if (TREE_CODE (arg3
) == SSA_NAME
)
29681 /* Recognize also when mask is like:
29682 __v2df src = _mm_setzero_pd ();
29683 __v2df mask = _mm_cmpeq_pd (src, src);
29685 __v8sf src = _mm256_setzero_ps ();
29686 __v8sf mask = _mm256_cmp_ps (src, src, _CMP_EQ_OQ);
29687 as that is a cheaper way to load all ones into
29688 a register than having to load a constant from
29690 gimple def_stmt
= SSA_NAME_DEF_STMT (arg3
);
29691 if (is_gimple_call (def_stmt
))
29693 tree fndecl
= gimple_call_fndecl (def_stmt
);
29695 && DECL_BUILT_IN_CLASS (fndecl
) == BUILT_IN_MD
)
29696 switch ((unsigned int) DECL_FUNCTION_CODE (fndecl
))
29698 case IX86_BUILTIN_CMPPD
:
29699 case IX86_BUILTIN_CMPPS
:
29700 case IX86_BUILTIN_CMPPD256
:
29701 case IX86_BUILTIN_CMPPS256
:
29702 if (!integer_zerop (gimple_call_arg (def_stmt
, 2)))
29705 case IX86_BUILTIN_CMPEQPD
:
29706 case IX86_BUILTIN_CMPEQPS
:
29707 if (initializer_zerop (gimple_call_arg (def_stmt
, 0))
29708 && initializer_zerop (gimple_call_arg (def_stmt
,
29719 pat
= GEN_FCN (icode
) (subtarget
, op0
, op1
, op2
, op3
, op4
);
29724 if (fcode
== IX86_BUILTIN_GATHERDIV8SF
29725 || fcode
== IX86_BUILTIN_GATHERDIV8SI
)
29727 enum machine_mode tmode
= GET_MODE (subtarget
) == V8SFmode
29728 ? V4SFmode
: V4SImode
;
29729 if (target
== NULL_RTX
)
29730 target
= gen_reg_rtx (tmode
);
29731 if (tmode
== V4SFmode
)
29732 emit_insn (gen_vec_extract_lo_v8sf (target
, subtarget
));
29734 emit_insn (gen_vec_extract_lo_v8si (target
, subtarget
));
29737 target
= subtarget
;
29741 case IX86_BUILTIN_XABORT
:
29742 icode
= CODE_FOR_xabort
;
29743 arg0
= CALL_EXPR_ARG (exp
, 0);
29744 op0
= expand_normal (arg0
);
29745 mode0
= insn_data
[icode
].operand
[0].mode
;
29746 if (!insn_data
[icode
].operand
[0].predicate (op0
, mode0
))
29748 error ("the xabort's argument must be an 8-bit immediate");
29751 emit_insn (gen_xabort (op0
));
29758 for (i
= 0, d
= bdesc_special_args
;
29759 i
< ARRAY_SIZE (bdesc_special_args
);
29761 if (d
->code
== fcode
)
29762 return ix86_expand_special_args_builtin (d
, exp
, target
);
29764 for (i
= 0, d
= bdesc_args
;
29765 i
< ARRAY_SIZE (bdesc_args
);
29767 if (d
->code
== fcode
)
29770 case IX86_BUILTIN_FABSQ
:
29771 case IX86_BUILTIN_COPYSIGNQ
:
29773 /* Emit a normal call if SSE2 isn't available. */
29774 return expand_call (exp
, target
, ignore
);
29776 return ix86_expand_args_builtin (d
, exp
, target
);
29779 for (i
= 0, d
= bdesc_comi
; i
< ARRAY_SIZE (bdesc_comi
); i
++, d
++)
29780 if (d
->code
== fcode
)
29781 return ix86_expand_sse_comi (d
, exp
, target
);
29783 for (i
= 0, d
= bdesc_pcmpestr
;
29784 i
< ARRAY_SIZE (bdesc_pcmpestr
);
29786 if (d
->code
== fcode
)
29787 return ix86_expand_sse_pcmpestr (d
, exp
, target
);
29789 for (i
= 0, d
= bdesc_pcmpistr
;
29790 i
< ARRAY_SIZE (bdesc_pcmpistr
);
29792 if (d
->code
== fcode
)
29793 return ix86_expand_sse_pcmpistr (d
, exp
, target
);
29795 for (i
= 0, d
= bdesc_multi_arg
; i
< ARRAY_SIZE (bdesc_multi_arg
); i
++, d
++)
29796 if (d
->code
== fcode
)
29797 return ix86_expand_multi_arg_builtin (d
->icode
, exp
, target
,
29798 (enum ix86_builtin_func_type
)
29799 d
->flag
, d
->comparison
);
29801 gcc_unreachable ();
29804 /* Returns a function decl for a vectorized version of the builtin function
29805 with builtin function code FN and the result vector type TYPE, or NULL_TREE
29806 if it is not available. */
29809 ix86_builtin_vectorized_function (tree fndecl
, tree type_out
,
29812 enum machine_mode in_mode
, out_mode
;
29814 enum built_in_function fn
= DECL_FUNCTION_CODE (fndecl
);
29816 if (TREE_CODE (type_out
) != VECTOR_TYPE
29817 || TREE_CODE (type_in
) != VECTOR_TYPE
29818 || DECL_BUILT_IN_CLASS (fndecl
) != BUILT_IN_NORMAL
)
29821 out_mode
= TYPE_MODE (TREE_TYPE (type_out
));
29822 out_n
= TYPE_VECTOR_SUBPARTS (type_out
);
29823 in_mode
= TYPE_MODE (TREE_TYPE (type_in
));
29824 in_n
= TYPE_VECTOR_SUBPARTS (type_in
);
29828 case BUILT_IN_SQRT
:
29829 if (out_mode
== DFmode
&& in_mode
== DFmode
)
29831 if (out_n
== 2 && in_n
== 2)
29832 return ix86_builtins
[IX86_BUILTIN_SQRTPD
];
29833 else if (out_n
== 4 && in_n
== 4)
29834 return ix86_builtins
[IX86_BUILTIN_SQRTPD256
];
29838 case BUILT_IN_SQRTF
:
29839 if (out_mode
== SFmode
&& in_mode
== SFmode
)
29841 if (out_n
== 4 && in_n
== 4)
29842 return ix86_builtins
[IX86_BUILTIN_SQRTPS_NR
];
29843 else if (out_n
== 8 && in_n
== 8)
29844 return ix86_builtins
[IX86_BUILTIN_SQRTPS_NR256
];
29848 case BUILT_IN_IFLOOR
:
29849 case BUILT_IN_LFLOOR
:
29850 case BUILT_IN_LLFLOOR
:
29851 /* The round insn does not trap on denormals. */
29852 if (flag_trapping_math
|| !TARGET_ROUND
)
29855 if (out_mode
== SImode
&& in_mode
== DFmode
)
29857 if (out_n
== 4 && in_n
== 2)
29858 return ix86_builtins
[IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX
];
29859 else if (out_n
== 8 && in_n
== 4)
29860 return ix86_builtins
[IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX256
];
29864 case BUILT_IN_IFLOORF
:
29865 case BUILT_IN_LFLOORF
:
29866 case BUILT_IN_LLFLOORF
:
29867 /* The round insn does not trap on denormals. */
29868 if (flag_trapping_math
|| !TARGET_ROUND
)
29871 if (out_mode
== SImode
&& in_mode
== SFmode
)
29873 if (out_n
== 4 && in_n
== 4)
29874 return ix86_builtins
[IX86_BUILTIN_FLOORPS_SFIX
];
29875 else if (out_n
== 8 && in_n
== 8)
29876 return ix86_builtins
[IX86_BUILTIN_FLOORPS_SFIX256
];
29880 case BUILT_IN_ICEIL
:
29881 case BUILT_IN_LCEIL
:
29882 case BUILT_IN_LLCEIL
:
29883 /* The round insn does not trap on denormals. */
29884 if (flag_trapping_math
|| !TARGET_ROUND
)
29887 if (out_mode
== SImode
&& in_mode
== DFmode
)
29889 if (out_n
== 4 && in_n
== 2)
29890 return ix86_builtins
[IX86_BUILTIN_CEILPD_VEC_PACK_SFIX
];
29891 else if (out_n
== 8 && in_n
== 4)
29892 return ix86_builtins
[IX86_BUILTIN_CEILPD_VEC_PACK_SFIX256
];
29896 case BUILT_IN_ICEILF
:
29897 case BUILT_IN_LCEILF
:
29898 case BUILT_IN_LLCEILF
:
29899 /* The round insn does not trap on denormals. */
29900 if (flag_trapping_math
|| !TARGET_ROUND
)
29903 if (out_mode
== SImode
&& in_mode
== SFmode
)
29905 if (out_n
== 4 && in_n
== 4)
29906 return ix86_builtins
[IX86_BUILTIN_CEILPS_SFIX
];
29907 else if (out_n
== 8 && in_n
== 8)
29908 return ix86_builtins
[IX86_BUILTIN_CEILPS_SFIX256
];
29912 case BUILT_IN_IRINT
:
29913 case BUILT_IN_LRINT
:
29914 case BUILT_IN_LLRINT
:
29915 if (out_mode
== SImode
&& in_mode
== DFmode
)
29917 if (out_n
== 4 && in_n
== 2)
29918 return ix86_builtins
[IX86_BUILTIN_VEC_PACK_SFIX
];
29919 else if (out_n
== 8 && in_n
== 4)
29920 return ix86_builtins
[IX86_BUILTIN_VEC_PACK_SFIX256
];
29924 case BUILT_IN_IRINTF
:
29925 case BUILT_IN_LRINTF
:
29926 case BUILT_IN_LLRINTF
:
29927 if (out_mode
== SImode
&& in_mode
== SFmode
)
29929 if (out_n
== 4 && in_n
== 4)
29930 return ix86_builtins
[IX86_BUILTIN_CVTPS2DQ
];
29931 else if (out_n
== 8 && in_n
== 8)
29932 return ix86_builtins
[IX86_BUILTIN_CVTPS2DQ256
];
29936 case BUILT_IN_IROUND
:
29937 case BUILT_IN_LROUND
:
29938 case BUILT_IN_LLROUND
:
29939 /* The round insn does not trap on denormals. */
29940 if (flag_trapping_math
|| !TARGET_ROUND
)
29943 if (out_mode
== SImode
&& in_mode
== DFmode
)
29945 if (out_n
== 4 && in_n
== 2)
29946 return ix86_builtins
[IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX
];
29947 else if (out_n
== 8 && in_n
== 4)
29948 return ix86_builtins
[IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX256
];
29952 case BUILT_IN_IROUNDF
:
29953 case BUILT_IN_LROUNDF
:
29954 case BUILT_IN_LLROUNDF
:
29955 /* The round insn does not trap on denormals. */
29956 if (flag_trapping_math
|| !TARGET_ROUND
)
29959 if (out_mode
== SImode
&& in_mode
== SFmode
)
29961 if (out_n
== 4 && in_n
== 4)
29962 return ix86_builtins
[IX86_BUILTIN_ROUNDPS_AZ_SFIX
];
29963 else if (out_n
== 8 && in_n
== 8)
29964 return ix86_builtins
[IX86_BUILTIN_ROUNDPS_AZ_SFIX256
];
29968 case BUILT_IN_COPYSIGN
:
29969 if (out_mode
== DFmode
&& in_mode
== DFmode
)
29971 if (out_n
== 2 && in_n
== 2)
29972 return ix86_builtins
[IX86_BUILTIN_CPYSGNPD
];
29973 else if (out_n
== 4 && in_n
== 4)
29974 return ix86_builtins
[IX86_BUILTIN_CPYSGNPD256
];
29978 case BUILT_IN_COPYSIGNF
:
29979 if (out_mode
== SFmode
&& in_mode
== SFmode
)
29981 if (out_n
== 4 && in_n
== 4)
29982 return ix86_builtins
[IX86_BUILTIN_CPYSGNPS
];
29983 else if (out_n
== 8 && in_n
== 8)
29984 return ix86_builtins
[IX86_BUILTIN_CPYSGNPS256
];
29988 case BUILT_IN_FLOOR
:
29989 /* The round insn does not trap on denormals. */
29990 if (flag_trapping_math
|| !TARGET_ROUND
)
29993 if (out_mode
== DFmode
&& in_mode
== DFmode
)
29995 if (out_n
== 2 && in_n
== 2)
29996 return ix86_builtins
[IX86_BUILTIN_FLOORPD
];
29997 else if (out_n
== 4 && in_n
== 4)
29998 return ix86_builtins
[IX86_BUILTIN_FLOORPD256
];
30002 case BUILT_IN_FLOORF
:
30003 /* The round insn does not trap on denormals. */
30004 if (flag_trapping_math
|| !TARGET_ROUND
)
30007 if (out_mode
== SFmode
&& in_mode
== SFmode
)
30009 if (out_n
== 4 && in_n
== 4)
30010 return ix86_builtins
[IX86_BUILTIN_FLOORPS
];
30011 else if (out_n
== 8 && in_n
== 8)
30012 return ix86_builtins
[IX86_BUILTIN_FLOORPS256
];
30016 case BUILT_IN_CEIL
:
30017 /* The round insn does not trap on denormals. */
30018 if (flag_trapping_math
|| !TARGET_ROUND
)
30021 if (out_mode
== DFmode
&& in_mode
== DFmode
)
30023 if (out_n
== 2 && in_n
== 2)
30024 return ix86_builtins
[IX86_BUILTIN_CEILPD
];
30025 else if (out_n
== 4 && in_n
== 4)
30026 return ix86_builtins
[IX86_BUILTIN_CEILPD256
];
30030 case BUILT_IN_CEILF
:
30031 /* The round insn does not trap on denormals. */
30032 if (flag_trapping_math
|| !TARGET_ROUND
)
30035 if (out_mode
== SFmode
&& in_mode
== SFmode
)
30037 if (out_n
== 4 && in_n
== 4)
30038 return ix86_builtins
[IX86_BUILTIN_CEILPS
];
30039 else if (out_n
== 8 && in_n
== 8)
30040 return ix86_builtins
[IX86_BUILTIN_CEILPS256
];
30044 case BUILT_IN_TRUNC
:
30045 /* The round insn does not trap on denormals. */
30046 if (flag_trapping_math
|| !TARGET_ROUND
)
30049 if (out_mode
== DFmode
&& in_mode
== DFmode
)
30051 if (out_n
== 2 && in_n
== 2)
30052 return ix86_builtins
[IX86_BUILTIN_TRUNCPD
];
30053 else if (out_n
== 4 && in_n
== 4)
30054 return ix86_builtins
[IX86_BUILTIN_TRUNCPD256
];
30058 case BUILT_IN_TRUNCF
:
30059 /* The round insn does not trap on denormals. */
30060 if (flag_trapping_math
|| !TARGET_ROUND
)
30063 if (out_mode
== SFmode
&& in_mode
== SFmode
)
30065 if (out_n
== 4 && in_n
== 4)
30066 return ix86_builtins
[IX86_BUILTIN_TRUNCPS
];
30067 else if (out_n
== 8 && in_n
== 8)
30068 return ix86_builtins
[IX86_BUILTIN_TRUNCPS256
];
30072 case BUILT_IN_RINT
:
30073 /* The round insn does not trap on denormals. */
30074 if (flag_trapping_math
|| !TARGET_ROUND
)
30077 if (out_mode
== DFmode
&& in_mode
== DFmode
)
30079 if (out_n
== 2 && in_n
== 2)
30080 return ix86_builtins
[IX86_BUILTIN_RINTPD
];
30081 else if (out_n
== 4 && in_n
== 4)
30082 return ix86_builtins
[IX86_BUILTIN_RINTPD256
];
30086 case BUILT_IN_RINTF
:
30087 /* The round insn does not trap on denormals. */
30088 if (flag_trapping_math
|| !TARGET_ROUND
)
30091 if (out_mode
== SFmode
&& in_mode
== SFmode
)
30093 if (out_n
== 4 && in_n
== 4)
30094 return ix86_builtins
[IX86_BUILTIN_RINTPS
];
30095 else if (out_n
== 8 && in_n
== 8)
30096 return ix86_builtins
[IX86_BUILTIN_RINTPS256
];
30100 case BUILT_IN_ROUND
:
30101 /* The round insn does not trap on denormals. */
30102 if (flag_trapping_math
|| !TARGET_ROUND
)
30105 if (out_mode
== DFmode
&& in_mode
== DFmode
)
30107 if (out_n
== 2 && in_n
== 2)
30108 return ix86_builtins
[IX86_BUILTIN_ROUNDPD_AZ
];
30109 else if (out_n
== 4 && in_n
== 4)
30110 return ix86_builtins
[IX86_BUILTIN_ROUNDPD_AZ256
];
30114 case BUILT_IN_ROUNDF
:
30115 /* The round insn does not trap on denormals. */
30116 if (flag_trapping_math
|| !TARGET_ROUND
)
30119 if (out_mode
== SFmode
&& in_mode
== SFmode
)
30121 if (out_n
== 4 && in_n
== 4)
30122 return ix86_builtins
[IX86_BUILTIN_ROUNDPS_AZ
];
30123 else if (out_n
== 8 && in_n
== 8)
30124 return ix86_builtins
[IX86_BUILTIN_ROUNDPS_AZ256
];
30129 if (out_mode
== DFmode
&& in_mode
== DFmode
)
30131 if (out_n
== 2 && in_n
== 2)
30132 return ix86_builtins
[IX86_BUILTIN_VFMADDPD
];
30133 if (out_n
== 4 && in_n
== 4)
30134 return ix86_builtins
[IX86_BUILTIN_VFMADDPD256
];
30138 case BUILT_IN_FMAF
:
30139 if (out_mode
== SFmode
&& in_mode
== SFmode
)
30141 if (out_n
== 4 && in_n
== 4)
30142 return ix86_builtins
[IX86_BUILTIN_VFMADDPS
];
30143 if (out_n
== 8 && in_n
== 8)
30144 return ix86_builtins
[IX86_BUILTIN_VFMADDPS256
];
30152 /* Dispatch to a handler for a vectorization library. */
30153 if (ix86_veclib_handler
)
30154 return ix86_veclib_handler ((enum built_in_function
) fn
, type_out
,
30160 /* Handler for an SVML-style interface to
30161 a library with vectorized intrinsics. */
30164 ix86_veclibabi_svml (enum built_in_function fn
, tree type_out
, tree type_in
)
30167 tree fntype
, new_fndecl
, args
;
30170 enum machine_mode el_mode
, in_mode
;
30173 /* The SVML is suitable for unsafe math only. */
30174 if (!flag_unsafe_math_optimizations
)
30177 el_mode
= TYPE_MODE (TREE_TYPE (type_out
));
30178 n
= TYPE_VECTOR_SUBPARTS (type_out
);
30179 in_mode
= TYPE_MODE (TREE_TYPE (type_in
));
30180 in_n
= TYPE_VECTOR_SUBPARTS (type_in
);
30181 if (el_mode
!= in_mode
30189 case BUILT_IN_LOG10
:
30191 case BUILT_IN_TANH
:
30193 case BUILT_IN_ATAN
:
30194 case BUILT_IN_ATAN2
:
30195 case BUILT_IN_ATANH
:
30196 case BUILT_IN_CBRT
:
30197 case BUILT_IN_SINH
:
30199 case BUILT_IN_ASINH
:
30200 case BUILT_IN_ASIN
:
30201 case BUILT_IN_COSH
:
30203 case BUILT_IN_ACOSH
:
30204 case BUILT_IN_ACOS
:
30205 if (el_mode
!= DFmode
|| n
!= 2)
30209 case BUILT_IN_EXPF
:
30210 case BUILT_IN_LOGF
:
30211 case BUILT_IN_LOG10F
:
30212 case BUILT_IN_POWF
:
30213 case BUILT_IN_TANHF
:
30214 case BUILT_IN_TANF
:
30215 case BUILT_IN_ATANF
:
30216 case BUILT_IN_ATAN2F
:
30217 case BUILT_IN_ATANHF
:
30218 case BUILT_IN_CBRTF
:
30219 case BUILT_IN_SINHF
:
30220 case BUILT_IN_SINF
:
30221 case BUILT_IN_ASINHF
:
30222 case BUILT_IN_ASINF
:
30223 case BUILT_IN_COSHF
:
30224 case BUILT_IN_COSF
:
30225 case BUILT_IN_ACOSHF
:
30226 case BUILT_IN_ACOSF
:
30227 if (el_mode
!= SFmode
|| n
!= 4)
30235 bname
= IDENTIFIER_POINTER (DECL_NAME (builtin_decl_implicit (fn
)));
30237 if (fn
== BUILT_IN_LOGF
)
30238 strcpy (name
, "vmlsLn4");
30239 else if (fn
== BUILT_IN_LOG
)
30240 strcpy (name
, "vmldLn2");
30243 sprintf (name
, "vmls%s", bname
+10);
30244 name
[strlen (name
)-1] = '4';
30247 sprintf (name
, "vmld%s2", bname
+10);
30249 /* Convert to uppercase. */
30253 for (args
= DECL_ARGUMENTS (builtin_decl_implicit (fn
));
30255 args
= TREE_CHAIN (args
))
30259 fntype
= build_function_type_list (type_out
, type_in
, NULL
);
30261 fntype
= build_function_type_list (type_out
, type_in
, type_in
, NULL
);
30263 /* Build a function declaration for the vectorized function. */
30264 new_fndecl
= build_decl (BUILTINS_LOCATION
,
30265 FUNCTION_DECL
, get_identifier (name
), fntype
);
30266 TREE_PUBLIC (new_fndecl
) = 1;
30267 DECL_EXTERNAL (new_fndecl
) = 1;
30268 DECL_IS_NOVOPS (new_fndecl
) = 1;
30269 TREE_READONLY (new_fndecl
) = 1;
30274 /* Handler for an ACML-style interface to
30275 a library with vectorized intrinsics. */
30278 ix86_veclibabi_acml (enum built_in_function fn
, tree type_out
, tree type_in
)
30280 char name
[20] = "__vr.._";
30281 tree fntype
, new_fndecl
, args
;
30284 enum machine_mode el_mode
, in_mode
;
30287 /* The ACML is 64bits only and suitable for unsafe math only as
30288 it does not correctly support parts of IEEE with the required
30289 precision such as denormals. */
30291 || !flag_unsafe_math_optimizations
)
30294 el_mode
= TYPE_MODE (TREE_TYPE (type_out
));
30295 n
= TYPE_VECTOR_SUBPARTS (type_out
);
30296 in_mode
= TYPE_MODE (TREE_TYPE (type_in
));
30297 in_n
= TYPE_VECTOR_SUBPARTS (type_in
);
30298 if (el_mode
!= in_mode
30308 case BUILT_IN_LOG2
:
30309 case BUILT_IN_LOG10
:
30312 if (el_mode
!= DFmode
30317 case BUILT_IN_SINF
:
30318 case BUILT_IN_COSF
:
30319 case BUILT_IN_EXPF
:
30320 case BUILT_IN_POWF
:
30321 case BUILT_IN_LOGF
:
30322 case BUILT_IN_LOG2F
:
30323 case BUILT_IN_LOG10F
:
30326 if (el_mode
!= SFmode
30335 bname
= IDENTIFIER_POINTER (DECL_NAME (builtin_decl_implicit (fn
)));
30336 sprintf (name
+ 7, "%s", bname
+10);
30339 for (args
= DECL_ARGUMENTS (builtin_decl_implicit (fn
));
30341 args
= TREE_CHAIN (args
))
30345 fntype
= build_function_type_list (type_out
, type_in
, NULL
);
30347 fntype
= build_function_type_list (type_out
, type_in
, type_in
, NULL
);
30349 /* Build a function declaration for the vectorized function. */
30350 new_fndecl
= build_decl (BUILTINS_LOCATION
,
30351 FUNCTION_DECL
, get_identifier (name
), fntype
);
30352 TREE_PUBLIC (new_fndecl
) = 1;
30353 DECL_EXTERNAL (new_fndecl
) = 1;
30354 DECL_IS_NOVOPS (new_fndecl
) = 1;
30355 TREE_READONLY (new_fndecl
) = 1;
30360 /* Returns a decl of a function that implements gather load with
30361 memory type MEM_VECTYPE and index type INDEX_VECTYPE and SCALE.
30362 Return NULL_TREE if it is not available. */
30365 ix86_vectorize_builtin_gather (const_tree mem_vectype
,
30366 const_tree index_type
, int scale
)
30369 enum ix86_builtins code
;
30374 if ((TREE_CODE (index_type
) != INTEGER_TYPE
30375 && !POINTER_TYPE_P (index_type
))
30376 || (TYPE_MODE (index_type
) != SImode
30377 && TYPE_MODE (index_type
) != DImode
))
30380 if (TYPE_PRECISION (index_type
) > POINTER_SIZE
)
30383 /* v*gather* insn sign extends index to pointer mode. */
30384 if (TYPE_PRECISION (index_type
) < POINTER_SIZE
30385 && TYPE_UNSIGNED (index_type
))
30390 || (scale
& (scale
- 1)) != 0)
30393 si
= TYPE_MODE (index_type
) == SImode
;
30394 switch (TYPE_MODE (mem_vectype
))
30397 code
= si
? IX86_BUILTIN_GATHERSIV2DF
: IX86_BUILTIN_GATHERDIV2DF
;
30400 code
= si
? IX86_BUILTIN_GATHERALTSIV4DF
: IX86_BUILTIN_GATHERDIV4DF
;
30403 code
= si
? IX86_BUILTIN_GATHERSIV2DI
: IX86_BUILTIN_GATHERDIV2DI
;
30406 code
= si
? IX86_BUILTIN_GATHERALTSIV4DI
: IX86_BUILTIN_GATHERDIV4DI
;
30409 code
= si
? IX86_BUILTIN_GATHERSIV4SF
: IX86_BUILTIN_GATHERDIV4SF
;
30412 code
= si
? IX86_BUILTIN_GATHERSIV8SF
: IX86_BUILTIN_GATHERALTDIV8SF
;
30415 code
= si
? IX86_BUILTIN_GATHERSIV4SI
: IX86_BUILTIN_GATHERDIV4SI
;
30418 code
= si
? IX86_BUILTIN_GATHERSIV8SI
: IX86_BUILTIN_GATHERALTDIV8SI
;
30424 return ix86_builtins
[code
];
30427 /* Returns a code for a target-specific builtin that implements
30428 reciprocal of the function, or NULL_TREE if not available. */
30431 ix86_builtin_reciprocal (unsigned int fn
, bool md_fn
,
30432 bool sqrt ATTRIBUTE_UNUSED
)
30434 if (! (TARGET_SSE_MATH
&& !optimize_insn_for_size_p ()
30435 && flag_finite_math_only
&& !flag_trapping_math
30436 && flag_unsafe_math_optimizations
))
30440 /* Machine dependent builtins. */
30443 /* Vectorized version of sqrt to rsqrt conversion. */
30444 case IX86_BUILTIN_SQRTPS_NR
:
30445 return ix86_builtins
[IX86_BUILTIN_RSQRTPS_NR
];
30447 case IX86_BUILTIN_SQRTPS_NR256
:
30448 return ix86_builtins
[IX86_BUILTIN_RSQRTPS_NR256
];
30454 /* Normal builtins. */
30457 /* Sqrt to rsqrt conversion. */
30458 case BUILT_IN_SQRTF
:
30459 return ix86_builtins
[IX86_BUILTIN_RSQRTF
];
30466 /* Helper for avx_vpermilps256_operand et al. This is also used by
30467 the expansion functions to turn the parallel back into a mask.
30468 The return value is 0 for no match and the imm8+1 for a match. */
30471 avx_vpermilp_parallel (rtx par
, enum machine_mode mode
)
30473 unsigned i
, nelt
= GET_MODE_NUNITS (mode
);
30475 unsigned char ipar
[8];
30477 if (XVECLEN (par
, 0) != (int) nelt
)
30480 /* Validate that all of the elements are constants, and not totally
30481 out of range. Copy the data into an integral array to make the
30482 subsequent checks easier. */
30483 for (i
= 0; i
< nelt
; ++i
)
30485 rtx er
= XVECEXP (par
, 0, i
);
30486 unsigned HOST_WIDE_INT ei
;
30488 if (!CONST_INT_P (er
))
30499 /* In the 256-bit DFmode case, we can only move elements within
30501 for (i
= 0; i
< 2; ++i
)
30505 mask
|= ipar
[i
] << i
;
30507 for (i
= 2; i
< 4; ++i
)
30511 mask
|= (ipar
[i
] - 2) << i
;
30516 /* In the 256-bit SFmode case, we have full freedom of movement
30517 within the low 128-bit lane, but the high 128-bit lane must
30518 mirror the exact same pattern. */
30519 for (i
= 0; i
< 4; ++i
)
30520 if (ipar
[i
] + 4 != ipar
[i
+ 4])
30527 /* In the 128-bit case, we've full freedom in the placement of
30528 the elements from the source operand. */
30529 for (i
= 0; i
< nelt
; ++i
)
30530 mask
|= ipar
[i
] << (i
* (nelt
/ 2));
30534 gcc_unreachable ();
30537 /* Make sure success has a non-zero value by adding one. */
30541 /* Helper for avx_vperm2f128_v4df_operand et al. This is also used by
30542 the expansion functions to turn the parallel back into a mask.
30543 The return value is 0 for no match and the imm8+1 for a match. */
30546 avx_vperm2f128_parallel (rtx par
, enum machine_mode mode
)
30548 unsigned i
, nelt
= GET_MODE_NUNITS (mode
), nelt2
= nelt
/ 2;
30550 unsigned char ipar
[8];
30552 if (XVECLEN (par
, 0) != (int) nelt
)
30555 /* Validate that all of the elements are constants, and not totally
30556 out of range. Copy the data into an integral array to make the
30557 subsequent checks easier. */
30558 for (i
= 0; i
< nelt
; ++i
)
30560 rtx er
= XVECEXP (par
, 0, i
);
30561 unsigned HOST_WIDE_INT ei
;
30563 if (!CONST_INT_P (er
))
30566 if (ei
>= 2 * nelt
)
30571 /* Validate that the halves of the permute are halves. */
30572 for (i
= 0; i
< nelt2
- 1; ++i
)
30573 if (ipar
[i
] + 1 != ipar
[i
+ 1])
30575 for (i
= nelt2
; i
< nelt
- 1; ++i
)
30576 if (ipar
[i
] + 1 != ipar
[i
+ 1])
30579 /* Reconstruct the mask. */
30580 for (i
= 0; i
< 2; ++i
)
30582 unsigned e
= ipar
[i
* nelt2
];
30586 mask
|= e
<< (i
* 4);
30589 /* Make sure success has a non-zero value by adding one. */
30593 /* Store OPERAND to the memory after reload is completed. This means
30594 that we can't easily use assign_stack_local. */
30596 ix86_force_to_memory (enum machine_mode mode
, rtx operand
)
30600 gcc_assert (reload_completed
);
30601 if (ix86_using_red_zone ())
30603 result
= gen_rtx_MEM (mode
,
30604 gen_rtx_PLUS (Pmode
,
30606 GEN_INT (-RED_ZONE_SIZE
)));
30607 emit_move_insn (result
, operand
);
30609 else if (TARGET_64BIT
)
30615 operand
= gen_lowpart (DImode
, operand
);
30619 gen_rtx_SET (VOIDmode
,
30620 gen_rtx_MEM (DImode
,
30621 gen_rtx_PRE_DEC (DImode
,
30622 stack_pointer_rtx
)),
30626 gcc_unreachable ();
30628 result
= gen_rtx_MEM (mode
, stack_pointer_rtx
);
30637 split_double_mode (mode
, &operand
, 1, operands
, operands
+ 1);
30639 gen_rtx_SET (VOIDmode
,
30640 gen_rtx_MEM (SImode
,
30641 gen_rtx_PRE_DEC (Pmode
,
30642 stack_pointer_rtx
)),
30645 gen_rtx_SET (VOIDmode
,
30646 gen_rtx_MEM (SImode
,
30647 gen_rtx_PRE_DEC (Pmode
,
30648 stack_pointer_rtx
)),
30653 /* Store HImodes as SImodes. */
30654 operand
= gen_lowpart (SImode
, operand
);
30658 gen_rtx_SET (VOIDmode
,
30659 gen_rtx_MEM (GET_MODE (operand
),
30660 gen_rtx_PRE_DEC (SImode
,
30661 stack_pointer_rtx
)),
30665 gcc_unreachable ();
30667 result
= gen_rtx_MEM (mode
, stack_pointer_rtx
);
30672 /* Free operand from the memory. */
30674 ix86_free_from_memory (enum machine_mode mode
)
30676 if (!ix86_using_red_zone ())
30680 if (mode
== DImode
|| TARGET_64BIT
)
30684 /* Use LEA to deallocate stack space. In peephole2 it will be converted
30685 to pop or add instruction if registers are available. */
30686 emit_insn (gen_rtx_SET (VOIDmode
, stack_pointer_rtx
,
30687 gen_rtx_PLUS (Pmode
, stack_pointer_rtx
,
30692 /* Implement TARGET_PREFERRED_RELOAD_CLASS.
30694 Put float CONST_DOUBLE in the constant pool instead of fp regs.
30695 QImode must go into class Q_REGS.
30696 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
30697 movdf to do mem-to-mem moves through integer regs. */
30700 ix86_preferred_reload_class (rtx x
, reg_class_t regclass
)
30702 enum machine_mode mode
= GET_MODE (x
);
30704 /* We're only allowed to return a subclass of CLASS. Many of the
30705 following checks fail for NO_REGS, so eliminate that early. */
30706 if (regclass
== NO_REGS
)
30709 /* All classes can load zeros. */
30710 if (x
== CONST0_RTX (mode
))
30713 /* Force constants into memory if we are loading a (nonzero) constant into
30714 an MMX or SSE register. This is because there are no MMX/SSE instructions
30715 to load from a constant. */
30717 && (MAYBE_MMX_CLASS_P (regclass
) || MAYBE_SSE_CLASS_P (regclass
)))
30720 /* Prefer SSE regs only, if we can use them for math. */
30721 if (TARGET_SSE_MATH
&& !TARGET_MIX_SSE_I387
&& SSE_FLOAT_MODE_P (mode
))
30722 return SSE_CLASS_P (regclass
) ? regclass
: NO_REGS
;
30724 /* Floating-point constants need more complex checks. */
30725 if (GET_CODE (x
) == CONST_DOUBLE
&& GET_MODE (x
) != VOIDmode
)
30727 /* General regs can load everything. */
30728 if (reg_class_subset_p (regclass
, GENERAL_REGS
))
30731 /* Floats can load 0 and 1 plus some others. Note that we eliminated
30732 zero above. We only want to wind up preferring 80387 registers if
30733 we plan on doing computation with them. */
30735 && standard_80387_constant_p (x
) > 0)
30737 /* Limit class to non-sse. */
30738 if (regclass
== FLOAT_SSE_REGS
)
30740 if (regclass
== FP_TOP_SSE_REGS
)
30742 if (regclass
== FP_SECOND_SSE_REGS
)
30743 return FP_SECOND_REG
;
30744 if (regclass
== FLOAT_INT_REGS
|| regclass
== FLOAT_REGS
)
30751 /* Generally when we see PLUS here, it's the function invariant
30752 (plus soft-fp const_int). Which can only be computed into general
30754 if (GET_CODE (x
) == PLUS
)
30755 return reg_class_subset_p (regclass
, GENERAL_REGS
) ? regclass
: NO_REGS
;
30757 /* QImode constants are easy to load, but non-constant QImode data
30758 must go into Q_REGS. */
30759 if (GET_MODE (x
) == QImode
&& !CONSTANT_P (x
))
30761 if (reg_class_subset_p (regclass
, Q_REGS
))
30763 if (reg_class_subset_p (Q_REGS
, regclass
))
30771 /* Discourage putting floating-point values in SSE registers unless
30772 SSE math is being used, and likewise for the 387 registers. */
30774 ix86_preferred_output_reload_class (rtx x
, reg_class_t regclass
)
30776 enum machine_mode mode
= GET_MODE (x
);
30778 /* Restrict the output reload class to the register bank that we are doing
30779 math on. If we would like not to return a subset of CLASS, reject this
30780 alternative: if reload cannot do this, it will still use its choice. */
30781 mode
= GET_MODE (x
);
30782 if (TARGET_SSE_MATH
&& SSE_FLOAT_MODE_P (mode
))
30783 return MAYBE_SSE_CLASS_P (regclass
) ? SSE_REGS
: NO_REGS
;
30785 if (X87_FLOAT_MODE_P (mode
))
30787 if (regclass
== FP_TOP_SSE_REGS
)
30789 else if (regclass
== FP_SECOND_SSE_REGS
)
30790 return FP_SECOND_REG
;
30792 return FLOAT_CLASS_P (regclass
) ? regclass
: NO_REGS
;
30799 ix86_secondary_reload (bool in_p
, rtx x
, reg_class_t rclass
,
30800 enum machine_mode mode
, secondary_reload_info
*sri
)
30802 /* Double-word spills from general registers to non-offsettable memory
30803 references (zero-extended addresses) require special handling. */
30806 && GET_MODE_SIZE (mode
) > UNITS_PER_WORD
30807 && rclass
== GENERAL_REGS
30808 && !offsettable_memref_p (x
))
30811 ? CODE_FOR_reload_noff_load
30812 : CODE_FOR_reload_noff_store
);
30813 /* Add the cost of moving address to a temporary. */
30814 sri
->extra_cost
= 1;
30819 /* QImode spills from non-QI registers require
30820 intermediate register on 32bit targets. */
30822 && !in_p
&& mode
== QImode
30823 && (rclass
== GENERAL_REGS
30824 || rclass
== LEGACY_REGS
30825 || rclass
== INDEX_REGS
))
30834 if (regno
>= FIRST_PSEUDO_REGISTER
|| GET_CODE (x
) == SUBREG
)
30835 regno
= true_regnum (x
);
30837 /* Return Q_REGS if the operand is in memory. */
30842 /* This condition handles corner case where an expression involving
30843 pointers gets vectorized. We're trying to use the address of a
30844 stack slot as a vector initializer.
30846 (set (reg:V2DI 74 [ vect_cst_.2 ])
30847 (vec_duplicate:V2DI (reg/f:DI 20 frame)))
30849 Eventually frame gets turned into sp+offset like this:
30851 (set (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
30852 (vec_duplicate:V2DI (plus:DI (reg/f:DI 7 sp)
30853 (const_int 392 [0x188]))))
30855 That later gets turned into:
30857 (set (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
30858 (vec_duplicate:V2DI (plus:DI (reg/f:DI 7 sp)
30859 (mem/u/c/i:DI (symbol_ref/u:DI ("*.LC0") [flags 0x2]) [0 S8 A64]))))
30861 We'll have the following reload recorded:
30863 Reload 0: reload_in (DI) =
30864 (plus:DI (reg/f:DI 7 sp)
30865 (mem/u/c/i:DI (symbol_ref/u:DI ("*.LC0") [flags 0x2]) [0 S8 A64]))
30866 reload_out (V2DI) = (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
30867 SSE_REGS, RELOAD_OTHER (opnum = 0), can't combine
30868 reload_in_reg: (plus:DI (reg/f:DI 7 sp) (const_int 392 [0x188]))
30869 reload_out_reg: (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
30870 reload_reg_rtx: (reg:V2DI 22 xmm1)
30872 Which isn't going to work since SSE instructions can't handle scalar
30873 additions. Returning GENERAL_REGS forces the addition into integer
30874 register and reload can handle subsequent reloads without problems. */
30876 if (in_p
&& GET_CODE (x
) == PLUS
30877 && SSE_CLASS_P (rclass
)
30878 && SCALAR_INT_MODE_P (mode
))
30879 return GENERAL_REGS
;
30884 /* Implement TARGET_CLASS_LIKELY_SPILLED_P. */
30887 ix86_class_likely_spilled_p (reg_class_t rclass
)
30898 case SSE_FIRST_REG
:
30900 case FP_SECOND_REG
:
30910 /* If we are copying between general and FP registers, we need a memory
30911 location. The same is true for SSE and MMX registers.
30913 To optimize register_move_cost performance, allow inline variant.
30915 The macro can't work reliably when one of the CLASSES is class containing
30916 registers from multiple units (SSE, MMX, integer). We avoid this by never
30917 combining those units in single alternative in the machine description.
30918 Ensure that this constraint holds to avoid unexpected surprises.
30920 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
30921 enforce these sanity checks. */
30924 inline_secondary_memory_needed (enum reg_class class1
, enum reg_class class2
,
30925 enum machine_mode mode
, int strict
)
30927 if (MAYBE_FLOAT_CLASS_P (class1
) != FLOAT_CLASS_P (class1
)
30928 || MAYBE_FLOAT_CLASS_P (class2
) != FLOAT_CLASS_P (class2
)
30929 || MAYBE_SSE_CLASS_P (class1
) != SSE_CLASS_P (class1
)
30930 || MAYBE_SSE_CLASS_P (class2
) != SSE_CLASS_P (class2
)
30931 || MAYBE_MMX_CLASS_P (class1
) != MMX_CLASS_P (class1
)
30932 || MAYBE_MMX_CLASS_P (class2
) != MMX_CLASS_P (class2
))
30934 gcc_assert (!strict
);
30938 if (FLOAT_CLASS_P (class1
) != FLOAT_CLASS_P (class2
))
30941 /* ??? This is a lie. We do have moves between mmx/general, and for
30942 mmx/sse2. But by saying we need secondary memory we discourage the
30943 register allocator from using the mmx registers unless needed. */
30944 if (MMX_CLASS_P (class1
) != MMX_CLASS_P (class2
))
30947 if (SSE_CLASS_P (class1
) != SSE_CLASS_P (class2
))
30949 /* SSE1 doesn't have any direct moves from other classes. */
30953 /* If the target says that inter-unit moves are more expensive
30954 than moving through memory, then don't generate them. */
30955 if (!TARGET_INTER_UNIT_MOVES
)
30958 /* Between SSE and general, we have moves no larger than word size. */
30959 if (GET_MODE_SIZE (mode
) > UNITS_PER_WORD
)
30967 ix86_secondary_memory_needed (enum reg_class class1
, enum reg_class class2
,
30968 enum machine_mode mode
, int strict
)
30970 return inline_secondary_memory_needed (class1
, class2
, mode
, strict
);
30973 /* Implement the TARGET_CLASS_MAX_NREGS hook.
30975 On the 80386, this is the size of MODE in words,
30976 except in the FP regs, where a single reg is always enough. */
30978 static unsigned char
30979 ix86_class_max_nregs (reg_class_t rclass
, enum machine_mode mode
)
30981 if (MAYBE_INTEGER_CLASS_P (rclass
))
30983 if (mode
== XFmode
)
30984 return (TARGET_64BIT
? 2 : 3);
30985 else if (mode
== XCmode
)
30986 return (TARGET_64BIT
? 4 : 6);
30988 return ((GET_MODE_SIZE (mode
) + UNITS_PER_WORD
- 1) / UNITS_PER_WORD
);
30992 if (COMPLEX_MODE_P (mode
))
30999 /* Return true if the registers in CLASS cannot represent the change from
31000 modes FROM to TO. */
31003 ix86_cannot_change_mode_class (enum machine_mode from
, enum machine_mode to
,
31004 enum reg_class regclass
)
31009 /* x87 registers can't do subreg at all, as all values are reformatted
31010 to extended precision. */
31011 if (MAYBE_FLOAT_CLASS_P (regclass
))
31014 if (MAYBE_SSE_CLASS_P (regclass
) || MAYBE_MMX_CLASS_P (regclass
))
31016 /* Vector registers do not support QI or HImode loads. If we don't
31017 disallow a change to these modes, reload will assume it's ok to
31018 drop the subreg from (subreg:SI (reg:HI 100) 0). This affects
31019 the vec_dupv4hi pattern. */
31020 if (GET_MODE_SIZE (from
) < 4)
31023 /* Vector registers do not support subreg with nonzero offsets, which
31024 are otherwise valid for integer registers. Since we can't see
31025 whether we have a nonzero offset from here, prohibit all
31026 nonparadoxical subregs changing size. */
31027 if (GET_MODE_SIZE (to
) < GET_MODE_SIZE (from
))
31034 /* Return the cost of moving data of mode M between a
31035 register and memory. A value of 2 is the default; this cost is
31036 relative to those in `REGISTER_MOVE_COST'.
31038 This function is used extensively by register_move_cost that is used to
31039 build tables at startup. Make it inline in this case.
31040 When IN is 2, return maximum of in and out move cost.
31042 If moving between registers and memory is more expensive than
31043 between two registers, you should define this macro to express the
31046 Model also increased moving costs of QImode registers in non
31050 inline_memory_move_cost (enum machine_mode mode
, enum reg_class regclass
,
31054 if (FLOAT_CLASS_P (regclass
))
31072 return MAX (ix86_cost
->fp_load
[index
], ix86_cost
->fp_store
[index
]);
31073 return in
? ix86_cost
->fp_load
[index
] : ix86_cost
->fp_store
[index
];
31075 if (SSE_CLASS_P (regclass
))
31078 switch (GET_MODE_SIZE (mode
))
31093 return MAX (ix86_cost
->sse_load
[index
], ix86_cost
->sse_store
[index
]);
31094 return in
? ix86_cost
->sse_load
[index
] : ix86_cost
->sse_store
[index
];
31096 if (MMX_CLASS_P (regclass
))
31099 switch (GET_MODE_SIZE (mode
))
31111 return MAX (ix86_cost
->mmx_load
[index
], ix86_cost
->mmx_store
[index
]);
31112 return in
? ix86_cost
->mmx_load
[index
] : ix86_cost
->mmx_store
[index
];
31114 switch (GET_MODE_SIZE (mode
))
31117 if (Q_CLASS_P (regclass
) || TARGET_64BIT
)
31120 return ix86_cost
->int_store
[0];
31121 if (TARGET_PARTIAL_REG_DEPENDENCY
31122 && optimize_function_for_speed_p (cfun
))
31123 cost
= ix86_cost
->movzbl_load
;
31125 cost
= ix86_cost
->int_load
[0];
31127 return MAX (cost
, ix86_cost
->int_store
[0]);
31133 return MAX (ix86_cost
->movzbl_load
, ix86_cost
->int_store
[0] + 4);
31135 return ix86_cost
->movzbl_load
;
31137 return ix86_cost
->int_store
[0] + 4;
31142 return MAX (ix86_cost
->int_load
[1], ix86_cost
->int_store
[1]);
31143 return in
? ix86_cost
->int_load
[1] : ix86_cost
->int_store
[1];
31145 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
31146 if (mode
== TFmode
)
31149 cost
= MAX (ix86_cost
->int_load
[2] , ix86_cost
->int_store
[2]);
31151 cost
= ix86_cost
->int_load
[2];
31153 cost
= ix86_cost
->int_store
[2];
31154 return (cost
* (((int) GET_MODE_SIZE (mode
)
31155 + UNITS_PER_WORD
- 1) / UNITS_PER_WORD
));
31160 ix86_memory_move_cost (enum machine_mode mode
, reg_class_t regclass
,
31163 return inline_memory_move_cost (mode
, (enum reg_class
) regclass
, in
? 1 : 0);
31167 /* Return the cost of moving data from a register in class CLASS1 to
31168 one in class CLASS2.
31170 It is not required that the cost always equal 2 when FROM is the same as TO;
31171 on some machines it is expensive to move between registers if they are not
31172 general registers. */
31175 ix86_register_move_cost (enum machine_mode mode
, reg_class_t class1_i
,
31176 reg_class_t class2_i
)
31178 enum reg_class class1
= (enum reg_class
) class1_i
;
31179 enum reg_class class2
= (enum reg_class
) class2_i
;
31181 /* In case we require secondary memory, compute cost of the store followed
31182 by load. In order to avoid bad register allocation choices, we need
31183 for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */
31185 if (inline_secondary_memory_needed (class1
, class2
, mode
, 0))
31189 cost
+= inline_memory_move_cost (mode
, class1
, 2);
31190 cost
+= inline_memory_move_cost (mode
, class2
, 2);
31192 /* In case of copying from general_purpose_register we may emit multiple
31193 stores followed by single load causing memory size mismatch stall.
31194 Count this as arbitrarily high cost of 20. */
31195 if (targetm
.class_max_nregs (class1
, mode
)
31196 > targetm
.class_max_nregs (class2
, mode
))
31199 /* In the case of FP/MMX moves, the registers actually overlap, and we
31200 have to switch modes in order to treat them differently. */
31201 if ((MMX_CLASS_P (class1
) && MAYBE_FLOAT_CLASS_P (class2
))
31202 || (MMX_CLASS_P (class2
) && MAYBE_FLOAT_CLASS_P (class1
)))
31208 /* Moves between SSE/MMX and integer unit are expensive. */
31209 if (MMX_CLASS_P (class1
) != MMX_CLASS_P (class2
)
31210 || SSE_CLASS_P (class1
) != SSE_CLASS_P (class2
))
31212 /* ??? By keeping returned value relatively high, we limit the number
31213 of moves between integer and MMX/SSE registers for all targets.
31214 Additionally, high value prevents problem with x86_modes_tieable_p(),
31215 where integer modes in MMX/SSE registers are not tieable
31216 because of missing QImode and HImode moves to, from or between
31217 MMX/SSE registers. */
31218 return MAX (8, ix86_cost
->mmxsse_to_integer
);
31220 if (MAYBE_FLOAT_CLASS_P (class1
))
31221 return ix86_cost
->fp_move
;
31222 if (MAYBE_SSE_CLASS_P (class1
))
31223 return ix86_cost
->sse_move
;
31224 if (MAYBE_MMX_CLASS_P (class1
))
31225 return ix86_cost
->mmx_move
;
31229 /* Return TRUE if hard register REGNO can hold a value of machine-mode
31233 ix86_hard_regno_mode_ok (int regno
, enum machine_mode mode
)
31235 /* Flags and only flags can only hold CCmode values. */
31236 if (CC_REGNO_P (regno
))
31237 return GET_MODE_CLASS (mode
) == MODE_CC
;
31238 if (GET_MODE_CLASS (mode
) == MODE_CC
31239 || GET_MODE_CLASS (mode
) == MODE_RANDOM
31240 || GET_MODE_CLASS (mode
) == MODE_PARTIAL_INT
)
31242 if (FP_REGNO_P (regno
))
31243 return VALID_FP_MODE_P (mode
);
31244 if (SSE_REGNO_P (regno
))
31246 /* We implement the move patterns for all vector modes into and
31247 out of SSE registers, even when no operation instructions
31248 are available. OImode move is available only when AVX is
31250 return ((TARGET_AVX
&& mode
== OImode
)
31251 || VALID_AVX256_REG_MODE (mode
)
31252 || VALID_SSE_REG_MODE (mode
)
31253 || VALID_SSE2_REG_MODE (mode
)
31254 || VALID_MMX_REG_MODE (mode
)
31255 || VALID_MMX_REG_MODE_3DNOW (mode
));
31257 if (MMX_REGNO_P (regno
))
31259 /* We implement the move patterns for 3DNOW modes even in MMX mode,
31260 so if the register is available at all, then we can move data of
31261 the given mode into or out of it. */
31262 return (VALID_MMX_REG_MODE (mode
)
31263 || VALID_MMX_REG_MODE_3DNOW (mode
));
31266 if (mode
== QImode
)
31268 /* Take care for QImode values - they can be in non-QI regs,
31269 but then they do cause partial register stalls. */
31270 if (regno
<= BX_REG
|| TARGET_64BIT
)
31272 if (!TARGET_PARTIAL_REG_STALL
)
31274 return !can_create_pseudo_p ();
31276 /* We handle both integer and floats in the general purpose registers. */
31277 else if (VALID_INT_MODE_P (mode
))
31279 else if (VALID_FP_MODE_P (mode
))
31281 else if (VALID_DFP_MODE_P (mode
))
31283 /* Lots of MMX code casts 8 byte vector modes to DImode. If we then go
31284 on to use that value in smaller contexts, this can easily force a
31285 pseudo to be allocated to GENERAL_REGS. Since this is no worse than
31286 supporting DImode, allow it. */
31287 else if (VALID_MMX_REG_MODE_3DNOW (mode
) || VALID_MMX_REG_MODE (mode
))
31293 /* A subroutine of ix86_modes_tieable_p. Return true if MODE is a
31294 tieable integer mode. */
31297 ix86_tieable_integer_mode_p (enum machine_mode mode
)
31306 return TARGET_64BIT
|| !TARGET_PARTIAL_REG_STALL
;
31309 return TARGET_64BIT
;
31316 /* Return true if MODE1 is accessible in a register that can hold MODE2
31317 without copying. That is, all register classes that can hold MODE2
31318 can also hold MODE1. */
31321 ix86_modes_tieable_p (enum machine_mode mode1
, enum machine_mode mode2
)
31323 if (mode1
== mode2
)
31326 if (ix86_tieable_integer_mode_p (mode1
)
31327 && ix86_tieable_integer_mode_p (mode2
))
31330 /* MODE2 being XFmode implies fp stack or general regs, which means we
31331 can tie any smaller floating point modes to it. Note that we do not
31332 tie this with TFmode. */
31333 if (mode2
== XFmode
)
31334 return mode1
== SFmode
|| mode1
== DFmode
;
31336 /* MODE2 being DFmode implies fp stack, general or sse regs, which means
31337 that we can tie it with SFmode. */
31338 if (mode2
== DFmode
)
31339 return mode1
== SFmode
;
31341 /* If MODE2 is only appropriate for an SSE register, then tie with
31342 any other mode acceptable to SSE registers. */
31343 if (GET_MODE_SIZE (mode2
) == 16
31344 && ix86_hard_regno_mode_ok (FIRST_SSE_REG
, mode2
))
31345 return (GET_MODE_SIZE (mode1
) == 16
31346 && ix86_hard_regno_mode_ok (FIRST_SSE_REG
, mode1
));
31348 /* If MODE2 is appropriate for an MMX register, then tie
31349 with any other mode acceptable to MMX registers. */
31350 if (GET_MODE_SIZE (mode2
) == 8
31351 && ix86_hard_regno_mode_ok (FIRST_MMX_REG
, mode2
))
31352 return (GET_MODE_SIZE (mode1
) == 8
31353 && ix86_hard_regno_mode_ok (FIRST_MMX_REG
, mode1
));
31358 /* Compute a (partial) cost for rtx X. Return true if the complete
31359 cost has been computed, and false if subexpressions should be
31360 scanned. In either case, *TOTAL contains the cost result. */
31363 ix86_rtx_costs (rtx x
, int code
, int outer_code_i
, int opno
, int *total
,
31366 enum rtx_code outer_code
= (enum rtx_code
) outer_code_i
;
31367 enum machine_mode mode
= GET_MODE (x
);
31368 const struct processor_costs
*cost
= speed
? ix86_cost
: &ix86_size_cost
;
31376 if (TARGET_64BIT
&& !x86_64_immediate_operand (x
, VOIDmode
))
31378 else if (TARGET_64BIT
&& !x86_64_zext_immediate_operand (x
, VOIDmode
))
31380 else if (flag_pic
&& SYMBOLIC_CONST (x
)
31382 || (!GET_CODE (x
) != LABEL_REF
31383 && (GET_CODE (x
) != SYMBOL_REF
31384 || !SYMBOL_REF_LOCAL_P (x
)))))
31391 if (mode
== VOIDmode
)
31394 switch (standard_80387_constant_p (x
))
31399 default: /* Other constants */
31404 /* Start with (MEM (SYMBOL_REF)), since that's where
31405 it'll probably end up. Add a penalty for size. */
31406 *total
= (COSTS_N_INSNS (1)
31407 + (flag_pic
!= 0 && !TARGET_64BIT
)
31408 + (mode
== SFmode
? 0 : mode
== DFmode
? 1 : 2));
31414 /* The zero extensions is often completely free on x86_64, so make
31415 it as cheap as possible. */
31416 if (TARGET_64BIT
&& mode
== DImode
31417 && GET_MODE (XEXP (x
, 0)) == SImode
)
31419 else if (TARGET_ZERO_EXTEND_WITH_AND
)
31420 *total
= cost
->add
;
31422 *total
= cost
->movzx
;
31426 *total
= cost
->movsx
;
31430 if (CONST_INT_P (XEXP (x
, 1))
31431 && (GET_MODE (XEXP (x
, 0)) != DImode
|| TARGET_64BIT
))
31433 HOST_WIDE_INT value
= INTVAL (XEXP (x
, 1));
31436 *total
= cost
->add
;
31439 if ((value
== 2 || value
== 3)
31440 && cost
->lea
<= cost
->shift_const
)
31442 *total
= cost
->lea
;
31452 if (!TARGET_64BIT
&& GET_MODE (XEXP (x
, 0)) == DImode
)
31454 if (CONST_INT_P (XEXP (x
, 1)))
31456 if (INTVAL (XEXP (x
, 1)) > 32)
31457 *total
= cost
->shift_const
+ COSTS_N_INSNS (2);
31459 *total
= cost
->shift_const
* 2;
31463 if (GET_CODE (XEXP (x
, 1)) == AND
)
31464 *total
= cost
->shift_var
* 2;
31466 *total
= cost
->shift_var
* 6 + COSTS_N_INSNS (2);
31471 if (CONST_INT_P (XEXP (x
, 1)))
31472 *total
= cost
->shift_const
;
31474 *total
= cost
->shift_var
;
31482 gcc_assert (FLOAT_MODE_P (mode
));
31483 gcc_assert (TARGET_FMA
|| TARGET_FMA4
);
31485 /* ??? SSE scalar/vector cost should be used here. */
31486 /* ??? Bald assumption that fma has the same cost as fmul. */
31487 *total
= cost
->fmul
;
31488 *total
+= rtx_cost (XEXP (x
, 1), FMA
, 1, speed
);
31490 /* Negate in op0 or op2 is free: FMS, FNMA, FNMS. */
31492 if (GET_CODE (sub
) == NEG
)
31493 sub
= XEXP (sub
, 0);
31494 *total
+= rtx_cost (sub
, FMA
, 0, speed
);
31497 if (GET_CODE (sub
) == NEG
)
31498 sub
= XEXP (sub
, 0);
31499 *total
+= rtx_cost (sub
, FMA
, 2, speed
);
31504 if (SSE_FLOAT_MODE_P (mode
) && TARGET_SSE_MATH
)
31506 /* ??? SSE scalar cost should be used here. */
31507 *total
= cost
->fmul
;
31510 else if (X87_FLOAT_MODE_P (mode
))
31512 *total
= cost
->fmul
;
31515 else if (FLOAT_MODE_P (mode
))
31517 /* ??? SSE vector cost should be used here. */
31518 *total
= cost
->fmul
;
31523 rtx op0
= XEXP (x
, 0);
31524 rtx op1
= XEXP (x
, 1);
31526 if (CONST_INT_P (XEXP (x
, 1)))
31528 unsigned HOST_WIDE_INT value
= INTVAL (XEXP (x
, 1));
31529 for (nbits
= 0; value
!= 0; value
&= value
- 1)
31533 /* This is arbitrary. */
31536 /* Compute costs correctly for widening multiplication. */
31537 if ((GET_CODE (op0
) == SIGN_EXTEND
|| GET_CODE (op0
) == ZERO_EXTEND
)
31538 && GET_MODE_SIZE (GET_MODE (XEXP (op0
, 0))) * 2
31539 == GET_MODE_SIZE (mode
))
31541 int is_mulwiden
= 0;
31542 enum machine_mode inner_mode
= GET_MODE (op0
);
31544 if (GET_CODE (op0
) == GET_CODE (op1
))
31545 is_mulwiden
= 1, op1
= XEXP (op1
, 0);
31546 else if (CONST_INT_P (op1
))
31548 if (GET_CODE (op0
) == SIGN_EXTEND
)
31549 is_mulwiden
= trunc_int_for_mode (INTVAL (op1
), inner_mode
)
31552 is_mulwiden
= !(INTVAL (op1
) & ~GET_MODE_MASK (inner_mode
));
31556 op0
= XEXP (op0
, 0), mode
= GET_MODE (op0
);
31559 *total
= (cost
->mult_init
[MODE_INDEX (mode
)]
31560 + nbits
* cost
->mult_bit
31561 + rtx_cost (op0
, outer_code
, opno
, speed
)
31562 + rtx_cost (op1
, outer_code
, opno
, speed
));
31571 if (SSE_FLOAT_MODE_P (mode
) && TARGET_SSE_MATH
)
31572 /* ??? SSE cost should be used here. */
31573 *total
= cost
->fdiv
;
31574 else if (X87_FLOAT_MODE_P (mode
))
31575 *total
= cost
->fdiv
;
31576 else if (FLOAT_MODE_P (mode
))
31577 /* ??? SSE vector cost should be used here. */
31578 *total
= cost
->fdiv
;
31580 *total
= cost
->divide
[MODE_INDEX (mode
)];
31584 if (GET_MODE_CLASS (mode
) == MODE_INT
31585 && GET_MODE_BITSIZE (mode
) <= GET_MODE_BITSIZE (Pmode
))
31587 if (GET_CODE (XEXP (x
, 0)) == PLUS
31588 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == MULT
31589 && CONST_INT_P (XEXP (XEXP (XEXP (x
, 0), 0), 1))
31590 && CONSTANT_P (XEXP (x
, 1)))
31592 HOST_WIDE_INT val
= INTVAL (XEXP (XEXP (XEXP (x
, 0), 0), 1));
31593 if (val
== 2 || val
== 4 || val
== 8)
31595 *total
= cost
->lea
;
31596 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 1),
31597 outer_code
, opno
, speed
);
31598 *total
+= rtx_cost (XEXP (XEXP (XEXP (x
, 0), 0), 0),
31599 outer_code
, opno
, speed
);
31600 *total
+= rtx_cost (XEXP (x
, 1), outer_code
, opno
, speed
);
31604 else if (GET_CODE (XEXP (x
, 0)) == MULT
31605 && CONST_INT_P (XEXP (XEXP (x
, 0), 1)))
31607 HOST_WIDE_INT val
= INTVAL (XEXP (XEXP (x
, 0), 1));
31608 if (val
== 2 || val
== 4 || val
== 8)
31610 *total
= cost
->lea
;
31611 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 0),
31612 outer_code
, opno
, speed
);
31613 *total
+= rtx_cost (XEXP (x
, 1), outer_code
, opno
, speed
);
31617 else if (GET_CODE (XEXP (x
, 0)) == PLUS
)
31619 *total
= cost
->lea
;
31620 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 0),
31621 outer_code
, opno
, speed
);
31622 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 1),
31623 outer_code
, opno
, speed
);
31624 *total
+= rtx_cost (XEXP (x
, 1), outer_code
, opno
, speed
);
31631 if (SSE_FLOAT_MODE_P (mode
) && TARGET_SSE_MATH
)
31633 /* ??? SSE cost should be used here. */
31634 *total
= cost
->fadd
;
31637 else if (X87_FLOAT_MODE_P (mode
))
31639 *total
= cost
->fadd
;
31642 else if (FLOAT_MODE_P (mode
))
31644 /* ??? SSE vector cost should be used here. */
31645 *total
= cost
->fadd
;
31653 if (!TARGET_64BIT
&& mode
== DImode
)
31655 *total
= (cost
->add
* 2
31656 + (rtx_cost (XEXP (x
, 0), outer_code
, opno
, speed
)
31657 << (GET_MODE (XEXP (x
, 0)) != DImode
))
31658 + (rtx_cost (XEXP (x
, 1), outer_code
, opno
, speed
)
31659 << (GET_MODE (XEXP (x
, 1)) != DImode
)));
31665 if (SSE_FLOAT_MODE_P (mode
) && TARGET_SSE_MATH
)
31667 /* ??? SSE cost should be used here. */
31668 *total
= cost
->fchs
;
31671 else if (X87_FLOAT_MODE_P (mode
))
31673 *total
= cost
->fchs
;
31676 else if (FLOAT_MODE_P (mode
))
31678 /* ??? SSE vector cost should be used here. */
31679 *total
= cost
->fchs
;
31685 if (!TARGET_64BIT
&& mode
== DImode
)
31686 *total
= cost
->add
* 2;
31688 *total
= cost
->add
;
31692 if (GET_CODE (XEXP (x
, 0)) == ZERO_EXTRACT
31693 && XEXP (XEXP (x
, 0), 1) == const1_rtx
31694 && CONST_INT_P (XEXP (XEXP (x
, 0), 2))
31695 && XEXP (x
, 1) == const0_rtx
)
31697 /* This kind of construct is implemented using test[bwl].
31698 Treat it as if we had an AND. */
31699 *total
= (cost
->add
31700 + rtx_cost (XEXP (XEXP (x
, 0), 0), outer_code
, opno
, speed
)
31701 + rtx_cost (const1_rtx
, outer_code
, opno
, speed
));
31707 if (!(SSE_FLOAT_MODE_P (mode
) && TARGET_SSE_MATH
))
31712 if (SSE_FLOAT_MODE_P (mode
) && TARGET_SSE_MATH
)
31713 /* ??? SSE cost should be used here. */
31714 *total
= cost
->fabs
;
31715 else if (X87_FLOAT_MODE_P (mode
))
31716 *total
= cost
->fabs
;
31717 else if (FLOAT_MODE_P (mode
))
31718 /* ??? SSE vector cost should be used here. */
31719 *total
= cost
->fabs
;
31723 if (SSE_FLOAT_MODE_P (mode
) && TARGET_SSE_MATH
)
31724 /* ??? SSE cost should be used here. */
31725 *total
= cost
->fsqrt
;
31726 else if (X87_FLOAT_MODE_P (mode
))
31727 *total
= cost
->fsqrt
;
31728 else if (FLOAT_MODE_P (mode
))
31729 /* ??? SSE vector cost should be used here. */
31730 *total
= cost
->fsqrt
;
31734 if (XINT (x
, 1) == UNSPEC_TP
)
31741 case VEC_DUPLICATE
:
31742 /* ??? Assume all of these vector manipulation patterns are
31743 recognizable. In which case they all pretty much have the
31745 *total
= COSTS_N_INSNS (1);
31755 static int current_machopic_label_num
;
31757 /* Given a symbol name and its associated stub, write out the
31758 definition of the stub. */
31761 machopic_output_stub (FILE *file
, const char *symb
, const char *stub
)
31763 unsigned int length
;
31764 char *binder_name
, *symbol_name
, lazy_ptr_name
[32];
31765 int label
= ++current_machopic_label_num
;
31767 /* For 64-bit we shouldn't get here. */
31768 gcc_assert (!TARGET_64BIT
);
31770 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
31771 symb
= targetm
.strip_name_encoding (symb
);
31773 length
= strlen (stub
);
31774 binder_name
= XALLOCAVEC (char, length
+ 32);
31775 GEN_BINDER_NAME_FOR_STUB (binder_name
, stub
, length
);
31777 length
= strlen (symb
);
31778 symbol_name
= XALLOCAVEC (char, length
+ 32);
31779 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name
, symb
, length
);
31781 sprintf (lazy_ptr_name
, "L%d$lz", label
);
31783 if (MACHOPIC_ATT_STUB
)
31784 switch_to_section (darwin_sections
[machopic_picsymbol_stub3_section
]);
31785 else if (MACHOPIC_PURE
)
31786 switch_to_section (darwin_sections
[machopic_picsymbol_stub2_section
]);
31788 switch_to_section (darwin_sections
[machopic_symbol_stub_section
]);
31790 fprintf (file
, "%s:\n", stub
);
31791 fprintf (file
, "\t.indirect_symbol %s\n", symbol_name
);
31793 if (MACHOPIC_ATT_STUB
)
31795 fprintf (file
, "\thlt ; hlt ; hlt ; hlt ; hlt\n");
31797 else if (MACHOPIC_PURE
)
31800 /* 25-byte PIC stub using "CALL get_pc_thunk". */
31801 rtx tmp
= gen_rtx_REG (SImode
, 2 /* ECX */);
31802 output_set_got (tmp
, NULL_RTX
); /* "CALL ___<cpu>.get_pc_thunk.cx". */
31803 fprintf (file
, "LPC$%d:\tmovl\t%s-LPC$%d(%%ecx),%%ecx\n",
31804 label
, lazy_ptr_name
, label
);
31805 fprintf (file
, "\tjmp\t*%%ecx\n");
31808 fprintf (file
, "\tjmp\t*%s\n", lazy_ptr_name
);
31810 /* The AT&T-style ("self-modifying") stub is not lazily bound, thus
31811 it needs no stub-binding-helper. */
31812 if (MACHOPIC_ATT_STUB
)
31815 fprintf (file
, "%s:\n", binder_name
);
31819 fprintf (file
, "\tlea\t%s-%s(%%ecx),%%ecx\n", lazy_ptr_name
, binder_name
);
31820 fprintf (file
, "\tpushl\t%%ecx\n");
31823 fprintf (file
, "\tpushl\t$%s\n", lazy_ptr_name
);
31825 fputs ("\tjmp\tdyld_stub_binding_helper\n", file
);
31827 /* N.B. Keep the correspondence of these
31828 'symbol_ptr/symbol_ptr2/symbol_ptr3' sections consistent with the
31829 old-pic/new-pic/non-pic stubs; altering this will break
31830 compatibility with existing dylibs. */
31833 /* 25-byte PIC stub using "CALL get_pc_thunk". */
31834 switch_to_section (darwin_sections
[machopic_lazy_symbol_ptr2_section
]);
31837 /* 16-byte -mdynamic-no-pic stub. */
31838 switch_to_section(darwin_sections
[machopic_lazy_symbol_ptr3_section
]);
31840 fprintf (file
, "%s:\n", lazy_ptr_name
);
31841 fprintf (file
, "\t.indirect_symbol %s\n", symbol_name
);
31842 fprintf (file
, ASM_LONG
"%s\n", binder_name
);
31844 #endif /* TARGET_MACHO */
31846 /* Order the registers for register allocator. */
31849 x86_order_regs_for_local_alloc (void)
31854 /* First allocate the local general purpose registers. */
31855 for (i
= 0; i
< FIRST_PSEUDO_REGISTER
; i
++)
31856 if (GENERAL_REGNO_P (i
) && call_used_regs
[i
])
31857 reg_alloc_order
[pos
++] = i
;
31859 /* Global general purpose registers. */
31860 for (i
= 0; i
< FIRST_PSEUDO_REGISTER
; i
++)
31861 if (GENERAL_REGNO_P (i
) && !call_used_regs
[i
])
31862 reg_alloc_order
[pos
++] = i
;
31864 /* x87 registers come first in case we are doing FP math
31866 if (!TARGET_SSE_MATH
)
31867 for (i
= FIRST_STACK_REG
; i
<= LAST_STACK_REG
; i
++)
31868 reg_alloc_order
[pos
++] = i
;
31870 /* SSE registers. */
31871 for (i
= FIRST_SSE_REG
; i
<= LAST_SSE_REG
; i
++)
31872 reg_alloc_order
[pos
++] = i
;
31873 for (i
= FIRST_REX_SSE_REG
; i
<= LAST_REX_SSE_REG
; i
++)
31874 reg_alloc_order
[pos
++] = i
;
31876 /* x87 registers. */
31877 if (TARGET_SSE_MATH
)
31878 for (i
= FIRST_STACK_REG
; i
<= LAST_STACK_REG
; i
++)
31879 reg_alloc_order
[pos
++] = i
;
31881 for (i
= FIRST_MMX_REG
; i
<= LAST_MMX_REG
; i
++)
31882 reg_alloc_order
[pos
++] = i
;
31884 /* Initialize the rest of array as we do not allocate some registers
31886 while (pos
< FIRST_PSEUDO_REGISTER
)
31887 reg_alloc_order
[pos
++] = 0;
31890 /* Handle a "callee_pop_aggregate_return" attribute; arguments as
31891 in struct attribute_spec handler. */
31893 ix86_handle_callee_pop_aggregate_return (tree
*node
, tree name
,
31895 int flags ATTRIBUTE_UNUSED
,
31896 bool *no_add_attrs
)
31898 if (TREE_CODE (*node
) != FUNCTION_TYPE
31899 && TREE_CODE (*node
) != METHOD_TYPE
31900 && TREE_CODE (*node
) != FIELD_DECL
31901 && TREE_CODE (*node
) != TYPE_DECL
)
31903 warning (OPT_Wattributes
, "%qE attribute only applies to functions",
31905 *no_add_attrs
= true;
31910 warning (OPT_Wattributes
, "%qE attribute only available for 32-bit",
31912 *no_add_attrs
= true;
31915 if (is_attribute_p ("callee_pop_aggregate_return", name
))
31919 cst
= TREE_VALUE (args
);
31920 if (TREE_CODE (cst
) != INTEGER_CST
)
31922 warning (OPT_Wattributes
,
31923 "%qE attribute requires an integer constant argument",
31925 *no_add_attrs
= true;
31927 else if (compare_tree_int (cst
, 0) != 0
31928 && compare_tree_int (cst
, 1) != 0)
31930 warning (OPT_Wattributes
,
31931 "argument to %qE attribute is neither zero, nor one",
31933 *no_add_attrs
= true;
31942 /* Handle a "ms_abi" or "sysv" attribute; arguments as in
31943 struct attribute_spec.handler. */
31945 ix86_handle_abi_attribute (tree
*node
, tree name
,
31946 tree args ATTRIBUTE_UNUSED
,
31947 int flags ATTRIBUTE_UNUSED
, bool *no_add_attrs
)
31949 if (TREE_CODE (*node
) != FUNCTION_TYPE
31950 && TREE_CODE (*node
) != METHOD_TYPE
31951 && TREE_CODE (*node
) != FIELD_DECL
31952 && TREE_CODE (*node
) != TYPE_DECL
)
31954 warning (OPT_Wattributes
, "%qE attribute only applies to functions",
31956 *no_add_attrs
= true;
31960 /* Can combine regparm with all attributes but fastcall. */
31961 if (is_attribute_p ("ms_abi", name
))
31963 if (lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (*node
)))
31965 error ("ms_abi and sysv_abi attributes are not compatible");
31970 else if (is_attribute_p ("sysv_abi", name
))
31972 if (lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (*node
)))
31974 error ("ms_abi and sysv_abi attributes are not compatible");
31983 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
31984 struct attribute_spec.handler. */
31986 ix86_handle_struct_attribute (tree
*node
, tree name
,
31987 tree args ATTRIBUTE_UNUSED
,
31988 int flags ATTRIBUTE_UNUSED
, bool *no_add_attrs
)
31991 if (DECL_P (*node
))
31993 if (TREE_CODE (*node
) == TYPE_DECL
)
31994 type
= &TREE_TYPE (*node
);
31999 if (!(type
&& (TREE_CODE (*type
) == RECORD_TYPE
32000 || TREE_CODE (*type
) == UNION_TYPE
)))
32002 warning (OPT_Wattributes
, "%qE attribute ignored",
32004 *no_add_attrs
= true;
32007 else if ((is_attribute_p ("ms_struct", name
)
32008 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type
)))
32009 || ((is_attribute_p ("gcc_struct", name
)
32010 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type
)))))
32012 warning (OPT_Wattributes
, "%qE incompatible attribute ignored",
32014 *no_add_attrs
= true;
32021 ix86_handle_fndecl_attribute (tree
*node
, tree name
,
32022 tree args ATTRIBUTE_UNUSED
,
32023 int flags ATTRIBUTE_UNUSED
, bool *no_add_attrs
)
32025 if (TREE_CODE (*node
) != FUNCTION_DECL
)
32027 warning (OPT_Wattributes
, "%qE attribute only applies to functions",
32029 *no_add_attrs
= true;
32035 ix86_ms_bitfield_layout_p (const_tree record_type
)
32037 return ((TARGET_MS_BITFIELD_LAYOUT
32038 && !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type
)))
32039 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type
)));
32042 /* Returns an expression indicating where the this parameter is
32043 located on entry to the FUNCTION. */
32046 x86_this_parameter (tree function
)
32048 tree type
= TREE_TYPE (function
);
32049 bool aggr
= aggregate_value_p (TREE_TYPE (type
), type
) != 0;
32054 const int *parm_regs
;
32056 if (ix86_function_type_abi (type
) == MS_ABI
)
32057 parm_regs
= x86_64_ms_abi_int_parameter_registers
;
32059 parm_regs
= x86_64_int_parameter_registers
;
32060 return gen_rtx_REG (Pmode
, parm_regs
[aggr
]);
32063 nregs
= ix86_function_regparm (type
, function
);
32065 if (nregs
> 0 && !stdarg_p (type
))
32068 unsigned int ccvt
= ix86_get_callcvt (type
);
32070 if ((ccvt
& IX86_CALLCVT_FASTCALL
) != 0)
32071 regno
= aggr
? DX_REG
: CX_REG
;
32072 else if ((ccvt
& IX86_CALLCVT_THISCALL
) != 0)
32076 return gen_rtx_MEM (SImode
,
32077 plus_constant (stack_pointer_rtx
, 4));
32086 return gen_rtx_MEM (SImode
,
32087 plus_constant (stack_pointer_rtx
, 4));
32090 return gen_rtx_REG (SImode
, regno
);
32093 return gen_rtx_MEM (SImode
, plus_constant (stack_pointer_rtx
, aggr
? 8 : 4));
32096 /* Determine whether x86_output_mi_thunk can succeed. */
32099 x86_can_output_mi_thunk (const_tree thunk ATTRIBUTE_UNUSED
,
32100 HOST_WIDE_INT delta ATTRIBUTE_UNUSED
,
32101 HOST_WIDE_INT vcall_offset
, const_tree function
)
32103 /* 64-bit can handle anything. */
32107 /* For 32-bit, everything's fine if we have one free register. */
32108 if (ix86_function_regparm (TREE_TYPE (function
), function
) < 3)
32111 /* Need a free register for vcall_offset. */
32115 /* Need a free register for GOT references. */
32116 if (flag_pic
&& !targetm
.binds_local_p (function
))
32119 /* Otherwise ok. */
32123 /* Output the assembler code for a thunk function. THUNK_DECL is the
32124 declaration for the thunk function itself, FUNCTION is the decl for
32125 the target function. DELTA is an immediate constant offset to be
32126 added to THIS. If VCALL_OFFSET is nonzero, the word at
32127 *(*this + vcall_offset) should be added to THIS. */
32130 x86_output_mi_thunk (FILE *file
,
32131 tree thunk ATTRIBUTE_UNUSED
, HOST_WIDE_INT delta
,
32132 HOST_WIDE_INT vcall_offset
, tree function
)
32134 rtx this_param
= x86_this_parameter (function
);
32135 rtx this_reg
, tmp
, fnaddr
;
32137 emit_note (NOTE_INSN_PROLOGUE_END
);
32139 /* If VCALL_OFFSET, we'll need THIS in a register. Might as well
32140 pull it in now and let DELTA benefit. */
32141 if (REG_P (this_param
))
32142 this_reg
= this_param
;
32143 else if (vcall_offset
)
32145 /* Put the this parameter into %eax. */
32146 this_reg
= gen_rtx_REG (Pmode
, AX_REG
);
32147 emit_move_insn (this_reg
, this_param
);
32150 this_reg
= NULL_RTX
;
32152 /* Adjust the this parameter by a fixed constant. */
32155 rtx delta_rtx
= GEN_INT (delta
);
32156 rtx delta_dst
= this_reg
? this_reg
: this_param
;
32160 if (!x86_64_general_operand (delta_rtx
, Pmode
))
32162 tmp
= gen_rtx_REG (Pmode
, R10_REG
);
32163 emit_move_insn (tmp
, delta_rtx
);
32168 ix86_emit_binop (PLUS
, Pmode
, delta_dst
, delta_rtx
);
32171 /* Adjust the this parameter by a value stored in the vtable. */
32174 rtx vcall_addr
, vcall_mem
, this_mem
;
32175 unsigned int tmp_regno
;
32178 tmp_regno
= R10_REG
;
32181 unsigned int ccvt
= ix86_get_callcvt (TREE_TYPE (function
));
32182 if ((ccvt
& (IX86_CALLCVT_FASTCALL
| IX86_CALLCVT_THISCALL
)) != 0)
32183 tmp_regno
= AX_REG
;
32185 tmp_regno
= CX_REG
;
32187 tmp
= gen_rtx_REG (Pmode
, tmp_regno
);
32189 this_mem
= gen_rtx_MEM (ptr_mode
, this_reg
);
32190 if (Pmode
!= ptr_mode
)
32191 this_mem
= gen_rtx_ZERO_EXTEND (Pmode
, this_mem
);
32192 emit_move_insn (tmp
, this_mem
);
32194 /* Adjust the this parameter. */
32195 vcall_addr
= plus_constant (tmp
, vcall_offset
);
32197 && !ix86_legitimate_address_p (ptr_mode
, vcall_addr
, true))
32199 rtx tmp2
= gen_rtx_REG (Pmode
, R11_REG
);
32200 emit_move_insn (tmp2
, GEN_INT (vcall_offset
));
32201 vcall_addr
= gen_rtx_PLUS (Pmode
, tmp
, tmp2
);
32204 vcall_mem
= gen_rtx_MEM (ptr_mode
, vcall_addr
);
32205 if (Pmode
!= ptr_mode
)
32206 emit_insn (gen_addsi_1_zext (this_reg
,
32207 gen_rtx_REG (ptr_mode
,
32211 ix86_emit_binop (PLUS
, Pmode
, this_reg
, vcall_mem
);
32214 /* If necessary, drop THIS back to its stack slot. */
32215 if (this_reg
&& this_reg
!= this_param
)
32216 emit_move_insn (this_param
, this_reg
);
32218 fnaddr
= XEXP (DECL_RTL (function
), 0);
32221 if (!flag_pic
|| targetm
.binds_local_p (function
)
32222 || cfun
->machine
->call_abi
== MS_ABI
)
32226 tmp
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, fnaddr
), UNSPEC_GOTPCREL
);
32227 tmp
= gen_rtx_CONST (Pmode
, tmp
);
32228 fnaddr
= gen_rtx_MEM (Pmode
, tmp
);
32233 if (!flag_pic
|| targetm
.binds_local_p (function
))
32236 else if (TARGET_MACHO
)
32238 fnaddr
= machopic_indirect_call_target (DECL_RTL (function
));
32239 fnaddr
= XEXP (fnaddr
, 0);
32241 #endif /* TARGET_MACHO */
32244 tmp
= gen_rtx_REG (Pmode
, CX_REG
);
32245 output_set_got (tmp
, NULL_RTX
);
32247 fnaddr
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, fnaddr
), UNSPEC_GOT
);
32248 fnaddr
= gen_rtx_PLUS (Pmode
, fnaddr
, tmp
);
32249 fnaddr
= gen_rtx_MEM (Pmode
, fnaddr
);
32253 /* Our sibling call patterns do not allow memories, because we have no
32254 predicate that can distinguish between frame and non-frame memory.
32255 For our purposes here, we can get away with (ab)using a jump pattern,
32256 because we're going to do no optimization. */
32257 if (MEM_P (fnaddr
))
32258 emit_jump_insn (gen_indirect_jump (fnaddr
));
32261 tmp
= gen_rtx_MEM (QImode
, fnaddr
);
32262 tmp
= gen_rtx_CALL (VOIDmode
, tmp
, const0_rtx
);
32263 tmp
= emit_call_insn (tmp
);
32264 SIBLING_CALL_P (tmp
) = 1;
32268 /* Emit just enough of rest_of_compilation to get the insns emitted.
32269 Note that use_thunk calls assemble_start_function et al. */
32270 tmp
= get_insns ();
32271 insn_locators_alloc ();
32272 shorten_branches (tmp
);
32273 final_start_function (tmp
, file
, 1);
32274 final (tmp
, file
, 1);
32275 final_end_function ();
32279 x86_file_start (void)
32281 default_file_start ();
32283 darwin_file_start ();
32285 if (X86_FILE_START_VERSION_DIRECTIVE
)
32286 fputs ("\t.version\t\"01.01\"\n", asm_out_file
);
32287 if (X86_FILE_START_FLTUSED
)
32288 fputs ("\t.global\t__fltused\n", asm_out_file
);
32289 if (ix86_asm_dialect
== ASM_INTEL
)
32290 fputs ("\t.intel_syntax noprefix\n", asm_out_file
);
32294 x86_field_alignment (tree field
, int computed
)
32296 enum machine_mode mode
;
32297 tree type
= TREE_TYPE (field
);
32299 if (TARGET_64BIT
|| TARGET_ALIGN_DOUBLE
)
32301 mode
= TYPE_MODE (strip_array_types (type
));
32302 if (mode
== DFmode
|| mode
== DCmode
32303 || GET_MODE_CLASS (mode
) == MODE_INT
32304 || GET_MODE_CLASS (mode
) == MODE_COMPLEX_INT
)
32305 return MIN (32, computed
);
32309 /* Output assembler code to FILE to increment profiler label # LABELNO
32310 for profiling a function entry. */
32312 x86_function_profiler (FILE *file
, int labelno ATTRIBUTE_UNUSED
)
32314 const char *mcount_name
= (flag_fentry
? MCOUNT_NAME_BEFORE_PROLOGUE
32319 #ifndef NO_PROFILE_COUNTERS
32320 fprintf (file
, "\tleaq\t%sP%d(%%rip),%%r11\n", LPREFIX
, labelno
);
32323 if (DEFAULT_ABI
== SYSV_ABI
&& flag_pic
)
32324 fprintf (file
, "\tcall\t*%s@GOTPCREL(%%rip)\n", mcount_name
);
32326 fprintf (file
, "\tcall\t%s\n", mcount_name
);
32330 #ifndef NO_PROFILE_COUNTERS
32331 fprintf (file
, "\tleal\t%sP%d@GOTOFF(%%ebx),%%" PROFILE_COUNT_REGISTER
"\n",
32334 fprintf (file
, "\tcall\t*%s@GOT(%%ebx)\n", mcount_name
);
32338 #ifndef NO_PROFILE_COUNTERS
32339 fprintf (file
, "\tmovl\t$%sP%d,%%" PROFILE_COUNT_REGISTER
"\n",
32342 fprintf (file
, "\tcall\t%s\n", mcount_name
);
32346 /* We don't have exact information about the insn sizes, but we may assume
32347 quite safely that we are informed about all 1 byte insns and memory
32348 address sizes. This is enough to eliminate unnecessary padding in
32352 min_insn_size (rtx insn
)
32356 if (!INSN_P (insn
) || !active_insn_p (insn
))
32359 /* Discard alignments we've emit and jump instructions. */
32360 if (GET_CODE (PATTERN (insn
)) == UNSPEC_VOLATILE
32361 && XINT (PATTERN (insn
), 1) == UNSPECV_ALIGN
)
32363 if (JUMP_TABLE_DATA_P (insn
))
32366 /* Important case - calls are always 5 bytes.
32367 It is common to have many calls in the row. */
32369 && symbolic_reference_mentioned_p (PATTERN (insn
))
32370 && !SIBLING_CALL_P (insn
))
32372 len
= get_attr_length (insn
);
32376 /* For normal instructions we rely on get_attr_length being exact,
32377 with a few exceptions. */
32378 if (!JUMP_P (insn
))
32380 enum attr_type type
= get_attr_type (insn
);
32385 if (GET_CODE (PATTERN (insn
)) == ASM_INPUT
32386 || asm_noperands (PATTERN (insn
)) >= 0)
32393 /* Otherwise trust get_attr_length. */
32397 l
= get_attr_length_address (insn
);
32398 if (l
< 4 && symbolic_reference_mentioned_p (PATTERN (insn
)))
32407 #ifdef ASM_OUTPUT_MAX_SKIP_PAD
32409 /* AMD K8 core mispredicts jumps when there are more than 3 jumps in 16 byte
32413 ix86_avoid_jump_mispredicts (void)
32415 rtx insn
, start
= get_insns ();
32416 int nbytes
= 0, njumps
= 0;
32419 /* Look for all minimal intervals of instructions containing 4 jumps.
32420 The intervals are bounded by START and INSN. NBYTES is the total
32421 size of instructions in the interval including INSN and not including
32422 START. When the NBYTES is smaller than 16 bytes, it is possible
32423 that the end of START and INSN ends up in the same 16byte page.
32425 The smallest offset in the page INSN can start is the case where START
32426 ends on the offset 0. Offset of INSN is then NBYTES - sizeof (INSN).
32427 We add p2align to 16byte window with maxskip 15 - NBYTES + sizeof (INSN).
32429 for (insn
= start
; insn
; insn
= NEXT_INSN (insn
))
32433 if (LABEL_P (insn
))
32435 int align
= label_to_alignment (insn
);
32436 int max_skip
= label_to_max_skip (insn
);
32440 /* If align > 3, only up to 16 - max_skip - 1 bytes can be
32441 already in the current 16 byte page, because otherwise
32442 ASM_OUTPUT_MAX_SKIP_ALIGN could skip max_skip or fewer
32443 bytes to reach 16 byte boundary. */
32445 || (align
<= 3 && max_skip
!= (1 << align
) - 1))
32448 fprintf (dump_file
, "Label %i with max_skip %i\n",
32449 INSN_UID (insn
), max_skip
);
32452 while (nbytes
+ max_skip
>= 16)
32454 start
= NEXT_INSN (start
);
32455 if ((JUMP_P (start
)
32456 && GET_CODE (PATTERN (start
)) != ADDR_VEC
32457 && GET_CODE (PATTERN (start
)) != ADDR_DIFF_VEC
)
32459 njumps
--, isjump
= 1;
32462 nbytes
-= min_insn_size (start
);
32468 min_size
= min_insn_size (insn
);
32469 nbytes
+= min_size
;
32471 fprintf (dump_file
, "Insn %i estimated to %i bytes\n",
32472 INSN_UID (insn
), min_size
);
32474 && GET_CODE (PATTERN (insn
)) != ADDR_VEC
32475 && GET_CODE (PATTERN (insn
)) != ADDR_DIFF_VEC
)
32483 start
= NEXT_INSN (start
);
32484 if ((JUMP_P (start
)
32485 && GET_CODE (PATTERN (start
)) != ADDR_VEC
32486 && GET_CODE (PATTERN (start
)) != ADDR_DIFF_VEC
)
32488 njumps
--, isjump
= 1;
32491 nbytes
-= min_insn_size (start
);
32493 gcc_assert (njumps
>= 0);
32495 fprintf (dump_file
, "Interval %i to %i has %i bytes\n",
32496 INSN_UID (start
), INSN_UID (insn
), nbytes
);
32498 if (njumps
== 3 && isjump
&& nbytes
< 16)
32500 int padsize
= 15 - nbytes
+ min_insn_size (insn
);
32503 fprintf (dump_file
, "Padding insn %i by %i bytes!\n",
32504 INSN_UID (insn
), padsize
);
32505 emit_insn_before (gen_pad (GEN_INT (padsize
)), insn
);
32511 /* AMD Athlon works faster
32512 when RET is not destination of conditional jump or directly preceded
32513 by other jump instruction. We avoid the penalty by inserting NOP just
32514 before the RET instructions in such cases. */
32516 ix86_pad_returns (void)
32521 FOR_EACH_EDGE (e
, ei
, EXIT_BLOCK_PTR
->preds
)
32523 basic_block bb
= e
->src
;
32524 rtx ret
= BB_END (bb
);
32526 bool replace
= false;
32528 if (!JUMP_P (ret
) || !ANY_RETURN_P (PATTERN (ret
))
32529 || optimize_bb_for_size_p (bb
))
32531 for (prev
= PREV_INSN (ret
); prev
; prev
= PREV_INSN (prev
))
32532 if (active_insn_p (prev
) || LABEL_P (prev
))
32534 if (prev
&& LABEL_P (prev
))
32539 FOR_EACH_EDGE (e
, ei
, bb
->preds
)
32540 if (EDGE_FREQUENCY (e
) && e
->src
->index
>= 0
32541 && !(e
->flags
& EDGE_FALLTHRU
))
32546 prev
= prev_active_insn (ret
);
32548 && ((JUMP_P (prev
) && any_condjump_p (prev
))
32551 /* Empty functions get branch mispredict even when
32552 the jump destination is not visible to us. */
32553 if (!prev
&& !optimize_function_for_size_p (cfun
))
32558 emit_jump_insn_before (gen_simple_return_internal_long (), ret
);
32564 /* Count the minimum number of instructions in BB. Return 4 if the
32565 number of instructions >= 4. */
32568 ix86_count_insn_bb (basic_block bb
)
32571 int insn_count
= 0;
32573 /* Count number of instructions in this block. Return 4 if the number
32574 of instructions >= 4. */
32575 FOR_BB_INSNS (bb
, insn
)
32577 /* Only happen in exit blocks. */
32579 && ANY_RETURN_P (PATTERN (insn
)))
32582 if (NONDEBUG_INSN_P (insn
)
32583 && GET_CODE (PATTERN (insn
)) != USE
32584 && GET_CODE (PATTERN (insn
)) != CLOBBER
)
32587 if (insn_count
>= 4)
32596 /* Count the minimum number of instructions in code path in BB.
32597 Return 4 if the number of instructions >= 4. */
32600 ix86_count_insn (basic_block bb
)
32604 int min_prev_count
;
32606 /* Only bother counting instructions along paths with no
32607 more than 2 basic blocks between entry and exit. Given
32608 that BB has an edge to exit, determine if a predecessor
32609 of BB has an edge from entry. If so, compute the number
32610 of instructions in the predecessor block. If there
32611 happen to be multiple such blocks, compute the minimum. */
32612 min_prev_count
= 4;
32613 FOR_EACH_EDGE (e
, ei
, bb
->preds
)
32616 edge_iterator prev_ei
;
32618 if (e
->src
== ENTRY_BLOCK_PTR
)
32620 min_prev_count
= 0;
32623 FOR_EACH_EDGE (prev_e
, prev_ei
, e
->src
->preds
)
32625 if (prev_e
->src
== ENTRY_BLOCK_PTR
)
32627 int count
= ix86_count_insn_bb (e
->src
);
32628 if (count
< min_prev_count
)
32629 min_prev_count
= count
;
32635 if (min_prev_count
< 4)
32636 min_prev_count
+= ix86_count_insn_bb (bb
);
32638 return min_prev_count
;
32641 /* Pad short funtion to 4 instructions. */
32644 ix86_pad_short_function (void)
32649 FOR_EACH_EDGE (e
, ei
, EXIT_BLOCK_PTR
->preds
)
32651 rtx ret
= BB_END (e
->src
);
32652 if (JUMP_P (ret
) && ANY_RETURN_P (PATTERN (ret
)))
32654 int insn_count
= ix86_count_insn (e
->src
);
32656 /* Pad short function. */
32657 if (insn_count
< 4)
32661 /* Find epilogue. */
32664 || NOTE_KIND (insn
) != NOTE_INSN_EPILOGUE_BEG
))
32665 insn
= PREV_INSN (insn
);
32670 /* Two NOPs count as one instruction. */
32671 insn_count
= 2 * (4 - insn_count
);
32672 emit_insn_before (gen_nops (GEN_INT (insn_count
)), insn
);
32678 /* Implement machine specific optimizations. We implement padding of returns
32679 for K8 CPUs and pass to avoid 4 jumps in the single 16 byte window. */
32683 /* We are freeing block_for_insn in the toplev to keep compatibility
32684 with old MDEP_REORGS that are not CFG based. Recompute it now. */
32685 compute_bb_for_insn ();
32687 /* Run the vzeroupper optimization if needed. */
32688 if (TARGET_VZEROUPPER
)
32689 move_or_delete_vzeroupper ();
32691 if (optimize
&& optimize_function_for_speed_p (cfun
))
32693 if (TARGET_PAD_SHORT_FUNCTION
)
32694 ix86_pad_short_function ();
32695 else if (TARGET_PAD_RETURNS
)
32696 ix86_pad_returns ();
32697 #ifdef ASM_OUTPUT_MAX_SKIP_PAD
32698 if (TARGET_FOUR_JUMP_LIMIT
)
32699 ix86_avoid_jump_mispredicts ();
32704 /* Return nonzero when QImode register that must be represented via REX prefix
32707 x86_extended_QIreg_mentioned_p (rtx insn
)
32710 extract_insn_cached (insn
);
32711 for (i
= 0; i
< recog_data
.n_operands
; i
++)
32712 if (REG_P (recog_data
.operand
[i
])
32713 && REGNO (recog_data
.operand
[i
]) > BX_REG
)
32718 /* Return nonzero when P points to register encoded via REX prefix.
32719 Called via for_each_rtx. */
32721 extended_reg_mentioned_1 (rtx
*p
, void *data ATTRIBUTE_UNUSED
)
32723 unsigned int regno
;
32726 regno
= REGNO (*p
);
32727 return REX_INT_REGNO_P (regno
) || REX_SSE_REGNO_P (regno
);
32730 /* Return true when INSN mentions register that must be encoded using REX
32733 x86_extended_reg_mentioned_p (rtx insn
)
32735 return for_each_rtx (INSN_P (insn
) ? &PATTERN (insn
) : &insn
,
32736 extended_reg_mentioned_1
, NULL
);
32739 /* If profitable, negate (without causing overflow) integer constant
32740 of mode MODE at location LOC. Return true in this case. */
32742 x86_maybe_negate_const_int (rtx
*loc
, enum machine_mode mode
)
32746 if (!CONST_INT_P (*loc
))
32752 /* DImode x86_64 constants must fit in 32 bits. */
32753 gcc_assert (x86_64_immediate_operand (*loc
, mode
));
32764 gcc_unreachable ();
32767 /* Avoid overflows. */
32768 if (mode_signbit_p (mode
, *loc
))
32771 val
= INTVAL (*loc
);
32773 /* Make things pretty and `subl $4,%eax' rather than `addl $-4,%eax'.
32774 Exceptions: -128 encodes smaller than 128, so swap sign and op. */
32775 if ((val
< 0 && val
!= -128)
32778 *loc
= GEN_INT (-val
);
32785 /* Generate an unsigned DImode/SImode to FP conversion. This is the same code
32786 optabs would emit if we didn't have TFmode patterns. */
32789 x86_emit_floatuns (rtx operands
[2])
32791 rtx neglab
, donelab
, i0
, i1
, f0
, in
, out
;
32792 enum machine_mode mode
, inmode
;
32794 inmode
= GET_MODE (operands
[1]);
32795 gcc_assert (inmode
== SImode
|| inmode
== DImode
);
32798 in
= force_reg (inmode
, operands
[1]);
32799 mode
= GET_MODE (out
);
32800 neglab
= gen_label_rtx ();
32801 donelab
= gen_label_rtx ();
32802 f0
= gen_reg_rtx (mode
);
32804 emit_cmp_and_jump_insns (in
, const0_rtx
, LT
, const0_rtx
, inmode
, 0, neglab
);
32806 expand_float (out
, in
, 0);
32808 emit_jump_insn (gen_jump (donelab
));
32811 emit_label (neglab
);
32813 i0
= expand_simple_binop (inmode
, LSHIFTRT
, in
, const1_rtx
, NULL
,
32815 i1
= expand_simple_binop (inmode
, AND
, in
, const1_rtx
, NULL
,
32817 i0
= expand_simple_binop (inmode
, IOR
, i0
, i1
, i0
, 1, OPTAB_DIRECT
);
32819 expand_float (f0
, i0
, 0);
32821 emit_insn (gen_rtx_SET (VOIDmode
, out
, gen_rtx_PLUS (mode
, f0
, f0
)));
32823 emit_label (donelab
);
32826 /* AVX2 does support 32-byte integer vector operations,
32827 thus the longest vector we are faced with is V32QImode. */
32828 #define MAX_VECT_LEN 32
32830 struct expand_vec_perm_d
32832 rtx target
, op0
, op1
;
32833 unsigned char perm
[MAX_VECT_LEN
];
32834 enum machine_mode vmode
;
32835 unsigned char nelt
;
32839 static bool expand_vec_perm_1 (struct expand_vec_perm_d
*d
);
32840 static bool expand_vec_perm_broadcast_1 (struct expand_vec_perm_d
*d
);
32842 /* Get a vector mode of the same size as the original but with elements
32843 twice as wide. This is only guaranteed to apply to integral vectors. */
32845 static inline enum machine_mode
32846 get_mode_wider_vector (enum machine_mode o
)
32848 /* ??? Rely on the ordering that genmodes.c gives to vectors. */
32849 enum machine_mode n
= GET_MODE_WIDER_MODE (o
);
32850 gcc_assert (GET_MODE_NUNITS (o
) == GET_MODE_NUNITS (n
) * 2);
32851 gcc_assert (GET_MODE_SIZE (o
) == GET_MODE_SIZE (n
));
32855 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
32856 with all elements equal to VAR. Return true if successful. */
32859 ix86_expand_vector_init_duplicate (bool mmx_ok
, enum machine_mode mode
,
32860 rtx target
, rtx val
)
32883 /* First attempt to recognize VAL as-is. */
32884 dup
= gen_rtx_VEC_DUPLICATE (mode
, val
);
32885 insn
= emit_insn (gen_rtx_SET (VOIDmode
, target
, dup
));
32886 if (recog_memoized (insn
) < 0)
32889 /* If that fails, force VAL into a register. */
32892 XEXP (dup
, 0) = force_reg (GET_MODE_INNER (mode
), val
);
32893 seq
= get_insns ();
32896 emit_insn_before (seq
, insn
);
32898 ok
= recog_memoized (insn
) >= 0;
32907 if (TARGET_SSE
|| TARGET_3DNOW_A
)
32911 val
= gen_lowpart (SImode
, val
);
32912 x
= gen_rtx_TRUNCATE (HImode
, val
);
32913 x
= gen_rtx_VEC_DUPLICATE (mode
, x
);
32914 emit_insn (gen_rtx_SET (VOIDmode
, target
, x
));
32927 struct expand_vec_perm_d dperm
;
32931 memset (&dperm
, 0, sizeof (dperm
));
32932 dperm
.target
= target
;
32933 dperm
.vmode
= mode
;
32934 dperm
.nelt
= GET_MODE_NUNITS (mode
);
32935 dperm
.op0
= dperm
.op1
= gen_reg_rtx (mode
);
32937 /* Extend to SImode using a paradoxical SUBREG. */
32938 tmp1
= gen_reg_rtx (SImode
);
32939 emit_move_insn (tmp1
, gen_lowpart (SImode
, val
));
32941 /* Insert the SImode value as low element of a V4SImode vector. */
32942 tmp2
= gen_lowpart (V4SImode
, dperm
.op0
);
32943 emit_insn (gen_vec_setv4si_0 (tmp2
, CONST0_RTX (V4SImode
), tmp1
));
32945 ok
= (expand_vec_perm_1 (&dperm
)
32946 || expand_vec_perm_broadcast_1 (&dperm
));
32958 /* Replicate the value once into the next wider mode and recurse. */
32960 enum machine_mode smode
, wsmode
, wvmode
;
32963 smode
= GET_MODE_INNER (mode
);
32964 wvmode
= get_mode_wider_vector (mode
);
32965 wsmode
= GET_MODE_INNER (wvmode
);
32967 val
= convert_modes (wsmode
, smode
, val
, true);
32968 x
= expand_simple_binop (wsmode
, ASHIFT
, val
,
32969 GEN_INT (GET_MODE_BITSIZE (smode
)),
32970 NULL_RTX
, 1, OPTAB_LIB_WIDEN
);
32971 val
= expand_simple_binop (wsmode
, IOR
, val
, x
, x
, 1, OPTAB_LIB_WIDEN
);
32973 x
= gen_lowpart (wvmode
, target
);
32974 ok
= ix86_expand_vector_init_duplicate (mmx_ok
, wvmode
, x
, val
);
32982 enum machine_mode hvmode
= (mode
== V16HImode
? V8HImode
: V16QImode
);
32983 rtx x
= gen_reg_rtx (hvmode
);
32985 ok
= ix86_expand_vector_init_duplicate (false, hvmode
, x
, val
);
32988 x
= gen_rtx_VEC_CONCAT (mode
, x
, x
);
32989 emit_insn (gen_rtx_SET (VOIDmode
, target
, x
));
32998 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
32999 whose ONE_VAR element is VAR, and other elements are zero. Return true
33003 ix86_expand_vector_init_one_nonzero (bool mmx_ok
, enum machine_mode mode
,
33004 rtx target
, rtx var
, int one_var
)
33006 enum machine_mode vsimode
;
33009 bool use_vector_set
= false;
33014 /* For SSE4.1, we normally use vector set. But if the second
33015 element is zero and inter-unit moves are OK, we use movq
33017 use_vector_set
= (TARGET_64BIT
33019 && !(TARGET_INTER_UNIT_MOVES
33025 use_vector_set
= TARGET_SSE4_1
;
33028 use_vector_set
= TARGET_SSE2
;
33031 use_vector_set
= TARGET_SSE
|| TARGET_3DNOW_A
;
33038 use_vector_set
= TARGET_AVX
;
33041 /* Use ix86_expand_vector_set in 64bit mode only. */
33042 use_vector_set
= TARGET_AVX
&& TARGET_64BIT
;
33048 if (use_vector_set
)
33050 emit_insn (gen_rtx_SET (VOIDmode
, target
, CONST0_RTX (mode
)));
33051 var
= force_reg (GET_MODE_INNER (mode
), var
);
33052 ix86_expand_vector_set (mmx_ok
, target
, var
, one_var
);
33068 var
= force_reg (GET_MODE_INNER (mode
), var
);
33069 x
= gen_rtx_VEC_CONCAT (mode
, var
, CONST0_RTX (GET_MODE_INNER (mode
)));
33070 emit_insn (gen_rtx_SET (VOIDmode
, target
, x
));
33075 if (!REG_P (target
) || REGNO (target
) < FIRST_PSEUDO_REGISTER
)
33076 new_target
= gen_reg_rtx (mode
);
33078 new_target
= target
;
33079 var
= force_reg (GET_MODE_INNER (mode
), var
);
33080 x
= gen_rtx_VEC_DUPLICATE (mode
, var
);
33081 x
= gen_rtx_VEC_MERGE (mode
, x
, CONST0_RTX (mode
), const1_rtx
);
33082 emit_insn (gen_rtx_SET (VOIDmode
, new_target
, x
));
33085 /* We need to shuffle the value to the correct position, so
33086 create a new pseudo to store the intermediate result. */
33088 /* With SSE2, we can use the integer shuffle insns. */
33089 if (mode
!= V4SFmode
&& TARGET_SSE2
)
33091 emit_insn (gen_sse2_pshufd_1 (new_target
, new_target
,
33093 GEN_INT (one_var
== 1 ? 0 : 1),
33094 GEN_INT (one_var
== 2 ? 0 : 1),
33095 GEN_INT (one_var
== 3 ? 0 : 1)));
33096 if (target
!= new_target
)
33097 emit_move_insn (target
, new_target
);
33101 /* Otherwise convert the intermediate result to V4SFmode and
33102 use the SSE1 shuffle instructions. */
33103 if (mode
!= V4SFmode
)
33105 tmp
= gen_reg_rtx (V4SFmode
);
33106 emit_move_insn (tmp
, gen_lowpart (V4SFmode
, new_target
));
33111 emit_insn (gen_sse_shufps_v4sf (tmp
, tmp
, tmp
,
33113 GEN_INT (one_var
== 1 ? 0 : 1),
33114 GEN_INT (one_var
== 2 ? 0+4 : 1+4),
33115 GEN_INT (one_var
== 3 ? 0+4 : 1+4)));
33117 if (mode
!= V4SFmode
)
33118 emit_move_insn (target
, gen_lowpart (V4SImode
, tmp
));
33119 else if (tmp
!= target
)
33120 emit_move_insn (target
, tmp
);
33122 else if (target
!= new_target
)
33123 emit_move_insn (target
, new_target
);
33128 vsimode
= V4SImode
;
33134 vsimode
= V2SImode
;
33140 /* Zero extend the variable element to SImode and recurse. */
33141 var
= convert_modes (SImode
, GET_MODE_INNER (mode
), var
, true);
33143 x
= gen_reg_rtx (vsimode
);
33144 if (!ix86_expand_vector_init_one_nonzero (mmx_ok
, vsimode
, x
,
33146 gcc_unreachable ();
33148 emit_move_insn (target
, gen_lowpart (mode
, x
));
33156 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
33157 consisting of the values in VALS. It is known that all elements
33158 except ONE_VAR are constants. Return true if successful. */
33161 ix86_expand_vector_init_one_var (bool mmx_ok
, enum machine_mode mode
,
33162 rtx target
, rtx vals
, int one_var
)
33164 rtx var
= XVECEXP (vals
, 0, one_var
);
33165 enum machine_mode wmode
;
33168 const_vec
= copy_rtx (vals
);
33169 XVECEXP (const_vec
, 0, one_var
) = CONST0_RTX (GET_MODE_INNER (mode
));
33170 const_vec
= gen_rtx_CONST_VECTOR (mode
, XVEC (const_vec
, 0));
33178 /* For the two element vectors, it's just as easy to use
33179 the general case. */
33183 /* Use ix86_expand_vector_set in 64bit mode only. */
33206 /* There's no way to set one QImode entry easily. Combine
33207 the variable value with its adjacent constant value, and
33208 promote to an HImode set. */
33209 x
= XVECEXP (vals
, 0, one_var
^ 1);
33212 var
= convert_modes (HImode
, QImode
, var
, true);
33213 var
= expand_simple_binop (HImode
, ASHIFT
, var
, GEN_INT (8),
33214 NULL_RTX
, 1, OPTAB_LIB_WIDEN
);
33215 x
= GEN_INT (INTVAL (x
) & 0xff);
33219 var
= convert_modes (HImode
, QImode
, var
, true);
33220 x
= gen_int_mode (INTVAL (x
) << 8, HImode
);
33222 if (x
!= const0_rtx
)
33223 var
= expand_simple_binop (HImode
, IOR
, var
, x
, var
,
33224 1, OPTAB_LIB_WIDEN
);
33226 x
= gen_reg_rtx (wmode
);
33227 emit_move_insn (x
, gen_lowpart (wmode
, const_vec
));
33228 ix86_expand_vector_set (mmx_ok
, x
, var
, one_var
>> 1);
33230 emit_move_insn (target
, gen_lowpart (mode
, x
));
33237 emit_move_insn (target
, const_vec
);
33238 ix86_expand_vector_set (mmx_ok
, target
, var
, one_var
);
33242 /* A subroutine of ix86_expand_vector_init_general. Use vector
33243 concatenate to handle the most general case: all values variable,
33244 and none identical. */
33247 ix86_expand_vector_init_concat (enum machine_mode mode
,
33248 rtx target
, rtx
*ops
, int n
)
33250 enum machine_mode cmode
, hmode
= VOIDmode
;
33251 rtx first
[8], second
[4];
33291 gcc_unreachable ();
33294 if (!register_operand (ops
[1], cmode
))
33295 ops
[1] = force_reg (cmode
, ops
[1]);
33296 if (!register_operand (ops
[0], cmode
))
33297 ops
[0] = force_reg (cmode
, ops
[0]);
33298 emit_insn (gen_rtx_SET (VOIDmode
, target
,
33299 gen_rtx_VEC_CONCAT (mode
, ops
[0],
33319 gcc_unreachable ();
33335 gcc_unreachable ();
33340 /* FIXME: We process inputs backward to help RA. PR 36222. */
33343 for (; i
> 0; i
-= 2, j
--)
33345 first
[j
] = gen_reg_rtx (cmode
);
33346 v
= gen_rtvec (2, ops
[i
- 1], ops
[i
]);
33347 ix86_expand_vector_init (false, first
[j
],
33348 gen_rtx_PARALLEL (cmode
, v
));
33354 gcc_assert (hmode
!= VOIDmode
);
33355 for (i
= j
= 0; i
< n
; i
+= 2, j
++)
33357 second
[j
] = gen_reg_rtx (hmode
);
33358 ix86_expand_vector_init_concat (hmode
, second
[j
],
33362 ix86_expand_vector_init_concat (mode
, target
, second
, n
);
33365 ix86_expand_vector_init_concat (mode
, target
, first
, n
);
33369 gcc_unreachable ();
33373 /* A subroutine of ix86_expand_vector_init_general. Use vector
33374 interleave to handle the most general case: all values variable,
33375 and none identical. */
33378 ix86_expand_vector_init_interleave (enum machine_mode mode
,
33379 rtx target
, rtx
*ops
, int n
)
33381 enum machine_mode first_imode
, second_imode
, third_imode
, inner_mode
;
33384 rtx (*gen_load_even
) (rtx
, rtx
, rtx
);
33385 rtx (*gen_interleave_first_low
) (rtx
, rtx
, rtx
);
33386 rtx (*gen_interleave_second_low
) (rtx
, rtx
, rtx
);
33391 gen_load_even
= gen_vec_setv8hi
;
33392 gen_interleave_first_low
= gen_vec_interleave_lowv4si
;
33393 gen_interleave_second_low
= gen_vec_interleave_lowv2di
;
33394 inner_mode
= HImode
;
33395 first_imode
= V4SImode
;
33396 second_imode
= V2DImode
;
33397 third_imode
= VOIDmode
;
33400 gen_load_even
= gen_vec_setv16qi
;
33401 gen_interleave_first_low
= gen_vec_interleave_lowv8hi
;
33402 gen_interleave_second_low
= gen_vec_interleave_lowv4si
;
33403 inner_mode
= QImode
;
33404 first_imode
= V8HImode
;
33405 second_imode
= V4SImode
;
33406 third_imode
= V2DImode
;
33409 gcc_unreachable ();
33412 for (i
= 0; i
< n
; i
++)
33414 /* Extend the odd elment to SImode using a paradoxical SUBREG. */
33415 op0
= gen_reg_rtx (SImode
);
33416 emit_move_insn (op0
, gen_lowpart (SImode
, ops
[i
+ i
]));
33418 /* Insert the SImode value as low element of V4SImode vector. */
33419 op1
= gen_reg_rtx (V4SImode
);
33420 op0
= gen_rtx_VEC_MERGE (V4SImode
,
33421 gen_rtx_VEC_DUPLICATE (V4SImode
,
33423 CONST0_RTX (V4SImode
),
33425 emit_insn (gen_rtx_SET (VOIDmode
, op1
, op0
));
33427 /* Cast the V4SImode vector back to a vector in orignal mode. */
33428 op0
= gen_reg_rtx (mode
);
33429 emit_move_insn (op0
, gen_lowpart (mode
, op1
));
33431 /* Load even elements into the second positon. */
33432 emit_insn (gen_load_even (op0
,
33433 force_reg (inner_mode
,
33437 /* Cast vector to FIRST_IMODE vector. */
33438 ops
[i
] = gen_reg_rtx (first_imode
);
33439 emit_move_insn (ops
[i
], gen_lowpart (first_imode
, op0
));
33442 /* Interleave low FIRST_IMODE vectors. */
33443 for (i
= j
= 0; i
< n
; i
+= 2, j
++)
33445 op0
= gen_reg_rtx (first_imode
);
33446 emit_insn (gen_interleave_first_low (op0
, ops
[i
], ops
[i
+ 1]));
33448 /* Cast FIRST_IMODE vector to SECOND_IMODE vector. */
33449 ops
[j
] = gen_reg_rtx (second_imode
);
33450 emit_move_insn (ops
[j
], gen_lowpart (second_imode
, op0
));
33453 /* Interleave low SECOND_IMODE vectors. */
33454 switch (second_imode
)
33457 for (i
= j
= 0; i
< n
/ 2; i
+= 2, j
++)
33459 op0
= gen_reg_rtx (second_imode
);
33460 emit_insn (gen_interleave_second_low (op0
, ops
[i
],
33463 /* Cast the SECOND_IMODE vector to the THIRD_IMODE
33465 ops
[j
] = gen_reg_rtx (third_imode
);
33466 emit_move_insn (ops
[j
], gen_lowpart (third_imode
, op0
));
33468 second_imode
= V2DImode
;
33469 gen_interleave_second_low
= gen_vec_interleave_lowv2di
;
33473 op0
= gen_reg_rtx (second_imode
);
33474 emit_insn (gen_interleave_second_low (op0
, ops
[0],
33477 /* Cast the SECOND_IMODE vector back to a vector on original
33479 emit_insn (gen_rtx_SET (VOIDmode
, target
,
33480 gen_lowpart (mode
, op0
)));
33484 gcc_unreachable ();
33488 /* A subroutine of ix86_expand_vector_init. Handle the most general case:
33489 all values variable, and none identical. */
33492 ix86_expand_vector_init_general (bool mmx_ok
, enum machine_mode mode
,
33493 rtx target
, rtx vals
)
33495 rtx ops
[32], op0
, op1
;
33496 enum machine_mode half_mode
= VOIDmode
;
33503 if (!mmx_ok
&& !TARGET_SSE
)
33515 n
= GET_MODE_NUNITS (mode
);
33516 for (i
= 0; i
< n
; i
++)
33517 ops
[i
] = XVECEXP (vals
, 0, i
);
33518 ix86_expand_vector_init_concat (mode
, target
, ops
, n
);
33522 half_mode
= V16QImode
;
33526 half_mode
= V8HImode
;
33530 n
= GET_MODE_NUNITS (mode
);
33531 for (i
= 0; i
< n
; i
++)
33532 ops
[i
] = XVECEXP (vals
, 0, i
);
33533 op0
= gen_reg_rtx (half_mode
);
33534 op1
= gen_reg_rtx (half_mode
);
33535 ix86_expand_vector_init_interleave (half_mode
, op0
, ops
,
33537 ix86_expand_vector_init_interleave (half_mode
, op1
,
33538 &ops
[n
>> 1], n
>> 2);
33539 emit_insn (gen_rtx_SET (VOIDmode
, target
,
33540 gen_rtx_VEC_CONCAT (mode
, op0
, op1
)));
33544 if (!TARGET_SSE4_1
)
33552 /* Don't use ix86_expand_vector_init_interleave if we can't
33553 move from GPR to SSE register directly. */
33554 if (!TARGET_INTER_UNIT_MOVES
)
33557 n
= GET_MODE_NUNITS (mode
);
33558 for (i
= 0; i
< n
; i
++)
33559 ops
[i
] = XVECEXP (vals
, 0, i
);
33560 ix86_expand_vector_init_interleave (mode
, target
, ops
, n
>> 1);
33568 gcc_unreachable ();
33572 int i
, j
, n_elts
, n_words
, n_elt_per_word
;
33573 enum machine_mode inner_mode
;
33574 rtx words
[4], shift
;
33576 inner_mode
= GET_MODE_INNER (mode
);
33577 n_elts
= GET_MODE_NUNITS (mode
);
33578 n_words
= GET_MODE_SIZE (mode
) / UNITS_PER_WORD
;
33579 n_elt_per_word
= n_elts
/ n_words
;
33580 shift
= GEN_INT (GET_MODE_BITSIZE (inner_mode
));
33582 for (i
= 0; i
< n_words
; ++i
)
33584 rtx word
= NULL_RTX
;
33586 for (j
= 0; j
< n_elt_per_word
; ++j
)
33588 rtx elt
= XVECEXP (vals
, 0, (i
+1)*n_elt_per_word
- j
- 1);
33589 elt
= convert_modes (word_mode
, inner_mode
, elt
, true);
33595 word
= expand_simple_binop (word_mode
, ASHIFT
, word
, shift
,
33596 word
, 1, OPTAB_LIB_WIDEN
);
33597 word
= expand_simple_binop (word_mode
, IOR
, word
, elt
,
33598 word
, 1, OPTAB_LIB_WIDEN
);
33606 emit_move_insn (target
, gen_lowpart (mode
, words
[0]));
33607 else if (n_words
== 2)
33609 rtx tmp
= gen_reg_rtx (mode
);
33610 emit_clobber (tmp
);
33611 emit_move_insn (gen_lowpart (word_mode
, tmp
), words
[0]);
33612 emit_move_insn (gen_highpart (word_mode
, tmp
), words
[1]);
33613 emit_move_insn (target
, tmp
);
33615 else if (n_words
== 4)
33617 rtx tmp
= gen_reg_rtx (V4SImode
);
33618 gcc_assert (word_mode
== SImode
);
33619 vals
= gen_rtx_PARALLEL (V4SImode
, gen_rtvec_v (4, words
));
33620 ix86_expand_vector_init_general (false, V4SImode
, tmp
, vals
);
33621 emit_move_insn (target
, gen_lowpart (mode
, tmp
));
33624 gcc_unreachable ();
33628 /* Initialize vector TARGET via VALS. Suppress the use of MMX
33629 instructions unless MMX_OK is true. */
33632 ix86_expand_vector_init (bool mmx_ok
, rtx target
, rtx vals
)
33634 enum machine_mode mode
= GET_MODE (target
);
33635 enum machine_mode inner_mode
= GET_MODE_INNER (mode
);
33636 int n_elts
= GET_MODE_NUNITS (mode
);
33637 int n_var
= 0, one_var
= -1;
33638 bool all_same
= true, all_const_zero
= true;
33642 for (i
= 0; i
< n_elts
; ++i
)
33644 x
= XVECEXP (vals
, 0, i
);
33645 if (!(CONST_INT_P (x
)
33646 || GET_CODE (x
) == CONST_DOUBLE
33647 || GET_CODE (x
) == CONST_FIXED
))
33648 n_var
++, one_var
= i
;
33649 else if (x
!= CONST0_RTX (inner_mode
))
33650 all_const_zero
= false;
33651 if (i
> 0 && !rtx_equal_p (x
, XVECEXP (vals
, 0, 0)))
33655 /* Constants are best loaded from the constant pool. */
33658 emit_move_insn (target
, gen_rtx_CONST_VECTOR (mode
, XVEC (vals
, 0)));
33662 /* If all values are identical, broadcast the value. */
33664 && ix86_expand_vector_init_duplicate (mmx_ok
, mode
, target
,
33665 XVECEXP (vals
, 0, 0)))
33668 /* Values where only one field is non-constant are best loaded from
33669 the pool and overwritten via move later. */
33673 && ix86_expand_vector_init_one_nonzero (mmx_ok
, mode
, target
,
33674 XVECEXP (vals
, 0, one_var
),
33678 if (ix86_expand_vector_init_one_var (mmx_ok
, mode
, target
, vals
, one_var
))
33682 ix86_expand_vector_init_general (mmx_ok
, mode
, target
, vals
);
33686 ix86_expand_vector_set (bool mmx_ok
, rtx target
, rtx val
, int elt
)
33688 enum machine_mode mode
= GET_MODE (target
);
33689 enum machine_mode inner_mode
= GET_MODE_INNER (mode
);
33690 enum machine_mode half_mode
;
33691 bool use_vec_merge
= false;
33693 static rtx (*gen_extract
[6][2]) (rtx
, rtx
)
33695 { gen_vec_extract_lo_v32qi
, gen_vec_extract_hi_v32qi
},
33696 { gen_vec_extract_lo_v16hi
, gen_vec_extract_hi_v16hi
},
33697 { gen_vec_extract_lo_v8si
, gen_vec_extract_hi_v8si
},
33698 { gen_vec_extract_lo_v4di
, gen_vec_extract_hi_v4di
},
33699 { gen_vec_extract_lo_v8sf
, gen_vec_extract_hi_v8sf
},
33700 { gen_vec_extract_lo_v4df
, gen_vec_extract_hi_v4df
}
33702 static rtx (*gen_insert
[6][2]) (rtx
, rtx
, rtx
)
33704 { gen_vec_set_lo_v32qi
, gen_vec_set_hi_v32qi
},
33705 { gen_vec_set_lo_v16hi
, gen_vec_set_hi_v16hi
},
33706 { gen_vec_set_lo_v8si
, gen_vec_set_hi_v8si
},
33707 { gen_vec_set_lo_v4di
, gen_vec_set_hi_v4di
},
33708 { gen_vec_set_lo_v8sf
, gen_vec_set_hi_v8sf
},
33709 { gen_vec_set_lo_v4df
, gen_vec_set_hi_v4df
}
33719 tmp
= gen_reg_rtx (GET_MODE_INNER (mode
));
33720 ix86_expand_vector_extract (true, tmp
, target
, 1 - elt
);
33722 tmp
= gen_rtx_VEC_CONCAT (mode
, val
, tmp
);
33724 tmp
= gen_rtx_VEC_CONCAT (mode
, tmp
, val
);
33725 emit_insn (gen_rtx_SET (VOIDmode
, target
, tmp
));
33731 use_vec_merge
= TARGET_SSE4_1
&& TARGET_64BIT
;
33735 tmp
= gen_reg_rtx (GET_MODE_INNER (mode
));
33736 ix86_expand_vector_extract (false, tmp
, target
, 1 - elt
);
33738 tmp
= gen_rtx_VEC_CONCAT (mode
, val
, tmp
);
33740 tmp
= gen_rtx_VEC_CONCAT (mode
, tmp
, val
);
33741 emit_insn (gen_rtx_SET (VOIDmode
, target
, tmp
));
33748 /* For the two element vectors, we implement a VEC_CONCAT with
33749 the extraction of the other element. */
33751 tmp
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (1, GEN_INT (1 - elt
)));
33752 tmp
= gen_rtx_VEC_SELECT (inner_mode
, target
, tmp
);
33755 op0
= val
, op1
= tmp
;
33757 op0
= tmp
, op1
= val
;
33759 tmp
= gen_rtx_VEC_CONCAT (mode
, op0
, op1
);
33760 emit_insn (gen_rtx_SET (VOIDmode
, target
, tmp
));
33765 use_vec_merge
= TARGET_SSE4_1
;
33772 use_vec_merge
= true;
33776 /* tmp = target = A B C D */
33777 tmp
= copy_to_reg (target
);
33778 /* target = A A B B */
33779 emit_insn (gen_vec_interleave_lowv4sf (target
, target
, target
));
33780 /* target = X A B B */
33781 ix86_expand_vector_set (false, target
, val
, 0);
33782 /* target = A X C D */
33783 emit_insn (gen_sse_shufps_v4sf (target
, target
, tmp
,
33784 const1_rtx
, const0_rtx
,
33785 GEN_INT (2+4), GEN_INT (3+4)));
33789 /* tmp = target = A B C D */
33790 tmp
= copy_to_reg (target
);
33791 /* tmp = X B C D */
33792 ix86_expand_vector_set (false, tmp
, val
, 0);
33793 /* target = A B X D */
33794 emit_insn (gen_sse_shufps_v4sf (target
, target
, tmp
,
33795 const0_rtx
, const1_rtx
,
33796 GEN_INT (0+4), GEN_INT (3+4)));
33800 /* tmp = target = A B C D */
33801 tmp
= copy_to_reg (target
);
33802 /* tmp = X B C D */
33803 ix86_expand_vector_set (false, tmp
, val
, 0);
33804 /* target = A B X D */
33805 emit_insn (gen_sse_shufps_v4sf (target
, target
, tmp
,
33806 const0_rtx
, const1_rtx
,
33807 GEN_INT (2+4), GEN_INT (0+4)));
33811 gcc_unreachable ();
33816 use_vec_merge
= TARGET_SSE4_1
;
33820 /* Element 0 handled by vec_merge below. */
33823 use_vec_merge
= true;
33829 /* With SSE2, use integer shuffles to swap element 0 and ELT,
33830 store into element 0, then shuffle them back. */
33834 order
[0] = GEN_INT (elt
);
33835 order
[1] = const1_rtx
;
33836 order
[2] = const2_rtx
;
33837 order
[3] = GEN_INT (3);
33838 order
[elt
] = const0_rtx
;
33840 emit_insn (gen_sse2_pshufd_1 (target
, target
, order
[0],
33841 order
[1], order
[2], order
[3]));
33843 ix86_expand_vector_set (false, target
, val
, 0);
33845 emit_insn (gen_sse2_pshufd_1 (target
, target
, order
[0],
33846 order
[1], order
[2], order
[3]));
33850 /* For SSE1, we have to reuse the V4SF code. */
33851 ix86_expand_vector_set (false, gen_lowpart (V4SFmode
, target
),
33852 gen_lowpart (SFmode
, val
), elt
);
33857 use_vec_merge
= TARGET_SSE2
;
33860 use_vec_merge
= mmx_ok
&& (TARGET_SSE
|| TARGET_3DNOW_A
);
33864 use_vec_merge
= TARGET_SSE4_1
;
33871 half_mode
= V16QImode
;
33877 half_mode
= V8HImode
;
33883 half_mode
= V4SImode
;
33889 half_mode
= V2DImode
;
33895 half_mode
= V4SFmode
;
33901 half_mode
= V2DFmode
;
33907 /* Compute offset. */
33911 gcc_assert (i
<= 1);
33913 /* Extract the half. */
33914 tmp
= gen_reg_rtx (half_mode
);
33915 emit_insn (gen_extract
[j
][i
] (tmp
, target
));
33917 /* Put val in tmp at elt. */
33918 ix86_expand_vector_set (false, tmp
, val
, elt
);
33921 emit_insn (gen_insert
[j
][i
] (target
, target
, tmp
));
33930 tmp
= gen_rtx_VEC_DUPLICATE (mode
, val
);
33931 tmp
= gen_rtx_VEC_MERGE (mode
, tmp
, target
, GEN_INT (1 << elt
));
33932 emit_insn (gen_rtx_SET (VOIDmode
, target
, tmp
));
33936 rtx mem
= assign_stack_temp (mode
, GET_MODE_SIZE (mode
), false);
33938 emit_move_insn (mem
, target
);
33940 tmp
= adjust_address (mem
, inner_mode
, elt
*GET_MODE_SIZE (inner_mode
));
33941 emit_move_insn (tmp
, val
);
33943 emit_move_insn (target
, mem
);
33948 ix86_expand_vector_extract (bool mmx_ok
, rtx target
, rtx vec
, int elt
)
33950 enum machine_mode mode
= GET_MODE (vec
);
33951 enum machine_mode inner_mode
= GET_MODE_INNER (mode
);
33952 bool use_vec_extr
= false;
33965 use_vec_extr
= true;
33969 use_vec_extr
= TARGET_SSE4_1
;
33981 tmp
= gen_reg_rtx (mode
);
33982 emit_insn (gen_sse_shufps_v4sf (tmp
, vec
, vec
,
33983 GEN_INT (elt
), GEN_INT (elt
),
33984 GEN_INT (elt
+4), GEN_INT (elt
+4)));
33988 tmp
= gen_reg_rtx (mode
);
33989 emit_insn (gen_vec_interleave_highv4sf (tmp
, vec
, vec
));
33993 gcc_unreachable ();
33996 use_vec_extr
= true;
34001 use_vec_extr
= TARGET_SSE4_1
;
34015 tmp
= gen_reg_rtx (mode
);
34016 emit_insn (gen_sse2_pshufd_1 (tmp
, vec
,
34017 GEN_INT (elt
), GEN_INT (elt
),
34018 GEN_INT (elt
), GEN_INT (elt
)));
34022 tmp
= gen_reg_rtx (mode
);
34023 emit_insn (gen_vec_interleave_highv4si (tmp
, vec
, vec
));
34027 gcc_unreachable ();
34030 use_vec_extr
= true;
34035 /* For SSE1, we have to reuse the V4SF code. */
34036 ix86_expand_vector_extract (false, gen_lowpart (SFmode
, target
),
34037 gen_lowpart (V4SFmode
, vec
), elt
);
34043 use_vec_extr
= TARGET_SSE2
;
34046 use_vec_extr
= mmx_ok
&& (TARGET_SSE
|| TARGET_3DNOW_A
);
34050 use_vec_extr
= TARGET_SSE4_1
;
34056 tmp
= gen_reg_rtx (V4SFmode
);
34058 emit_insn (gen_vec_extract_lo_v8sf (tmp
, vec
));
34060 emit_insn (gen_vec_extract_hi_v8sf (tmp
, vec
));
34061 ix86_expand_vector_extract (false, target
, tmp
, elt
& 3);
34069 tmp
= gen_reg_rtx (V2DFmode
);
34071 emit_insn (gen_vec_extract_lo_v4df (tmp
, vec
));
34073 emit_insn (gen_vec_extract_hi_v4df (tmp
, vec
));
34074 ix86_expand_vector_extract (false, target
, tmp
, elt
& 1);
34082 tmp
= gen_reg_rtx (V16QImode
);
34084 emit_insn (gen_vec_extract_lo_v32qi (tmp
, vec
));
34086 emit_insn (gen_vec_extract_hi_v32qi (tmp
, vec
));
34087 ix86_expand_vector_extract (false, target
, tmp
, elt
& 15);
34095 tmp
= gen_reg_rtx (V8HImode
);
34097 emit_insn (gen_vec_extract_lo_v16hi (tmp
, vec
));
34099 emit_insn (gen_vec_extract_hi_v16hi (tmp
, vec
));
34100 ix86_expand_vector_extract (false, target
, tmp
, elt
& 7);
34108 tmp
= gen_reg_rtx (V4SImode
);
34110 emit_insn (gen_vec_extract_lo_v8si (tmp
, vec
));
34112 emit_insn (gen_vec_extract_hi_v8si (tmp
, vec
));
34113 ix86_expand_vector_extract (false, target
, tmp
, elt
& 3);
34121 tmp
= gen_reg_rtx (V2DImode
);
34123 emit_insn (gen_vec_extract_lo_v4di (tmp
, vec
));
34125 emit_insn (gen_vec_extract_hi_v4di (tmp
, vec
));
34126 ix86_expand_vector_extract (false, target
, tmp
, elt
& 1);
34132 /* ??? Could extract the appropriate HImode element and shift. */
34139 tmp
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (1, GEN_INT (elt
)));
34140 tmp
= gen_rtx_VEC_SELECT (inner_mode
, vec
, tmp
);
34142 /* Let the rtl optimizers know about the zero extension performed. */
34143 if (inner_mode
== QImode
|| inner_mode
== HImode
)
34145 tmp
= gen_rtx_ZERO_EXTEND (SImode
, tmp
);
34146 target
= gen_lowpart (SImode
, target
);
34149 emit_insn (gen_rtx_SET (VOIDmode
, target
, tmp
));
34153 rtx mem
= assign_stack_temp (mode
, GET_MODE_SIZE (mode
), false);
34155 emit_move_insn (mem
, vec
);
34157 tmp
= adjust_address (mem
, inner_mode
, elt
*GET_MODE_SIZE (inner_mode
));
34158 emit_move_insn (target
, tmp
);
34162 /* Generate code to copy vector bits i / 2 ... i - 1 from vector SRC
34163 to bits 0 ... i / 2 - 1 of vector DEST, which has the same mode.
34164 The upper bits of DEST are undefined, though they shouldn't cause
34165 exceptions (some bits from src or all zeros are ok). */
34168 emit_reduc_half (rtx dest
, rtx src
, int i
)
34171 switch (GET_MODE (src
))
34175 tem
= gen_sse_movhlps (dest
, src
, src
);
34177 tem
= gen_sse_shufps_v4sf (dest
, src
, src
, const1_rtx
, const1_rtx
,
34178 GEN_INT (1 + 4), GEN_INT (1 + 4));
34181 tem
= gen_vec_interleave_highv2df (dest
, src
, src
);
34187 tem
= gen_sse2_lshrv1ti3 (gen_lowpart (V1TImode
, dest
),
34188 gen_lowpart (V1TImode
, src
),
34193 tem
= gen_avx_vperm2f128v8sf3 (dest
, src
, src
, const1_rtx
);
34195 tem
= gen_avx_shufps256 (dest
, src
, src
,
34196 GEN_INT (i
== 128 ? 2 + (3 << 2) : 1));
34200 tem
= gen_avx_vperm2f128v4df3 (dest
, src
, src
, const1_rtx
);
34202 tem
= gen_avx_shufpd256 (dest
, src
, src
, const1_rtx
);
34209 tem
= gen_avx2_permv2ti (gen_lowpart (V4DImode
, dest
),
34210 gen_lowpart (V4DImode
, src
),
34211 gen_lowpart (V4DImode
, src
),
34214 tem
= gen_avx2_lshrv2ti3 (gen_lowpart (V2TImode
, dest
),
34215 gen_lowpart (V2TImode
, src
),
34219 gcc_unreachable ();
34224 /* Expand a vector reduction. FN is the binary pattern to reduce;
34225 DEST is the destination; IN is the input vector. */
34228 ix86_expand_reduc (rtx (*fn
) (rtx
, rtx
, rtx
), rtx dest
, rtx in
)
34230 rtx half
, dst
, vec
= in
;
34231 enum machine_mode mode
= GET_MODE (in
);
34234 /* SSE4 has a special instruction for V8HImode UMIN reduction. */
34236 && mode
== V8HImode
34237 && fn
== gen_uminv8hi3
)
34239 emit_insn (gen_sse4_1_phminposuw (dest
, in
));
34243 for (i
= GET_MODE_BITSIZE (mode
);
34244 i
> GET_MODE_BITSIZE (GET_MODE_INNER (mode
));
34247 half
= gen_reg_rtx (mode
);
34248 emit_reduc_half (half
, vec
, i
);
34249 if (i
== GET_MODE_BITSIZE (GET_MODE_INNER (mode
)) * 2)
34252 dst
= gen_reg_rtx (mode
);
34253 emit_insn (fn (dst
, half
, vec
));
34258 /* Target hook for scalar_mode_supported_p. */
34260 ix86_scalar_mode_supported_p (enum machine_mode mode
)
34262 if (DECIMAL_FLOAT_MODE_P (mode
))
34263 return default_decimal_float_supported_p ();
34264 else if (mode
== TFmode
)
34267 return default_scalar_mode_supported_p (mode
);
34270 /* Implements target hook vector_mode_supported_p. */
34272 ix86_vector_mode_supported_p (enum machine_mode mode
)
34274 if (TARGET_SSE
&& VALID_SSE_REG_MODE (mode
))
34276 if (TARGET_SSE2
&& VALID_SSE2_REG_MODE (mode
))
34278 if (TARGET_AVX
&& VALID_AVX256_REG_MODE (mode
))
34280 if (TARGET_MMX
&& VALID_MMX_REG_MODE (mode
))
34282 if (TARGET_3DNOW
&& VALID_MMX_REG_MODE_3DNOW (mode
))
34287 /* Target hook for c_mode_for_suffix. */
34288 static enum machine_mode
34289 ix86_c_mode_for_suffix (char suffix
)
34299 /* Worker function for TARGET_MD_ASM_CLOBBERS.
34301 We do this in the new i386 backend to maintain source compatibility
34302 with the old cc0-based compiler. */
34305 ix86_md_asm_clobbers (tree outputs ATTRIBUTE_UNUSED
,
34306 tree inputs ATTRIBUTE_UNUSED
,
34309 clobbers
= tree_cons (NULL_TREE
, build_string (5, "flags"),
34311 clobbers
= tree_cons (NULL_TREE
, build_string (4, "fpsr"),
34316 /* Implements target vector targetm.asm.encode_section_info. */
34318 static void ATTRIBUTE_UNUSED
34319 ix86_encode_section_info (tree decl
, rtx rtl
, int first
)
34321 default_encode_section_info (decl
, rtl
, first
);
34323 if (TREE_CODE (decl
) == VAR_DECL
34324 && (TREE_STATIC (decl
) || DECL_EXTERNAL (decl
))
34325 && ix86_in_large_data_p (decl
))
34326 SYMBOL_REF_FLAGS (XEXP (rtl
, 0)) |= SYMBOL_FLAG_FAR_ADDR
;
34329 /* Worker function for REVERSE_CONDITION. */
34332 ix86_reverse_condition (enum rtx_code code
, enum machine_mode mode
)
34334 return (mode
!= CCFPmode
&& mode
!= CCFPUmode
34335 ? reverse_condition (code
)
34336 : reverse_condition_maybe_unordered (code
));
34339 /* Output code to perform an x87 FP register move, from OPERANDS[1]
34343 output_387_reg_move (rtx insn
, rtx
*operands
)
34345 if (REG_P (operands
[0]))
34347 if (REG_P (operands
[1])
34348 && find_regno_note (insn
, REG_DEAD
, REGNO (operands
[1])))
34350 if (REGNO (operands
[0]) == FIRST_STACK_REG
)
34351 return output_387_ffreep (operands
, 0);
34352 return "fstp\t%y0";
34354 if (STACK_TOP_P (operands
[0]))
34355 return "fld%Z1\t%y1";
34358 else if (MEM_P (operands
[0]))
34360 gcc_assert (REG_P (operands
[1]));
34361 if (find_regno_note (insn
, REG_DEAD
, REGNO (operands
[1])))
34362 return "fstp%Z0\t%y0";
34365 /* There is no non-popping store to memory for XFmode.
34366 So if we need one, follow the store with a load. */
34367 if (GET_MODE (operands
[0]) == XFmode
)
34368 return "fstp%Z0\t%y0\n\tfld%Z0\t%y0";
34370 return "fst%Z0\t%y0";
34377 /* Output code to perform a conditional jump to LABEL, if C2 flag in
34378 FP status register is set. */
34381 ix86_emit_fp_unordered_jump (rtx label
)
34383 rtx reg
= gen_reg_rtx (HImode
);
34386 emit_insn (gen_x86_fnstsw_1 (reg
));
34388 if (TARGET_SAHF
&& (TARGET_USE_SAHF
|| optimize_insn_for_size_p ()))
34390 emit_insn (gen_x86_sahf_1 (reg
));
34392 temp
= gen_rtx_REG (CCmode
, FLAGS_REG
);
34393 temp
= gen_rtx_UNORDERED (VOIDmode
, temp
, const0_rtx
);
34397 emit_insn (gen_testqi_ext_ccno_0 (reg
, GEN_INT (0x04)));
34399 temp
= gen_rtx_REG (CCNOmode
, FLAGS_REG
);
34400 temp
= gen_rtx_NE (VOIDmode
, temp
, const0_rtx
);
34403 temp
= gen_rtx_IF_THEN_ELSE (VOIDmode
, temp
,
34404 gen_rtx_LABEL_REF (VOIDmode
, label
),
34406 temp
= gen_rtx_SET (VOIDmode
, pc_rtx
, temp
);
34408 emit_jump_insn (temp
);
34409 predict_jump (REG_BR_PROB_BASE
* 10 / 100);
34412 /* Output code to perform a log1p XFmode calculation. */
34414 void ix86_emit_i387_log1p (rtx op0
, rtx op1
)
34416 rtx label1
= gen_label_rtx ();
34417 rtx label2
= gen_label_rtx ();
34419 rtx tmp
= gen_reg_rtx (XFmode
);
34420 rtx tmp2
= gen_reg_rtx (XFmode
);
34423 emit_insn (gen_absxf2 (tmp
, op1
));
34424 test
= gen_rtx_GE (VOIDmode
, tmp
,
34425 CONST_DOUBLE_FROM_REAL_VALUE (
34426 REAL_VALUE_ATOF ("0.29289321881345247561810596348408353", XFmode
),
34428 emit_jump_insn (gen_cbranchxf4 (test
, XEXP (test
, 0), XEXP (test
, 1), label1
));
34430 emit_move_insn (tmp2
, standard_80387_constant_rtx (4)); /* fldln2 */
34431 emit_insn (gen_fyl2xp1xf3_i387 (op0
, op1
, tmp2
));
34432 emit_jump (label2
);
34434 emit_label (label1
);
34435 emit_move_insn (tmp
, CONST1_RTX (XFmode
));
34436 emit_insn (gen_addxf3 (tmp
, op1
, tmp
));
34437 emit_move_insn (tmp2
, standard_80387_constant_rtx (4)); /* fldln2 */
34438 emit_insn (gen_fyl2xxf3_i387 (op0
, tmp
, tmp2
));
34440 emit_label (label2
);
34443 /* Emit code for round calculation. */
34444 void ix86_emit_i387_round (rtx op0
, rtx op1
)
34446 enum machine_mode inmode
= GET_MODE (op1
);
34447 enum machine_mode outmode
= GET_MODE (op0
);
34448 rtx e1
, e2
, res
, tmp
, tmp1
, half
;
34449 rtx scratch
= gen_reg_rtx (HImode
);
34450 rtx flags
= gen_rtx_REG (CCNOmode
, FLAGS_REG
);
34451 rtx jump_label
= gen_label_rtx ();
34453 rtx (*gen_abs
) (rtx
, rtx
);
34454 rtx (*gen_neg
) (rtx
, rtx
);
34459 gen_abs
= gen_abssf2
;
34462 gen_abs
= gen_absdf2
;
34465 gen_abs
= gen_absxf2
;
34468 gcc_unreachable ();
34474 gen_neg
= gen_negsf2
;
34477 gen_neg
= gen_negdf2
;
34480 gen_neg
= gen_negxf2
;
34483 gen_neg
= gen_neghi2
;
34486 gen_neg
= gen_negsi2
;
34489 gen_neg
= gen_negdi2
;
34492 gcc_unreachable ();
34495 e1
= gen_reg_rtx (inmode
);
34496 e2
= gen_reg_rtx (inmode
);
34497 res
= gen_reg_rtx (outmode
);
34499 half
= CONST_DOUBLE_FROM_REAL_VALUE (dconsthalf
, inmode
);
34501 /* round(a) = sgn(a) * floor(fabs(a) + 0.5) */
34503 /* scratch = fxam(op1) */
34504 emit_insn (gen_rtx_SET (VOIDmode
, scratch
,
34505 gen_rtx_UNSPEC (HImode
, gen_rtvec (1, op1
),
34507 /* e1 = fabs(op1) */
34508 emit_insn (gen_abs (e1
, op1
));
34510 /* e2 = e1 + 0.5 */
34511 half
= force_reg (inmode
, half
);
34512 emit_insn (gen_rtx_SET (VOIDmode
, e2
,
34513 gen_rtx_PLUS (inmode
, e1
, half
)));
34515 /* res = floor(e2) */
34516 if (inmode
!= XFmode
)
34518 tmp1
= gen_reg_rtx (XFmode
);
34520 emit_insn (gen_rtx_SET (VOIDmode
, tmp1
,
34521 gen_rtx_FLOAT_EXTEND (XFmode
, e2
)));
34531 rtx tmp0
= gen_reg_rtx (XFmode
);
34533 emit_insn (gen_frndintxf2_floor (tmp0
, tmp1
));
34535 emit_insn (gen_rtx_SET (VOIDmode
, res
,
34536 gen_rtx_UNSPEC (outmode
, gen_rtvec (1, tmp0
),
34537 UNSPEC_TRUNC_NOOP
)));
34541 emit_insn (gen_frndintxf2_floor (res
, tmp1
));
34544 emit_insn (gen_lfloorxfhi2 (res
, tmp1
));
34547 emit_insn (gen_lfloorxfsi2 (res
, tmp1
));
34550 emit_insn (gen_lfloorxfdi2 (res
, tmp1
));
34553 gcc_unreachable ();
34556 /* flags = signbit(a) */
34557 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x02)));
34559 /* if (flags) then res = -res */
34560 tmp
= gen_rtx_IF_THEN_ELSE (VOIDmode
,
34561 gen_rtx_EQ (VOIDmode
, flags
, const0_rtx
),
34562 gen_rtx_LABEL_REF (VOIDmode
, jump_label
),
34564 insn
= emit_jump_insn (gen_rtx_SET (VOIDmode
, pc_rtx
, tmp
));
34565 predict_jump (REG_BR_PROB_BASE
* 50 / 100);
34566 JUMP_LABEL (insn
) = jump_label
;
34568 emit_insn (gen_neg (res
, res
));
34570 emit_label (jump_label
);
34571 LABEL_NUSES (jump_label
) = 1;
34573 emit_move_insn (op0
, res
);
34576 /* Output code to perform a Newton-Rhapson approximation of a single precision
34577 floating point divide [http://en.wikipedia.org/wiki/N-th_root_algorithm]. */
34579 void ix86_emit_swdivsf (rtx res
, rtx a
, rtx b
, enum machine_mode mode
)
34581 rtx x0
, x1
, e0
, e1
;
34583 x0
= gen_reg_rtx (mode
);
34584 e0
= gen_reg_rtx (mode
);
34585 e1
= gen_reg_rtx (mode
);
34586 x1
= gen_reg_rtx (mode
);
34588 /* a / b = a * ((rcp(b) + rcp(b)) - (b * rcp(b) * rcp (b))) */
34590 b
= force_reg (mode
, b
);
34592 /* x0 = rcp(b) estimate */
34593 emit_insn (gen_rtx_SET (VOIDmode
, x0
,
34594 gen_rtx_UNSPEC (mode
, gen_rtvec (1, b
),
34597 emit_insn (gen_rtx_SET (VOIDmode
, e0
,
34598 gen_rtx_MULT (mode
, x0
, b
)));
34601 emit_insn (gen_rtx_SET (VOIDmode
, e0
,
34602 gen_rtx_MULT (mode
, x0
, e0
)));
34605 emit_insn (gen_rtx_SET (VOIDmode
, e1
,
34606 gen_rtx_PLUS (mode
, x0
, x0
)));
34609 emit_insn (gen_rtx_SET (VOIDmode
, x1
,
34610 gen_rtx_MINUS (mode
, e1
, e0
)));
34613 emit_insn (gen_rtx_SET (VOIDmode
, res
,
34614 gen_rtx_MULT (mode
, a
, x1
)));
34617 /* Output code to perform a Newton-Rhapson approximation of a
34618 single precision floating point [reciprocal] square root. */
34620 void ix86_emit_swsqrtsf (rtx res
, rtx a
, enum machine_mode mode
,
34623 rtx x0
, e0
, e1
, e2
, e3
, mthree
, mhalf
;
34626 x0
= gen_reg_rtx (mode
);
34627 e0
= gen_reg_rtx (mode
);
34628 e1
= gen_reg_rtx (mode
);
34629 e2
= gen_reg_rtx (mode
);
34630 e3
= gen_reg_rtx (mode
);
34632 real_from_integer (&r
, VOIDmode
, -3, -1, 0);
34633 mthree
= CONST_DOUBLE_FROM_REAL_VALUE (r
, SFmode
);
34635 real_arithmetic (&r
, NEGATE_EXPR
, &dconsthalf
, NULL
);
34636 mhalf
= CONST_DOUBLE_FROM_REAL_VALUE (r
, SFmode
);
34638 if (VECTOR_MODE_P (mode
))
34640 mthree
= ix86_build_const_vector (mode
, true, mthree
);
34641 mhalf
= ix86_build_const_vector (mode
, true, mhalf
);
34644 /* sqrt(a) = -0.5 * a * rsqrtss(a) * (a * rsqrtss(a) * rsqrtss(a) - 3.0)
34645 rsqrt(a) = -0.5 * rsqrtss(a) * (a * rsqrtss(a) * rsqrtss(a) - 3.0) */
34647 a
= force_reg (mode
, a
);
34649 /* x0 = rsqrt(a) estimate */
34650 emit_insn (gen_rtx_SET (VOIDmode
, x0
,
34651 gen_rtx_UNSPEC (mode
, gen_rtvec (1, a
),
34654 /* If (a == 0.0) Filter out infinity to prevent NaN for sqrt(0.0). */
34659 zero
= gen_reg_rtx (mode
);
34660 mask
= gen_reg_rtx (mode
);
34662 zero
= force_reg (mode
, CONST0_RTX(mode
));
34663 emit_insn (gen_rtx_SET (VOIDmode
, mask
,
34664 gen_rtx_NE (mode
, zero
, a
)));
34666 emit_insn (gen_rtx_SET (VOIDmode
, x0
,
34667 gen_rtx_AND (mode
, x0
, mask
)));
34671 emit_insn (gen_rtx_SET (VOIDmode
, e0
,
34672 gen_rtx_MULT (mode
, x0
, a
)));
34674 emit_insn (gen_rtx_SET (VOIDmode
, e1
,
34675 gen_rtx_MULT (mode
, e0
, x0
)));
34678 mthree
= force_reg (mode
, mthree
);
34679 emit_insn (gen_rtx_SET (VOIDmode
, e2
,
34680 gen_rtx_PLUS (mode
, e1
, mthree
)));
34682 mhalf
= force_reg (mode
, mhalf
);
34684 /* e3 = -.5 * x0 */
34685 emit_insn (gen_rtx_SET (VOIDmode
, e3
,
34686 gen_rtx_MULT (mode
, x0
, mhalf
)));
34688 /* e3 = -.5 * e0 */
34689 emit_insn (gen_rtx_SET (VOIDmode
, e3
,
34690 gen_rtx_MULT (mode
, e0
, mhalf
)));
34691 /* ret = e2 * e3 */
34692 emit_insn (gen_rtx_SET (VOIDmode
, res
,
34693 gen_rtx_MULT (mode
, e2
, e3
)));
34696 #ifdef TARGET_SOLARIS
34697 /* Solaris implementation of TARGET_ASM_NAMED_SECTION. */
34700 i386_solaris_elf_named_section (const char *name
, unsigned int flags
,
34703 /* With Binutils 2.15, the "@unwind" marker must be specified on
34704 every occurrence of the ".eh_frame" section, not just the first
34707 && strcmp (name
, ".eh_frame") == 0)
34709 fprintf (asm_out_file
, "\t.section\t%s,\"%s\",@unwind\n", name
,
34710 flags
& SECTION_WRITE
? "aw" : "a");
34715 if (HAVE_COMDAT_GROUP
&& flags
& SECTION_LINKONCE
)
34717 solaris_elf_asm_comdat_section (name
, flags
, decl
);
34722 default_elf_asm_named_section (name
, flags
, decl
);
34724 #endif /* TARGET_SOLARIS */
34726 /* Return the mangling of TYPE if it is an extended fundamental type. */
34728 static const char *
34729 ix86_mangle_type (const_tree type
)
34731 type
= TYPE_MAIN_VARIANT (type
);
34733 if (TREE_CODE (type
) != VOID_TYPE
&& TREE_CODE (type
) != BOOLEAN_TYPE
34734 && TREE_CODE (type
) != INTEGER_TYPE
&& TREE_CODE (type
) != REAL_TYPE
)
34737 switch (TYPE_MODE (type
))
34740 /* __float128 is "g". */
34743 /* "long double" or __float80 is "e". */
34750 /* For 32-bit code we can save PIC register setup by using
34751 __stack_chk_fail_local hidden function instead of calling
34752 __stack_chk_fail directly. 64-bit code doesn't need to setup any PIC
34753 register, so it is better to call __stack_chk_fail directly. */
34755 static tree ATTRIBUTE_UNUSED
34756 ix86_stack_protect_fail (void)
34758 return TARGET_64BIT
34759 ? default_external_stack_protect_fail ()
34760 : default_hidden_stack_protect_fail ();
34763 /* Select a format to encode pointers in exception handling data. CODE
34764 is 0 for data, 1 for code labels, 2 for function pointers. GLOBAL is
34765 true if the symbol may be affected by dynamic relocations.
34767 ??? All x86 object file formats are capable of representing this.
34768 After all, the relocation needed is the same as for the call insn.
34769 Whether or not a particular assembler allows us to enter such, I
34770 guess we'll have to see. */
34772 asm_preferred_eh_data_format (int code
, int global
)
34776 int type
= DW_EH_PE_sdata8
;
34778 || ix86_cmodel
== CM_SMALL_PIC
34779 || (ix86_cmodel
== CM_MEDIUM_PIC
&& (global
|| code
)))
34780 type
= DW_EH_PE_sdata4
;
34781 return (global
? DW_EH_PE_indirect
: 0) | DW_EH_PE_pcrel
| type
;
34783 if (ix86_cmodel
== CM_SMALL
34784 || (ix86_cmodel
== CM_MEDIUM
&& code
))
34785 return DW_EH_PE_udata4
;
34786 return DW_EH_PE_absptr
;
34789 /* Expand copysign from SIGN to the positive value ABS_VALUE
34790 storing in RESULT. If MASK is non-null, it shall be a mask to mask out
34793 ix86_sse_copysign_to_positive (rtx result
, rtx abs_value
, rtx sign
, rtx mask
)
34795 enum machine_mode mode
= GET_MODE (sign
);
34796 rtx sgn
= gen_reg_rtx (mode
);
34797 if (mask
== NULL_RTX
)
34799 enum machine_mode vmode
;
34801 if (mode
== SFmode
)
34803 else if (mode
== DFmode
)
34808 mask
= ix86_build_signbit_mask (vmode
, VECTOR_MODE_P (mode
), false);
34809 if (!VECTOR_MODE_P (mode
))
34811 /* We need to generate a scalar mode mask in this case. */
34812 rtx tmp
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (1, const0_rtx
));
34813 tmp
= gen_rtx_VEC_SELECT (mode
, mask
, tmp
);
34814 mask
= gen_reg_rtx (mode
);
34815 emit_insn (gen_rtx_SET (VOIDmode
, mask
, tmp
));
34819 mask
= gen_rtx_NOT (mode
, mask
);
34820 emit_insn (gen_rtx_SET (VOIDmode
, sgn
,
34821 gen_rtx_AND (mode
, mask
, sign
)));
34822 emit_insn (gen_rtx_SET (VOIDmode
, result
,
34823 gen_rtx_IOR (mode
, abs_value
, sgn
)));
34826 /* Expand fabs (OP0) and return a new rtx that holds the result. The
34827 mask for masking out the sign-bit is stored in *SMASK, if that is
34830 ix86_expand_sse_fabs (rtx op0
, rtx
*smask
)
34832 enum machine_mode vmode
, mode
= GET_MODE (op0
);
34835 xa
= gen_reg_rtx (mode
);
34836 if (mode
== SFmode
)
34838 else if (mode
== DFmode
)
34842 mask
= ix86_build_signbit_mask (vmode
, VECTOR_MODE_P (mode
), true);
34843 if (!VECTOR_MODE_P (mode
))
34845 /* We need to generate a scalar mode mask in this case. */
34846 rtx tmp
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (1, const0_rtx
));
34847 tmp
= gen_rtx_VEC_SELECT (mode
, mask
, tmp
);
34848 mask
= gen_reg_rtx (mode
);
34849 emit_insn (gen_rtx_SET (VOIDmode
, mask
, tmp
));
34851 emit_insn (gen_rtx_SET (VOIDmode
, xa
,
34852 gen_rtx_AND (mode
, op0
, mask
)));
34860 /* Expands a comparison of OP0 with OP1 using comparison code CODE,
34861 swapping the operands if SWAP_OPERANDS is true. The expanded
34862 code is a forward jump to a newly created label in case the
34863 comparison is true. The generated label rtx is returned. */
34865 ix86_expand_sse_compare_and_jump (enum rtx_code code
, rtx op0
, rtx op1
,
34866 bool swap_operands
)
34877 label
= gen_label_rtx ();
34878 tmp
= gen_rtx_REG (CCFPUmode
, FLAGS_REG
);
34879 emit_insn (gen_rtx_SET (VOIDmode
, tmp
,
34880 gen_rtx_COMPARE (CCFPUmode
, op0
, op1
)));
34881 tmp
= gen_rtx_fmt_ee (code
, VOIDmode
, tmp
, const0_rtx
);
34882 tmp
= gen_rtx_IF_THEN_ELSE (VOIDmode
, tmp
,
34883 gen_rtx_LABEL_REF (VOIDmode
, label
), pc_rtx
);
34884 tmp
= emit_jump_insn (gen_rtx_SET (VOIDmode
, pc_rtx
, tmp
));
34885 JUMP_LABEL (tmp
) = label
;
34890 /* Expand a mask generating SSE comparison instruction comparing OP0 with OP1
34891 using comparison code CODE. Operands are swapped for the comparison if
34892 SWAP_OPERANDS is true. Returns a rtx for the generated mask. */
34894 ix86_expand_sse_compare_mask (enum rtx_code code
, rtx op0
, rtx op1
,
34895 bool swap_operands
)
34897 rtx (*insn
)(rtx
, rtx
, rtx
, rtx
);
34898 enum machine_mode mode
= GET_MODE (op0
);
34899 rtx mask
= gen_reg_rtx (mode
);
34908 insn
= mode
== DFmode
? gen_setcc_df_sse
: gen_setcc_sf_sse
;
34910 emit_insn (insn (mask
, op0
, op1
,
34911 gen_rtx_fmt_ee (code
, mode
, op0
, op1
)));
34915 /* Generate and return a rtx of mode MODE for 2**n where n is the number
34916 of bits of the mantissa of MODE, which must be one of DFmode or SFmode. */
34918 ix86_gen_TWO52 (enum machine_mode mode
)
34920 REAL_VALUE_TYPE TWO52r
;
34923 real_ldexp (&TWO52r
, &dconst1
, mode
== DFmode
? 52 : 23);
34924 TWO52
= const_double_from_real_value (TWO52r
, mode
);
34925 TWO52
= force_reg (mode
, TWO52
);
34930 /* Expand SSE sequence for computing lround from OP1 storing
34933 ix86_expand_lround (rtx op0
, rtx op1
)
34935 /* C code for the stuff we're doing below:
34936 tmp = op1 + copysign (nextafter (0.5, 0.0), op1)
34939 enum machine_mode mode
= GET_MODE (op1
);
34940 const struct real_format
*fmt
;
34941 REAL_VALUE_TYPE pred_half
, half_minus_pred_half
;
34944 /* load nextafter (0.5, 0.0) */
34945 fmt
= REAL_MODE_FORMAT (mode
);
34946 real_2expN (&half_minus_pred_half
, -(fmt
->p
) - 1, mode
);
34947 REAL_ARITHMETIC (pred_half
, MINUS_EXPR
, dconsthalf
, half_minus_pred_half
);
34949 /* adj = copysign (0.5, op1) */
34950 adj
= force_reg (mode
, const_double_from_real_value (pred_half
, mode
));
34951 ix86_sse_copysign_to_positive (adj
, adj
, force_reg (mode
, op1
), NULL_RTX
);
34953 /* adj = op1 + adj */
34954 adj
= expand_simple_binop (mode
, PLUS
, adj
, op1
, NULL_RTX
, 0, OPTAB_DIRECT
);
34956 /* op0 = (imode)adj */
34957 expand_fix (op0
, adj
, 0);
34960 /* Expand SSE2 sequence for computing lround from OPERAND1 storing
34963 ix86_expand_lfloorceil (rtx op0
, rtx op1
, bool do_floor
)
34965 /* C code for the stuff we're doing below (for do_floor):
34967 xi -= (double)xi > op1 ? 1 : 0;
34970 enum machine_mode fmode
= GET_MODE (op1
);
34971 enum machine_mode imode
= GET_MODE (op0
);
34972 rtx ireg
, freg
, label
, tmp
;
34974 /* reg = (long)op1 */
34975 ireg
= gen_reg_rtx (imode
);
34976 expand_fix (ireg
, op1
, 0);
34978 /* freg = (double)reg */
34979 freg
= gen_reg_rtx (fmode
);
34980 expand_float (freg
, ireg
, 0);
34982 /* ireg = (freg > op1) ? ireg - 1 : ireg */
34983 label
= ix86_expand_sse_compare_and_jump (UNLE
,
34984 freg
, op1
, !do_floor
);
34985 tmp
= expand_simple_binop (imode
, do_floor
? MINUS
: PLUS
,
34986 ireg
, const1_rtx
, NULL_RTX
, 0, OPTAB_DIRECT
);
34987 emit_move_insn (ireg
, tmp
);
34989 emit_label (label
);
34990 LABEL_NUSES (label
) = 1;
34992 emit_move_insn (op0
, ireg
);
34995 /* Expand rint (IEEE round to nearest) rounding OPERAND1 and storing the
34996 result in OPERAND0. */
34998 ix86_expand_rint (rtx operand0
, rtx operand1
)
35000 /* C code for the stuff we're doing below:
35001 xa = fabs (operand1);
35002 if (!isless (xa, 2**52))
35004 xa = xa + 2**52 - 2**52;
35005 return copysign (xa, operand1);
35007 enum machine_mode mode
= GET_MODE (operand0
);
35008 rtx res
, xa
, label
, TWO52
, mask
;
35010 res
= gen_reg_rtx (mode
);
35011 emit_move_insn (res
, operand1
);
35013 /* xa = abs (operand1) */
35014 xa
= ix86_expand_sse_fabs (res
, &mask
);
35016 /* if (!isless (xa, TWO52)) goto label; */
35017 TWO52
= ix86_gen_TWO52 (mode
);
35018 label
= ix86_expand_sse_compare_and_jump (UNLE
, TWO52
, xa
, false);
35020 xa
= expand_simple_binop (mode
, PLUS
, xa
, TWO52
, NULL_RTX
, 0, OPTAB_DIRECT
);
35021 xa
= expand_simple_binop (mode
, MINUS
, xa
, TWO52
, xa
, 0, OPTAB_DIRECT
);
35023 ix86_sse_copysign_to_positive (res
, xa
, res
, mask
);
35025 emit_label (label
);
35026 LABEL_NUSES (label
) = 1;
35028 emit_move_insn (operand0
, res
);
35031 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
35034 ix86_expand_floorceildf_32 (rtx operand0
, rtx operand1
, bool do_floor
)
35036 /* C code for the stuff we expand below.
35037 double xa = fabs (x), x2;
35038 if (!isless (xa, TWO52))
35040 xa = xa + TWO52 - TWO52;
35041 x2 = copysign (xa, x);
35050 enum machine_mode mode
= GET_MODE (operand0
);
35051 rtx xa
, TWO52
, tmp
, label
, one
, res
, mask
;
35053 TWO52
= ix86_gen_TWO52 (mode
);
35055 /* Temporary for holding the result, initialized to the input
35056 operand to ease control flow. */
35057 res
= gen_reg_rtx (mode
);
35058 emit_move_insn (res
, operand1
);
35060 /* xa = abs (operand1) */
35061 xa
= ix86_expand_sse_fabs (res
, &mask
);
35063 /* if (!isless (xa, TWO52)) goto label; */
35064 label
= ix86_expand_sse_compare_and_jump (UNLE
, TWO52
, xa
, false);
35066 /* xa = xa + TWO52 - TWO52; */
35067 xa
= expand_simple_binop (mode
, PLUS
, xa
, TWO52
, NULL_RTX
, 0, OPTAB_DIRECT
);
35068 xa
= expand_simple_binop (mode
, MINUS
, xa
, TWO52
, xa
, 0, OPTAB_DIRECT
);
35070 /* xa = copysign (xa, operand1) */
35071 ix86_sse_copysign_to_positive (xa
, xa
, res
, mask
);
35073 /* generate 1.0 or -1.0 */
35074 one
= force_reg (mode
,
35075 const_double_from_real_value (do_floor
35076 ? dconst1
: dconstm1
, mode
));
35078 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
35079 tmp
= ix86_expand_sse_compare_mask (UNGT
, xa
, res
, !do_floor
);
35080 emit_insn (gen_rtx_SET (VOIDmode
, tmp
,
35081 gen_rtx_AND (mode
, one
, tmp
)));
35082 /* We always need to subtract here to preserve signed zero. */
35083 tmp
= expand_simple_binop (mode
, MINUS
,
35084 xa
, tmp
, NULL_RTX
, 0, OPTAB_DIRECT
);
35085 emit_move_insn (res
, tmp
);
35087 emit_label (label
);
35088 LABEL_NUSES (label
) = 1;
35090 emit_move_insn (operand0
, res
);
35093 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
35096 ix86_expand_floorceil (rtx operand0
, rtx operand1
, bool do_floor
)
35098 /* C code for the stuff we expand below.
35099 double xa = fabs (x), x2;
35100 if (!isless (xa, TWO52))
35102 x2 = (double)(long)x;
35109 if (HONOR_SIGNED_ZEROS (mode))
35110 return copysign (x2, x);
35113 enum machine_mode mode
= GET_MODE (operand0
);
35114 rtx xa
, xi
, TWO52
, tmp
, label
, one
, res
, mask
;
35116 TWO52
= ix86_gen_TWO52 (mode
);
35118 /* Temporary for holding the result, initialized to the input
35119 operand to ease control flow. */
35120 res
= gen_reg_rtx (mode
);
35121 emit_move_insn (res
, operand1
);
35123 /* xa = abs (operand1) */
35124 xa
= ix86_expand_sse_fabs (res
, &mask
);
35126 /* if (!isless (xa, TWO52)) goto label; */
35127 label
= ix86_expand_sse_compare_and_jump (UNLE
, TWO52
, xa
, false);
35129 /* xa = (double)(long)x */
35130 xi
= gen_reg_rtx (mode
== DFmode
? DImode
: SImode
);
35131 expand_fix (xi
, res
, 0);
35132 expand_float (xa
, xi
, 0);
35135 one
= force_reg (mode
, const_double_from_real_value (dconst1
, mode
));
35137 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
35138 tmp
= ix86_expand_sse_compare_mask (UNGT
, xa
, res
, !do_floor
);
35139 emit_insn (gen_rtx_SET (VOIDmode
, tmp
,
35140 gen_rtx_AND (mode
, one
, tmp
)));
35141 tmp
= expand_simple_binop (mode
, do_floor
? MINUS
: PLUS
,
35142 xa
, tmp
, NULL_RTX
, 0, OPTAB_DIRECT
);
35143 emit_move_insn (res
, tmp
);
35145 if (HONOR_SIGNED_ZEROS (mode
))
35146 ix86_sse_copysign_to_positive (res
, res
, force_reg (mode
, operand1
), mask
);
35148 emit_label (label
);
35149 LABEL_NUSES (label
) = 1;
35151 emit_move_insn (operand0
, res
);
35154 /* Expand SSE sequence for computing round from OPERAND1 storing
35155 into OPERAND0. Sequence that works without relying on DImode truncation
35156 via cvttsd2siq that is only available on 64bit targets. */
35158 ix86_expand_rounddf_32 (rtx operand0
, rtx operand1
)
35160 /* C code for the stuff we expand below.
35161 double xa = fabs (x), xa2, x2;
35162 if (!isless (xa, TWO52))
35164 Using the absolute value and copying back sign makes
35165 -0.0 -> -0.0 correct.
35166 xa2 = xa + TWO52 - TWO52;
35171 else if (dxa > 0.5)
35173 x2 = copysign (xa2, x);
35176 enum machine_mode mode
= GET_MODE (operand0
);
35177 rtx xa
, xa2
, dxa
, TWO52
, tmp
, label
, half
, mhalf
, one
, res
, mask
;
35179 TWO52
= ix86_gen_TWO52 (mode
);
35181 /* Temporary for holding the result, initialized to the input
35182 operand to ease control flow. */
35183 res
= gen_reg_rtx (mode
);
35184 emit_move_insn (res
, operand1
);
35186 /* xa = abs (operand1) */
35187 xa
= ix86_expand_sse_fabs (res
, &mask
);
35189 /* if (!isless (xa, TWO52)) goto label; */
35190 label
= ix86_expand_sse_compare_and_jump (UNLE
, TWO52
, xa
, false);
35192 /* xa2 = xa + TWO52 - TWO52; */
35193 xa2
= expand_simple_binop (mode
, PLUS
, xa
, TWO52
, NULL_RTX
, 0, OPTAB_DIRECT
);
35194 xa2
= expand_simple_binop (mode
, MINUS
, xa2
, TWO52
, xa2
, 0, OPTAB_DIRECT
);
35196 /* dxa = xa2 - xa; */
35197 dxa
= expand_simple_binop (mode
, MINUS
, xa2
, xa
, NULL_RTX
, 0, OPTAB_DIRECT
);
35199 /* generate 0.5, 1.0 and -0.5 */
35200 half
= force_reg (mode
, const_double_from_real_value (dconsthalf
, mode
));
35201 one
= expand_simple_binop (mode
, PLUS
, half
, half
, NULL_RTX
, 0, OPTAB_DIRECT
);
35202 mhalf
= expand_simple_binop (mode
, MINUS
, half
, one
, NULL_RTX
,
35206 tmp
= gen_reg_rtx (mode
);
35207 /* xa2 = xa2 - (dxa > 0.5 ? 1 : 0) */
35208 tmp
= ix86_expand_sse_compare_mask (UNGT
, dxa
, half
, false);
35209 emit_insn (gen_rtx_SET (VOIDmode
, tmp
,
35210 gen_rtx_AND (mode
, one
, tmp
)));
35211 xa2
= expand_simple_binop (mode
, MINUS
, xa2
, tmp
, NULL_RTX
, 0, OPTAB_DIRECT
);
35212 /* xa2 = xa2 + (dxa <= -0.5 ? 1 : 0) */
35213 tmp
= ix86_expand_sse_compare_mask (UNGE
, mhalf
, dxa
, false);
35214 emit_insn (gen_rtx_SET (VOIDmode
, tmp
,
35215 gen_rtx_AND (mode
, one
, tmp
)));
35216 xa2
= expand_simple_binop (mode
, PLUS
, xa2
, tmp
, NULL_RTX
, 0, OPTAB_DIRECT
);
35218 /* res = copysign (xa2, operand1) */
35219 ix86_sse_copysign_to_positive (res
, xa2
, force_reg (mode
, operand1
), mask
);
35221 emit_label (label
);
35222 LABEL_NUSES (label
) = 1;
35224 emit_move_insn (operand0
, res
);
35227 /* Expand SSE sequence for computing trunc from OPERAND1 storing
35230 ix86_expand_trunc (rtx operand0
, rtx operand1
)
35232 /* C code for SSE variant we expand below.
35233 double xa = fabs (x), x2;
35234 if (!isless (xa, TWO52))
35236 x2 = (double)(long)x;
35237 if (HONOR_SIGNED_ZEROS (mode))
35238 return copysign (x2, x);
35241 enum machine_mode mode
= GET_MODE (operand0
);
35242 rtx xa
, xi
, TWO52
, label
, res
, mask
;
35244 TWO52
= ix86_gen_TWO52 (mode
);
35246 /* Temporary for holding the result, initialized to the input
35247 operand to ease control flow. */
35248 res
= gen_reg_rtx (mode
);
35249 emit_move_insn (res
, operand1
);
35251 /* xa = abs (operand1) */
35252 xa
= ix86_expand_sse_fabs (res
, &mask
);
35254 /* if (!isless (xa, TWO52)) goto label; */
35255 label
= ix86_expand_sse_compare_and_jump (UNLE
, TWO52
, xa
, false);
35257 /* x = (double)(long)x */
35258 xi
= gen_reg_rtx (mode
== DFmode
? DImode
: SImode
);
35259 expand_fix (xi
, res
, 0);
35260 expand_float (res
, xi
, 0);
35262 if (HONOR_SIGNED_ZEROS (mode
))
35263 ix86_sse_copysign_to_positive (res
, res
, force_reg (mode
, operand1
), mask
);
35265 emit_label (label
);
35266 LABEL_NUSES (label
) = 1;
35268 emit_move_insn (operand0
, res
);
35271 /* Expand SSE sequence for computing trunc from OPERAND1 storing
35274 ix86_expand_truncdf_32 (rtx operand0
, rtx operand1
)
35276 enum machine_mode mode
= GET_MODE (operand0
);
35277 rtx xa
, mask
, TWO52
, label
, one
, res
, smask
, tmp
;
35279 /* C code for SSE variant we expand below.
35280 double xa = fabs (x), x2;
35281 if (!isless (xa, TWO52))
35283 xa2 = xa + TWO52 - TWO52;
35287 x2 = copysign (xa2, x);
35291 TWO52
= ix86_gen_TWO52 (mode
);
35293 /* Temporary for holding the result, initialized to the input
35294 operand to ease control flow. */
35295 res
= gen_reg_rtx (mode
);
35296 emit_move_insn (res
, operand1
);
35298 /* xa = abs (operand1) */
35299 xa
= ix86_expand_sse_fabs (res
, &smask
);
35301 /* if (!isless (xa, TWO52)) goto label; */
35302 label
= ix86_expand_sse_compare_and_jump (UNLE
, TWO52
, xa
, false);
35304 /* res = xa + TWO52 - TWO52; */
35305 tmp
= expand_simple_binop (mode
, PLUS
, xa
, TWO52
, NULL_RTX
, 0, OPTAB_DIRECT
);
35306 tmp
= expand_simple_binop (mode
, MINUS
, tmp
, TWO52
, tmp
, 0, OPTAB_DIRECT
);
35307 emit_move_insn (res
, tmp
);
35310 one
= force_reg (mode
, const_double_from_real_value (dconst1
, mode
));
35312 /* Compensate: res = xa2 - (res > xa ? 1 : 0) */
35313 mask
= ix86_expand_sse_compare_mask (UNGT
, res
, xa
, false);
35314 emit_insn (gen_rtx_SET (VOIDmode
, mask
,
35315 gen_rtx_AND (mode
, mask
, one
)));
35316 tmp
= expand_simple_binop (mode
, MINUS
,
35317 res
, mask
, NULL_RTX
, 0, OPTAB_DIRECT
);
35318 emit_move_insn (res
, tmp
);
35320 /* res = copysign (res, operand1) */
35321 ix86_sse_copysign_to_positive (res
, res
, force_reg (mode
, operand1
), smask
);
35323 emit_label (label
);
35324 LABEL_NUSES (label
) = 1;
35326 emit_move_insn (operand0
, res
);
35329 /* Expand SSE sequence for computing round from OPERAND1 storing
35332 ix86_expand_round (rtx operand0
, rtx operand1
)
35334 /* C code for the stuff we're doing below:
35335 double xa = fabs (x);
35336 if (!isless (xa, TWO52))
35338 xa = (double)(long)(xa + nextafter (0.5, 0.0));
35339 return copysign (xa, x);
35341 enum machine_mode mode
= GET_MODE (operand0
);
35342 rtx res
, TWO52
, xa
, label
, xi
, half
, mask
;
35343 const struct real_format
*fmt
;
35344 REAL_VALUE_TYPE pred_half
, half_minus_pred_half
;
35346 /* Temporary for holding the result, initialized to the input
35347 operand to ease control flow. */
35348 res
= gen_reg_rtx (mode
);
35349 emit_move_insn (res
, operand1
);
35351 TWO52
= ix86_gen_TWO52 (mode
);
35352 xa
= ix86_expand_sse_fabs (res
, &mask
);
35353 label
= ix86_expand_sse_compare_and_jump (UNLE
, TWO52
, xa
, false);
35355 /* load nextafter (0.5, 0.0) */
35356 fmt
= REAL_MODE_FORMAT (mode
);
35357 real_2expN (&half_minus_pred_half
, -(fmt
->p
) - 1, mode
);
35358 REAL_ARITHMETIC (pred_half
, MINUS_EXPR
, dconsthalf
, half_minus_pred_half
);
35360 /* xa = xa + 0.5 */
35361 half
= force_reg (mode
, const_double_from_real_value (pred_half
, mode
));
35362 xa
= expand_simple_binop (mode
, PLUS
, xa
, half
, NULL_RTX
, 0, OPTAB_DIRECT
);
35364 /* xa = (double)(int64_t)xa */
35365 xi
= gen_reg_rtx (mode
== DFmode
? DImode
: SImode
);
35366 expand_fix (xi
, xa
, 0);
35367 expand_float (xa
, xi
, 0);
35369 /* res = copysign (xa, operand1) */
35370 ix86_sse_copysign_to_positive (res
, xa
, force_reg (mode
, operand1
), mask
);
35372 emit_label (label
);
35373 LABEL_NUSES (label
) = 1;
35375 emit_move_insn (operand0
, res
);
35378 /* Expand SSE sequence for computing round
35379 from OP1 storing into OP0 using sse4 round insn. */
35381 ix86_expand_round_sse4 (rtx op0
, rtx op1
)
35383 enum machine_mode mode
= GET_MODE (op0
);
35384 rtx e1
, e2
, res
, half
;
35385 const struct real_format
*fmt
;
35386 REAL_VALUE_TYPE pred_half
, half_minus_pred_half
;
35387 rtx (*gen_copysign
) (rtx
, rtx
, rtx
);
35388 rtx (*gen_round
) (rtx
, rtx
, rtx
);
35393 gen_copysign
= gen_copysignsf3
;
35394 gen_round
= gen_sse4_1_roundsf2
;
35397 gen_copysign
= gen_copysigndf3
;
35398 gen_round
= gen_sse4_1_rounddf2
;
35401 gcc_unreachable ();
35404 /* round (a) = trunc (a + copysign (0.5, a)) */
35406 /* load nextafter (0.5, 0.0) */
35407 fmt
= REAL_MODE_FORMAT (mode
);
35408 real_2expN (&half_minus_pred_half
, -(fmt
->p
) - 1, mode
);
35409 REAL_ARITHMETIC (pred_half
, MINUS_EXPR
, dconsthalf
, half_minus_pred_half
);
35410 half
= const_double_from_real_value (pred_half
, mode
);
35412 /* e1 = copysign (0.5, op1) */
35413 e1
= gen_reg_rtx (mode
);
35414 emit_insn (gen_copysign (e1
, half
, op1
));
35416 /* e2 = op1 + e1 */
35417 e2
= expand_simple_binop (mode
, PLUS
, op1
, e1
, NULL_RTX
, 0, OPTAB_DIRECT
);
35419 /* res = trunc (e2) */
35420 res
= gen_reg_rtx (mode
);
35421 emit_insn (gen_round (res
, e2
, GEN_INT (ROUND_TRUNC
)));
35423 emit_move_insn (op0
, res
);
35427 /* Table of valid machine attributes. */
35428 static const struct attribute_spec ix86_attribute_table
[] =
35430 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
35431 affects_type_identity } */
35432 /* Stdcall attribute says callee is responsible for popping arguments
35433 if they are not variable. */
35434 { "stdcall", 0, 0, false, true, true, ix86_handle_cconv_attribute
,
35436 /* Fastcall attribute says callee is responsible for popping arguments
35437 if they are not variable. */
35438 { "fastcall", 0, 0, false, true, true, ix86_handle_cconv_attribute
,
35440 /* Thiscall attribute says callee is responsible for popping arguments
35441 if they are not variable. */
35442 { "thiscall", 0, 0, false, true, true, ix86_handle_cconv_attribute
,
35444 /* Cdecl attribute says the callee is a normal C declaration */
35445 { "cdecl", 0, 0, false, true, true, ix86_handle_cconv_attribute
,
35447 /* Regparm attribute specifies how many integer arguments are to be
35448 passed in registers. */
35449 { "regparm", 1, 1, false, true, true, ix86_handle_cconv_attribute
,
35451 /* Sseregparm attribute says we are using x86_64 calling conventions
35452 for FP arguments. */
35453 { "sseregparm", 0, 0, false, true, true, ix86_handle_cconv_attribute
,
35455 /* The transactional memory builtins are implicitly regparm or fastcall
35456 depending on the ABI. Override the generic do-nothing attribute that
35457 these builtins were declared with. */
35458 { "*tm regparm", 0, 0, false, true, true, ix86_handle_tm_regparm_attribute
,
35460 /* force_align_arg_pointer says this function realigns the stack at entry. */
35461 { (const char *)&ix86_force_align_arg_pointer_string
, 0, 0,
35462 false, true, true, ix86_handle_cconv_attribute
, false },
35463 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
35464 { "dllimport", 0, 0, false, false, false, handle_dll_attribute
, false },
35465 { "dllexport", 0, 0, false, false, false, handle_dll_attribute
, false },
35466 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute
,
35469 { "ms_struct", 0, 0, false, false, false, ix86_handle_struct_attribute
,
35471 { "gcc_struct", 0, 0, false, false, false, ix86_handle_struct_attribute
,
35473 #ifdef SUBTARGET_ATTRIBUTE_TABLE
35474 SUBTARGET_ATTRIBUTE_TABLE
,
35476 /* ms_abi and sysv_abi calling convention function attributes. */
35477 { "ms_abi", 0, 0, false, true, true, ix86_handle_abi_attribute
, true },
35478 { "sysv_abi", 0, 0, false, true, true, ix86_handle_abi_attribute
, true },
35479 { "ms_hook_prologue", 0, 0, true, false, false, ix86_handle_fndecl_attribute
,
35481 { "callee_pop_aggregate_return", 1, 1, false, true, true,
35482 ix86_handle_callee_pop_aggregate_return
, true },
35484 { NULL
, 0, 0, false, false, false, NULL
, false }
35487 /* Implement targetm.vectorize.builtin_vectorization_cost. */
35489 ix86_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost
,
35490 tree vectype ATTRIBUTE_UNUSED
,
35491 int misalign ATTRIBUTE_UNUSED
)
35493 switch (type_of_cost
)
35496 return ix86_cost
->scalar_stmt_cost
;
35499 return ix86_cost
->scalar_load_cost
;
35502 return ix86_cost
->scalar_store_cost
;
35505 return ix86_cost
->vec_stmt_cost
;
35508 return ix86_cost
->vec_align_load_cost
;
35511 return ix86_cost
->vec_store_cost
;
35513 case vec_to_scalar
:
35514 return ix86_cost
->vec_to_scalar_cost
;
35516 case scalar_to_vec
:
35517 return ix86_cost
->scalar_to_vec_cost
;
35519 case unaligned_load
:
35520 case unaligned_store
:
35521 return ix86_cost
->vec_unalign_load_cost
;
35523 case cond_branch_taken
:
35524 return ix86_cost
->cond_taken_branch_cost
;
35526 case cond_branch_not_taken
:
35527 return ix86_cost
->cond_not_taken_branch_cost
;
35530 case vec_promote_demote
:
35531 return ix86_cost
->vec_stmt_cost
;
35534 gcc_unreachable ();
35538 /* Construct (set target (vec_select op0 (parallel perm))) and
35539 return true if that's a valid instruction in the active ISA. */
35542 expand_vselect (rtx target
, rtx op0
, const unsigned char *perm
, unsigned nelt
)
35544 rtx rperm
[MAX_VECT_LEN
], x
;
35547 for (i
= 0; i
< nelt
; ++i
)
35548 rperm
[i
] = GEN_INT (perm
[i
]);
35550 x
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec_v (nelt
, rperm
));
35551 x
= gen_rtx_VEC_SELECT (GET_MODE (target
), op0
, x
);
35552 x
= gen_rtx_SET (VOIDmode
, target
, x
);
35555 if (recog_memoized (x
) < 0)
35563 /* Similar, but generate a vec_concat from op0 and op1 as well. */
35566 expand_vselect_vconcat (rtx target
, rtx op0
, rtx op1
,
35567 const unsigned char *perm
, unsigned nelt
)
35569 enum machine_mode v2mode
;
35572 v2mode
= GET_MODE_2XWIDER_MODE (GET_MODE (op0
));
35573 x
= gen_rtx_VEC_CONCAT (v2mode
, op0
, op1
);
35574 return expand_vselect (target
, x
, perm
, nelt
);
35577 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
35578 in terms of blendp[sd] / pblendw / pblendvb / vpblendd. */
35581 expand_vec_perm_blend (struct expand_vec_perm_d
*d
)
35583 enum machine_mode vmode
= d
->vmode
;
35584 unsigned i
, mask
, nelt
= d
->nelt
;
35585 rtx target
, op0
, op1
, x
;
35586 rtx rperm
[32], vperm
;
35588 if (d
->op0
== d
->op1
)
35590 if (TARGET_AVX2
&& GET_MODE_SIZE (vmode
) == 32)
35592 else if (TARGET_AVX
&& (vmode
== V4DFmode
|| vmode
== V8SFmode
))
35594 else if (TARGET_SSE4_1
&& GET_MODE_SIZE (vmode
) == 16)
35599 /* This is a blend, not a permute. Elements must stay in their
35600 respective lanes. */
35601 for (i
= 0; i
< nelt
; ++i
)
35603 unsigned e
= d
->perm
[i
];
35604 if (!(e
== i
|| e
== i
+ nelt
))
35611 /* ??? Without SSE4.1, we could implement this with and/andn/or. This
35612 decision should be extracted elsewhere, so that we only try that
35613 sequence once all budget==3 options have been tried. */
35614 target
= d
->target
;
35627 for (i
= 0; i
< nelt
; ++i
)
35628 mask
|= (d
->perm
[i
] >= nelt
) << i
;
35632 for (i
= 0; i
< 2; ++i
)
35633 mask
|= (d
->perm
[i
] >= 2 ? 15 : 0) << (i
* 4);
35638 for (i
= 0; i
< 4; ++i
)
35639 mask
|= (d
->perm
[i
] >= 4 ? 3 : 0) << (i
* 2);
35644 /* See if bytes move in pairs so we can use pblendw with
35645 an immediate argument, rather than pblendvb with a vector
35647 for (i
= 0; i
< 16; i
+= 2)
35648 if (d
->perm
[i
] + 1 != d
->perm
[i
+ 1])
35651 for (i
= 0; i
< nelt
; ++i
)
35652 rperm
[i
] = (d
->perm
[i
] < nelt
? const0_rtx
: constm1_rtx
);
35655 vperm
= gen_rtx_CONST_VECTOR (vmode
, gen_rtvec_v (nelt
, rperm
));
35656 vperm
= force_reg (vmode
, vperm
);
35658 if (GET_MODE_SIZE (vmode
) == 16)
35659 emit_insn (gen_sse4_1_pblendvb (target
, op0
, op1
, vperm
));
35661 emit_insn (gen_avx2_pblendvb (target
, op0
, op1
, vperm
));
35665 for (i
= 0; i
< 8; ++i
)
35666 mask
|= (d
->perm
[i
* 2] >= 16) << i
;
35671 target
= gen_lowpart (vmode
, target
);
35672 op0
= gen_lowpart (vmode
, op0
);
35673 op1
= gen_lowpart (vmode
, op1
);
35677 /* See if bytes move in pairs. If not, vpblendvb must be used. */
35678 for (i
= 0; i
< 32; i
+= 2)
35679 if (d
->perm
[i
] + 1 != d
->perm
[i
+ 1])
35681 /* See if bytes move in quadruplets. If yes, vpblendd
35682 with immediate can be used. */
35683 for (i
= 0; i
< 32; i
+= 4)
35684 if (d
->perm
[i
] + 2 != d
->perm
[i
+ 2])
35688 /* See if bytes move the same in both lanes. If yes,
35689 vpblendw with immediate can be used. */
35690 for (i
= 0; i
< 16; i
+= 2)
35691 if (d
->perm
[i
] + 16 != d
->perm
[i
+ 16])
35694 /* Use vpblendw. */
35695 for (i
= 0; i
< 16; ++i
)
35696 mask
|= (d
->perm
[i
* 2] >= 32) << i
;
35701 /* Use vpblendd. */
35702 for (i
= 0; i
< 8; ++i
)
35703 mask
|= (d
->perm
[i
* 4] >= 32) << i
;
35708 /* See if words move in pairs. If yes, vpblendd can be used. */
35709 for (i
= 0; i
< 16; i
+= 2)
35710 if (d
->perm
[i
] + 1 != d
->perm
[i
+ 1])
35714 /* See if words move the same in both lanes. If not,
35715 vpblendvb must be used. */
35716 for (i
= 0; i
< 8; i
++)
35717 if (d
->perm
[i
] + 8 != d
->perm
[i
+ 8])
35719 /* Use vpblendvb. */
35720 for (i
= 0; i
< 32; ++i
)
35721 rperm
[i
] = (d
->perm
[i
/ 2] < 16 ? const0_rtx
: constm1_rtx
);
35725 target
= gen_lowpart (vmode
, target
);
35726 op0
= gen_lowpart (vmode
, op0
);
35727 op1
= gen_lowpart (vmode
, op1
);
35728 goto finish_pblendvb
;
35731 /* Use vpblendw. */
35732 for (i
= 0; i
< 16; ++i
)
35733 mask
|= (d
->perm
[i
] >= 16) << i
;
35737 /* Use vpblendd. */
35738 for (i
= 0; i
< 8; ++i
)
35739 mask
|= (d
->perm
[i
* 2] >= 16) << i
;
35744 /* Use vpblendd. */
35745 for (i
= 0; i
< 4; ++i
)
35746 mask
|= (d
->perm
[i
] >= 4 ? 3 : 0) << (i
* 2);
35751 gcc_unreachable ();
35754 /* This matches five different patterns with the different modes. */
35755 x
= gen_rtx_VEC_MERGE (vmode
, op1
, op0
, GEN_INT (mask
));
35756 x
= gen_rtx_SET (VOIDmode
, target
, x
);
35762 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
35763 in terms of the variable form of vpermilps.
35765 Note that we will have already failed the immediate input vpermilps,
35766 which requires that the high and low part shuffle be identical; the
35767 variable form doesn't require that. */
35770 expand_vec_perm_vpermil (struct expand_vec_perm_d
*d
)
35772 rtx rperm
[8], vperm
;
35775 if (!TARGET_AVX
|| d
->vmode
!= V8SFmode
|| d
->op0
!= d
->op1
)
35778 /* We can only permute within the 128-bit lane. */
35779 for (i
= 0; i
< 8; ++i
)
35781 unsigned e
= d
->perm
[i
];
35782 if (i
< 4 ? e
>= 4 : e
< 4)
35789 for (i
= 0; i
< 8; ++i
)
35791 unsigned e
= d
->perm
[i
];
35793 /* Within each 128-bit lane, the elements of op0 are numbered
35794 from 0 and the elements of op1 are numbered from 4. */
35800 rperm
[i
] = GEN_INT (e
);
35803 vperm
= gen_rtx_CONST_VECTOR (V8SImode
, gen_rtvec_v (8, rperm
));
35804 vperm
= force_reg (V8SImode
, vperm
);
35805 emit_insn (gen_avx_vpermilvarv8sf3 (d
->target
, d
->op0
, vperm
));
35810 /* Return true if permutation D can be performed as VMODE permutation
35814 valid_perm_using_mode_p (enum machine_mode vmode
, struct expand_vec_perm_d
*d
)
35816 unsigned int i
, j
, chunk
;
35818 if (GET_MODE_CLASS (vmode
) != MODE_VECTOR_INT
35819 || GET_MODE_CLASS (d
->vmode
) != MODE_VECTOR_INT
35820 || GET_MODE_SIZE (vmode
) != GET_MODE_SIZE (d
->vmode
))
35823 if (GET_MODE_NUNITS (vmode
) >= d
->nelt
)
35826 chunk
= d
->nelt
/ GET_MODE_NUNITS (vmode
);
35827 for (i
= 0; i
< d
->nelt
; i
+= chunk
)
35828 if (d
->perm
[i
] & (chunk
- 1))
35831 for (j
= 1; j
< chunk
; ++j
)
35832 if (d
->perm
[i
] + j
!= d
->perm
[i
+ j
])
35838 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
35839 in terms of pshufb, vpperm, vpermq, vpermd or vperm2i128. */
35842 expand_vec_perm_pshufb (struct expand_vec_perm_d
*d
)
35844 unsigned i
, nelt
, eltsz
, mask
;
35845 unsigned char perm
[32];
35846 enum machine_mode vmode
= V16QImode
;
35847 rtx rperm
[32], vperm
, target
, op0
, op1
;
35851 if (d
->op0
!= d
->op1
)
35853 if (!TARGET_XOP
|| GET_MODE_SIZE (d
->vmode
) != 16)
35856 && valid_perm_using_mode_p (V2TImode
, d
))
35861 /* Use vperm2i128 insn. The pattern uses
35862 V4DImode instead of V2TImode. */
35863 target
= gen_lowpart (V4DImode
, d
->target
);
35864 op0
= gen_lowpart (V4DImode
, d
->op0
);
35865 op1
= gen_lowpart (V4DImode
, d
->op1
);
35867 = GEN_INT (((d
->perm
[0] & (nelt
/ 2)) ? 1 : 0)
35868 || ((d
->perm
[nelt
/ 2] & (nelt
/ 2)) ? 2 : 0));
35869 emit_insn (gen_avx2_permv2ti (target
, op0
, op1
, rperm
[0]));
35877 if (GET_MODE_SIZE (d
->vmode
) == 16)
35882 else if (GET_MODE_SIZE (d
->vmode
) == 32)
35887 /* V4DImode should be already handled through
35888 expand_vselect by vpermq instruction. */
35889 gcc_assert (d
->vmode
!= V4DImode
);
35892 if (d
->vmode
== V8SImode
35893 || d
->vmode
== V16HImode
35894 || d
->vmode
== V32QImode
)
35896 /* First see if vpermq can be used for
35897 V8SImode/V16HImode/V32QImode. */
35898 if (valid_perm_using_mode_p (V4DImode
, d
))
35900 for (i
= 0; i
< 4; i
++)
35901 perm
[i
] = (d
->perm
[i
* nelt
/ 4] * 4 / nelt
) & 3;
35904 return expand_vselect (gen_lowpart (V4DImode
, d
->target
),
35905 gen_lowpart (V4DImode
, d
->op0
),
35909 /* Next see if vpermd can be used. */
35910 if (valid_perm_using_mode_p (V8SImode
, d
))
35914 if (vmode
== V32QImode
)
35916 /* vpshufb only works intra lanes, it is not
35917 possible to shuffle bytes in between the lanes. */
35918 for (i
= 0; i
< nelt
; ++i
)
35919 if ((d
->perm
[i
] ^ i
) & (nelt
/ 2))
35930 if (vmode
== V8SImode
)
35931 for (i
= 0; i
< 8; ++i
)
35932 rperm
[i
] = GEN_INT ((d
->perm
[i
* nelt
/ 8] * 8 / nelt
) & 7);
35935 eltsz
= GET_MODE_SIZE (GET_MODE_INNER (d
->vmode
));
35936 if (d
->op0
!= d
->op1
)
35937 mask
= 2 * nelt
- 1;
35938 else if (vmode
== V16QImode
)
35941 mask
= nelt
/ 2 - 1;
35943 for (i
= 0; i
< nelt
; ++i
)
35945 unsigned j
, e
= d
->perm
[i
] & mask
;
35946 for (j
= 0; j
< eltsz
; ++j
)
35947 rperm
[i
* eltsz
+ j
] = GEN_INT (e
* eltsz
+ j
);
35951 vperm
= gen_rtx_CONST_VECTOR (vmode
,
35952 gen_rtvec_v (GET_MODE_NUNITS (vmode
), rperm
));
35953 vperm
= force_reg (vmode
, vperm
);
35955 target
= gen_lowpart (vmode
, d
->target
);
35956 op0
= gen_lowpart (vmode
, d
->op0
);
35957 if (d
->op0
== d
->op1
)
35959 if (vmode
== V16QImode
)
35960 emit_insn (gen_ssse3_pshufbv16qi3 (target
, op0
, vperm
));
35961 else if (vmode
== V32QImode
)
35962 emit_insn (gen_avx2_pshufbv32qi3 (target
, op0
, vperm
));
35964 emit_insn (gen_avx2_permvarv8si (target
, vperm
, op0
));
35968 op1
= gen_lowpart (vmode
, d
->op1
);
35969 emit_insn (gen_xop_pperm (target
, op0
, op1
, vperm
));
35975 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to instantiate D
35976 in a single instruction. */
35979 expand_vec_perm_1 (struct expand_vec_perm_d
*d
)
35981 unsigned i
, nelt
= d
->nelt
;
35982 unsigned char perm2
[MAX_VECT_LEN
];
35984 /* Check plain VEC_SELECT first, because AVX has instructions that could
35985 match both SEL and SEL+CONCAT, but the plain SEL will allow a memory
35986 input where SEL+CONCAT may not. */
35987 if (d
->op0
== d
->op1
)
35989 int mask
= nelt
- 1;
35990 bool identity_perm
= true;
35991 bool broadcast_perm
= true;
35993 for (i
= 0; i
< nelt
; i
++)
35995 perm2
[i
] = d
->perm
[i
] & mask
;
35997 identity_perm
= false;
35999 broadcast_perm
= false;
36005 emit_move_insn (d
->target
, d
->op0
);
36008 else if (broadcast_perm
&& TARGET_AVX2
)
36010 /* Use vpbroadcast{b,w,d}. */
36011 rtx op
= d
->op0
, (*gen
) (rtx
, rtx
) = NULL
;
36015 op
= gen_lowpart (V16QImode
, op
);
36016 gen
= gen_avx2_pbroadcastv32qi
;
36019 op
= gen_lowpart (V8HImode
, op
);
36020 gen
= gen_avx2_pbroadcastv16hi
;
36023 op
= gen_lowpart (V4SImode
, op
);
36024 gen
= gen_avx2_pbroadcastv8si
;
36027 gen
= gen_avx2_pbroadcastv16qi
;
36030 gen
= gen_avx2_pbroadcastv8hi
;
36032 /* For other modes prefer other shuffles this function creates. */
36038 emit_insn (gen (d
->target
, op
));
36043 if (expand_vselect (d
->target
, d
->op0
, perm2
, nelt
))
36046 /* There are plenty of patterns in sse.md that are written for
36047 SEL+CONCAT and are not replicated for a single op. Perhaps
36048 that should be changed, to avoid the nastiness here. */
36050 /* Recognize interleave style patterns, which means incrementing
36051 every other permutation operand. */
36052 for (i
= 0; i
< nelt
; i
+= 2)
36054 perm2
[i
] = d
->perm
[i
] & mask
;
36055 perm2
[i
+ 1] = (d
->perm
[i
+ 1] & mask
) + nelt
;
36057 if (expand_vselect_vconcat (d
->target
, d
->op0
, d
->op0
, perm2
, nelt
))
36060 /* Recognize shufps, which means adding {0, 0, nelt, nelt}. */
36063 for (i
= 0; i
< nelt
; i
+= 4)
36065 perm2
[i
+ 0] = d
->perm
[i
+ 0] & mask
;
36066 perm2
[i
+ 1] = d
->perm
[i
+ 1] & mask
;
36067 perm2
[i
+ 2] = (d
->perm
[i
+ 2] & mask
) + nelt
;
36068 perm2
[i
+ 3] = (d
->perm
[i
+ 3] & mask
) + nelt
;
36071 if (expand_vselect_vconcat (d
->target
, d
->op0
, d
->op0
, perm2
, nelt
))
36076 /* Finally, try the fully general two operand permute. */
36077 if (expand_vselect_vconcat (d
->target
, d
->op0
, d
->op1
, d
->perm
, nelt
))
36080 /* Recognize interleave style patterns with reversed operands. */
36081 if (d
->op0
!= d
->op1
)
36083 for (i
= 0; i
< nelt
; ++i
)
36085 unsigned e
= d
->perm
[i
];
36093 if (expand_vselect_vconcat (d
->target
, d
->op1
, d
->op0
, perm2
, nelt
))
36097 /* Try the SSE4.1 blend variable merge instructions. */
36098 if (expand_vec_perm_blend (d
))
36101 /* Try one of the AVX vpermil variable permutations. */
36102 if (expand_vec_perm_vpermil (d
))
36105 /* Try the SSSE3 pshufb or XOP vpperm or AVX2 vperm2i128,
36106 vpshufb, vpermd or vpermq variable permutation. */
36107 if (expand_vec_perm_pshufb (d
))
36113 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
36114 in terms of a pair of pshuflw + pshufhw instructions. */
36117 expand_vec_perm_pshuflw_pshufhw (struct expand_vec_perm_d
*d
)
36119 unsigned char perm2
[MAX_VECT_LEN
];
36123 if (d
->vmode
!= V8HImode
|| d
->op0
!= d
->op1
)
36126 /* The two permutations only operate in 64-bit lanes. */
36127 for (i
= 0; i
< 4; ++i
)
36128 if (d
->perm
[i
] >= 4)
36130 for (i
= 4; i
< 8; ++i
)
36131 if (d
->perm
[i
] < 4)
36137 /* Emit the pshuflw. */
36138 memcpy (perm2
, d
->perm
, 4);
36139 for (i
= 4; i
< 8; ++i
)
36141 ok
= expand_vselect (d
->target
, d
->op0
, perm2
, 8);
36144 /* Emit the pshufhw. */
36145 memcpy (perm2
+ 4, d
->perm
+ 4, 4);
36146 for (i
= 0; i
< 4; ++i
)
36148 ok
= expand_vselect (d
->target
, d
->target
, perm2
, 8);
36154 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
36155 the permutation using the SSSE3 palignr instruction. This succeeds
36156 when all of the elements in PERM fit within one vector and we merely
36157 need to shift them down so that a single vector permutation has a
36158 chance to succeed. */
36161 expand_vec_perm_palignr (struct expand_vec_perm_d
*d
)
36163 unsigned i
, nelt
= d
->nelt
;
36168 /* Even with AVX, palignr only operates on 128-bit vectors. */
36169 if (!TARGET_SSSE3
|| GET_MODE_SIZE (d
->vmode
) != 16)
36172 min
= nelt
, max
= 0;
36173 for (i
= 0; i
< nelt
; ++i
)
36175 unsigned e
= d
->perm
[i
];
36181 if (min
== 0 || max
- min
>= nelt
)
36184 /* Given that we have SSSE3, we know we'll be able to implement the
36185 single operand permutation after the palignr with pshufb. */
36189 shift
= GEN_INT (min
* GET_MODE_BITSIZE (GET_MODE_INNER (d
->vmode
)));
36190 emit_insn (gen_ssse3_palignrti (gen_lowpart (TImode
, d
->target
),
36191 gen_lowpart (TImode
, d
->op1
),
36192 gen_lowpart (TImode
, d
->op0
), shift
));
36194 d
->op0
= d
->op1
= d
->target
;
36197 for (i
= 0; i
< nelt
; ++i
)
36199 unsigned e
= d
->perm
[i
] - min
;
36205 /* Test for the degenerate case where the alignment by itself
36206 produces the desired permutation. */
36210 ok
= expand_vec_perm_1 (d
);
36216 static bool expand_vec_perm_interleave3 (struct expand_vec_perm_d
*d
);
36218 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
36219 a two vector permutation into a single vector permutation by using
36220 an interleave operation to merge the vectors. */
36223 expand_vec_perm_interleave2 (struct expand_vec_perm_d
*d
)
36225 struct expand_vec_perm_d dremap
, dfinal
;
36226 unsigned i
, nelt
= d
->nelt
, nelt2
= nelt
/ 2;
36227 unsigned HOST_WIDE_INT contents
;
36228 unsigned char remap
[2 * MAX_VECT_LEN
];
36230 bool ok
, same_halves
= false;
36232 if (GET_MODE_SIZE (d
->vmode
) == 16)
36234 if (d
->op0
== d
->op1
)
36237 else if (GET_MODE_SIZE (d
->vmode
) == 32)
36241 /* For 32-byte modes allow even d->op0 == d->op1.
36242 The lack of cross-lane shuffling in some instructions
36243 might prevent a single insn shuffle. */
36245 dfinal
.testing_p
= true;
36246 /* If expand_vec_perm_interleave3 can expand this into
36247 a 3 insn sequence, give up and let it be expanded as
36248 3 insn sequence. While that is one insn longer,
36249 it doesn't need a memory operand and in the common
36250 case that both interleave low and high permutations
36251 with the same operands are adjacent needs 4 insns
36252 for both after CSE. */
36253 if (expand_vec_perm_interleave3 (&dfinal
))
36259 /* Examine from whence the elements come. */
36261 for (i
= 0; i
< nelt
; ++i
)
36262 contents
|= ((unsigned HOST_WIDE_INT
) 1) << d
->perm
[i
];
36264 memset (remap
, 0xff, sizeof (remap
));
36267 if (GET_MODE_SIZE (d
->vmode
) == 16)
36269 unsigned HOST_WIDE_INT h1
, h2
, h3
, h4
;
36271 /* Split the two input vectors into 4 halves. */
36272 h1
= (((unsigned HOST_WIDE_INT
) 1) << nelt2
) - 1;
36277 /* If the elements from the low halves use interleave low, and similarly
36278 for interleave high. If the elements are from mis-matched halves, we
36279 can use shufps for V4SF/V4SI or do a DImode shuffle. */
36280 if ((contents
& (h1
| h3
)) == contents
)
36283 for (i
= 0; i
< nelt2
; ++i
)
36286 remap
[i
+ nelt
] = i
* 2 + 1;
36287 dremap
.perm
[i
* 2] = i
;
36288 dremap
.perm
[i
* 2 + 1] = i
+ nelt
;
36290 if (!TARGET_SSE2
&& d
->vmode
== V4SImode
)
36291 dremap
.vmode
= V4SFmode
;
36293 else if ((contents
& (h2
| h4
)) == contents
)
36296 for (i
= 0; i
< nelt2
; ++i
)
36298 remap
[i
+ nelt2
] = i
* 2;
36299 remap
[i
+ nelt
+ nelt2
] = i
* 2 + 1;
36300 dremap
.perm
[i
* 2] = i
+ nelt2
;
36301 dremap
.perm
[i
* 2 + 1] = i
+ nelt
+ nelt2
;
36303 if (!TARGET_SSE2
&& d
->vmode
== V4SImode
)
36304 dremap
.vmode
= V4SFmode
;
36306 else if ((contents
& (h1
| h4
)) == contents
)
36309 for (i
= 0; i
< nelt2
; ++i
)
36312 remap
[i
+ nelt
+ nelt2
] = i
+ nelt2
;
36313 dremap
.perm
[i
] = i
;
36314 dremap
.perm
[i
+ nelt2
] = i
+ nelt
+ nelt2
;
36319 dremap
.vmode
= V2DImode
;
36321 dremap
.perm
[0] = 0;
36322 dremap
.perm
[1] = 3;
36325 else if ((contents
& (h2
| h3
)) == contents
)
36328 for (i
= 0; i
< nelt2
; ++i
)
36330 remap
[i
+ nelt2
] = i
;
36331 remap
[i
+ nelt
] = i
+ nelt2
;
36332 dremap
.perm
[i
] = i
+ nelt2
;
36333 dremap
.perm
[i
+ nelt2
] = i
+ nelt
;
36338 dremap
.vmode
= V2DImode
;
36340 dremap
.perm
[0] = 1;
36341 dremap
.perm
[1] = 2;
36349 unsigned int nelt4
= nelt
/ 4, nzcnt
= 0;
36350 unsigned HOST_WIDE_INT q
[8];
36351 unsigned int nonzero_halves
[4];
36353 /* Split the two input vectors into 8 quarters. */
36354 q
[0] = (((unsigned HOST_WIDE_INT
) 1) << nelt4
) - 1;
36355 for (i
= 1; i
< 8; ++i
)
36356 q
[i
] = q
[0] << (nelt4
* i
);
36357 for (i
= 0; i
< 4; ++i
)
36358 if (((q
[2 * i
] | q
[2 * i
+ 1]) & contents
) != 0)
36360 nonzero_halves
[nzcnt
] = i
;
36366 gcc_assert (d
->op0
== d
->op1
);
36367 nonzero_halves
[1] = nonzero_halves
[0];
36368 same_halves
= true;
36370 else if (d
->op0
== d
->op1
)
36372 gcc_assert (nonzero_halves
[0] == 0);
36373 gcc_assert (nonzero_halves
[1] == 1);
36378 if (d
->perm
[0] / nelt2
== nonzero_halves
[1])
36380 /* Attempt to increase the likelyhood that dfinal
36381 shuffle will be intra-lane. */
36382 char tmph
= nonzero_halves
[0];
36383 nonzero_halves
[0] = nonzero_halves
[1];
36384 nonzero_halves
[1] = tmph
;
36387 /* vperm2f128 or vperm2i128. */
36388 for (i
= 0; i
< nelt2
; ++i
)
36390 remap
[i
+ nonzero_halves
[1] * nelt2
] = i
+ nelt2
;
36391 remap
[i
+ nonzero_halves
[0] * nelt2
] = i
;
36392 dremap
.perm
[i
+ nelt2
] = i
+ nonzero_halves
[1] * nelt2
;
36393 dremap
.perm
[i
] = i
+ nonzero_halves
[0] * nelt2
;
36396 if (d
->vmode
!= V8SFmode
36397 && d
->vmode
!= V4DFmode
36398 && d
->vmode
!= V8SImode
)
36400 dremap
.vmode
= V8SImode
;
36402 for (i
= 0; i
< 4; ++i
)
36404 dremap
.perm
[i
] = i
+ nonzero_halves
[0] * 4;
36405 dremap
.perm
[i
+ 4] = i
+ nonzero_halves
[1] * 4;
36409 else if (d
->op0
== d
->op1
)
36411 else if (TARGET_AVX2
36412 && (contents
& (q
[0] | q
[2] | q
[4] | q
[6])) == contents
)
36415 for (i
= 0; i
< nelt4
; ++i
)
36418 remap
[i
+ nelt
] = i
* 2 + 1;
36419 remap
[i
+ nelt2
] = i
* 2 + nelt2
;
36420 remap
[i
+ nelt
+ nelt2
] = i
* 2 + nelt2
+ 1;
36421 dremap
.perm
[i
* 2] = i
;
36422 dremap
.perm
[i
* 2 + 1] = i
+ nelt
;
36423 dremap
.perm
[i
* 2 + nelt2
] = i
+ nelt2
;
36424 dremap
.perm
[i
* 2 + nelt2
+ 1] = i
+ nelt
+ nelt2
;
36427 else if (TARGET_AVX2
36428 && (contents
& (q
[1] | q
[3] | q
[5] | q
[7])) == contents
)
36431 for (i
= 0; i
< nelt4
; ++i
)
36433 remap
[i
+ nelt4
] = i
* 2;
36434 remap
[i
+ nelt
+ nelt4
] = i
* 2 + 1;
36435 remap
[i
+ nelt2
+ nelt4
] = i
* 2 + nelt2
;
36436 remap
[i
+ nelt
+ nelt2
+ nelt4
] = i
* 2 + nelt2
+ 1;
36437 dremap
.perm
[i
* 2] = i
+ nelt4
;
36438 dremap
.perm
[i
* 2 + 1] = i
+ nelt
+ nelt4
;
36439 dremap
.perm
[i
* 2 + nelt2
] = i
+ nelt2
+ nelt4
;
36440 dremap
.perm
[i
* 2 + nelt2
+ 1] = i
+ nelt
+ nelt2
+ nelt4
;
36447 /* Use the remapping array set up above to move the elements from their
36448 swizzled locations into their final destinations. */
36450 for (i
= 0; i
< nelt
; ++i
)
36452 unsigned e
= remap
[d
->perm
[i
]];
36453 gcc_assert (e
< nelt
);
36454 /* If same_halves is true, both halves of the remapped vector are the
36455 same. Avoid cross-lane accesses if possible. */
36456 if (same_halves
&& i
>= nelt2
)
36458 gcc_assert (e
< nelt2
);
36459 dfinal
.perm
[i
] = e
+ nelt2
;
36462 dfinal
.perm
[i
] = e
;
36464 dfinal
.op0
= gen_reg_rtx (dfinal
.vmode
);
36465 dfinal
.op1
= dfinal
.op0
;
36466 dremap
.target
= dfinal
.op0
;
36468 /* Test if the final remap can be done with a single insn. For V4SFmode or
36469 V4SImode this *will* succeed. For V8HImode or V16QImode it may not. */
36471 ok
= expand_vec_perm_1 (&dfinal
);
36472 seq
= get_insns ();
36481 if (dremap
.vmode
!= dfinal
.vmode
)
36483 dremap
.target
= gen_lowpart (dremap
.vmode
, dremap
.target
);
36484 dremap
.op0
= gen_lowpart (dremap
.vmode
, dremap
.op0
);
36485 dremap
.op1
= gen_lowpart (dremap
.vmode
, dremap
.op1
);
36488 ok
= expand_vec_perm_1 (&dremap
);
36495 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
36496 a single vector cross-lane permutation into vpermq followed
36497 by any of the single insn permutations. */
36500 expand_vec_perm_vpermq_perm_1 (struct expand_vec_perm_d
*d
)
36502 struct expand_vec_perm_d dremap
, dfinal
;
36503 unsigned i
, j
, nelt
= d
->nelt
, nelt2
= nelt
/ 2, nelt4
= nelt
/ 4;
36504 unsigned contents
[2];
36508 && (d
->vmode
== V32QImode
|| d
->vmode
== V16HImode
)
36509 && d
->op0
== d
->op1
))
36514 for (i
= 0; i
< nelt2
; ++i
)
36516 contents
[0] |= 1u << (d
->perm
[i
] / nelt4
);
36517 contents
[1] |= 1u << (d
->perm
[i
+ nelt2
] / nelt4
);
36520 for (i
= 0; i
< 2; ++i
)
36522 unsigned int cnt
= 0;
36523 for (j
= 0; j
< 4; ++j
)
36524 if ((contents
[i
] & (1u << j
)) != 0 && ++cnt
> 2)
36532 dremap
.vmode
= V4DImode
;
36534 dremap
.target
= gen_reg_rtx (V4DImode
);
36535 dremap
.op0
= gen_lowpart (V4DImode
, d
->op0
);
36536 dremap
.op1
= dremap
.op0
;
36537 for (i
= 0; i
< 2; ++i
)
36539 unsigned int cnt
= 0;
36540 for (j
= 0; j
< 4; ++j
)
36541 if ((contents
[i
] & (1u << j
)) != 0)
36542 dremap
.perm
[2 * i
+ cnt
++] = j
;
36543 for (; cnt
< 2; ++cnt
)
36544 dremap
.perm
[2 * i
+ cnt
] = 0;
36548 dfinal
.op0
= gen_lowpart (dfinal
.vmode
, dremap
.target
);
36549 dfinal
.op1
= dfinal
.op0
;
36550 for (i
= 0, j
= 0; i
< nelt
; ++i
)
36554 dfinal
.perm
[i
] = (d
->perm
[i
] & (nelt4
- 1)) | (j
? nelt2
: 0);
36555 if ((d
->perm
[i
] / nelt4
) == dremap
.perm
[j
])
36557 else if ((d
->perm
[i
] / nelt4
) == dremap
.perm
[j
+ 1])
36558 dfinal
.perm
[i
] |= nelt4
;
36560 gcc_unreachable ();
36563 ok
= expand_vec_perm_1 (&dremap
);
36566 ok
= expand_vec_perm_1 (&dfinal
);
36572 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
36573 a two vector permutation using 2 intra-lane interleave insns
36574 and cross-lane shuffle for 32-byte vectors. */
36577 expand_vec_perm_interleave3 (struct expand_vec_perm_d
*d
)
36580 rtx (*gen
) (rtx
, rtx
, rtx
);
36582 if (d
->op0
== d
->op1
)
36584 if (TARGET_AVX2
&& GET_MODE_SIZE (d
->vmode
) == 32)
36586 else if (TARGET_AVX
&& (d
->vmode
== V8SFmode
|| d
->vmode
== V4DFmode
))
36592 if (d
->perm
[0] != 0 && d
->perm
[0] != nelt
/ 2)
36594 for (i
= 0; i
< nelt
; i
+= 2)
36595 if (d
->perm
[i
] != d
->perm
[0] + i
/ 2
36596 || d
->perm
[i
+ 1] != d
->perm
[0] + i
/ 2 + nelt
)
36606 gen
= gen_vec_interleave_highv32qi
;
36608 gen
= gen_vec_interleave_lowv32qi
;
36612 gen
= gen_vec_interleave_highv16hi
;
36614 gen
= gen_vec_interleave_lowv16hi
;
36618 gen
= gen_vec_interleave_highv8si
;
36620 gen
= gen_vec_interleave_lowv8si
;
36624 gen
= gen_vec_interleave_highv4di
;
36626 gen
= gen_vec_interleave_lowv4di
;
36630 gen
= gen_vec_interleave_highv8sf
;
36632 gen
= gen_vec_interleave_lowv8sf
;
36636 gen
= gen_vec_interleave_highv4df
;
36638 gen
= gen_vec_interleave_lowv4df
;
36641 gcc_unreachable ();
36644 emit_insn (gen (d
->target
, d
->op0
, d
->op1
));
36648 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement
36649 a single vector permutation using a single intra-lane vector
36650 permutation, vperm2f128 swapping the lanes and vblend* insn blending
36651 the non-swapped and swapped vectors together. */
36654 expand_vec_perm_vperm2f128_vblend (struct expand_vec_perm_d
*d
)
36656 struct expand_vec_perm_d dfirst
, dsecond
;
36657 unsigned i
, j
, msk
, nelt
= d
->nelt
, nelt2
= nelt
/ 2;
36660 rtx (*blend
) (rtx
, rtx
, rtx
, rtx
) = NULL
;
36664 || (d
->vmode
!= V8SFmode
&& d
->vmode
!= V4DFmode
)
36665 || d
->op0
!= d
->op1
)
36669 for (i
= 0; i
< nelt
; i
++)
36670 dfirst
.perm
[i
] = 0xff;
36671 for (i
= 0, msk
= 0; i
< nelt
; i
++)
36673 j
= (d
->perm
[i
] & nelt2
) ? i
| nelt2
: i
& ~nelt2
;
36674 if (dfirst
.perm
[j
] != 0xff && dfirst
.perm
[j
] != d
->perm
[i
])
36676 dfirst
.perm
[j
] = d
->perm
[i
];
36680 for (i
= 0; i
< nelt
; i
++)
36681 if (dfirst
.perm
[i
] == 0xff)
36682 dfirst
.perm
[i
] = i
;
36685 dfirst
.target
= gen_reg_rtx (dfirst
.vmode
);
36688 ok
= expand_vec_perm_1 (&dfirst
);
36689 seq
= get_insns ();
36701 dsecond
.op0
= dfirst
.target
;
36702 dsecond
.op1
= dfirst
.target
;
36703 dsecond
.target
= gen_reg_rtx (dsecond
.vmode
);
36704 for (i
= 0; i
< nelt
; i
++)
36705 dsecond
.perm
[i
] = i
^ nelt2
;
36707 ok
= expand_vec_perm_1 (&dsecond
);
36710 blend
= d
->vmode
== V8SFmode
? gen_avx_blendps256
: gen_avx_blendpd256
;
36711 emit_insn (blend (d
->target
, dfirst
.target
, dsecond
.target
, GEN_INT (msk
)));
36715 /* A subroutine of expand_vec_perm_even_odd_1. Implement the double-word
36716 permutation with two pshufb insns and an ior. We should have already
36717 failed all two instruction sequences. */
36720 expand_vec_perm_pshufb2 (struct expand_vec_perm_d
*d
)
36722 rtx rperm
[2][16], vperm
, l
, h
, op
, m128
;
36723 unsigned int i
, nelt
, eltsz
;
36725 if (!TARGET_SSSE3
|| GET_MODE_SIZE (d
->vmode
) != 16)
36727 gcc_assert (d
->op0
!= d
->op1
);
36730 eltsz
= GET_MODE_SIZE (GET_MODE_INNER (d
->vmode
));
36732 /* Generate two permutation masks. If the required element is within
36733 the given vector it is shuffled into the proper lane. If the required
36734 element is in the other vector, force a zero into the lane by setting
36735 bit 7 in the permutation mask. */
36736 m128
= GEN_INT (-128);
36737 for (i
= 0; i
< nelt
; ++i
)
36739 unsigned j
, e
= d
->perm
[i
];
36740 unsigned which
= (e
>= nelt
);
36744 for (j
= 0; j
< eltsz
; ++j
)
36746 rperm
[which
][i
*eltsz
+ j
] = GEN_INT (e
*eltsz
+ j
);
36747 rperm
[1-which
][i
*eltsz
+ j
] = m128
;
36751 vperm
= gen_rtx_CONST_VECTOR (V16QImode
, gen_rtvec_v (16, rperm
[0]));
36752 vperm
= force_reg (V16QImode
, vperm
);
36754 l
= gen_reg_rtx (V16QImode
);
36755 op
= gen_lowpart (V16QImode
, d
->op0
);
36756 emit_insn (gen_ssse3_pshufbv16qi3 (l
, op
, vperm
));
36758 vperm
= gen_rtx_CONST_VECTOR (V16QImode
, gen_rtvec_v (16, rperm
[1]));
36759 vperm
= force_reg (V16QImode
, vperm
);
36761 h
= gen_reg_rtx (V16QImode
);
36762 op
= gen_lowpart (V16QImode
, d
->op1
);
36763 emit_insn (gen_ssse3_pshufbv16qi3 (h
, op
, vperm
));
36765 op
= gen_lowpart (V16QImode
, d
->target
);
36766 emit_insn (gen_iorv16qi3 (op
, l
, h
));
36771 /* Implement arbitrary permutation of one V32QImode and V16QImode operand
36772 with two vpshufb insns, vpermq and vpor. We should have already failed
36773 all two or three instruction sequences. */
36776 expand_vec_perm_vpshufb2_vpermq (struct expand_vec_perm_d
*d
)
36778 rtx rperm
[2][32], vperm
, l
, h
, hp
, op
, m128
;
36779 unsigned int i
, nelt
, eltsz
;
36782 || d
->op0
!= d
->op1
36783 || (d
->vmode
!= V32QImode
&& d
->vmode
!= V16HImode
))
36790 eltsz
= GET_MODE_SIZE (GET_MODE_INNER (d
->vmode
));
36792 /* Generate two permutation masks. If the required element is within
36793 the same lane, it is shuffled in. If the required element from the
36794 other lane, force a zero by setting bit 7 in the permutation mask.
36795 In the other mask the mask has non-negative elements if element
36796 is requested from the other lane, but also moved to the other lane,
36797 so that the result of vpshufb can have the two V2TImode halves
36799 m128
= GEN_INT (-128);
36800 for (i
= 0; i
< nelt
; ++i
)
36802 unsigned j
, e
= d
->perm
[i
] & (nelt
/ 2 - 1);
36803 unsigned which
= ((d
->perm
[i
] ^ i
) & (nelt
/ 2)) * eltsz
;
36805 for (j
= 0; j
< eltsz
; ++j
)
36807 rperm
[!!which
][(i
* eltsz
+ j
) ^ which
] = GEN_INT (e
* eltsz
+ j
);
36808 rperm
[!which
][(i
* eltsz
+ j
) ^ (which
^ 16)] = m128
;
36812 vperm
= gen_rtx_CONST_VECTOR (V32QImode
, gen_rtvec_v (32, rperm
[1]));
36813 vperm
= force_reg (V32QImode
, vperm
);
36815 h
= gen_reg_rtx (V32QImode
);
36816 op
= gen_lowpart (V32QImode
, d
->op0
);
36817 emit_insn (gen_avx2_pshufbv32qi3 (h
, op
, vperm
));
36819 /* Swap the 128-byte lanes of h into hp. */
36820 hp
= gen_reg_rtx (V4DImode
);
36821 op
= gen_lowpart (V4DImode
, h
);
36822 emit_insn (gen_avx2_permv4di_1 (hp
, op
, const2_rtx
, GEN_INT (3), const0_rtx
,
36825 vperm
= gen_rtx_CONST_VECTOR (V32QImode
, gen_rtvec_v (32, rperm
[0]));
36826 vperm
= force_reg (V32QImode
, vperm
);
36828 l
= gen_reg_rtx (V32QImode
);
36829 op
= gen_lowpart (V32QImode
, d
->op0
);
36830 emit_insn (gen_avx2_pshufbv32qi3 (l
, op
, vperm
));
36832 op
= gen_lowpart (V32QImode
, d
->target
);
36833 emit_insn (gen_iorv32qi3 (op
, l
, gen_lowpart (V32QImode
, hp
)));
36838 /* A subroutine of expand_vec_perm_even_odd_1. Implement extract-even
36839 and extract-odd permutations of two V32QImode and V16QImode operand
36840 with two vpshufb insns, vpor and vpermq. We should have already
36841 failed all two or three instruction sequences. */
36844 expand_vec_perm_vpshufb2_vpermq_even_odd (struct expand_vec_perm_d
*d
)
36846 rtx rperm
[2][32], vperm
, l
, h
, ior
, op
, m128
;
36847 unsigned int i
, nelt
, eltsz
;
36850 || d
->op0
== d
->op1
36851 || (d
->vmode
!= V32QImode
&& d
->vmode
!= V16HImode
))
36854 for (i
= 0; i
< d
->nelt
; ++i
)
36855 if ((d
->perm
[i
] ^ (i
* 2)) & (3 * d
->nelt
/ 2))
36862 eltsz
= GET_MODE_SIZE (GET_MODE_INNER (d
->vmode
));
36864 /* Generate two permutation masks. In the first permutation mask
36865 the first quarter will contain indexes for the first half
36866 of the op0, the second quarter will contain bit 7 set, third quarter
36867 will contain indexes for the second half of the op0 and the
36868 last quarter bit 7 set. In the second permutation mask
36869 the first quarter will contain bit 7 set, the second quarter
36870 indexes for the first half of the op1, the third quarter bit 7 set
36871 and last quarter indexes for the second half of the op1.
36872 I.e. the first mask e.g. for V32QImode extract even will be:
36873 0, 2, ..., 0xe, -128, ..., -128, 0, 2, ..., 0xe, -128, ..., -128
36874 (all values masked with 0xf except for -128) and second mask
36875 for extract even will be
36876 -128, ..., -128, 0, 2, ..., 0xe, -128, ..., -128, 0, 2, ..., 0xe. */
36877 m128
= GEN_INT (-128);
36878 for (i
= 0; i
< nelt
; ++i
)
36880 unsigned j
, e
= d
->perm
[i
] & (nelt
/ 2 - 1);
36881 unsigned which
= d
->perm
[i
] >= nelt
;
36882 unsigned xorv
= (i
>= nelt
/ 4 && i
< 3 * nelt
/ 4) ? 24 : 0;
36884 for (j
= 0; j
< eltsz
; ++j
)
36886 rperm
[which
][(i
* eltsz
+ j
) ^ xorv
] = GEN_INT (e
* eltsz
+ j
);
36887 rperm
[1 - which
][(i
* eltsz
+ j
) ^ xorv
] = m128
;
36891 vperm
= gen_rtx_CONST_VECTOR (V32QImode
, gen_rtvec_v (32, rperm
[0]));
36892 vperm
= force_reg (V32QImode
, vperm
);
36894 l
= gen_reg_rtx (V32QImode
);
36895 op
= gen_lowpart (V32QImode
, d
->op0
);
36896 emit_insn (gen_avx2_pshufbv32qi3 (l
, op
, vperm
));
36898 vperm
= gen_rtx_CONST_VECTOR (V32QImode
, gen_rtvec_v (32, rperm
[1]));
36899 vperm
= force_reg (V32QImode
, vperm
);
36901 h
= gen_reg_rtx (V32QImode
);
36902 op
= gen_lowpart (V32QImode
, d
->op1
);
36903 emit_insn (gen_avx2_pshufbv32qi3 (h
, op
, vperm
));
36905 ior
= gen_reg_rtx (V32QImode
);
36906 emit_insn (gen_iorv32qi3 (ior
, l
, h
));
36908 /* Permute the V4DImode quarters using { 0, 2, 1, 3 } permutation. */
36909 op
= gen_lowpart (V4DImode
, d
->target
);
36910 ior
= gen_lowpart (V4DImode
, ior
);
36911 emit_insn (gen_avx2_permv4di_1 (op
, ior
, const0_rtx
, const2_rtx
,
36912 const1_rtx
, GEN_INT (3)));
36917 /* A subroutine of ix86_expand_vec_perm_builtin_1. Implement extract-even
36918 and extract-odd permutations. */
36921 expand_vec_perm_even_odd_1 (struct expand_vec_perm_d
*d
, unsigned odd
)
36928 t1
= gen_reg_rtx (V4DFmode
);
36929 t2
= gen_reg_rtx (V4DFmode
);
36931 /* Shuffle the lanes around into { 0 1 4 5 } and { 2 3 6 7 }. */
36932 emit_insn (gen_avx_vperm2f128v4df3 (t1
, d
->op0
, d
->op1
, GEN_INT (0x20)));
36933 emit_insn (gen_avx_vperm2f128v4df3 (t2
, d
->op0
, d
->op1
, GEN_INT (0x31)));
36935 /* Now an unpck[lh]pd will produce the result required. */
36937 t3
= gen_avx_unpckhpd256 (d
->target
, t1
, t2
);
36939 t3
= gen_avx_unpcklpd256 (d
->target
, t1
, t2
);
36945 int mask
= odd
? 0xdd : 0x88;
36947 t1
= gen_reg_rtx (V8SFmode
);
36948 t2
= gen_reg_rtx (V8SFmode
);
36949 t3
= gen_reg_rtx (V8SFmode
);
36951 /* Shuffle within the 128-bit lanes to produce:
36952 { 0 2 8 a 4 6 c e } | { 1 3 9 b 5 7 d f }. */
36953 emit_insn (gen_avx_shufps256 (t1
, d
->op0
, d
->op1
,
36956 /* Shuffle the lanes around to produce:
36957 { 4 6 c e 0 2 8 a } and { 5 7 d f 1 3 9 b }. */
36958 emit_insn (gen_avx_vperm2f128v8sf3 (t2
, t1
, t1
,
36961 /* Shuffle within the 128-bit lanes to produce:
36962 { 0 2 4 6 4 6 0 2 } | { 1 3 5 7 5 7 1 3 }. */
36963 emit_insn (gen_avx_shufps256 (t3
, t1
, t2
, GEN_INT (0x44)));
36965 /* Shuffle within the 128-bit lanes to produce:
36966 { 8 a c e c e 8 a } | { 9 b d f d f 9 b }. */
36967 emit_insn (gen_avx_shufps256 (t2
, t1
, t2
, GEN_INT (0xee)));
36969 /* Shuffle the lanes around to produce:
36970 { 0 2 4 6 8 a c e } | { 1 3 5 7 9 b d f }. */
36971 emit_insn (gen_avx_vperm2f128v8sf3 (d
->target
, t3
, t2
,
36980 /* These are always directly implementable by expand_vec_perm_1. */
36981 gcc_unreachable ();
36985 return expand_vec_perm_pshufb2 (d
);
36988 /* We need 2*log2(N)-1 operations to achieve odd/even
36989 with interleave. */
36990 t1
= gen_reg_rtx (V8HImode
);
36991 t2
= gen_reg_rtx (V8HImode
);
36992 emit_insn (gen_vec_interleave_highv8hi (t1
, d
->op0
, d
->op1
));
36993 emit_insn (gen_vec_interleave_lowv8hi (d
->target
, d
->op0
, d
->op1
));
36994 emit_insn (gen_vec_interleave_highv8hi (t2
, d
->target
, t1
));
36995 emit_insn (gen_vec_interleave_lowv8hi (d
->target
, d
->target
, t1
));
36997 t3
= gen_vec_interleave_highv8hi (d
->target
, d
->target
, t2
);
36999 t3
= gen_vec_interleave_lowv8hi (d
->target
, d
->target
, t2
);
37006 return expand_vec_perm_pshufb2 (d
);
37009 t1
= gen_reg_rtx (V16QImode
);
37010 t2
= gen_reg_rtx (V16QImode
);
37011 t3
= gen_reg_rtx (V16QImode
);
37012 emit_insn (gen_vec_interleave_highv16qi (t1
, d
->op0
, d
->op1
));
37013 emit_insn (gen_vec_interleave_lowv16qi (d
->target
, d
->op0
, d
->op1
));
37014 emit_insn (gen_vec_interleave_highv16qi (t2
, d
->target
, t1
));
37015 emit_insn (gen_vec_interleave_lowv16qi (d
->target
, d
->target
, t1
));
37016 emit_insn (gen_vec_interleave_highv16qi (t3
, d
->target
, t2
));
37017 emit_insn (gen_vec_interleave_lowv16qi (d
->target
, d
->target
, t2
));
37019 t3
= gen_vec_interleave_highv16qi (d
->target
, d
->target
, t3
);
37021 t3
= gen_vec_interleave_lowv16qi (d
->target
, d
->target
, t3
);
37028 return expand_vec_perm_vpshufb2_vpermq_even_odd (d
);
37033 struct expand_vec_perm_d d_copy
= *d
;
37034 d_copy
.vmode
= V4DFmode
;
37035 d_copy
.target
= gen_lowpart (V4DFmode
, d
->target
);
37036 d_copy
.op0
= gen_lowpart (V4DFmode
, d
->op0
);
37037 d_copy
.op1
= gen_lowpart (V4DFmode
, d
->op1
);
37038 return expand_vec_perm_even_odd_1 (&d_copy
, odd
);
37041 t1
= gen_reg_rtx (V4DImode
);
37042 t2
= gen_reg_rtx (V4DImode
);
37044 /* Shuffle the lanes around into { 0 1 4 5 } and { 2 3 6 7 }. */
37045 emit_insn (gen_avx2_permv2ti (t1
, d
->op0
, d
->op1
, GEN_INT (0x20)));
37046 emit_insn (gen_avx2_permv2ti (t2
, d
->op0
, d
->op1
, GEN_INT (0x31)));
37048 /* Now an vpunpck[lh]qdq will produce the result required. */
37050 t3
= gen_avx2_interleave_highv4di (d
->target
, t1
, t2
);
37052 t3
= gen_avx2_interleave_lowv4di (d
->target
, t1
, t2
);
37059 struct expand_vec_perm_d d_copy
= *d
;
37060 d_copy
.vmode
= V8SFmode
;
37061 d_copy
.target
= gen_lowpart (V8SFmode
, d
->target
);
37062 d_copy
.op0
= gen_lowpart (V8SFmode
, d
->op0
);
37063 d_copy
.op1
= gen_lowpart (V8SFmode
, d
->op1
);
37064 return expand_vec_perm_even_odd_1 (&d_copy
, odd
);
37067 t1
= gen_reg_rtx (V8SImode
);
37068 t2
= gen_reg_rtx (V8SImode
);
37070 /* Shuffle the lanes around into
37071 { 0 1 2 3 8 9 a b } and { 4 5 6 7 c d e f }. */
37072 emit_insn (gen_avx2_permv2ti (gen_lowpart (V4DImode
, t1
),
37073 gen_lowpart (V4DImode
, d
->op0
),
37074 gen_lowpart (V4DImode
, d
->op1
),
37076 emit_insn (gen_avx2_permv2ti (gen_lowpart (V4DImode
, t2
),
37077 gen_lowpart (V4DImode
, d
->op0
),
37078 gen_lowpart (V4DImode
, d
->op1
),
37081 /* Swap the 2nd and 3rd position in each lane into
37082 { 0 2 1 3 8 a 9 b } and { 4 6 5 7 c e d f }. */
37083 emit_insn (gen_avx2_pshufdv3 (t1
, t1
,
37084 GEN_INT (2 * 4 + 1 * 16 + 3 * 64)));
37085 emit_insn (gen_avx2_pshufdv3 (t2
, t2
,
37086 GEN_INT (2 * 4 + 1 * 16 + 3 * 64)));
37088 /* Now an vpunpck[lh]qdq will produce
37089 { 0 2 4 6 8 a c e } resp. { 1 3 5 7 9 b d f }. */
37091 t3
= gen_avx2_interleave_highv4di (gen_lowpart (V4DImode
, d
->target
),
37092 gen_lowpart (V4DImode
, t1
),
37093 gen_lowpart (V4DImode
, t2
));
37095 t3
= gen_avx2_interleave_lowv4di (gen_lowpart (V4DImode
, d
->target
),
37096 gen_lowpart (V4DImode
, t1
),
37097 gen_lowpart (V4DImode
, t2
));
37102 gcc_unreachable ();
37108 /* A subroutine of ix86_expand_vec_perm_builtin_1. Pattern match
37109 extract-even and extract-odd permutations. */
37112 expand_vec_perm_even_odd (struct expand_vec_perm_d
*d
)
37114 unsigned i
, odd
, nelt
= d
->nelt
;
37117 if (odd
!= 0 && odd
!= 1)
37120 for (i
= 1; i
< nelt
; ++i
)
37121 if (d
->perm
[i
] != 2 * i
+ odd
)
37124 return expand_vec_perm_even_odd_1 (d
, odd
);
37127 /* A subroutine of ix86_expand_vec_perm_builtin_1. Implement broadcast
37128 permutations. We assume that expand_vec_perm_1 has already failed. */
37131 expand_vec_perm_broadcast_1 (struct expand_vec_perm_d
*d
)
37133 unsigned elt
= d
->perm
[0], nelt2
= d
->nelt
/ 2;
37134 enum machine_mode vmode
= d
->vmode
;
37135 unsigned char perm2
[4];
37143 /* These are special-cased in sse.md so that we can optionally
37144 use the vbroadcast instruction. They expand to two insns
37145 if the input happens to be in a register. */
37146 gcc_unreachable ();
37152 /* These are always implementable using standard shuffle patterns. */
37153 gcc_unreachable ();
37157 /* These can be implemented via interleave. We save one insn by
37158 stopping once we have promoted to V4SImode and then use pshufd. */
37162 rtx (*gen
) (rtx
, rtx
, rtx
)
37163 = vmode
== V16QImode
? gen_vec_interleave_lowv16qi
37164 : gen_vec_interleave_lowv8hi
;
37168 gen
= vmode
== V16QImode
? gen_vec_interleave_highv16qi
37169 : gen_vec_interleave_highv8hi
;
37174 dest
= gen_reg_rtx (vmode
);
37175 emit_insn (gen (dest
, op0
, op0
));
37176 vmode
= get_mode_wider_vector (vmode
);
37177 op0
= gen_lowpart (vmode
, dest
);
37179 while (vmode
!= V4SImode
);
37181 memset (perm2
, elt
, 4);
37182 ok
= expand_vselect (gen_lowpart (V4SImode
, d
->target
), op0
, perm2
, 4);
37190 /* For AVX2 broadcasts of the first element vpbroadcast* or
37191 vpermq should be used by expand_vec_perm_1. */
37192 gcc_assert (!TARGET_AVX2
|| d
->perm
[0]);
37196 gcc_unreachable ();
37200 /* A subroutine of ix86_expand_vec_perm_builtin_1. Pattern match
37201 broadcast permutations. */
37204 expand_vec_perm_broadcast (struct expand_vec_perm_d
*d
)
37206 unsigned i
, elt
, nelt
= d
->nelt
;
37208 if (d
->op0
!= d
->op1
)
37212 for (i
= 1; i
< nelt
; ++i
)
37213 if (d
->perm
[i
] != elt
)
37216 return expand_vec_perm_broadcast_1 (d
);
37219 /* Implement arbitrary permutation of two V32QImode and V16QImode operands
37220 with 4 vpshufb insns, 2 vpermq and 3 vpor. We should have already failed
37221 all the shorter instruction sequences. */
37224 expand_vec_perm_vpshufb4_vpermq2 (struct expand_vec_perm_d
*d
)
37226 rtx rperm
[4][32], vperm
, l
[2], h
[2], op
, m128
;
37227 unsigned int i
, nelt
, eltsz
;
37231 || d
->op0
== d
->op1
37232 || (d
->vmode
!= V32QImode
&& d
->vmode
!= V16HImode
))
37239 eltsz
= GET_MODE_SIZE (GET_MODE_INNER (d
->vmode
));
37241 /* Generate 4 permutation masks. If the required element is within
37242 the same lane, it is shuffled in. If the required element from the
37243 other lane, force a zero by setting bit 7 in the permutation mask.
37244 In the other mask the mask has non-negative elements if element
37245 is requested from the other lane, but also moved to the other lane,
37246 so that the result of vpshufb can have the two V2TImode halves
37248 m128
= GEN_INT (-128);
37249 for (i
= 0; i
< 32; ++i
)
37251 rperm
[0][i
] = m128
;
37252 rperm
[1][i
] = m128
;
37253 rperm
[2][i
] = m128
;
37254 rperm
[3][i
] = m128
;
37260 for (i
= 0; i
< nelt
; ++i
)
37262 unsigned j
, e
= d
->perm
[i
] & (nelt
/ 2 - 1);
37263 unsigned xlane
= ((d
->perm
[i
] ^ i
) & (nelt
/ 2)) * eltsz
;
37264 unsigned int which
= ((d
->perm
[i
] & nelt
) ? 2 : 0) + (xlane
? 1 : 0);
37266 for (j
= 0; j
< eltsz
; ++j
)
37267 rperm
[which
][(i
* eltsz
+ j
) ^ xlane
] = GEN_INT (e
* eltsz
+ j
);
37268 used
[which
] = true;
37271 for (i
= 0; i
< 2; ++i
)
37273 if (!used
[2 * i
+ 1])
37278 vperm
= gen_rtx_CONST_VECTOR (V32QImode
,
37279 gen_rtvec_v (32, rperm
[2 * i
+ 1]));
37280 vperm
= force_reg (V32QImode
, vperm
);
37281 h
[i
] = gen_reg_rtx (V32QImode
);
37282 op
= gen_lowpart (V32QImode
, i
? d
->op1
: d
->op0
);
37283 emit_insn (gen_avx2_pshufbv32qi3 (h
[i
], op
, vperm
));
37286 /* Swap the 128-byte lanes of h[X]. */
37287 for (i
= 0; i
< 2; ++i
)
37289 if (h
[i
] == NULL_RTX
)
37291 op
= gen_reg_rtx (V4DImode
);
37292 emit_insn (gen_avx2_permv4di_1 (op
, gen_lowpart (V4DImode
, h
[i
]),
37293 const2_rtx
, GEN_INT (3), const0_rtx
,
37295 h
[i
] = gen_lowpart (V32QImode
, op
);
37298 for (i
= 0; i
< 2; ++i
)
37305 vperm
= gen_rtx_CONST_VECTOR (V32QImode
, gen_rtvec_v (32, rperm
[2 * i
]));
37306 vperm
= force_reg (V32QImode
, vperm
);
37307 l
[i
] = gen_reg_rtx (V32QImode
);
37308 op
= gen_lowpart (V32QImode
, i
? d
->op1
: d
->op0
);
37309 emit_insn (gen_avx2_pshufbv32qi3 (l
[i
], op
, vperm
));
37312 for (i
= 0; i
< 2; ++i
)
37316 op
= gen_reg_rtx (V32QImode
);
37317 emit_insn (gen_iorv32qi3 (op
, l
[i
], h
[i
]));
37324 gcc_assert (l
[0] && l
[1]);
37325 op
= gen_lowpart (V32QImode
, d
->target
);
37326 emit_insn (gen_iorv32qi3 (op
, l
[0], l
[1]));
37330 /* The guts of ix86_expand_vec_perm_const, also used by the ok hook.
37331 With all of the interface bits taken care of, perform the expansion
37332 in D and return true on success. */
37335 ix86_expand_vec_perm_const_1 (struct expand_vec_perm_d
*d
)
37337 /* Try a single instruction expansion. */
37338 if (expand_vec_perm_1 (d
))
37341 /* Try sequences of two instructions. */
37343 if (expand_vec_perm_pshuflw_pshufhw (d
))
37346 if (expand_vec_perm_palignr (d
))
37349 if (expand_vec_perm_interleave2 (d
))
37352 if (expand_vec_perm_broadcast (d
))
37355 if (expand_vec_perm_vpermq_perm_1 (d
))
37358 /* Try sequences of three instructions. */
37360 if (expand_vec_perm_pshufb2 (d
))
37363 if (expand_vec_perm_interleave3 (d
))
37366 if (expand_vec_perm_vperm2f128_vblend (d
))
37369 /* Try sequences of four instructions. */
37371 if (expand_vec_perm_vpshufb2_vpermq (d
))
37374 if (expand_vec_perm_vpshufb2_vpermq_even_odd (d
))
37377 /* ??? Look for narrow permutations whose element orderings would
37378 allow the promotion to a wider mode. */
37380 /* ??? Look for sequences of interleave or a wider permute that place
37381 the data into the correct lanes for a half-vector shuffle like
37382 pshuf[lh]w or vpermilps. */
37384 /* ??? Look for sequences of interleave that produce the desired results.
37385 The combinatorics of punpck[lh] get pretty ugly... */
37387 if (expand_vec_perm_even_odd (d
))
37390 /* Even longer sequences. */
37391 if (expand_vec_perm_vpshufb4_vpermq2 (d
))
37398 ix86_expand_vec_perm_const (rtx operands
[4])
37400 struct expand_vec_perm_d d
;
37401 unsigned char perm
[MAX_VECT_LEN
];
37402 int i
, nelt
, which
;
37405 d
.target
= operands
[0];
37406 d
.op0
= operands
[1];
37407 d
.op1
= operands
[2];
37410 d
.vmode
= GET_MODE (d
.target
);
37411 gcc_assert (VECTOR_MODE_P (d
.vmode
));
37412 d
.nelt
= nelt
= GET_MODE_NUNITS (d
.vmode
);
37413 d
.testing_p
= false;
37415 gcc_assert (GET_CODE (sel
) == CONST_VECTOR
);
37416 gcc_assert (XVECLEN (sel
, 0) == nelt
);
37417 gcc_checking_assert (sizeof (d
.perm
) == sizeof (perm
));
37419 for (i
= which
= 0; i
< nelt
; ++i
)
37421 rtx e
= XVECEXP (sel
, 0, i
);
37422 int ei
= INTVAL (e
) & (2 * nelt
- 1);
37424 which
|= (ei
< nelt
? 1 : 2);
37435 if (!rtx_equal_p (d
.op0
, d
.op1
))
37438 /* The elements of PERM do not suggest that only the first operand
37439 is used, but both operands are identical. Allow easier matching
37440 of the permutation by folding the permutation into the single
37442 for (i
= 0; i
< nelt
; ++i
)
37443 if (d
.perm
[i
] >= nelt
)
37452 for (i
= 0; i
< nelt
; ++i
)
37458 if (ix86_expand_vec_perm_const_1 (&d
))
37461 /* If the mask says both arguments are needed, but they are the same,
37462 the above tried to expand with d.op0 == d.op1. If that didn't work,
37463 retry with d.op0 != d.op1 as that is what testing has been done with. */
37464 if (which
== 3 && d
.op0
== d
.op1
)
37469 memcpy (d
.perm
, perm
, sizeof (perm
));
37470 d
.op1
= gen_reg_rtx (d
.vmode
);
37472 ok
= ix86_expand_vec_perm_const_1 (&d
);
37473 seq
= get_insns ();
37477 emit_move_insn (d
.op1
, d
.op0
);
37486 /* Implement targetm.vectorize.vec_perm_const_ok. */
37489 ix86_vectorize_vec_perm_const_ok (enum machine_mode vmode
,
37490 const unsigned char *sel
)
37492 struct expand_vec_perm_d d
;
37493 unsigned int i
, nelt
, which
;
37497 d
.nelt
= nelt
= GET_MODE_NUNITS (d
.vmode
);
37498 d
.testing_p
= true;
37500 /* Given sufficient ISA support we can just return true here
37501 for selected vector modes. */
37502 if (GET_MODE_SIZE (d
.vmode
) == 16)
37504 /* All implementable with a single vpperm insn. */
37507 /* All implementable with 2 pshufb + 1 ior. */
37510 /* All implementable with shufpd or unpck[lh]pd. */
37515 /* Extract the values from the vector CST into the permutation
37517 memcpy (d
.perm
, sel
, nelt
);
37518 for (i
= which
= 0; i
< nelt
; ++i
)
37520 unsigned char e
= d
.perm
[i
];
37521 gcc_assert (e
< 2 * nelt
);
37522 which
|= (e
< nelt
? 1 : 2);
37525 /* For all elements from second vector, fold the elements to first. */
37527 for (i
= 0; i
< nelt
; ++i
)
37530 /* Check whether the mask can be applied to the vector type. */
37531 one_vec
= (which
!= 3);
37533 /* Implementable with shufps or pshufd. */
37534 if (one_vec
&& (d
.vmode
== V4SFmode
|| d
.vmode
== V4SImode
))
37537 /* Otherwise we have to go through the motions and see if we can
37538 figure out how to generate the requested permutation. */
37539 d
.target
= gen_raw_REG (d
.vmode
, LAST_VIRTUAL_REGISTER
+ 1);
37540 d
.op1
= d
.op0
= gen_raw_REG (d
.vmode
, LAST_VIRTUAL_REGISTER
+ 2);
37542 d
.op1
= gen_raw_REG (d
.vmode
, LAST_VIRTUAL_REGISTER
+ 3);
37545 ret
= ix86_expand_vec_perm_const_1 (&d
);
37552 ix86_expand_vec_extract_even_odd (rtx targ
, rtx op0
, rtx op1
, unsigned odd
)
37554 struct expand_vec_perm_d d
;
37560 d
.vmode
= GET_MODE (targ
);
37561 d
.nelt
= nelt
= GET_MODE_NUNITS (d
.vmode
);
37562 d
.testing_p
= false;
37564 for (i
= 0; i
< nelt
; ++i
)
37565 d
.perm
[i
] = i
* 2 + odd
;
37567 /* We'll either be able to implement the permutation directly... */
37568 if (expand_vec_perm_1 (&d
))
37571 /* ... or we use the special-case patterns. */
37572 expand_vec_perm_even_odd_1 (&d
, odd
);
37575 /* Expand an insert into a vector register through pinsr insn.
37576 Return true if successful. */
37579 ix86_expand_pinsr (rtx
*operands
)
37581 rtx dst
= operands
[0];
37582 rtx src
= operands
[3];
37584 unsigned int size
= INTVAL (operands
[1]);
37585 unsigned int pos
= INTVAL (operands
[2]);
37587 if (GET_CODE (dst
) == SUBREG
)
37589 pos
+= SUBREG_BYTE (dst
) * BITS_PER_UNIT
;
37590 dst
= SUBREG_REG (dst
);
37593 if (GET_CODE (src
) == SUBREG
)
37594 src
= SUBREG_REG (src
);
37596 switch (GET_MODE (dst
))
37603 enum machine_mode srcmode
, dstmode
;
37604 rtx (*pinsr
)(rtx
, rtx
, rtx
, rtx
);
37606 srcmode
= mode_for_size (size
, MODE_INT
, 0);
37611 if (!TARGET_SSE4_1
)
37613 dstmode
= V16QImode
;
37614 pinsr
= gen_sse4_1_pinsrb
;
37620 dstmode
= V8HImode
;
37621 pinsr
= gen_sse2_pinsrw
;
37625 if (!TARGET_SSE4_1
)
37627 dstmode
= V4SImode
;
37628 pinsr
= gen_sse4_1_pinsrd
;
37632 gcc_assert (TARGET_64BIT
);
37633 if (!TARGET_SSE4_1
)
37635 dstmode
= V2DImode
;
37636 pinsr
= gen_sse4_1_pinsrq
;
37643 dst
= gen_lowpart (dstmode
, dst
);
37644 src
= gen_lowpart (srcmode
, src
);
37648 emit_insn (pinsr (dst
, dst
, src
, GEN_INT (1 << pos
)));
37657 /* This function returns the calling abi specific va_list type node.
37658 It returns the FNDECL specific va_list type. */
37661 ix86_fn_abi_va_list (tree fndecl
)
37664 return va_list_type_node
;
37665 gcc_assert (fndecl
!= NULL_TREE
);
37667 if (ix86_function_abi ((const_tree
) fndecl
) == MS_ABI
)
37668 return ms_va_list_type_node
;
37670 return sysv_va_list_type_node
;
37673 /* Returns the canonical va_list type specified by TYPE. If there
37674 is no valid TYPE provided, it return NULL_TREE. */
37677 ix86_canonical_va_list_type (tree type
)
37681 /* Resolve references and pointers to va_list type. */
37682 if (TREE_CODE (type
) == MEM_REF
)
37683 type
= TREE_TYPE (type
);
37684 else if (POINTER_TYPE_P (type
) && POINTER_TYPE_P (TREE_TYPE(type
)))
37685 type
= TREE_TYPE (type
);
37686 else if (POINTER_TYPE_P (type
) && TREE_CODE (TREE_TYPE (type
)) == ARRAY_TYPE
)
37687 type
= TREE_TYPE (type
);
37689 if (TARGET_64BIT
&& va_list_type_node
!= NULL_TREE
)
37691 wtype
= va_list_type_node
;
37692 gcc_assert (wtype
!= NULL_TREE
);
37694 if (TREE_CODE (wtype
) == ARRAY_TYPE
)
37696 /* If va_list is an array type, the argument may have decayed
37697 to a pointer type, e.g. by being passed to another function.
37698 In that case, unwrap both types so that we can compare the
37699 underlying records. */
37700 if (TREE_CODE (htype
) == ARRAY_TYPE
37701 || POINTER_TYPE_P (htype
))
37703 wtype
= TREE_TYPE (wtype
);
37704 htype
= TREE_TYPE (htype
);
37707 if (TYPE_MAIN_VARIANT (wtype
) == TYPE_MAIN_VARIANT (htype
))
37708 return va_list_type_node
;
37709 wtype
= sysv_va_list_type_node
;
37710 gcc_assert (wtype
!= NULL_TREE
);
37712 if (TREE_CODE (wtype
) == ARRAY_TYPE
)
37714 /* If va_list is an array type, the argument may have decayed
37715 to a pointer type, e.g. by being passed to another function.
37716 In that case, unwrap both types so that we can compare the
37717 underlying records. */
37718 if (TREE_CODE (htype
) == ARRAY_TYPE
37719 || POINTER_TYPE_P (htype
))
37721 wtype
= TREE_TYPE (wtype
);
37722 htype
= TREE_TYPE (htype
);
37725 if (TYPE_MAIN_VARIANT (wtype
) == TYPE_MAIN_VARIANT (htype
))
37726 return sysv_va_list_type_node
;
37727 wtype
= ms_va_list_type_node
;
37728 gcc_assert (wtype
!= NULL_TREE
);
37730 if (TREE_CODE (wtype
) == ARRAY_TYPE
)
37732 /* If va_list is an array type, the argument may have decayed
37733 to a pointer type, e.g. by being passed to another function.
37734 In that case, unwrap both types so that we can compare the
37735 underlying records. */
37736 if (TREE_CODE (htype
) == ARRAY_TYPE
37737 || POINTER_TYPE_P (htype
))
37739 wtype
= TREE_TYPE (wtype
);
37740 htype
= TREE_TYPE (htype
);
37743 if (TYPE_MAIN_VARIANT (wtype
) == TYPE_MAIN_VARIANT (htype
))
37744 return ms_va_list_type_node
;
37747 return std_canonical_va_list_type (type
);
37750 /* Iterate through the target-specific builtin types for va_list.
37751 IDX denotes the iterator, *PTREE is set to the result type of
37752 the va_list builtin, and *PNAME to its internal type.
37753 Returns zero if there is no element for this index, otherwise
37754 IDX should be increased upon the next call.
37755 Note, do not iterate a base builtin's name like __builtin_va_list.
37756 Used from c_common_nodes_and_builtins. */
37759 ix86_enum_va_list (int idx
, const char **pname
, tree
*ptree
)
37769 *ptree
= ms_va_list_type_node
;
37770 *pname
= "__builtin_ms_va_list";
37774 *ptree
= sysv_va_list_type_node
;
37775 *pname
= "__builtin_sysv_va_list";
37783 #undef TARGET_SCHED_DISPATCH
37784 #define TARGET_SCHED_DISPATCH has_dispatch
37785 #undef TARGET_SCHED_DISPATCH_DO
37786 #define TARGET_SCHED_DISPATCH_DO do_dispatch
37787 #undef TARGET_SCHED_REASSOCIATION_WIDTH
37788 #define TARGET_SCHED_REASSOCIATION_WIDTH ix86_reassociation_width
37790 /* The size of the dispatch window is the total number of bytes of
37791 object code allowed in a window. */
37792 #define DISPATCH_WINDOW_SIZE 16
37794 /* Number of dispatch windows considered for scheduling. */
37795 #define MAX_DISPATCH_WINDOWS 3
37797 /* Maximum number of instructions in a window. */
37800 /* Maximum number of immediate operands in a window. */
37803 /* Maximum number of immediate bits allowed in a window. */
37804 #define MAX_IMM_SIZE 128
37806 /* Maximum number of 32 bit immediates allowed in a window. */
37807 #define MAX_IMM_32 4
37809 /* Maximum number of 64 bit immediates allowed in a window. */
37810 #define MAX_IMM_64 2
37812 /* Maximum total of loads or prefetches allowed in a window. */
37815 /* Maximum total of stores allowed in a window. */
37816 #define MAX_STORE 1
37822 /* Dispatch groups. Istructions that affect the mix in a dispatch window. */
37823 enum dispatch_group
{
37838 /* Number of allowable groups in a dispatch window. It is an array
37839 indexed by dispatch_group enum. 100 is used as a big number,
37840 because the number of these kind of operations does not have any
37841 effect in dispatch window, but we need them for other reasons in
37843 static unsigned int num_allowable_groups
[disp_last
] = {
37844 0, 2, 1, 1, 2, 4, 4, 2, 1, BIG
, BIG
37847 char group_name
[disp_last
+ 1][16] = {
37848 "disp_no_group", "disp_load", "disp_store", "disp_load_store",
37849 "disp_prefetch", "disp_imm", "disp_imm_32", "disp_imm_64",
37850 "disp_branch", "disp_cmp", "disp_jcc", "disp_last"
37853 /* Instruction path. */
37856 path_single
, /* Single micro op. */
37857 path_double
, /* Double micro op. */
37858 path_multi
, /* Instructions with more than 2 micro op.. */
37862 /* sched_insn_info defines a window to the instructions scheduled in
37863 the basic block. It contains a pointer to the insn_info table and
37864 the instruction scheduled.
37866 Windows are allocated for each basic block and are linked
37868 typedef struct sched_insn_info_s
{
37870 enum dispatch_group group
;
37871 enum insn_path path
;
37876 /* Linked list of dispatch windows. This is a two way list of
37877 dispatch windows of a basic block. It contains information about
37878 the number of uops in the window and the total number of
37879 instructions and of bytes in the object code for this dispatch
37881 typedef struct dispatch_windows_s
{
37882 int num_insn
; /* Number of insn in the window. */
37883 int num_uops
; /* Number of uops in the window. */
37884 int window_size
; /* Number of bytes in the window. */
37885 int window_num
; /* Window number between 0 or 1. */
37886 int num_imm
; /* Number of immediates in an insn. */
37887 int num_imm_32
; /* Number of 32 bit immediates in an insn. */
37888 int num_imm_64
; /* Number of 64 bit immediates in an insn. */
37889 int imm_size
; /* Total immediates in the window. */
37890 int num_loads
; /* Total memory loads in the window. */
37891 int num_stores
; /* Total memory stores in the window. */
37892 int violation
; /* Violation exists in window. */
37893 sched_insn_info
*window
; /* Pointer to the window. */
37894 struct dispatch_windows_s
*next
;
37895 struct dispatch_windows_s
*prev
;
37896 } dispatch_windows
;
37898 /* Immediate valuse used in an insn. */
37899 typedef struct imm_info_s
37906 static dispatch_windows
*dispatch_window_list
;
37907 static dispatch_windows
*dispatch_window_list1
;
37909 /* Get dispatch group of insn. */
37911 static enum dispatch_group
37912 get_mem_group (rtx insn
)
37914 enum attr_memory memory
;
37916 if (INSN_CODE (insn
) < 0)
37917 return disp_no_group
;
37918 memory
= get_attr_memory (insn
);
37919 if (memory
== MEMORY_STORE
)
37922 if (memory
== MEMORY_LOAD
)
37925 if (memory
== MEMORY_BOTH
)
37926 return disp_load_store
;
37928 return disp_no_group
;
37931 /* Return true if insn is a compare instruction. */
37936 enum attr_type type
;
37938 type
= get_attr_type (insn
);
37939 return (type
== TYPE_TEST
37940 || type
== TYPE_ICMP
37941 || type
== TYPE_FCMP
37942 || GET_CODE (PATTERN (insn
)) == COMPARE
);
37945 /* Return true if a dispatch violation encountered. */
37948 dispatch_violation (void)
37950 if (dispatch_window_list
->next
)
37951 return dispatch_window_list
->next
->violation
;
37952 return dispatch_window_list
->violation
;
37955 /* Return true if insn is a branch instruction. */
37958 is_branch (rtx insn
)
37960 return (CALL_P (insn
) || JUMP_P (insn
));
37963 /* Return true if insn is a prefetch instruction. */
37966 is_prefetch (rtx insn
)
37968 return NONJUMP_INSN_P (insn
) && GET_CODE (PATTERN (insn
)) == PREFETCH
;
37971 /* This function initializes a dispatch window and the list container holding a
37972 pointer to the window. */
37975 init_window (int window_num
)
37978 dispatch_windows
*new_list
;
37980 if (window_num
== 0)
37981 new_list
= dispatch_window_list
;
37983 new_list
= dispatch_window_list1
;
37985 new_list
->num_insn
= 0;
37986 new_list
->num_uops
= 0;
37987 new_list
->window_size
= 0;
37988 new_list
->next
= NULL
;
37989 new_list
->prev
= NULL
;
37990 new_list
->window_num
= window_num
;
37991 new_list
->num_imm
= 0;
37992 new_list
->num_imm_32
= 0;
37993 new_list
->num_imm_64
= 0;
37994 new_list
->imm_size
= 0;
37995 new_list
->num_loads
= 0;
37996 new_list
->num_stores
= 0;
37997 new_list
->violation
= false;
37999 for (i
= 0; i
< MAX_INSN
; i
++)
38001 new_list
->window
[i
].insn
= NULL
;
38002 new_list
->window
[i
].group
= disp_no_group
;
38003 new_list
->window
[i
].path
= no_path
;
38004 new_list
->window
[i
].byte_len
= 0;
38005 new_list
->window
[i
].imm_bytes
= 0;
38010 /* This function allocates and initializes a dispatch window and the
38011 list container holding a pointer to the window. */
38013 static dispatch_windows
*
38014 allocate_window (void)
38016 dispatch_windows
*new_list
= XNEW (struct dispatch_windows_s
);
38017 new_list
->window
= XNEWVEC (struct sched_insn_info_s
, MAX_INSN
+ 1);
38022 /* This routine initializes the dispatch scheduling information. It
38023 initiates building dispatch scheduler tables and constructs the
38024 first dispatch window. */
38027 init_dispatch_sched (void)
38029 /* Allocate a dispatch list and a window. */
38030 dispatch_window_list
= allocate_window ();
38031 dispatch_window_list1
= allocate_window ();
38036 /* This function returns true if a branch is detected. End of a basic block
38037 does not have to be a branch, but here we assume only branches end a
38041 is_end_basic_block (enum dispatch_group group
)
38043 return group
== disp_branch
;
38046 /* This function is called when the end of a window processing is reached. */
38049 process_end_window (void)
38051 gcc_assert (dispatch_window_list
->num_insn
<= MAX_INSN
);
38052 if (dispatch_window_list
->next
)
38054 gcc_assert (dispatch_window_list1
->num_insn
<= MAX_INSN
);
38055 gcc_assert (dispatch_window_list
->window_size
38056 + dispatch_window_list1
->window_size
<= 48);
38062 /* Allocates a new dispatch window and adds it to WINDOW_LIST.
38063 WINDOW_NUM is either 0 or 1. A maximum of two windows are generated
38064 for 48 bytes of instructions. Note that these windows are not dispatch
38065 windows that their sizes are DISPATCH_WINDOW_SIZE. */
38067 static dispatch_windows
*
38068 allocate_next_window (int window_num
)
38070 if (window_num
== 0)
38072 if (dispatch_window_list
->next
)
38075 return dispatch_window_list
;
38078 dispatch_window_list
->next
= dispatch_window_list1
;
38079 dispatch_window_list1
->prev
= dispatch_window_list
;
38081 return dispatch_window_list1
;
38084 /* Increment the number of immediate operands of an instruction. */
38087 find_constant_1 (rtx
*in_rtx
, imm_info
*imm_values
)
38092 switch ( GET_CODE (*in_rtx
))
38097 (imm_values
->imm
)++;
38098 if (x86_64_immediate_operand (*in_rtx
, SImode
))
38099 (imm_values
->imm32
)++;
38101 (imm_values
->imm64
)++;
38105 (imm_values
->imm
)++;
38106 (imm_values
->imm64
)++;
38110 if (LABEL_KIND (*in_rtx
) == LABEL_NORMAL
)
38112 (imm_values
->imm
)++;
38113 (imm_values
->imm32
)++;
38124 /* Compute number of immediate operands of an instruction. */
38127 find_constant (rtx in_rtx
, imm_info
*imm_values
)
38129 for_each_rtx (INSN_P (in_rtx
) ? &PATTERN (in_rtx
) : &in_rtx
,
38130 (rtx_function
) find_constant_1
, (void *) imm_values
);
38133 /* Return total size of immediate operands of an instruction along with number
38134 of corresponding immediate-operands. It initializes its parameters to zero
38135 befor calling FIND_CONSTANT.
38136 INSN is the input instruction. IMM is the total of immediates.
38137 IMM32 is the number of 32 bit immediates. IMM64 is the number of 64
38141 get_num_immediates (rtx insn
, int *imm
, int *imm32
, int *imm64
)
38143 imm_info imm_values
= {0, 0, 0};
38145 find_constant (insn
, &imm_values
);
38146 *imm
= imm_values
.imm
;
38147 *imm32
= imm_values
.imm32
;
38148 *imm64
= imm_values
.imm64
;
38149 return imm_values
.imm32
* 4 + imm_values
.imm64
* 8;
38152 /* This function indicates if an operand of an instruction is an
38156 has_immediate (rtx insn
)
38158 int num_imm_operand
;
38159 int num_imm32_operand
;
38160 int num_imm64_operand
;
38163 return get_num_immediates (insn
, &num_imm_operand
, &num_imm32_operand
,
38164 &num_imm64_operand
);
38168 /* Return single or double path for instructions. */
38170 static enum insn_path
38171 get_insn_path (rtx insn
)
38173 enum attr_amdfam10_decode path
= get_attr_amdfam10_decode (insn
);
38175 if ((int)path
== 0)
38176 return path_single
;
38178 if ((int)path
== 1)
38179 return path_double
;
38184 /* Return insn dispatch group. */
38186 static enum dispatch_group
38187 get_insn_group (rtx insn
)
38189 enum dispatch_group group
= get_mem_group (insn
);
38193 if (is_branch (insn
))
38194 return disp_branch
;
38199 if (has_immediate (insn
))
38202 if (is_prefetch (insn
))
38203 return disp_prefetch
;
38205 return disp_no_group
;
38208 /* Count number of GROUP restricted instructions in a dispatch
38209 window WINDOW_LIST. */
38212 count_num_restricted (rtx insn
, dispatch_windows
*window_list
)
38214 enum dispatch_group group
= get_insn_group (insn
);
38216 int num_imm_operand
;
38217 int num_imm32_operand
;
38218 int num_imm64_operand
;
38220 if (group
== disp_no_group
)
38223 if (group
== disp_imm
)
38225 imm_size
= get_num_immediates (insn
, &num_imm_operand
, &num_imm32_operand
,
38226 &num_imm64_operand
);
38227 if (window_list
->imm_size
+ imm_size
> MAX_IMM_SIZE
38228 || num_imm_operand
+ window_list
->num_imm
> MAX_IMM
38229 || (num_imm32_operand
> 0
38230 && (window_list
->num_imm_32
+ num_imm32_operand
> MAX_IMM_32
38231 || window_list
->num_imm_64
* 2 + num_imm32_operand
> MAX_IMM_32
))
38232 || (num_imm64_operand
> 0
38233 && (window_list
->num_imm_64
+ num_imm64_operand
> MAX_IMM_64
38234 || window_list
->num_imm_32
+ num_imm64_operand
* 2 > MAX_IMM_32
))
38235 || (window_list
->imm_size
+ imm_size
== MAX_IMM_SIZE
38236 && num_imm64_operand
> 0
38237 && ((window_list
->num_imm_64
> 0
38238 && window_list
->num_insn
>= 2)
38239 || window_list
->num_insn
>= 3)))
38245 if ((group
== disp_load_store
38246 && (window_list
->num_loads
>= MAX_LOAD
38247 || window_list
->num_stores
>= MAX_STORE
))
38248 || ((group
== disp_load
38249 || group
== disp_prefetch
)
38250 && window_list
->num_loads
>= MAX_LOAD
)
38251 || (group
== disp_store
38252 && window_list
->num_stores
>= MAX_STORE
))
38258 /* This function returns true if insn satisfies dispatch rules on the
38259 last window scheduled. */
38262 fits_dispatch_window (rtx insn
)
38264 dispatch_windows
*window_list
= dispatch_window_list
;
38265 dispatch_windows
*window_list_next
= dispatch_window_list
->next
;
38266 unsigned int num_restrict
;
38267 enum dispatch_group group
= get_insn_group (insn
);
38268 enum insn_path path
= get_insn_path (insn
);
38271 /* Make disp_cmp and disp_jcc get scheduled at the latest. These
38272 instructions should be given the lowest priority in the
38273 scheduling process in Haifa scheduler to make sure they will be
38274 scheduled in the same dispatch window as the refrence to them. */
38275 if (group
== disp_jcc
|| group
== disp_cmp
)
38278 /* Check nonrestricted. */
38279 if (group
== disp_no_group
|| group
== disp_branch
)
38282 /* Get last dispatch window. */
38283 if (window_list_next
)
38284 window_list
= window_list_next
;
38286 if (window_list
->window_num
== 1)
38288 sum
= window_list
->prev
->window_size
+ window_list
->window_size
;
38291 || (min_insn_size (insn
) + sum
) >= 48)
38292 /* Window 1 is full. Go for next window. */
38296 num_restrict
= count_num_restricted (insn
, window_list
);
38298 if (num_restrict
> num_allowable_groups
[group
])
38301 /* See if it fits in the first window. */
38302 if (window_list
->window_num
== 0)
38304 /* The first widow should have only single and double path
38306 if (path
== path_double
38307 && (window_list
->num_uops
+ 2) > MAX_INSN
)
38309 else if (path
!= path_single
)
38315 /* Add an instruction INSN with NUM_UOPS micro-operations to the
38316 dispatch window WINDOW_LIST. */
38319 add_insn_window (rtx insn
, dispatch_windows
*window_list
, int num_uops
)
38321 int byte_len
= min_insn_size (insn
);
38322 int num_insn
= window_list
->num_insn
;
38324 sched_insn_info
*window
= window_list
->window
;
38325 enum dispatch_group group
= get_insn_group (insn
);
38326 enum insn_path path
= get_insn_path (insn
);
38327 int num_imm_operand
;
38328 int num_imm32_operand
;
38329 int num_imm64_operand
;
38331 if (!window_list
->violation
&& group
!= disp_cmp
38332 && !fits_dispatch_window (insn
))
38333 window_list
->violation
= true;
38335 imm_size
= get_num_immediates (insn
, &num_imm_operand
, &num_imm32_operand
,
38336 &num_imm64_operand
);
38338 /* Initialize window with new instruction. */
38339 window
[num_insn
].insn
= insn
;
38340 window
[num_insn
].byte_len
= byte_len
;
38341 window
[num_insn
].group
= group
;
38342 window
[num_insn
].path
= path
;
38343 window
[num_insn
].imm_bytes
= imm_size
;
38345 window_list
->window_size
+= byte_len
;
38346 window_list
->num_insn
= num_insn
+ 1;
38347 window_list
->num_uops
= window_list
->num_uops
+ num_uops
;
38348 window_list
->imm_size
+= imm_size
;
38349 window_list
->num_imm
+= num_imm_operand
;
38350 window_list
->num_imm_32
+= num_imm32_operand
;
38351 window_list
->num_imm_64
+= num_imm64_operand
;
38353 if (group
== disp_store
)
38354 window_list
->num_stores
+= 1;
38355 else if (group
== disp_load
38356 || group
== disp_prefetch
)
38357 window_list
->num_loads
+= 1;
38358 else if (group
== disp_load_store
)
38360 window_list
->num_stores
+= 1;
38361 window_list
->num_loads
+= 1;
38365 /* Adds a scheduled instruction, INSN, to the current dispatch window.
38366 If the total bytes of instructions or the number of instructions in
38367 the window exceed allowable, it allocates a new window. */
38370 add_to_dispatch_window (rtx insn
)
38373 dispatch_windows
*window_list
;
38374 dispatch_windows
*next_list
;
38375 dispatch_windows
*window0_list
;
38376 enum insn_path path
;
38377 enum dispatch_group insn_group
;
38385 if (INSN_CODE (insn
) < 0)
38388 byte_len
= min_insn_size (insn
);
38389 window_list
= dispatch_window_list
;
38390 next_list
= window_list
->next
;
38391 path
= get_insn_path (insn
);
38392 insn_group
= get_insn_group (insn
);
38394 /* Get the last dispatch window. */
38396 window_list
= dispatch_window_list
->next
;
38398 if (path
== path_single
)
38400 else if (path
== path_double
)
38403 insn_num_uops
= (int) path
;
38405 /* If current window is full, get a new window.
38406 Window number zero is full, if MAX_INSN uops are scheduled in it.
38407 Window number one is full, if window zero's bytes plus window
38408 one's bytes is 32, or if the bytes of the new instruction added
38409 to the total makes it greater than 48, or it has already MAX_INSN
38410 instructions in it. */
38411 num_insn
= window_list
->num_insn
;
38412 num_uops
= window_list
->num_uops
;
38413 window_num
= window_list
->window_num
;
38414 insn_fits
= fits_dispatch_window (insn
);
38416 if (num_insn
>= MAX_INSN
38417 || num_uops
+ insn_num_uops
> MAX_INSN
38420 window_num
= ~window_num
& 1;
38421 window_list
= allocate_next_window (window_num
);
38424 if (window_num
== 0)
38426 add_insn_window (insn
, window_list
, insn_num_uops
);
38427 if (window_list
->num_insn
>= MAX_INSN
38428 && insn_group
== disp_branch
)
38430 process_end_window ();
38434 else if (window_num
== 1)
38436 window0_list
= window_list
->prev
;
38437 sum
= window0_list
->window_size
+ window_list
->window_size
;
38439 || (byte_len
+ sum
) >= 48)
38441 process_end_window ();
38442 window_list
= dispatch_window_list
;
38445 add_insn_window (insn
, window_list
, insn_num_uops
);
38448 gcc_unreachable ();
38450 if (is_end_basic_block (insn_group
))
38452 /* End of basic block is reached do end-basic-block process. */
38453 process_end_window ();
38458 /* Print the dispatch window, WINDOW_NUM, to FILE. */
38460 DEBUG_FUNCTION
static void
38461 debug_dispatch_window_file (FILE *file
, int window_num
)
38463 dispatch_windows
*list
;
38466 if (window_num
== 0)
38467 list
= dispatch_window_list
;
38469 list
= dispatch_window_list1
;
38471 fprintf (file
, "Window #%d:\n", list
->window_num
);
38472 fprintf (file
, " num_insn = %d, num_uops = %d, window_size = %d\n",
38473 list
->num_insn
, list
->num_uops
, list
->window_size
);
38474 fprintf (file
, " num_imm = %d, num_imm_32 = %d, num_imm_64 = %d, imm_size = %d\n",
38475 list
->num_imm
, list
->num_imm_32
, list
->num_imm_64
, list
->imm_size
);
38477 fprintf (file
, " num_loads = %d, num_stores = %d\n", list
->num_loads
,
38479 fprintf (file
, " insn info:\n");
38481 for (i
= 0; i
< MAX_INSN
; i
++)
38483 if (!list
->window
[i
].insn
)
38485 fprintf (file
, " group[%d] = %s, insn[%d] = %p, path[%d] = %d byte_len[%d] = %d, imm_bytes[%d] = %d\n",
38486 i
, group_name
[list
->window
[i
].group
],
38487 i
, (void *)list
->window
[i
].insn
,
38488 i
, list
->window
[i
].path
,
38489 i
, list
->window
[i
].byte_len
,
38490 i
, list
->window
[i
].imm_bytes
);
38494 /* Print to stdout a dispatch window. */
38496 DEBUG_FUNCTION
void
38497 debug_dispatch_window (int window_num
)
38499 debug_dispatch_window_file (stdout
, window_num
);
38502 /* Print INSN dispatch information to FILE. */
38504 DEBUG_FUNCTION
static void
38505 debug_insn_dispatch_info_file (FILE *file
, rtx insn
)
38508 enum insn_path path
;
38509 enum dispatch_group group
;
38511 int num_imm_operand
;
38512 int num_imm32_operand
;
38513 int num_imm64_operand
;
38515 if (INSN_CODE (insn
) < 0)
38518 byte_len
= min_insn_size (insn
);
38519 path
= get_insn_path (insn
);
38520 group
= get_insn_group (insn
);
38521 imm_size
= get_num_immediates (insn
, &num_imm_operand
, &num_imm32_operand
,
38522 &num_imm64_operand
);
38524 fprintf (file
, " insn info:\n");
38525 fprintf (file
, " group = %s, path = %d, byte_len = %d\n",
38526 group_name
[group
], path
, byte_len
);
38527 fprintf (file
, " num_imm = %d, num_imm_32 = %d, num_imm_64 = %d, imm_size = %d\n",
38528 num_imm_operand
, num_imm32_operand
, num_imm64_operand
, imm_size
);
38531 /* Print to STDERR the status of the ready list with respect to
38532 dispatch windows. */
38534 DEBUG_FUNCTION
void
38535 debug_ready_dispatch (void)
38538 int no_ready
= number_in_ready ();
38540 fprintf (stdout
, "Number of ready: %d\n", no_ready
);
38542 for (i
= 0; i
< no_ready
; i
++)
38543 debug_insn_dispatch_info_file (stdout
, get_ready_element (i
));
38546 /* This routine is the driver of the dispatch scheduler. */
38549 do_dispatch (rtx insn
, int mode
)
38551 if (mode
== DISPATCH_INIT
)
38552 init_dispatch_sched ();
38553 else if (mode
== ADD_TO_DISPATCH_WINDOW
)
38554 add_to_dispatch_window (insn
);
38557 /* Return TRUE if Dispatch Scheduling is supported. */
38560 has_dispatch (rtx insn
, int action
)
38562 if ((ix86_tune
== PROCESSOR_BDVER1
|| ix86_tune
== PROCESSOR_BDVER2
)
38563 && flag_dispatch_scheduler
)
38569 case IS_DISPATCH_ON
:
38574 return is_cmp (insn
);
38576 case DISPATCH_VIOLATION
:
38577 return dispatch_violation ();
38579 case FITS_DISPATCH_WINDOW
:
38580 return fits_dispatch_window (insn
);
38586 /* Implementation of reassociation_width target hook used by
38587 reassoc phase to identify parallelism level in reassociated
38588 tree. Statements tree_code is passed in OPC. Arguments type
38591 Currently parallel reassociation is enabled for Atom
38592 processors only and we set reassociation width to be 2
38593 because Atom may issue up to 2 instructions per cycle.
38595 Return value should be fixed if parallel reassociation is
38596 enabled for other processors. */
38599 ix86_reassociation_width (unsigned int opc ATTRIBUTE_UNUSED
,
38600 enum machine_mode mode
)
38604 if (INTEGRAL_MODE_P (mode
) && TARGET_REASSOC_INT_TO_PARALLEL
)
38606 else if (FLOAT_MODE_P (mode
) && TARGET_REASSOC_FP_TO_PARALLEL
)
38612 /* ??? No autovectorization into MMX or 3DNOW until we can reliably
38613 place emms and femms instructions. */
38615 static enum machine_mode
38616 ix86_preferred_simd_mode (enum machine_mode mode
)
38624 return (TARGET_AVX
&& !TARGET_PREFER_AVX128
) ? V32QImode
: V16QImode
;
38626 return (TARGET_AVX
&& !TARGET_PREFER_AVX128
) ? V16HImode
: V8HImode
;
38628 return (TARGET_AVX
&& !TARGET_PREFER_AVX128
) ? V8SImode
: V4SImode
;
38630 return (TARGET_AVX
&& !TARGET_PREFER_AVX128
) ? V4DImode
: V2DImode
;
38633 if (TARGET_AVX
&& !TARGET_PREFER_AVX128
)
38639 if (!TARGET_VECTORIZE_DOUBLE
)
38641 else if (TARGET_AVX
&& !TARGET_PREFER_AVX128
)
38643 else if (TARGET_SSE2
)
38652 /* If AVX is enabled then try vectorizing with both 256bit and 128bit
38655 static unsigned int
38656 ix86_autovectorize_vector_sizes (void)
38658 return (TARGET_AVX
&& !TARGET_PREFER_AVX128
) ? 32 | 16 : 0;
38661 /* Initialize the GCC target structure. */
38662 #undef TARGET_RETURN_IN_MEMORY
38663 #define TARGET_RETURN_IN_MEMORY ix86_return_in_memory
38665 #undef TARGET_LEGITIMIZE_ADDRESS
38666 #define TARGET_LEGITIMIZE_ADDRESS ix86_legitimize_address
38668 #undef TARGET_ATTRIBUTE_TABLE
38669 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
38670 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
38671 # undef TARGET_MERGE_DECL_ATTRIBUTES
38672 # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
38675 #undef TARGET_COMP_TYPE_ATTRIBUTES
38676 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
38678 #undef TARGET_INIT_BUILTINS
38679 #define TARGET_INIT_BUILTINS ix86_init_builtins
38680 #undef TARGET_BUILTIN_DECL
38681 #define TARGET_BUILTIN_DECL ix86_builtin_decl
38682 #undef TARGET_EXPAND_BUILTIN
38683 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
38685 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
38686 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
38687 ix86_builtin_vectorized_function
38689 #undef TARGET_VECTORIZE_BUILTIN_TM_LOAD
38690 #define TARGET_VECTORIZE_BUILTIN_TM_LOAD ix86_builtin_tm_load
38692 #undef TARGET_VECTORIZE_BUILTIN_TM_STORE
38693 #define TARGET_VECTORIZE_BUILTIN_TM_STORE ix86_builtin_tm_store
38695 #undef TARGET_VECTORIZE_BUILTIN_GATHER
38696 #define TARGET_VECTORIZE_BUILTIN_GATHER ix86_vectorize_builtin_gather
38698 #undef TARGET_BUILTIN_RECIPROCAL
38699 #define TARGET_BUILTIN_RECIPROCAL ix86_builtin_reciprocal
38701 #undef TARGET_ASM_FUNCTION_EPILOGUE
38702 #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
38704 #undef TARGET_ENCODE_SECTION_INFO
38705 #ifndef SUBTARGET_ENCODE_SECTION_INFO
38706 #define TARGET_ENCODE_SECTION_INFO ix86_encode_section_info
38708 #define TARGET_ENCODE_SECTION_INFO SUBTARGET_ENCODE_SECTION_INFO
38711 #undef TARGET_ASM_OPEN_PAREN
38712 #define TARGET_ASM_OPEN_PAREN ""
38713 #undef TARGET_ASM_CLOSE_PAREN
38714 #define TARGET_ASM_CLOSE_PAREN ""
38716 #undef TARGET_ASM_BYTE_OP
38717 #define TARGET_ASM_BYTE_OP ASM_BYTE
38719 #undef TARGET_ASM_ALIGNED_HI_OP
38720 #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
38721 #undef TARGET_ASM_ALIGNED_SI_OP
38722 #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
38724 #undef TARGET_ASM_ALIGNED_DI_OP
38725 #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
38728 #undef TARGET_PROFILE_BEFORE_PROLOGUE
38729 #define TARGET_PROFILE_BEFORE_PROLOGUE ix86_profile_before_prologue
38731 #undef TARGET_ASM_UNALIGNED_HI_OP
38732 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
38733 #undef TARGET_ASM_UNALIGNED_SI_OP
38734 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
38735 #undef TARGET_ASM_UNALIGNED_DI_OP
38736 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
38738 #undef TARGET_PRINT_OPERAND
38739 #define TARGET_PRINT_OPERAND ix86_print_operand
38740 #undef TARGET_PRINT_OPERAND_ADDRESS
38741 #define TARGET_PRINT_OPERAND_ADDRESS ix86_print_operand_address
38742 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
38743 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P ix86_print_operand_punct_valid_p
38744 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
38745 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA i386_asm_output_addr_const_extra
38747 #undef TARGET_SCHED_INIT_GLOBAL
38748 #define TARGET_SCHED_INIT_GLOBAL ix86_sched_init_global
38749 #undef TARGET_SCHED_ADJUST_COST
38750 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
38751 #undef TARGET_SCHED_ISSUE_RATE
38752 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
38753 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
38754 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
38755 ia32_multipass_dfa_lookahead
38757 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
38758 #define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
38761 #undef TARGET_HAVE_TLS
38762 #define TARGET_HAVE_TLS true
38764 #undef TARGET_CANNOT_FORCE_CONST_MEM
38765 #define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
38766 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
38767 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P hook_bool_mode_const_rtx_true
38769 #undef TARGET_DELEGITIMIZE_ADDRESS
38770 #define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address
38772 #undef TARGET_MS_BITFIELD_LAYOUT_P
38773 #define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
38776 #undef TARGET_BINDS_LOCAL_P
38777 #define TARGET_BINDS_LOCAL_P darwin_binds_local_p
38779 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
38780 #undef TARGET_BINDS_LOCAL_P
38781 #define TARGET_BINDS_LOCAL_P i386_pe_binds_local_p
38784 #undef TARGET_ASM_OUTPUT_MI_THUNK
38785 #define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
38786 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
38787 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
38789 #undef TARGET_ASM_FILE_START
38790 #define TARGET_ASM_FILE_START x86_file_start
38792 #undef TARGET_OPTION_OVERRIDE
38793 #define TARGET_OPTION_OVERRIDE ix86_option_override
38795 #undef TARGET_REGISTER_MOVE_COST
38796 #define TARGET_REGISTER_MOVE_COST ix86_register_move_cost
38797 #undef TARGET_MEMORY_MOVE_COST
38798 #define TARGET_MEMORY_MOVE_COST ix86_memory_move_cost
38799 #undef TARGET_RTX_COSTS
38800 #define TARGET_RTX_COSTS ix86_rtx_costs
38801 #undef TARGET_ADDRESS_COST
38802 #define TARGET_ADDRESS_COST ix86_address_cost
38804 #undef TARGET_FIXED_CONDITION_CODE_REGS
38805 #define TARGET_FIXED_CONDITION_CODE_REGS ix86_fixed_condition_code_regs
38806 #undef TARGET_CC_MODES_COMPATIBLE
38807 #define TARGET_CC_MODES_COMPATIBLE ix86_cc_modes_compatible
38809 #undef TARGET_MACHINE_DEPENDENT_REORG
38810 #define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg
38812 #undef TARGET_BUILTIN_SETJMP_FRAME_VALUE
38813 #define TARGET_BUILTIN_SETJMP_FRAME_VALUE ix86_builtin_setjmp_frame_value
38815 #undef TARGET_BUILD_BUILTIN_VA_LIST
38816 #define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list
38818 #undef TARGET_ENUM_VA_LIST_P
38819 #define TARGET_ENUM_VA_LIST_P ix86_enum_va_list
38821 #undef TARGET_FN_ABI_VA_LIST
38822 #define TARGET_FN_ABI_VA_LIST ix86_fn_abi_va_list
38824 #undef TARGET_CANONICAL_VA_LIST_TYPE
38825 #define TARGET_CANONICAL_VA_LIST_TYPE ix86_canonical_va_list_type
38827 #undef TARGET_EXPAND_BUILTIN_VA_START
38828 #define TARGET_EXPAND_BUILTIN_VA_START ix86_va_start
38830 #undef TARGET_MD_ASM_CLOBBERS
38831 #define TARGET_MD_ASM_CLOBBERS ix86_md_asm_clobbers
38833 #undef TARGET_PROMOTE_PROTOTYPES
38834 #define TARGET_PROMOTE_PROTOTYPES hook_bool_const_tree_true
38835 #undef TARGET_STRUCT_VALUE_RTX
38836 #define TARGET_STRUCT_VALUE_RTX ix86_struct_value_rtx
38837 #undef TARGET_SETUP_INCOMING_VARARGS
38838 #define TARGET_SETUP_INCOMING_VARARGS ix86_setup_incoming_varargs
38839 #undef TARGET_MUST_PASS_IN_STACK
38840 #define TARGET_MUST_PASS_IN_STACK ix86_must_pass_in_stack
38841 #undef TARGET_FUNCTION_ARG_ADVANCE
38842 #define TARGET_FUNCTION_ARG_ADVANCE ix86_function_arg_advance
38843 #undef TARGET_FUNCTION_ARG
38844 #define TARGET_FUNCTION_ARG ix86_function_arg
38845 #undef TARGET_FUNCTION_ARG_BOUNDARY
38846 #define TARGET_FUNCTION_ARG_BOUNDARY ix86_function_arg_boundary
38847 #undef TARGET_PASS_BY_REFERENCE
38848 #define TARGET_PASS_BY_REFERENCE ix86_pass_by_reference
38849 #undef TARGET_INTERNAL_ARG_POINTER
38850 #define TARGET_INTERNAL_ARG_POINTER ix86_internal_arg_pointer
38851 #undef TARGET_UPDATE_STACK_BOUNDARY
38852 #define TARGET_UPDATE_STACK_BOUNDARY ix86_update_stack_boundary
38853 #undef TARGET_GET_DRAP_RTX
38854 #define TARGET_GET_DRAP_RTX ix86_get_drap_rtx
38855 #undef TARGET_STRICT_ARGUMENT_NAMING
38856 #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
38857 #undef TARGET_STATIC_CHAIN
38858 #define TARGET_STATIC_CHAIN ix86_static_chain
38859 #undef TARGET_TRAMPOLINE_INIT
38860 #define TARGET_TRAMPOLINE_INIT ix86_trampoline_init
38861 #undef TARGET_RETURN_POPS_ARGS
38862 #define TARGET_RETURN_POPS_ARGS ix86_return_pops_args
38864 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
38865 #define TARGET_GIMPLIFY_VA_ARG_EXPR ix86_gimplify_va_arg
38867 #undef TARGET_SCALAR_MODE_SUPPORTED_P
38868 #define TARGET_SCALAR_MODE_SUPPORTED_P ix86_scalar_mode_supported_p
38870 #undef TARGET_VECTOR_MODE_SUPPORTED_P
38871 #define TARGET_VECTOR_MODE_SUPPORTED_P ix86_vector_mode_supported_p
38873 #undef TARGET_C_MODE_FOR_SUFFIX
38874 #define TARGET_C_MODE_FOR_SUFFIX ix86_c_mode_for_suffix
38877 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
38878 #define TARGET_ASM_OUTPUT_DWARF_DTPREL i386_output_dwarf_dtprel
38881 #ifdef SUBTARGET_INSERT_ATTRIBUTES
38882 #undef TARGET_INSERT_ATTRIBUTES
38883 #define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
38886 #undef TARGET_MANGLE_TYPE
38887 #define TARGET_MANGLE_TYPE ix86_mangle_type
38890 #undef TARGET_STACK_PROTECT_FAIL
38891 #define TARGET_STACK_PROTECT_FAIL ix86_stack_protect_fail
38894 #undef TARGET_FUNCTION_VALUE
38895 #define TARGET_FUNCTION_VALUE ix86_function_value
38897 #undef TARGET_FUNCTION_VALUE_REGNO_P
38898 #define TARGET_FUNCTION_VALUE_REGNO_P ix86_function_value_regno_p
38900 #undef TARGET_PROMOTE_FUNCTION_MODE
38901 #define TARGET_PROMOTE_FUNCTION_MODE ix86_promote_function_mode
38903 #undef TARGET_SECONDARY_RELOAD
38904 #define TARGET_SECONDARY_RELOAD ix86_secondary_reload
38906 #undef TARGET_CLASS_MAX_NREGS
38907 #define TARGET_CLASS_MAX_NREGS ix86_class_max_nregs
38909 #undef TARGET_PREFERRED_RELOAD_CLASS
38910 #define TARGET_PREFERRED_RELOAD_CLASS ix86_preferred_reload_class
38911 #undef TARGET_PREFERRED_OUTPUT_RELOAD_CLASS
38912 #define TARGET_PREFERRED_OUTPUT_RELOAD_CLASS ix86_preferred_output_reload_class
38913 #undef TARGET_CLASS_LIKELY_SPILLED_P
38914 #define TARGET_CLASS_LIKELY_SPILLED_P ix86_class_likely_spilled_p
38916 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
38917 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
38918 ix86_builtin_vectorization_cost
38919 #undef TARGET_VECTORIZE_VEC_PERM_CONST_OK
38920 #define TARGET_VECTORIZE_VEC_PERM_CONST_OK \
38921 ix86_vectorize_vec_perm_const_ok
38922 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
38923 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE \
38924 ix86_preferred_simd_mode
38925 #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
38926 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
38927 ix86_autovectorize_vector_sizes
38929 #undef TARGET_SET_CURRENT_FUNCTION
38930 #define TARGET_SET_CURRENT_FUNCTION ix86_set_current_function
38932 #undef TARGET_OPTION_VALID_ATTRIBUTE_P
38933 #define TARGET_OPTION_VALID_ATTRIBUTE_P ix86_valid_target_attribute_p
38935 #undef TARGET_OPTION_SAVE
38936 #define TARGET_OPTION_SAVE ix86_function_specific_save
38938 #undef TARGET_OPTION_RESTORE
38939 #define TARGET_OPTION_RESTORE ix86_function_specific_restore
38941 #undef TARGET_OPTION_PRINT
38942 #define TARGET_OPTION_PRINT ix86_function_specific_print
38944 #undef TARGET_CAN_INLINE_P
38945 #define TARGET_CAN_INLINE_P ix86_can_inline_p
38947 #undef TARGET_EXPAND_TO_RTL_HOOK
38948 #define TARGET_EXPAND_TO_RTL_HOOK ix86_maybe_switch_abi
38950 #undef TARGET_LEGITIMATE_ADDRESS_P
38951 #define TARGET_LEGITIMATE_ADDRESS_P ix86_legitimate_address_p
38953 #undef TARGET_LEGITIMATE_CONSTANT_P
38954 #define TARGET_LEGITIMATE_CONSTANT_P ix86_legitimate_constant_p
38956 #undef TARGET_FRAME_POINTER_REQUIRED
38957 #define TARGET_FRAME_POINTER_REQUIRED ix86_frame_pointer_required
38959 #undef TARGET_CAN_ELIMINATE
38960 #define TARGET_CAN_ELIMINATE ix86_can_eliminate
38962 #undef TARGET_EXTRA_LIVE_ON_ENTRY
38963 #define TARGET_EXTRA_LIVE_ON_ENTRY ix86_live_on_entry
38965 #undef TARGET_ASM_CODE_END
38966 #define TARGET_ASM_CODE_END ix86_code_end
38968 #undef TARGET_CONDITIONAL_REGISTER_USAGE
38969 #define TARGET_CONDITIONAL_REGISTER_USAGE ix86_conditional_register_usage
38972 #undef TARGET_INIT_LIBFUNCS
38973 #define TARGET_INIT_LIBFUNCS darwin_rename_builtins
38976 struct gcc_target targetm
= TARGET_INITIALIZER
;
38978 #include "gt-i386.h"