1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
3 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010
4 Free Software Foundation, Inc.
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 3, or (at your option)
13 GCC is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
24 #include "coretypes.h"
30 #include "hard-reg-set.h"
31 #include "insn-config.h"
32 #include "conditions.h"
34 #include "insn-codes.h"
35 #include "insn-attr.h"
42 #include "diagnostic-core.h"
44 #include "basic-block.h"
47 #include "target-def.h"
48 #include "langhooks.h"
53 #include "tm-constrs.h"
57 #include "dwarf2out.h"
58 #include "sched-int.h"
60 enum upper_128bits_state
62 unknown
= 0, /* Unknown. */
63 unused
, /* Not used or not referenced. */
64 used
/* Used or referenced. */
67 typedef struct block_info_def
69 /* State of the upper 128bits of any AVX registers at exit. */
70 enum upper_128bits_state state
;
71 /* If the upper 128bits of any AVX registers are referenced. */
72 enum upper_128bits_state referenced
;
73 /* Number of vzerouppers in this block. */
75 /* TRUE if block has been processed. */
77 /* TRUE if block has been rescanned. */
81 #define BLOCK_INFO(B) ((block_info) (B)->aux)
83 enum call_avx256_state
85 /* Callee returns 256bit AVX register. */
86 callee_return_avx256
= -1,
87 /* Callee returns and passes 256bit AVX register. */
88 callee_return_pass_avx256
,
89 /* Callee passes 256bit AVX register. */
91 /* Callee doesn't return nor passe 256bit AVX register, or no
92 256bit AVX register in function return. */
94 /* vzeroupper intrinsic. */
98 /* Check if a 256bit AVX register is referenced in stores. */
101 check_avx256_stores (rtx dest
, const_rtx set
, void *data
)
104 && VALID_AVX256_REG_MODE (GET_MODE (dest
)))
105 || (GET_CODE (set
) == SET
106 && REG_P (SET_SRC (set
))
107 && VALID_AVX256_REG_MODE (GET_MODE (SET_SRC (set
)))))
109 enum upper_128bits_state
*state
110 = (enum upper_128bits_state
*) data
;
115 /* Helper function for move_or_delete_vzeroupper_1. Look for vzeroupper
116 in basic block BB. Delete it if upper 128bit AVX registers are
117 unused. If it isn't deleted, move it to just before a jump insn.
119 UPPER_128BITS_LIVE is TRUE if the upper 128bits of any AVX registers
120 are live at entry. */
123 move_or_delete_vzeroupper_2 (basic_block bb
,
124 enum upper_128bits_state state
)
127 rtx vzeroupper_insn
= NULL_RTX
;
130 enum upper_128bits_state referenced
= BLOCK_INFO (bb
)->referenced
;
131 int count
= BLOCK_INFO (bb
)->count
;
134 fprintf (dump_file
, " [bb %i] entry: upper 128bits: %d\n",
137 /* BB_END changes when it is deleted. */
138 bb_end
= BB_END (bb
);
140 while (insn
!= bb_end
)
142 insn
= NEXT_INSN (insn
);
144 if (!NONDEBUG_INSN_P (insn
))
147 /* Move vzeroupper before jump/call. */
148 if (JUMP_P (insn
) || CALL_P (insn
))
150 if (!vzeroupper_insn
)
153 if (PREV_INSN (insn
) != vzeroupper_insn
)
157 fprintf (dump_file
, "Move vzeroupper after:\n");
158 print_rtl_single (dump_file
, PREV_INSN (insn
));
159 fprintf (dump_file
, "before:\n");
160 print_rtl_single (dump_file
, insn
);
162 reorder_insns_nobb (vzeroupper_insn
, vzeroupper_insn
,
165 vzeroupper_insn
= NULL_RTX
;
169 pat
= PATTERN (insn
);
171 /* Check insn for vzeroupper intrinsic. */
172 if (GET_CODE (pat
) == UNSPEC_VOLATILE
173 && XINT (pat
, 1) == UNSPECV_VZEROUPPER
)
177 /* Found vzeroupper intrinsic. */
178 fprintf (dump_file
, "Found vzeroupper:\n");
179 print_rtl_single (dump_file
, insn
);
184 /* Check insn for vzeroall intrinsic. */
185 if (GET_CODE (pat
) == PARALLEL
186 && GET_CODE (XVECEXP (pat
, 0, 0)) == UNSPEC_VOLATILE
187 && XINT (XVECEXP (pat
, 0, 0), 1) == UNSPECV_VZEROALL
)
191 /* Delete pending vzeroupper insertion. */
195 delete_insn (vzeroupper_insn
);
196 vzeroupper_insn
= NULL_RTX
;
199 else if (state
!= used
&& referenced
!= unused
)
201 /* No need to call note_stores if the upper 128bits of
202 AVX registers are never referenced. */
203 note_stores (pat
, check_avx256_stores
, &state
);
210 /* Process vzeroupper intrinsic. */
212 avx256
= INTVAL (XVECEXP (pat
, 0, 0));
216 /* Since the upper 128bits are cleared, callee must not pass
217 256bit AVX register. We only need to check if callee
218 returns 256bit AVX register. */
219 if (avx256
== callee_return_avx256
)
222 /* Remove unnecessary vzeroupper since upper 128bits are
226 fprintf (dump_file
, "Delete redundant vzeroupper:\n");
227 print_rtl_single (dump_file
, insn
);
234 /* Set state to UNUSED if callee doesn't return 256bit AVX
236 if (avx256
!= callee_return_pass_avx256
)
239 if (avx256
== callee_return_pass_avx256
240 || avx256
== callee_pass_avx256
)
242 /* Must remove vzeroupper since callee passes in 256bit
246 fprintf (dump_file
, "Delete callee pass vzeroupper:\n");
247 print_rtl_single (dump_file
, insn
);
253 vzeroupper_insn
= insn
;
257 BLOCK_INFO (bb
)->state
= state
;
259 if (BLOCK_INFO (bb
)->referenced
== unknown
)
261 /* The upper 128bits of AVX registers are never referenced if
262 REFERENCED isn't updated. */
263 if (referenced
== unknown
)
265 BLOCK_INFO (bb
)->referenced
= referenced
;
266 BLOCK_INFO (bb
)->count
= count
;
270 fprintf (dump_file
, " [bb %i] exit: upper 128bits: %d\n",
274 /* Helper function for move_or_delete_vzeroupper. Process vzeroupper
275 in BLOCK and its predecessor blocks recursively. */
278 move_or_delete_vzeroupper_1 (basic_block block
)
282 enum upper_128bits_state state
;
285 fprintf (dump_file
, " Process [bb %i]: status: %d\n",
286 block
->index
, BLOCK_INFO (block
)->processed
);
288 if (BLOCK_INFO (block
)->processed
)
291 BLOCK_INFO (block
)->processed
= true;
295 /* Process all predecessor edges of this block. */
296 FOR_EACH_EDGE (e
, ei
, block
->preds
)
300 move_or_delete_vzeroupper_1 (e
->src
);
301 switch (BLOCK_INFO (e
->src
)->state
)
315 /* If state of any predecessor edges is unknown, we need to rescan. */
316 if (state
== unknown
)
317 cfun
->machine
->rescan_vzeroupper_p
= 1;
319 /* Process this block. */
320 move_or_delete_vzeroupper_2 (block
, state
);
323 /* Helper function for move_or_delete_vzeroupper. Rescan vzeroupper
324 in BLOCK and its predecessor blocks recursively. */
327 rescan_move_or_delete_vzeroupper (basic_block block
)
331 enum upper_128bits_state state
;
334 fprintf (dump_file
, " Rescan [bb %i]: status: %d\n",
335 block
->index
, BLOCK_INFO (block
)->rescanned
);
337 if (BLOCK_INFO (block
)->rescanned
)
340 BLOCK_INFO (block
)->rescanned
= true;
344 /* Rescan all predecessor edges of this block. */
345 FOR_EACH_EDGE (e
, ei
, block
->preds
)
349 rescan_move_or_delete_vzeroupper (e
->src
);
350 /* For rescan, UKKNOWN state is treated as UNUSED. */
351 if (BLOCK_INFO (e
->src
)->state
== used
)
355 /* Rescan this block only if there are vzerouppers or the upper
356 128bits of AVX registers are referenced. */
357 if (BLOCK_INFO (block
)->count
== 0
358 && (state
== used
|| BLOCK_INFO (block
)->referenced
!= used
))
361 BLOCK_INFO (block
)->state
= state
;
363 fprintf (dump_file
, " [bb %i] exit: upper 128bits: %d\n",
364 block
->index
, BLOCK_INFO (block
)->state
);
367 move_or_delete_vzeroupper_2 (block
, state
);
370 /* Go through the instruction stream looking for vzeroupper. Delete
371 it if upper 128bit AVX registers are unused. If it isn't deleted,
372 move it to just before a jump insn. */
375 move_or_delete_vzeroupper (void)
380 unsigned int count
= 0;
382 /* Set up block info for each basic block. */
383 alloc_aux_for_blocks (sizeof (struct block_info_def
));
385 /* Process successor blocks of all entry points. */
387 fprintf (dump_file
, "Process all entry points\n");
389 FOR_EACH_EDGE (e
, ei
, ENTRY_BLOCK_PTR
->succs
)
391 move_or_delete_vzeroupper_2 (e
->dest
,
392 cfun
->machine
->caller_pass_avx256_p
394 BLOCK_INFO (e
->dest
)->processed
= true;
395 BLOCK_INFO (e
->dest
)->rescanned
= true;
398 /* Process all basic blocks. */
400 fprintf (dump_file
, "Process all basic blocks\n");
404 move_or_delete_vzeroupper_1 (bb
);
405 count
+= BLOCK_INFO (bb
)->count
;
408 /* Rescan all basic blocks if needed. */
409 if (count
&& cfun
->machine
->rescan_vzeroupper_p
)
412 fprintf (dump_file
, "Rescan all basic blocks\n");
415 rescan_move_or_delete_vzeroupper (bb
);
418 free_aux_for_blocks ();
421 static rtx
legitimize_dllimport_symbol (rtx
, bool);
423 #ifndef CHECK_STACK_LIMIT
424 #define CHECK_STACK_LIMIT (-1)
427 /* Return index of given mode in mult and division cost tables. */
428 #define MODE_INDEX(mode) \
429 ((mode) == QImode ? 0 \
430 : (mode) == HImode ? 1 \
431 : (mode) == SImode ? 2 \
432 : (mode) == DImode ? 3 \
435 /* Processor costs (relative to an add) */
436 /* We assume COSTS_N_INSNS is defined as (N)*4 and an addition is 2 bytes. */
437 #define COSTS_N_BYTES(N) ((N) * 2)
439 #define DUMMY_STRINGOP_ALGS {libcall, {{-1, libcall}}}
442 struct processor_costs ix86_size_cost
= {/* costs for tuning for size */
443 COSTS_N_BYTES (2), /* cost of an add instruction */
444 COSTS_N_BYTES (3), /* cost of a lea instruction */
445 COSTS_N_BYTES (2), /* variable shift costs */
446 COSTS_N_BYTES (3), /* constant shift costs */
447 {COSTS_N_BYTES (3), /* cost of starting multiply for QI */
448 COSTS_N_BYTES (3), /* HI */
449 COSTS_N_BYTES (3), /* SI */
450 COSTS_N_BYTES (3), /* DI */
451 COSTS_N_BYTES (5)}, /* other */
452 0, /* cost of multiply per each bit set */
453 {COSTS_N_BYTES (3), /* cost of a divide/mod for QI */
454 COSTS_N_BYTES (3), /* HI */
455 COSTS_N_BYTES (3), /* SI */
456 COSTS_N_BYTES (3), /* DI */
457 COSTS_N_BYTES (5)}, /* other */
458 COSTS_N_BYTES (3), /* cost of movsx */
459 COSTS_N_BYTES (3), /* cost of movzx */
460 0, /* "large" insn */
462 2, /* cost for loading QImode using movzbl */
463 {2, 2, 2}, /* cost of loading integer registers
464 in QImode, HImode and SImode.
465 Relative to reg-reg move (2). */
466 {2, 2, 2}, /* cost of storing integer registers */
467 2, /* cost of reg,reg fld/fst */
468 {2, 2, 2}, /* cost of loading fp registers
469 in SFmode, DFmode and XFmode */
470 {2, 2, 2}, /* cost of storing fp registers
471 in SFmode, DFmode and XFmode */
472 3, /* cost of moving MMX register */
473 {3, 3}, /* cost of loading MMX registers
474 in SImode and DImode */
475 {3, 3}, /* cost of storing MMX registers
476 in SImode and DImode */
477 3, /* cost of moving SSE register */
478 {3, 3, 3}, /* cost of loading SSE registers
479 in SImode, DImode and TImode */
480 {3, 3, 3}, /* cost of storing SSE registers
481 in SImode, DImode and TImode */
482 3, /* MMX or SSE register to integer */
483 0, /* size of l1 cache */
484 0, /* size of l2 cache */
485 0, /* size of prefetch block */
486 0, /* number of parallel prefetches */
488 COSTS_N_BYTES (2), /* cost of FADD and FSUB insns. */
489 COSTS_N_BYTES (2), /* cost of FMUL instruction. */
490 COSTS_N_BYTES (2), /* cost of FDIV instruction. */
491 COSTS_N_BYTES (2), /* cost of FABS instruction. */
492 COSTS_N_BYTES (2), /* cost of FCHS instruction. */
493 COSTS_N_BYTES (2), /* cost of FSQRT instruction. */
494 {{rep_prefix_1_byte
, {{-1, rep_prefix_1_byte
}}},
495 {rep_prefix_1_byte
, {{-1, rep_prefix_1_byte
}}}},
496 {{rep_prefix_1_byte
, {{-1, rep_prefix_1_byte
}}},
497 {rep_prefix_1_byte
, {{-1, rep_prefix_1_byte
}}}},
498 1, /* scalar_stmt_cost. */
499 1, /* scalar load_cost. */
500 1, /* scalar_store_cost. */
501 1, /* vec_stmt_cost. */
502 1, /* vec_to_scalar_cost. */
503 1, /* scalar_to_vec_cost. */
504 1, /* vec_align_load_cost. */
505 1, /* vec_unalign_load_cost. */
506 1, /* vec_store_cost. */
507 1, /* cond_taken_branch_cost. */
508 1, /* cond_not_taken_branch_cost. */
511 /* Processor costs (relative to an add) */
513 struct processor_costs i386_cost
= { /* 386 specific costs */
514 COSTS_N_INSNS (1), /* cost of an add instruction */
515 COSTS_N_INSNS (1), /* cost of a lea instruction */
516 COSTS_N_INSNS (3), /* variable shift costs */
517 COSTS_N_INSNS (2), /* constant shift costs */
518 {COSTS_N_INSNS (6), /* cost of starting multiply for QI */
519 COSTS_N_INSNS (6), /* HI */
520 COSTS_N_INSNS (6), /* SI */
521 COSTS_N_INSNS (6), /* DI */
522 COSTS_N_INSNS (6)}, /* other */
523 COSTS_N_INSNS (1), /* cost of multiply per each bit set */
524 {COSTS_N_INSNS (23), /* cost of a divide/mod for QI */
525 COSTS_N_INSNS (23), /* HI */
526 COSTS_N_INSNS (23), /* SI */
527 COSTS_N_INSNS (23), /* DI */
528 COSTS_N_INSNS (23)}, /* other */
529 COSTS_N_INSNS (3), /* cost of movsx */
530 COSTS_N_INSNS (2), /* cost of movzx */
531 15, /* "large" insn */
533 4, /* cost for loading QImode using movzbl */
534 {2, 4, 2}, /* cost of loading integer registers
535 in QImode, HImode and SImode.
536 Relative to reg-reg move (2). */
537 {2, 4, 2}, /* cost of storing integer registers */
538 2, /* cost of reg,reg fld/fst */
539 {8, 8, 8}, /* cost of loading fp registers
540 in SFmode, DFmode and XFmode */
541 {8, 8, 8}, /* cost of storing fp registers
542 in SFmode, DFmode and XFmode */
543 2, /* cost of moving MMX register */
544 {4, 8}, /* cost of loading MMX registers
545 in SImode and DImode */
546 {4, 8}, /* cost of storing MMX registers
547 in SImode and DImode */
548 2, /* cost of moving SSE register */
549 {4, 8, 16}, /* cost of loading SSE registers
550 in SImode, DImode and TImode */
551 {4, 8, 16}, /* cost of storing SSE registers
552 in SImode, DImode and TImode */
553 3, /* MMX or SSE register to integer */
554 0, /* size of l1 cache */
555 0, /* size of l2 cache */
556 0, /* size of prefetch block */
557 0, /* number of parallel prefetches */
559 COSTS_N_INSNS (23), /* cost of FADD and FSUB insns. */
560 COSTS_N_INSNS (27), /* cost of FMUL instruction. */
561 COSTS_N_INSNS (88), /* cost of FDIV instruction. */
562 COSTS_N_INSNS (22), /* cost of FABS instruction. */
563 COSTS_N_INSNS (24), /* cost of FCHS instruction. */
564 COSTS_N_INSNS (122), /* cost of FSQRT instruction. */
565 {{rep_prefix_1_byte
, {{-1, rep_prefix_1_byte
}}},
566 DUMMY_STRINGOP_ALGS
},
567 {{rep_prefix_1_byte
, {{-1, rep_prefix_1_byte
}}},
568 DUMMY_STRINGOP_ALGS
},
569 1, /* scalar_stmt_cost. */
570 1, /* scalar load_cost. */
571 1, /* scalar_store_cost. */
572 1, /* vec_stmt_cost. */
573 1, /* vec_to_scalar_cost. */
574 1, /* scalar_to_vec_cost. */
575 1, /* vec_align_load_cost. */
576 2, /* vec_unalign_load_cost. */
577 1, /* vec_store_cost. */
578 3, /* cond_taken_branch_cost. */
579 1, /* cond_not_taken_branch_cost. */
583 struct processor_costs i486_cost
= { /* 486 specific costs */
584 COSTS_N_INSNS (1), /* cost of an add instruction */
585 COSTS_N_INSNS (1), /* cost of a lea instruction */
586 COSTS_N_INSNS (3), /* variable shift costs */
587 COSTS_N_INSNS (2), /* constant shift costs */
588 {COSTS_N_INSNS (12), /* cost of starting multiply for QI */
589 COSTS_N_INSNS (12), /* HI */
590 COSTS_N_INSNS (12), /* SI */
591 COSTS_N_INSNS (12), /* DI */
592 COSTS_N_INSNS (12)}, /* other */
593 1, /* cost of multiply per each bit set */
594 {COSTS_N_INSNS (40), /* cost of a divide/mod for QI */
595 COSTS_N_INSNS (40), /* HI */
596 COSTS_N_INSNS (40), /* SI */
597 COSTS_N_INSNS (40), /* DI */
598 COSTS_N_INSNS (40)}, /* other */
599 COSTS_N_INSNS (3), /* cost of movsx */
600 COSTS_N_INSNS (2), /* cost of movzx */
601 15, /* "large" insn */
603 4, /* cost for loading QImode using movzbl */
604 {2, 4, 2}, /* cost of loading integer registers
605 in QImode, HImode and SImode.
606 Relative to reg-reg move (2). */
607 {2, 4, 2}, /* cost of storing integer registers */
608 2, /* cost of reg,reg fld/fst */
609 {8, 8, 8}, /* cost of loading fp registers
610 in SFmode, DFmode and XFmode */
611 {8, 8, 8}, /* cost of storing fp registers
612 in SFmode, DFmode and XFmode */
613 2, /* cost of moving MMX register */
614 {4, 8}, /* cost of loading MMX registers
615 in SImode and DImode */
616 {4, 8}, /* cost of storing MMX registers
617 in SImode and DImode */
618 2, /* cost of moving SSE register */
619 {4, 8, 16}, /* cost of loading SSE registers
620 in SImode, DImode and TImode */
621 {4, 8, 16}, /* cost of storing SSE registers
622 in SImode, DImode and TImode */
623 3, /* MMX or SSE register to integer */
624 4, /* size of l1 cache. 486 has 8kB cache
625 shared for code and data, so 4kB is
626 not really precise. */
627 4, /* size of l2 cache */
628 0, /* size of prefetch block */
629 0, /* number of parallel prefetches */
631 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
632 COSTS_N_INSNS (16), /* cost of FMUL instruction. */
633 COSTS_N_INSNS (73), /* cost of FDIV instruction. */
634 COSTS_N_INSNS (3), /* cost of FABS instruction. */
635 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
636 COSTS_N_INSNS (83), /* cost of FSQRT instruction. */
637 {{rep_prefix_4_byte
, {{-1, rep_prefix_4_byte
}}},
638 DUMMY_STRINGOP_ALGS
},
639 {{rep_prefix_4_byte
, {{-1, rep_prefix_4_byte
}}},
640 DUMMY_STRINGOP_ALGS
},
641 1, /* scalar_stmt_cost. */
642 1, /* scalar load_cost. */
643 1, /* scalar_store_cost. */
644 1, /* vec_stmt_cost. */
645 1, /* vec_to_scalar_cost. */
646 1, /* scalar_to_vec_cost. */
647 1, /* vec_align_load_cost. */
648 2, /* vec_unalign_load_cost. */
649 1, /* vec_store_cost. */
650 3, /* cond_taken_branch_cost. */
651 1, /* cond_not_taken_branch_cost. */
655 struct processor_costs pentium_cost
= {
656 COSTS_N_INSNS (1), /* cost of an add instruction */
657 COSTS_N_INSNS (1), /* cost of a lea instruction */
658 COSTS_N_INSNS (4), /* variable shift costs */
659 COSTS_N_INSNS (1), /* constant shift costs */
660 {COSTS_N_INSNS (11), /* cost of starting multiply for QI */
661 COSTS_N_INSNS (11), /* HI */
662 COSTS_N_INSNS (11), /* SI */
663 COSTS_N_INSNS (11), /* DI */
664 COSTS_N_INSNS (11)}, /* other */
665 0, /* cost of multiply per each bit set */
666 {COSTS_N_INSNS (25), /* cost of a divide/mod for QI */
667 COSTS_N_INSNS (25), /* HI */
668 COSTS_N_INSNS (25), /* SI */
669 COSTS_N_INSNS (25), /* DI */
670 COSTS_N_INSNS (25)}, /* other */
671 COSTS_N_INSNS (3), /* cost of movsx */
672 COSTS_N_INSNS (2), /* cost of movzx */
673 8, /* "large" insn */
675 6, /* cost for loading QImode using movzbl */
676 {2, 4, 2}, /* cost of loading integer registers
677 in QImode, HImode and SImode.
678 Relative to reg-reg move (2). */
679 {2, 4, 2}, /* cost of storing integer registers */
680 2, /* cost of reg,reg fld/fst */
681 {2, 2, 6}, /* cost of loading fp registers
682 in SFmode, DFmode and XFmode */
683 {4, 4, 6}, /* cost of storing fp registers
684 in SFmode, DFmode and XFmode */
685 8, /* cost of moving MMX register */
686 {8, 8}, /* cost of loading MMX registers
687 in SImode and DImode */
688 {8, 8}, /* cost of storing MMX registers
689 in SImode and DImode */
690 2, /* cost of moving SSE register */
691 {4, 8, 16}, /* cost of loading SSE registers
692 in SImode, DImode and TImode */
693 {4, 8, 16}, /* cost of storing SSE registers
694 in SImode, DImode and TImode */
695 3, /* MMX or SSE register to integer */
696 8, /* size of l1 cache. */
697 8, /* size of l2 cache */
698 0, /* size of prefetch block */
699 0, /* number of parallel prefetches */
701 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
702 COSTS_N_INSNS (3), /* cost of FMUL instruction. */
703 COSTS_N_INSNS (39), /* cost of FDIV instruction. */
704 COSTS_N_INSNS (1), /* cost of FABS instruction. */
705 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
706 COSTS_N_INSNS (70), /* cost of FSQRT instruction. */
707 {{libcall
, {{256, rep_prefix_4_byte
}, {-1, libcall
}}},
708 DUMMY_STRINGOP_ALGS
},
709 {{libcall
, {{-1, rep_prefix_4_byte
}}},
710 DUMMY_STRINGOP_ALGS
},
711 1, /* scalar_stmt_cost. */
712 1, /* scalar load_cost. */
713 1, /* scalar_store_cost. */
714 1, /* vec_stmt_cost. */
715 1, /* vec_to_scalar_cost. */
716 1, /* scalar_to_vec_cost. */
717 1, /* vec_align_load_cost. */
718 2, /* vec_unalign_load_cost. */
719 1, /* vec_store_cost. */
720 3, /* cond_taken_branch_cost. */
721 1, /* cond_not_taken_branch_cost. */
725 struct processor_costs pentiumpro_cost
= {
726 COSTS_N_INSNS (1), /* cost of an add instruction */
727 COSTS_N_INSNS (1), /* cost of a lea instruction */
728 COSTS_N_INSNS (1), /* variable shift costs */
729 COSTS_N_INSNS (1), /* constant shift costs */
730 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
731 COSTS_N_INSNS (4), /* HI */
732 COSTS_N_INSNS (4), /* SI */
733 COSTS_N_INSNS (4), /* DI */
734 COSTS_N_INSNS (4)}, /* other */
735 0, /* cost of multiply per each bit set */
736 {COSTS_N_INSNS (17), /* cost of a divide/mod for QI */
737 COSTS_N_INSNS (17), /* HI */
738 COSTS_N_INSNS (17), /* SI */
739 COSTS_N_INSNS (17), /* DI */
740 COSTS_N_INSNS (17)}, /* other */
741 COSTS_N_INSNS (1), /* cost of movsx */
742 COSTS_N_INSNS (1), /* cost of movzx */
743 8, /* "large" insn */
745 2, /* cost for loading QImode using movzbl */
746 {4, 4, 4}, /* cost of loading integer registers
747 in QImode, HImode and SImode.
748 Relative to reg-reg move (2). */
749 {2, 2, 2}, /* cost of storing integer registers */
750 2, /* cost of reg,reg fld/fst */
751 {2, 2, 6}, /* cost of loading fp registers
752 in SFmode, DFmode and XFmode */
753 {4, 4, 6}, /* cost of storing fp registers
754 in SFmode, DFmode and XFmode */
755 2, /* cost of moving MMX register */
756 {2, 2}, /* cost of loading MMX registers
757 in SImode and DImode */
758 {2, 2}, /* cost of storing MMX registers
759 in SImode and DImode */
760 2, /* cost of moving SSE register */
761 {2, 2, 8}, /* cost of loading SSE registers
762 in SImode, DImode and TImode */
763 {2, 2, 8}, /* cost of storing SSE registers
764 in SImode, DImode and TImode */
765 3, /* MMX or SSE register to integer */
766 8, /* size of l1 cache. */
767 256, /* size of l2 cache */
768 32, /* size of prefetch block */
769 6, /* number of parallel prefetches */
771 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
772 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
773 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
774 COSTS_N_INSNS (2), /* cost of FABS instruction. */
775 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
776 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
777 /* PentiumPro has optimized rep instructions for blocks aligned by 8 bytes
778 (we ensure the alignment). For small blocks inline loop is still a
779 noticeable win, for bigger blocks either rep movsl or rep movsb is
780 way to go. Rep movsb has apparently more expensive startup time in CPU,
781 but after 4K the difference is down in the noise. */
782 {{rep_prefix_4_byte
, {{128, loop
}, {1024, unrolled_loop
},
783 {8192, rep_prefix_4_byte
}, {-1, rep_prefix_1_byte
}}},
784 DUMMY_STRINGOP_ALGS
},
785 {{rep_prefix_4_byte
, {{1024, unrolled_loop
},
786 {8192, rep_prefix_4_byte
}, {-1, libcall
}}},
787 DUMMY_STRINGOP_ALGS
},
788 1, /* scalar_stmt_cost. */
789 1, /* scalar load_cost. */
790 1, /* scalar_store_cost. */
791 1, /* vec_stmt_cost. */
792 1, /* vec_to_scalar_cost. */
793 1, /* scalar_to_vec_cost. */
794 1, /* vec_align_load_cost. */
795 2, /* vec_unalign_load_cost. */
796 1, /* vec_store_cost. */
797 3, /* cond_taken_branch_cost. */
798 1, /* cond_not_taken_branch_cost. */
802 struct processor_costs geode_cost
= {
803 COSTS_N_INSNS (1), /* cost of an add instruction */
804 COSTS_N_INSNS (1), /* cost of a lea instruction */
805 COSTS_N_INSNS (2), /* variable shift costs */
806 COSTS_N_INSNS (1), /* constant shift costs */
807 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
808 COSTS_N_INSNS (4), /* HI */
809 COSTS_N_INSNS (7), /* SI */
810 COSTS_N_INSNS (7), /* DI */
811 COSTS_N_INSNS (7)}, /* other */
812 0, /* cost of multiply per each bit set */
813 {COSTS_N_INSNS (15), /* cost of a divide/mod for QI */
814 COSTS_N_INSNS (23), /* HI */
815 COSTS_N_INSNS (39), /* SI */
816 COSTS_N_INSNS (39), /* DI */
817 COSTS_N_INSNS (39)}, /* other */
818 COSTS_N_INSNS (1), /* cost of movsx */
819 COSTS_N_INSNS (1), /* cost of movzx */
820 8, /* "large" insn */
822 1, /* cost for loading QImode using movzbl */
823 {1, 1, 1}, /* cost of loading integer registers
824 in QImode, HImode and SImode.
825 Relative to reg-reg move (2). */
826 {1, 1, 1}, /* cost of storing integer registers */
827 1, /* cost of reg,reg fld/fst */
828 {1, 1, 1}, /* cost of loading fp registers
829 in SFmode, DFmode and XFmode */
830 {4, 6, 6}, /* cost of storing fp registers
831 in SFmode, DFmode and XFmode */
833 1, /* cost of moving MMX register */
834 {1, 1}, /* cost of loading MMX registers
835 in SImode and DImode */
836 {1, 1}, /* cost of storing MMX registers
837 in SImode and DImode */
838 1, /* cost of moving SSE register */
839 {1, 1, 1}, /* cost of loading SSE registers
840 in SImode, DImode and TImode */
841 {1, 1, 1}, /* cost of storing SSE registers
842 in SImode, DImode and TImode */
843 1, /* MMX or SSE register to integer */
844 64, /* size of l1 cache. */
845 128, /* size of l2 cache. */
846 32, /* size of prefetch block */
847 1, /* number of parallel prefetches */
849 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
850 COSTS_N_INSNS (11), /* cost of FMUL instruction. */
851 COSTS_N_INSNS (47), /* cost of FDIV instruction. */
852 COSTS_N_INSNS (1), /* cost of FABS instruction. */
853 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
854 COSTS_N_INSNS (54), /* cost of FSQRT instruction. */
855 {{libcall
, {{256, rep_prefix_4_byte
}, {-1, libcall
}}},
856 DUMMY_STRINGOP_ALGS
},
857 {{libcall
, {{256, rep_prefix_4_byte
}, {-1, libcall
}}},
858 DUMMY_STRINGOP_ALGS
},
859 1, /* scalar_stmt_cost. */
860 1, /* scalar load_cost. */
861 1, /* scalar_store_cost. */
862 1, /* vec_stmt_cost. */
863 1, /* vec_to_scalar_cost. */
864 1, /* scalar_to_vec_cost. */
865 1, /* vec_align_load_cost. */
866 2, /* vec_unalign_load_cost. */
867 1, /* vec_store_cost. */
868 3, /* cond_taken_branch_cost. */
869 1, /* cond_not_taken_branch_cost. */
873 struct processor_costs k6_cost
= {
874 COSTS_N_INSNS (1), /* cost of an add instruction */
875 COSTS_N_INSNS (2), /* cost of a lea instruction */
876 COSTS_N_INSNS (1), /* variable shift costs */
877 COSTS_N_INSNS (1), /* constant shift costs */
878 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
879 COSTS_N_INSNS (3), /* HI */
880 COSTS_N_INSNS (3), /* SI */
881 COSTS_N_INSNS (3), /* DI */
882 COSTS_N_INSNS (3)}, /* other */
883 0, /* cost of multiply per each bit set */
884 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
885 COSTS_N_INSNS (18), /* HI */
886 COSTS_N_INSNS (18), /* SI */
887 COSTS_N_INSNS (18), /* DI */
888 COSTS_N_INSNS (18)}, /* other */
889 COSTS_N_INSNS (2), /* cost of movsx */
890 COSTS_N_INSNS (2), /* cost of movzx */
891 8, /* "large" insn */
893 3, /* cost for loading QImode using movzbl */
894 {4, 5, 4}, /* cost of loading integer registers
895 in QImode, HImode and SImode.
896 Relative to reg-reg move (2). */
897 {2, 3, 2}, /* cost of storing integer registers */
898 4, /* cost of reg,reg fld/fst */
899 {6, 6, 6}, /* cost of loading fp registers
900 in SFmode, DFmode and XFmode */
901 {4, 4, 4}, /* cost of storing fp registers
902 in SFmode, DFmode and XFmode */
903 2, /* cost of moving MMX register */
904 {2, 2}, /* cost of loading MMX registers
905 in SImode and DImode */
906 {2, 2}, /* cost of storing MMX registers
907 in SImode and DImode */
908 2, /* cost of moving SSE register */
909 {2, 2, 8}, /* cost of loading SSE registers
910 in SImode, DImode and TImode */
911 {2, 2, 8}, /* cost of storing SSE registers
912 in SImode, DImode and TImode */
913 6, /* MMX or SSE register to integer */
914 32, /* size of l1 cache. */
915 32, /* size of l2 cache. Some models
916 have integrated l2 cache, but
917 optimizing for k6 is not important
918 enough to worry about that. */
919 32, /* size of prefetch block */
920 1, /* number of parallel prefetches */
922 COSTS_N_INSNS (2), /* cost of FADD and FSUB insns. */
923 COSTS_N_INSNS (2), /* cost of FMUL instruction. */
924 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
925 COSTS_N_INSNS (2), /* cost of FABS instruction. */
926 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
927 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
928 {{libcall
, {{256, rep_prefix_4_byte
}, {-1, libcall
}}},
929 DUMMY_STRINGOP_ALGS
},
930 {{libcall
, {{256, rep_prefix_4_byte
}, {-1, libcall
}}},
931 DUMMY_STRINGOP_ALGS
},
932 1, /* scalar_stmt_cost. */
933 1, /* scalar load_cost. */
934 1, /* scalar_store_cost. */
935 1, /* vec_stmt_cost. */
936 1, /* vec_to_scalar_cost. */
937 1, /* scalar_to_vec_cost. */
938 1, /* vec_align_load_cost. */
939 2, /* vec_unalign_load_cost. */
940 1, /* vec_store_cost. */
941 3, /* cond_taken_branch_cost. */
942 1, /* cond_not_taken_branch_cost. */
946 struct processor_costs athlon_cost
= {
947 COSTS_N_INSNS (1), /* cost of an add instruction */
948 COSTS_N_INSNS (2), /* cost of a lea instruction */
949 COSTS_N_INSNS (1), /* variable shift costs */
950 COSTS_N_INSNS (1), /* constant shift costs */
951 {COSTS_N_INSNS (5), /* cost of starting multiply for QI */
952 COSTS_N_INSNS (5), /* HI */
953 COSTS_N_INSNS (5), /* SI */
954 COSTS_N_INSNS (5), /* DI */
955 COSTS_N_INSNS (5)}, /* other */
956 0, /* cost of multiply per each bit set */
957 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
958 COSTS_N_INSNS (26), /* HI */
959 COSTS_N_INSNS (42), /* SI */
960 COSTS_N_INSNS (74), /* DI */
961 COSTS_N_INSNS (74)}, /* other */
962 COSTS_N_INSNS (1), /* cost of movsx */
963 COSTS_N_INSNS (1), /* cost of movzx */
964 8, /* "large" insn */
966 4, /* cost for loading QImode using movzbl */
967 {3, 4, 3}, /* cost of loading integer registers
968 in QImode, HImode and SImode.
969 Relative to reg-reg move (2). */
970 {3, 4, 3}, /* cost of storing integer registers */
971 4, /* cost of reg,reg fld/fst */
972 {4, 4, 12}, /* cost of loading fp registers
973 in SFmode, DFmode and XFmode */
974 {6, 6, 8}, /* cost of storing fp registers
975 in SFmode, DFmode and XFmode */
976 2, /* cost of moving MMX register */
977 {4, 4}, /* cost of loading MMX registers
978 in SImode and DImode */
979 {4, 4}, /* cost of storing MMX registers
980 in SImode and DImode */
981 2, /* cost of moving SSE register */
982 {4, 4, 6}, /* cost of loading SSE registers
983 in SImode, DImode and TImode */
984 {4, 4, 5}, /* cost of storing SSE registers
985 in SImode, DImode and TImode */
986 5, /* MMX or SSE register to integer */
987 64, /* size of l1 cache. */
988 256, /* size of l2 cache. */
989 64, /* size of prefetch block */
990 6, /* number of parallel prefetches */
992 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
993 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
994 COSTS_N_INSNS (24), /* cost of FDIV instruction. */
995 COSTS_N_INSNS (2), /* cost of FABS instruction. */
996 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
997 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
998 /* For some reason, Athlon deals better with REP prefix (relative to loops)
999 compared to K8. Alignment becomes important after 8 bytes for memcpy and
1000 128 bytes for memset. */
1001 {{libcall
, {{2048, rep_prefix_4_byte
}, {-1, libcall
}}},
1002 DUMMY_STRINGOP_ALGS
},
1003 {{libcall
, {{2048, rep_prefix_4_byte
}, {-1, libcall
}}},
1004 DUMMY_STRINGOP_ALGS
},
1005 1, /* scalar_stmt_cost. */
1006 1, /* scalar load_cost. */
1007 1, /* scalar_store_cost. */
1008 1, /* vec_stmt_cost. */
1009 1, /* vec_to_scalar_cost. */
1010 1, /* scalar_to_vec_cost. */
1011 1, /* vec_align_load_cost. */
1012 2, /* vec_unalign_load_cost. */
1013 1, /* vec_store_cost. */
1014 3, /* cond_taken_branch_cost. */
1015 1, /* cond_not_taken_branch_cost. */
1019 struct processor_costs k8_cost
= {
1020 COSTS_N_INSNS (1), /* cost of an add instruction */
1021 COSTS_N_INSNS (2), /* cost of a lea instruction */
1022 COSTS_N_INSNS (1), /* variable shift costs */
1023 COSTS_N_INSNS (1), /* constant shift costs */
1024 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1025 COSTS_N_INSNS (4), /* HI */
1026 COSTS_N_INSNS (3), /* SI */
1027 COSTS_N_INSNS (4), /* DI */
1028 COSTS_N_INSNS (5)}, /* other */
1029 0, /* cost of multiply per each bit set */
1030 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1031 COSTS_N_INSNS (26), /* HI */
1032 COSTS_N_INSNS (42), /* SI */
1033 COSTS_N_INSNS (74), /* DI */
1034 COSTS_N_INSNS (74)}, /* other */
1035 COSTS_N_INSNS (1), /* cost of movsx */
1036 COSTS_N_INSNS (1), /* cost of movzx */
1037 8, /* "large" insn */
1039 4, /* cost for loading QImode using movzbl */
1040 {3, 4, 3}, /* cost of loading integer registers
1041 in QImode, HImode and SImode.
1042 Relative to reg-reg move (2). */
1043 {3, 4, 3}, /* cost of storing integer registers */
1044 4, /* cost of reg,reg fld/fst */
1045 {4, 4, 12}, /* cost of loading fp registers
1046 in SFmode, DFmode and XFmode */
1047 {6, 6, 8}, /* cost of storing fp registers
1048 in SFmode, DFmode and XFmode */
1049 2, /* cost of moving MMX register */
1050 {3, 3}, /* cost of loading MMX registers
1051 in SImode and DImode */
1052 {4, 4}, /* cost of storing MMX registers
1053 in SImode and DImode */
1054 2, /* cost of moving SSE register */
1055 {4, 3, 6}, /* cost of loading SSE registers
1056 in SImode, DImode and TImode */
1057 {4, 4, 5}, /* cost of storing SSE registers
1058 in SImode, DImode and TImode */
1059 5, /* MMX or SSE register to integer */
1060 64, /* size of l1 cache. */
1061 512, /* size of l2 cache. */
1062 64, /* size of prefetch block */
1063 /* New AMD processors never drop prefetches; if they cannot be performed
1064 immediately, they are queued. We set number of simultaneous prefetches
1065 to a large constant to reflect this (it probably is not a good idea not
1066 to limit number of prefetches at all, as their execution also takes some
1068 100, /* number of parallel prefetches */
1069 3, /* Branch cost */
1070 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
1071 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
1072 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
1073 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1074 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1075 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
1076 /* K8 has optimized REP instruction for medium sized blocks, but for very
1077 small blocks it is better to use loop. For large blocks, libcall can
1078 do nontemporary accesses and beat inline considerably. */
1079 {{libcall
, {{6, loop
}, {14, unrolled_loop
}, {-1, rep_prefix_4_byte
}}},
1080 {libcall
, {{16, loop
}, {8192, rep_prefix_8_byte
}, {-1, libcall
}}}},
1081 {{libcall
, {{8, loop
}, {24, unrolled_loop
},
1082 {2048, rep_prefix_4_byte
}, {-1, libcall
}}},
1083 {libcall
, {{48, unrolled_loop
}, {8192, rep_prefix_8_byte
}, {-1, libcall
}}}},
1084 4, /* scalar_stmt_cost. */
1085 2, /* scalar load_cost. */
1086 2, /* scalar_store_cost. */
1087 5, /* vec_stmt_cost. */
1088 0, /* vec_to_scalar_cost. */
1089 2, /* scalar_to_vec_cost. */
1090 2, /* vec_align_load_cost. */
1091 3, /* vec_unalign_load_cost. */
1092 3, /* vec_store_cost. */
1093 3, /* cond_taken_branch_cost. */
1094 2, /* cond_not_taken_branch_cost. */
1097 struct processor_costs amdfam10_cost
= {
1098 COSTS_N_INSNS (1), /* cost of an add instruction */
1099 COSTS_N_INSNS (2), /* cost of a lea instruction */
1100 COSTS_N_INSNS (1), /* variable shift costs */
1101 COSTS_N_INSNS (1), /* constant shift costs */
1102 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1103 COSTS_N_INSNS (4), /* HI */
1104 COSTS_N_INSNS (3), /* SI */
1105 COSTS_N_INSNS (4), /* DI */
1106 COSTS_N_INSNS (5)}, /* other */
1107 0, /* cost of multiply per each bit set */
1108 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1109 COSTS_N_INSNS (35), /* HI */
1110 COSTS_N_INSNS (51), /* SI */
1111 COSTS_N_INSNS (83), /* DI */
1112 COSTS_N_INSNS (83)}, /* other */
1113 COSTS_N_INSNS (1), /* cost of movsx */
1114 COSTS_N_INSNS (1), /* cost of movzx */
1115 8, /* "large" insn */
1117 4, /* cost for loading QImode using movzbl */
1118 {3, 4, 3}, /* cost of loading integer registers
1119 in QImode, HImode and SImode.
1120 Relative to reg-reg move (2). */
1121 {3, 4, 3}, /* cost of storing integer registers */
1122 4, /* cost of reg,reg fld/fst */
1123 {4, 4, 12}, /* cost of loading fp registers
1124 in SFmode, DFmode and XFmode */
1125 {6, 6, 8}, /* cost of storing fp registers
1126 in SFmode, DFmode and XFmode */
1127 2, /* cost of moving MMX register */
1128 {3, 3}, /* cost of loading MMX registers
1129 in SImode and DImode */
1130 {4, 4}, /* cost of storing MMX registers
1131 in SImode and DImode */
1132 2, /* cost of moving SSE register */
1133 {4, 4, 3}, /* cost of loading SSE registers
1134 in SImode, DImode and TImode */
1135 {4, 4, 5}, /* cost of storing SSE registers
1136 in SImode, DImode and TImode */
1137 3, /* MMX or SSE register to integer */
1139 MOVD reg64, xmmreg Double FSTORE 4
1140 MOVD reg32, xmmreg Double FSTORE 4
1142 MOVD reg64, xmmreg Double FADD 3
1144 MOVD reg32, xmmreg Double FADD 3
1146 64, /* size of l1 cache. */
1147 512, /* size of l2 cache. */
1148 64, /* size of prefetch block */
1149 /* New AMD processors never drop prefetches; if they cannot be performed
1150 immediately, they are queued. We set number of simultaneous prefetches
1151 to a large constant to reflect this (it probably is not a good idea not
1152 to limit number of prefetches at all, as their execution also takes some
1154 100, /* number of parallel prefetches */
1155 2, /* Branch cost */
1156 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
1157 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
1158 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
1159 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1160 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1161 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
1163 /* AMDFAM10 has optimized REP instruction for medium sized blocks, but for
1164 very small blocks it is better to use loop. For large blocks, libcall can
1165 do nontemporary accesses and beat inline considerably. */
1166 {{libcall
, {{6, loop
}, {14, unrolled_loop
}, {-1, rep_prefix_4_byte
}}},
1167 {libcall
, {{16, loop
}, {8192, rep_prefix_8_byte
}, {-1, libcall
}}}},
1168 {{libcall
, {{8, loop
}, {24, unrolled_loop
},
1169 {2048, rep_prefix_4_byte
}, {-1, libcall
}}},
1170 {libcall
, {{48, unrolled_loop
}, {8192, rep_prefix_8_byte
}, {-1, libcall
}}}},
1171 4, /* scalar_stmt_cost. */
1172 2, /* scalar load_cost. */
1173 2, /* scalar_store_cost. */
1174 6, /* vec_stmt_cost. */
1175 0, /* vec_to_scalar_cost. */
1176 2, /* scalar_to_vec_cost. */
1177 2, /* vec_align_load_cost. */
1178 2, /* vec_unalign_load_cost. */
1179 2, /* vec_store_cost. */
1180 2, /* cond_taken_branch_cost. */
1181 1, /* cond_not_taken_branch_cost. */
1184 struct processor_costs bdver1_cost
= {
1185 COSTS_N_INSNS (1), /* cost of an add instruction */
1186 COSTS_N_INSNS (1), /* cost of a lea instruction */
1187 COSTS_N_INSNS (1), /* variable shift costs */
1188 COSTS_N_INSNS (1), /* constant shift costs */
1189 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
1190 COSTS_N_INSNS (4), /* HI */
1191 COSTS_N_INSNS (4), /* SI */
1192 COSTS_N_INSNS (6), /* DI */
1193 COSTS_N_INSNS (6)}, /* other */
1194 0, /* cost of multiply per each bit set */
1195 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1196 COSTS_N_INSNS (35), /* HI */
1197 COSTS_N_INSNS (51), /* SI */
1198 COSTS_N_INSNS (83), /* DI */
1199 COSTS_N_INSNS (83)}, /* other */
1200 COSTS_N_INSNS (1), /* cost of movsx */
1201 COSTS_N_INSNS (1), /* cost of movzx */
1202 8, /* "large" insn */
1204 4, /* cost for loading QImode using movzbl */
1205 {5, 5, 4}, /* cost of loading integer registers
1206 in QImode, HImode and SImode.
1207 Relative to reg-reg move (2). */
1208 {4, 4, 4}, /* cost of storing integer registers */
1209 2, /* cost of reg,reg fld/fst */
1210 {5, 5, 12}, /* cost of loading fp registers
1211 in SFmode, DFmode and XFmode */
1212 {4, 4, 8}, /* cost of storing fp registers
1213 in SFmode, DFmode and XFmode */
1214 2, /* cost of moving MMX register */
1215 {4, 4}, /* cost of loading MMX registers
1216 in SImode and DImode */
1217 {4, 4}, /* cost of storing MMX registers
1218 in SImode and DImode */
1219 2, /* cost of moving SSE register */
1220 {4, 4, 4}, /* cost of loading SSE registers
1221 in SImode, DImode and TImode */
1222 {4, 4, 4}, /* cost of storing SSE registers
1223 in SImode, DImode and TImode */
1224 2, /* MMX or SSE register to integer */
1226 MOVD reg64, xmmreg Double FSTORE 4
1227 MOVD reg32, xmmreg Double FSTORE 4
1229 MOVD reg64, xmmreg Double FADD 3
1231 MOVD reg32, xmmreg Double FADD 3
1233 16, /* size of l1 cache. */
1234 2048, /* size of l2 cache. */
1235 64, /* size of prefetch block */
1236 /* New AMD processors never drop prefetches; if they cannot be performed
1237 immediately, they are queued. We set number of simultaneous prefetches
1238 to a large constant to reflect this (it probably is not a good idea not
1239 to limit number of prefetches at all, as their execution also takes some
1241 100, /* number of parallel prefetches */
1242 2, /* Branch cost */
1243 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1244 COSTS_N_INSNS (6), /* cost of FMUL instruction. */
1245 COSTS_N_INSNS (42), /* cost of FDIV instruction. */
1246 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1247 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1248 COSTS_N_INSNS (52), /* cost of FSQRT instruction. */
1250 /* BDVER1 has optimized REP instruction for medium sized blocks, but for
1251 very small blocks it is better to use loop. For large blocks, libcall
1252 can do nontemporary accesses and beat inline considerably. */
1253 {{libcall
, {{6, loop
}, {14, unrolled_loop
}, {-1, rep_prefix_4_byte
}}},
1254 {libcall
, {{16, loop
}, {8192, rep_prefix_8_byte
}, {-1, libcall
}}}},
1255 {{libcall
, {{8, loop
}, {24, unrolled_loop
},
1256 {2048, rep_prefix_4_byte
}, {-1, libcall
}}},
1257 {libcall
, {{48, unrolled_loop
}, {8192, rep_prefix_8_byte
}, {-1, libcall
}}}},
1258 6, /* scalar_stmt_cost. */
1259 4, /* scalar load_cost. */
1260 4, /* scalar_store_cost. */
1261 6, /* vec_stmt_cost. */
1262 0, /* vec_to_scalar_cost. */
1263 2, /* scalar_to_vec_cost. */
1264 4, /* vec_align_load_cost. */
1265 4, /* vec_unalign_load_cost. */
1266 4, /* vec_store_cost. */
1267 2, /* cond_taken_branch_cost. */
1268 1, /* cond_not_taken_branch_cost. */
1272 struct processor_costs pentium4_cost
= {
1273 COSTS_N_INSNS (1), /* cost of an add instruction */
1274 COSTS_N_INSNS (3), /* cost of a lea instruction */
1275 COSTS_N_INSNS (4), /* variable shift costs */
1276 COSTS_N_INSNS (4), /* constant shift costs */
1277 {COSTS_N_INSNS (15), /* cost of starting multiply for QI */
1278 COSTS_N_INSNS (15), /* HI */
1279 COSTS_N_INSNS (15), /* SI */
1280 COSTS_N_INSNS (15), /* DI */
1281 COSTS_N_INSNS (15)}, /* other */
1282 0, /* cost of multiply per each bit set */
1283 {COSTS_N_INSNS (56), /* cost of a divide/mod for QI */
1284 COSTS_N_INSNS (56), /* HI */
1285 COSTS_N_INSNS (56), /* SI */
1286 COSTS_N_INSNS (56), /* DI */
1287 COSTS_N_INSNS (56)}, /* other */
1288 COSTS_N_INSNS (1), /* cost of movsx */
1289 COSTS_N_INSNS (1), /* cost of movzx */
1290 16, /* "large" insn */
1292 2, /* cost for loading QImode using movzbl */
1293 {4, 5, 4}, /* cost of loading integer registers
1294 in QImode, HImode and SImode.
1295 Relative to reg-reg move (2). */
1296 {2, 3, 2}, /* cost of storing integer registers */
1297 2, /* cost of reg,reg fld/fst */
1298 {2, 2, 6}, /* cost of loading fp registers
1299 in SFmode, DFmode and XFmode */
1300 {4, 4, 6}, /* cost of storing fp registers
1301 in SFmode, DFmode and XFmode */
1302 2, /* cost of moving MMX register */
1303 {2, 2}, /* cost of loading MMX registers
1304 in SImode and DImode */
1305 {2, 2}, /* cost of storing MMX registers
1306 in SImode and DImode */
1307 12, /* cost of moving SSE register */
1308 {12, 12, 12}, /* cost of loading SSE registers
1309 in SImode, DImode and TImode */
1310 {2, 2, 8}, /* cost of storing SSE registers
1311 in SImode, DImode and TImode */
1312 10, /* MMX or SSE register to integer */
1313 8, /* size of l1 cache. */
1314 256, /* size of l2 cache. */
1315 64, /* size of prefetch block */
1316 6, /* number of parallel prefetches */
1317 2, /* Branch cost */
1318 COSTS_N_INSNS (5), /* cost of FADD and FSUB insns. */
1319 COSTS_N_INSNS (7), /* cost of FMUL instruction. */
1320 COSTS_N_INSNS (43), /* cost of FDIV instruction. */
1321 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1322 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1323 COSTS_N_INSNS (43), /* cost of FSQRT instruction. */
1324 {{libcall
, {{12, loop_1_byte
}, {-1, rep_prefix_4_byte
}}},
1325 DUMMY_STRINGOP_ALGS
},
1326 {{libcall
, {{6, loop_1_byte
}, {48, loop
}, {20480, rep_prefix_4_byte
},
1328 DUMMY_STRINGOP_ALGS
},
1329 1, /* scalar_stmt_cost. */
1330 1, /* scalar load_cost. */
1331 1, /* scalar_store_cost. */
1332 1, /* vec_stmt_cost. */
1333 1, /* vec_to_scalar_cost. */
1334 1, /* scalar_to_vec_cost. */
1335 1, /* vec_align_load_cost. */
1336 2, /* vec_unalign_load_cost. */
1337 1, /* vec_store_cost. */
1338 3, /* cond_taken_branch_cost. */
1339 1, /* cond_not_taken_branch_cost. */
1343 struct processor_costs nocona_cost
= {
1344 COSTS_N_INSNS (1), /* cost of an add instruction */
1345 COSTS_N_INSNS (1), /* cost of a lea instruction */
1346 COSTS_N_INSNS (1), /* variable shift costs */
1347 COSTS_N_INSNS (1), /* constant shift costs */
1348 {COSTS_N_INSNS (10), /* cost of starting multiply for QI */
1349 COSTS_N_INSNS (10), /* HI */
1350 COSTS_N_INSNS (10), /* SI */
1351 COSTS_N_INSNS (10), /* DI */
1352 COSTS_N_INSNS (10)}, /* other */
1353 0, /* cost of multiply per each bit set */
1354 {COSTS_N_INSNS (66), /* cost of a divide/mod for QI */
1355 COSTS_N_INSNS (66), /* HI */
1356 COSTS_N_INSNS (66), /* SI */
1357 COSTS_N_INSNS (66), /* DI */
1358 COSTS_N_INSNS (66)}, /* other */
1359 COSTS_N_INSNS (1), /* cost of movsx */
1360 COSTS_N_INSNS (1), /* cost of movzx */
1361 16, /* "large" insn */
1362 17, /* MOVE_RATIO */
1363 4, /* cost for loading QImode using movzbl */
1364 {4, 4, 4}, /* cost of loading integer registers
1365 in QImode, HImode and SImode.
1366 Relative to reg-reg move (2). */
1367 {4, 4, 4}, /* cost of storing integer registers */
1368 3, /* cost of reg,reg fld/fst */
1369 {12, 12, 12}, /* cost of loading fp registers
1370 in SFmode, DFmode and XFmode */
1371 {4, 4, 4}, /* cost of storing fp registers
1372 in SFmode, DFmode and XFmode */
1373 6, /* cost of moving MMX register */
1374 {12, 12}, /* cost of loading MMX registers
1375 in SImode and DImode */
1376 {12, 12}, /* cost of storing MMX registers
1377 in SImode and DImode */
1378 6, /* cost of moving SSE register */
1379 {12, 12, 12}, /* cost of loading SSE registers
1380 in SImode, DImode and TImode */
1381 {12, 12, 12}, /* cost of storing SSE registers
1382 in SImode, DImode and TImode */
1383 8, /* MMX or SSE register to integer */
1384 8, /* size of l1 cache. */
1385 1024, /* size of l2 cache. */
1386 128, /* size of prefetch block */
1387 8, /* number of parallel prefetches */
1388 1, /* Branch cost */
1389 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1390 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1391 COSTS_N_INSNS (40), /* cost of FDIV instruction. */
1392 COSTS_N_INSNS (3), /* cost of FABS instruction. */
1393 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
1394 COSTS_N_INSNS (44), /* cost of FSQRT instruction. */
1395 {{libcall
, {{12, loop_1_byte
}, {-1, rep_prefix_4_byte
}}},
1396 {libcall
, {{32, loop
}, {20000, rep_prefix_8_byte
},
1397 {100000, unrolled_loop
}, {-1, libcall
}}}},
1398 {{libcall
, {{6, loop_1_byte
}, {48, loop
}, {20480, rep_prefix_4_byte
},
1400 {libcall
, {{24, loop
}, {64, unrolled_loop
},
1401 {8192, rep_prefix_8_byte
}, {-1, libcall
}}}},
1402 1, /* scalar_stmt_cost. */
1403 1, /* scalar load_cost. */
1404 1, /* scalar_store_cost. */
1405 1, /* vec_stmt_cost. */
1406 1, /* vec_to_scalar_cost. */
1407 1, /* scalar_to_vec_cost. */
1408 1, /* vec_align_load_cost. */
1409 2, /* vec_unalign_load_cost. */
1410 1, /* vec_store_cost. */
1411 3, /* cond_taken_branch_cost. */
1412 1, /* cond_not_taken_branch_cost. */
1416 struct processor_costs atom_cost
= {
1417 COSTS_N_INSNS (1), /* cost of an add instruction */
1418 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1419 COSTS_N_INSNS (1), /* variable shift costs */
1420 COSTS_N_INSNS (1), /* constant shift costs */
1421 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1422 COSTS_N_INSNS (4), /* HI */
1423 COSTS_N_INSNS (3), /* SI */
1424 COSTS_N_INSNS (4), /* DI */
1425 COSTS_N_INSNS (2)}, /* other */
1426 0, /* cost of multiply per each bit set */
1427 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1428 COSTS_N_INSNS (26), /* HI */
1429 COSTS_N_INSNS (42), /* SI */
1430 COSTS_N_INSNS (74), /* DI */
1431 COSTS_N_INSNS (74)}, /* other */
1432 COSTS_N_INSNS (1), /* cost of movsx */
1433 COSTS_N_INSNS (1), /* cost of movzx */
1434 8, /* "large" insn */
1435 17, /* MOVE_RATIO */
1436 2, /* cost for loading QImode using movzbl */
1437 {4, 4, 4}, /* cost of loading integer registers
1438 in QImode, HImode and SImode.
1439 Relative to reg-reg move (2). */
1440 {4, 4, 4}, /* cost of storing integer registers */
1441 4, /* cost of reg,reg fld/fst */
1442 {12, 12, 12}, /* cost of loading fp registers
1443 in SFmode, DFmode and XFmode */
1444 {6, 6, 8}, /* cost of storing fp registers
1445 in SFmode, DFmode and XFmode */
1446 2, /* cost of moving MMX register */
1447 {8, 8}, /* cost of loading MMX registers
1448 in SImode and DImode */
1449 {8, 8}, /* cost of storing MMX registers
1450 in SImode and DImode */
1451 2, /* cost of moving SSE register */
1452 {8, 8, 8}, /* cost of loading SSE registers
1453 in SImode, DImode and TImode */
1454 {8, 8, 8}, /* cost of storing SSE registers
1455 in SImode, DImode and TImode */
1456 5, /* MMX or SSE register to integer */
1457 32, /* size of l1 cache. */
1458 256, /* size of l2 cache. */
1459 64, /* size of prefetch block */
1460 6, /* number of parallel prefetches */
1461 3, /* Branch cost */
1462 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1463 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1464 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1465 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1466 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1467 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1468 {{libcall
, {{11, loop
}, {-1, rep_prefix_4_byte
}}},
1469 {libcall
, {{32, loop
}, {64, rep_prefix_4_byte
},
1470 {8192, rep_prefix_8_byte
}, {-1, libcall
}}}},
1471 {{libcall
, {{8, loop
}, {15, unrolled_loop
},
1472 {2048, rep_prefix_4_byte
}, {-1, libcall
}}},
1473 {libcall
, {{24, loop
}, {32, unrolled_loop
},
1474 {8192, rep_prefix_8_byte
}, {-1, libcall
}}}},
1475 1, /* scalar_stmt_cost. */
1476 1, /* scalar load_cost. */
1477 1, /* scalar_store_cost. */
1478 1, /* vec_stmt_cost. */
1479 1, /* vec_to_scalar_cost. */
1480 1, /* scalar_to_vec_cost. */
1481 1, /* vec_align_load_cost. */
1482 2, /* vec_unalign_load_cost. */
1483 1, /* vec_store_cost. */
1484 3, /* cond_taken_branch_cost. */
1485 1, /* cond_not_taken_branch_cost. */
1488 /* Generic64 should produce code tuned for Nocona and K8. */
1490 struct processor_costs generic64_cost
= {
1491 COSTS_N_INSNS (1), /* cost of an add instruction */
1492 /* On all chips taken into consideration lea is 2 cycles and more. With
1493 this cost however our current implementation of synth_mult results in
1494 use of unnecessary temporary registers causing regression on several
1495 SPECfp benchmarks. */
1496 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1497 COSTS_N_INSNS (1), /* variable shift costs */
1498 COSTS_N_INSNS (1), /* constant shift costs */
1499 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1500 COSTS_N_INSNS (4), /* HI */
1501 COSTS_N_INSNS (3), /* SI */
1502 COSTS_N_INSNS (4), /* DI */
1503 COSTS_N_INSNS (2)}, /* other */
1504 0, /* cost of multiply per each bit set */
1505 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1506 COSTS_N_INSNS (26), /* HI */
1507 COSTS_N_INSNS (42), /* SI */
1508 COSTS_N_INSNS (74), /* DI */
1509 COSTS_N_INSNS (74)}, /* other */
1510 COSTS_N_INSNS (1), /* cost of movsx */
1511 COSTS_N_INSNS (1), /* cost of movzx */
1512 8, /* "large" insn */
1513 17, /* MOVE_RATIO */
1514 4, /* cost for loading QImode using movzbl */
1515 {4, 4, 4}, /* cost of loading integer registers
1516 in QImode, HImode and SImode.
1517 Relative to reg-reg move (2). */
1518 {4, 4, 4}, /* cost of storing integer registers */
1519 4, /* cost of reg,reg fld/fst */
1520 {12, 12, 12}, /* cost of loading fp registers
1521 in SFmode, DFmode and XFmode */
1522 {6, 6, 8}, /* cost of storing fp registers
1523 in SFmode, DFmode and XFmode */
1524 2, /* cost of moving MMX register */
1525 {8, 8}, /* cost of loading MMX registers
1526 in SImode and DImode */
1527 {8, 8}, /* cost of storing MMX registers
1528 in SImode and DImode */
1529 2, /* cost of moving SSE register */
1530 {8, 8, 8}, /* cost of loading SSE registers
1531 in SImode, DImode and TImode */
1532 {8, 8, 8}, /* cost of storing SSE registers
1533 in SImode, DImode and TImode */
1534 5, /* MMX or SSE register to integer */
1535 32, /* size of l1 cache. */
1536 512, /* size of l2 cache. */
1537 64, /* size of prefetch block */
1538 6, /* number of parallel prefetches */
1539 /* Benchmarks shows large regressions on K8 sixtrack benchmark when this
1540 value is increased to perhaps more appropriate value of 5. */
1541 3, /* Branch cost */
1542 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1543 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1544 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1545 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1546 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1547 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1548 {DUMMY_STRINGOP_ALGS
,
1549 {libcall
, {{32, loop
}, {8192, rep_prefix_8_byte
}, {-1, libcall
}}}},
1550 {DUMMY_STRINGOP_ALGS
,
1551 {libcall
, {{32, loop
}, {8192, rep_prefix_8_byte
}, {-1, libcall
}}}},
1552 1, /* scalar_stmt_cost. */
1553 1, /* scalar load_cost. */
1554 1, /* scalar_store_cost. */
1555 1, /* vec_stmt_cost. */
1556 1, /* vec_to_scalar_cost. */
1557 1, /* scalar_to_vec_cost. */
1558 1, /* vec_align_load_cost. */
1559 2, /* vec_unalign_load_cost. */
1560 1, /* vec_store_cost. */
1561 3, /* cond_taken_branch_cost. */
1562 1, /* cond_not_taken_branch_cost. */
1565 /* Generic32 should produce code tuned for PPro, Pentium4, Nocona,
1568 struct processor_costs generic32_cost
= {
1569 COSTS_N_INSNS (1), /* cost of an add instruction */
1570 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1571 COSTS_N_INSNS (1), /* variable shift costs */
1572 COSTS_N_INSNS (1), /* constant shift costs */
1573 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1574 COSTS_N_INSNS (4), /* HI */
1575 COSTS_N_INSNS (3), /* SI */
1576 COSTS_N_INSNS (4), /* DI */
1577 COSTS_N_INSNS (2)}, /* other */
1578 0, /* cost of multiply per each bit set */
1579 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1580 COSTS_N_INSNS (26), /* HI */
1581 COSTS_N_INSNS (42), /* SI */
1582 COSTS_N_INSNS (74), /* DI */
1583 COSTS_N_INSNS (74)}, /* other */
1584 COSTS_N_INSNS (1), /* cost of movsx */
1585 COSTS_N_INSNS (1), /* cost of movzx */
1586 8, /* "large" insn */
1587 17, /* MOVE_RATIO */
1588 4, /* cost for loading QImode using movzbl */
1589 {4, 4, 4}, /* cost of loading integer registers
1590 in QImode, HImode and SImode.
1591 Relative to reg-reg move (2). */
1592 {4, 4, 4}, /* cost of storing integer registers */
1593 4, /* cost of reg,reg fld/fst */
1594 {12, 12, 12}, /* cost of loading fp registers
1595 in SFmode, DFmode and XFmode */
1596 {6, 6, 8}, /* cost of storing fp registers
1597 in SFmode, DFmode and XFmode */
1598 2, /* cost of moving MMX register */
1599 {8, 8}, /* cost of loading MMX registers
1600 in SImode and DImode */
1601 {8, 8}, /* cost of storing MMX registers
1602 in SImode and DImode */
1603 2, /* cost of moving SSE register */
1604 {8, 8, 8}, /* cost of loading SSE registers
1605 in SImode, DImode and TImode */
1606 {8, 8, 8}, /* cost of storing SSE registers
1607 in SImode, DImode and TImode */
1608 5, /* MMX or SSE register to integer */
1609 32, /* size of l1 cache. */
1610 256, /* size of l2 cache. */
1611 64, /* size of prefetch block */
1612 6, /* number of parallel prefetches */
1613 3, /* Branch cost */
1614 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1615 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1616 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1617 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1618 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1619 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1620 {{libcall
, {{32, loop
}, {8192, rep_prefix_4_byte
}, {-1, libcall
}}},
1621 DUMMY_STRINGOP_ALGS
},
1622 {{libcall
, {{32, loop
}, {8192, rep_prefix_4_byte
}, {-1, libcall
}}},
1623 DUMMY_STRINGOP_ALGS
},
1624 1, /* scalar_stmt_cost. */
1625 1, /* scalar load_cost. */
1626 1, /* scalar_store_cost. */
1627 1, /* vec_stmt_cost. */
1628 1, /* vec_to_scalar_cost. */
1629 1, /* scalar_to_vec_cost. */
1630 1, /* vec_align_load_cost. */
1631 2, /* vec_unalign_load_cost. */
1632 1, /* vec_store_cost. */
1633 3, /* cond_taken_branch_cost. */
1634 1, /* cond_not_taken_branch_cost. */
1637 const struct processor_costs
*ix86_cost
= &pentium_cost
;
1639 /* Processor feature/optimization bitmasks. */
1640 #define m_386 (1<<PROCESSOR_I386)
1641 #define m_486 (1<<PROCESSOR_I486)
1642 #define m_PENT (1<<PROCESSOR_PENTIUM)
1643 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
1644 #define m_PENT4 (1<<PROCESSOR_PENTIUM4)
1645 #define m_NOCONA (1<<PROCESSOR_NOCONA)
1646 #define m_CORE2_32 (1<<PROCESSOR_CORE2_32)
1647 #define m_CORE2_64 (1<<PROCESSOR_CORE2_64)
1648 #define m_COREI7_32 (1<<PROCESSOR_COREI7_32)
1649 #define m_COREI7_64 (1<<PROCESSOR_COREI7_64)
1650 #define m_COREI7 (m_COREI7_32 | m_COREI7_64)
1651 #define m_CORE2I7_32 (m_CORE2_32 | m_COREI7_32)
1652 #define m_CORE2I7_64 (m_CORE2_64 | m_COREI7_64)
1653 #define m_CORE2I7 (m_CORE2I7_32 | m_CORE2I7_64)
1654 #define m_ATOM (1<<PROCESSOR_ATOM)
1656 #define m_GEODE (1<<PROCESSOR_GEODE)
1657 #define m_K6 (1<<PROCESSOR_K6)
1658 #define m_K6_GEODE (m_K6 | m_GEODE)
1659 #define m_K8 (1<<PROCESSOR_K8)
1660 #define m_ATHLON (1<<PROCESSOR_ATHLON)
1661 #define m_ATHLON_K8 (m_K8 | m_ATHLON)
1662 #define m_AMDFAM10 (1<<PROCESSOR_AMDFAM10)
1663 #define m_BDVER1 (1<<PROCESSOR_BDVER1)
1664 #define m_AMD_MULTIPLE (m_K8 | m_ATHLON | m_AMDFAM10 | m_BDVER1)
1666 #define m_GENERIC32 (1<<PROCESSOR_GENERIC32)
1667 #define m_GENERIC64 (1<<PROCESSOR_GENERIC64)
1669 /* Generic instruction choice should be common subset of supported CPUs
1670 (PPro/PENT4/NOCONA/CORE2/Athlon/K8). */
1671 #define m_GENERIC (m_GENERIC32 | m_GENERIC64)
1673 /* Feature tests against the various tunings. */
1674 unsigned char ix86_tune_features
[X86_TUNE_LAST
];
1676 /* Feature tests against the various tunings used to create ix86_tune_features
1677 based on the processor mask. */
1678 static unsigned int initial_ix86_tune_features
[X86_TUNE_LAST
] = {
1679 /* X86_TUNE_USE_LEAVE: Leave does not affect Nocona SPEC2000 results
1680 negatively, so enabling for Generic64 seems like good code size
1681 tradeoff. We can't enable it for 32bit generic because it does not
1682 work well with PPro base chips. */
1683 m_386
| m_K6_GEODE
| m_AMD_MULTIPLE
| m_CORE2I7_64
| m_GENERIC64
,
1685 /* X86_TUNE_PUSH_MEMORY */
1686 m_386
| m_K6_GEODE
| m_AMD_MULTIPLE
| m_PENT4
1687 | m_NOCONA
| m_CORE2I7
| m_GENERIC
,
1689 /* X86_TUNE_ZERO_EXTEND_WITH_AND */
1692 /* X86_TUNE_UNROLL_STRLEN */
1693 m_486
| m_PENT
| m_ATOM
| m_PPRO
| m_AMD_MULTIPLE
| m_K6
1694 | m_CORE2I7
| m_GENERIC
,
1696 /* X86_TUNE_DEEP_BRANCH_PREDICTION */
1697 m_ATOM
| m_PPRO
| m_K6_GEODE
| m_AMD_MULTIPLE
| m_PENT4
1698 | m_CORE2I7
| m_GENERIC
,
1700 /* X86_TUNE_BRANCH_PREDICTION_HINTS: Branch hints were put in P4 based
1701 on simulation result. But after P4 was made, no performance benefit
1702 was observed with branch hints. It also increases the code size.
1703 As a result, icc never generates branch hints. */
1706 /* X86_TUNE_DOUBLE_WITH_ADD */
1709 /* X86_TUNE_USE_SAHF */
1710 m_ATOM
| m_PPRO
| m_K6_GEODE
| m_K8
| m_AMDFAM10
| m_BDVER1
| m_PENT4
1711 | m_NOCONA
| m_CORE2I7
| m_GENERIC
,
1713 /* X86_TUNE_MOVX: Enable to zero extend integer registers to avoid
1714 partial dependencies. */
1715 m_AMD_MULTIPLE
| m_ATOM
| m_PPRO
| m_PENT4
| m_NOCONA
1716 | m_CORE2I7
| m_GENERIC
| m_GEODE
/* m_386 | m_K6 */,
1718 /* X86_TUNE_PARTIAL_REG_STALL: We probably ought to watch for partial
1719 register stalls on Generic32 compilation setting as well. However
1720 in current implementation the partial register stalls are not eliminated
1721 very well - they can be introduced via subregs synthesized by combine
1722 and can happen in caller/callee saving sequences. Because this option
1723 pays back little on PPro based chips and is in conflict with partial reg
1724 dependencies used by Athlon/P4 based chips, it is better to leave it off
1725 for generic32 for now. */
1728 /* X86_TUNE_PARTIAL_FLAG_REG_STALL */
1729 m_CORE2I7
| m_GENERIC
,
1731 /* X86_TUNE_USE_HIMODE_FIOP */
1732 m_386
| m_486
| m_K6_GEODE
,
1734 /* X86_TUNE_USE_SIMODE_FIOP */
1735 ~(m_PPRO
| m_AMD_MULTIPLE
| m_PENT
| m_ATOM
| m_CORE2I7
| m_GENERIC
),
1737 /* X86_TUNE_USE_MOV0 */
1740 /* X86_TUNE_USE_CLTD */
1741 ~(m_PENT
| m_ATOM
| m_K6
| m_CORE2I7
| m_GENERIC
),
1743 /* X86_TUNE_USE_XCHGB: Use xchgb %rh,%rl instead of rolw/rorw $8,rx. */
1746 /* X86_TUNE_SPLIT_LONG_MOVES */
1749 /* X86_TUNE_READ_MODIFY_WRITE */
1752 /* X86_TUNE_READ_MODIFY */
1755 /* X86_TUNE_PROMOTE_QIMODE */
1756 m_K6_GEODE
| m_PENT
| m_ATOM
| m_386
| m_486
| m_AMD_MULTIPLE
1757 | m_CORE2I7
| m_GENERIC
/* | m_PENT4 ? */,
1759 /* X86_TUNE_FAST_PREFIX */
1760 ~(m_PENT
| m_486
| m_386
),
1762 /* X86_TUNE_SINGLE_STRINGOP */
1763 m_386
| m_PENT4
| m_NOCONA
,
1765 /* X86_TUNE_QIMODE_MATH */
1768 /* X86_TUNE_HIMODE_MATH: On PPro this flag is meant to avoid partial
1769 register stalls. Just like X86_TUNE_PARTIAL_REG_STALL this option
1770 might be considered for Generic32 if our scheme for avoiding partial
1771 stalls was more effective. */
1774 /* X86_TUNE_PROMOTE_QI_REGS */
1777 /* X86_TUNE_PROMOTE_HI_REGS */
1780 /* X86_TUNE_SINGLE_POP: Enable if single pop insn is preferred
1781 over esp addition. */
1782 m_386
| m_486
| m_PENT
| m_PPRO
,
1784 /* X86_TUNE_DOUBLE_POP: Enable if double pop insn is preferred
1785 over esp addition. */
1788 /* X86_TUNE_SINGLE_PUSH: Enable if single push insn is preferred
1789 over esp subtraction. */
1790 m_386
| m_486
| m_PENT
| m_K6_GEODE
,
1792 /* X86_TUNE_DOUBLE_PUSH. Enable if double push insn is preferred
1793 over esp subtraction. */
1794 m_PENT
| m_K6_GEODE
,
1796 /* X86_TUNE_INTEGER_DFMODE_MOVES: Enable if integer moves are preferred
1797 for DFmode copies */
1798 ~(m_AMD_MULTIPLE
| m_ATOM
| m_PENT4
| m_NOCONA
| m_PPRO
| m_CORE2I7
1799 | m_GENERIC
| m_GEODE
),
1801 /* X86_TUNE_PARTIAL_REG_DEPENDENCY */
1802 m_AMD_MULTIPLE
| m_ATOM
| m_PENT4
| m_NOCONA
| m_CORE2I7
| m_GENERIC
,
1804 /* X86_TUNE_SSE_PARTIAL_REG_DEPENDENCY: In the Generic model we have a
1805 conflict here in between PPro/Pentium4 based chips that thread 128bit
1806 SSE registers as single units versus K8 based chips that divide SSE
1807 registers to two 64bit halves. This knob promotes all store destinations
1808 to be 128bit to allow register renaming on 128bit SSE units, but usually
1809 results in one extra microop on 64bit SSE units. Experimental results
1810 shows that disabling this option on P4 brings over 20% SPECfp regression,
1811 while enabling it on K8 brings roughly 2.4% regression that can be partly
1812 masked by careful scheduling of moves. */
1813 m_ATOM
| m_PENT4
| m_NOCONA
| m_PPRO
| m_CORE2I7
| m_GENERIC
1814 | m_AMDFAM10
| m_BDVER1
,
1816 /* X86_TUNE_SSE_UNALIGNED_LOAD_OPTIMAL */
1817 m_AMDFAM10
| m_BDVER1
| m_COREI7
,
1819 /* X86_TUNE_SSE_UNALIGNED_STORE_OPTIMAL */
1820 m_BDVER1
| m_COREI7
,
1822 /* X86_TUNE_SSE_PACKED_SINGLE_INSN_OPTIMAL */
1825 /* X86_TUNE_SSE_SPLIT_REGS: Set for machines where the type and dependencies
1826 are resolved on SSE register parts instead of whole registers, so we may
1827 maintain just lower part of scalar values in proper format leaving the
1828 upper part undefined. */
1831 /* X86_TUNE_SSE_TYPELESS_STORES */
1834 /* X86_TUNE_SSE_LOAD0_BY_PXOR */
1835 m_PPRO
| m_PENT4
| m_NOCONA
,
1837 /* X86_TUNE_MEMORY_MISMATCH_STALL */
1838 m_AMD_MULTIPLE
| m_ATOM
| m_PENT4
| m_NOCONA
| m_CORE2I7
| m_GENERIC
,
1840 /* X86_TUNE_PROLOGUE_USING_MOVE */
1841 m_ATHLON_K8
| m_ATOM
| m_PPRO
| m_CORE2I7
| m_GENERIC
,
1843 /* X86_TUNE_EPILOGUE_USING_MOVE */
1844 m_ATHLON_K8
| m_ATOM
| m_PPRO
| m_CORE2I7
| m_GENERIC
,
1846 /* X86_TUNE_SHIFT1 */
1849 /* X86_TUNE_USE_FFREEP */
1852 /* X86_TUNE_INTER_UNIT_MOVES */
1853 ~(m_AMD_MULTIPLE
| m_GENERIC
),
1855 /* X86_TUNE_INTER_UNIT_CONVERSIONS */
1856 ~(m_AMDFAM10
| m_BDVER1
),
1858 /* X86_TUNE_FOUR_JUMP_LIMIT: Some CPU cores are not able to predict more
1859 than 4 branch instructions in the 16 byte window. */
1860 m_ATOM
| m_PPRO
| m_AMD_MULTIPLE
| m_PENT4
| m_NOCONA
| m_CORE2I7
1863 /* X86_TUNE_SCHEDULE */
1864 m_PPRO
| m_AMD_MULTIPLE
| m_K6_GEODE
| m_PENT
| m_ATOM
| m_CORE2I7
1867 /* X86_TUNE_USE_BT */
1868 m_AMD_MULTIPLE
| m_ATOM
| m_CORE2I7
| m_GENERIC
,
1870 /* X86_TUNE_USE_INCDEC */
1871 ~(m_PENT4
| m_NOCONA
| m_CORE2I7
| m_GENERIC
| m_ATOM
),
1873 /* X86_TUNE_PAD_RETURNS */
1874 m_AMD_MULTIPLE
| m_CORE2I7
| m_GENERIC
,
1876 /* X86_TUNE_PAD_SHORT_FUNCTION: Pad short funtion. */
1879 /* X86_TUNE_EXT_80387_CONSTANTS */
1880 m_K6_GEODE
| m_ATHLON_K8
| m_ATOM
| m_PENT4
| m_NOCONA
| m_PPRO
1881 | m_CORE2I7
| m_GENERIC
,
1883 /* X86_TUNE_SHORTEN_X87_SSE */
1886 /* X86_TUNE_AVOID_VECTOR_DECODE */
1887 m_K8
| m_CORE2I7_64
| m_GENERIC64
,
1889 /* X86_TUNE_PROMOTE_HIMODE_IMUL: Modern CPUs have same latency for HImode
1890 and SImode multiply, but 386 and 486 do HImode multiply faster. */
1893 /* X86_TUNE_SLOW_IMUL_IMM32_MEM: Imul of 32-bit constant and memory is
1894 vector path on AMD machines. */
1895 m_K8
| m_CORE2I7_64
| m_GENERIC64
| m_AMDFAM10
| m_BDVER1
,
1897 /* X86_TUNE_SLOW_IMUL_IMM8: Imul of 8-bit constant is vector path on AMD
1899 m_K8
| m_CORE2I7_64
| m_GENERIC64
| m_AMDFAM10
| m_BDVER1
,
1901 /* X86_TUNE_MOVE_M1_VIA_OR: On pentiums, it is faster to load -1 via OR
1905 /* X86_TUNE_NOT_UNPAIRABLE: NOT is not pairable on Pentium, while XOR is,
1906 but one byte longer. */
1909 /* X86_TUNE_NOT_VECTORMODE: On AMD K6, NOT is vector decoded with memory
1910 operand that cannot be represented using a modRM byte. The XOR
1911 replacement is long decoded, so this split helps here as well. */
1914 /* X86_TUNE_USE_VECTOR_FP_CONVERTS: Prefer vector packed SSE conversion
1916 m_AMDFAM10
| m_CORE2I7
| m_GENERIC
,
1918 /* X86_TUNE_USE_VECTOR_CONVERTS: Prefer vector packed SSE conversion
1919 from integer to FP. */
1922 /* X86_TUNE_FUSE_CMP_AND_BRANCH: Fuse a compare or test instruction
1923 with a subsequent conditional jump instruction into a single
1924 compare-and-branch uop. */
1927 /* X86_TUNE_OPT_AGU: Optimize for Address Generation Unit. This flag
1928 will impact LEA instruction selection. */
1931 /* X86_TUNE_VECTORIZE_DOUBLE: Enable double precision vector
1936 /* Feature tests against the various architecture variations. */
1937 unsigned char ix86_arch_features
[X86_ARCH_LAST
];
1939 /* Feature tests against the various architecture variations, used to create
1940 ix86_arch_features based on the processor mask. */
1941 static unsigned int initial_ix86_arch_features
[X86_ARCH_LAST
] = {
1942 /* X86_ARCH_CMOVE: Conditional move was added for pentiumpro. */
1943 ~(m_386
| m_486
| m_PENT
| m_K6
),
1945 /* X86_ARCH_CMPXCHG: Compare and exchange was added for 80486. */
1948 /* X86_ARCH_CMPXCHG8B: Compare and exchange 8 bytes was added for pentium. */
1951 /* X86_ARCH_XADD: Exchange and add was added for 80486. */
1954 /* X86_ARCH_BSWAP: Byteswap was added for 80486. */
1958 static const unsigned int x86_accumulate_outgoing_args
1959 = m_AMD_MULTIPLE
| m_ATOM
| m_PENT4
| m_NOCONA
| m_PPRO
| m_CORE2I7
1962 static const unsigned int x86_arch_always_fancy_math_387
1963 = m_PENT
| m_ATOM
| m_PPRO
| m_AMD_MULTIPLE
| m_PENT4
1964 | m_NOCONA
| m_CORE2I7
| m_GENERIC
;
1966 static enum stringop_alg stringop_alg
= no_stringop
;
1968 /* In case the average insn count for single function invocation is
1969 lower than this constant, emit fast (but longer) prologue and
1971 #define FAST_PROLOGUE_INSN_COUNT 20
1973 /* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
1974 static const char *const qi_reg_name
[] = QI_REGISTER_NAMES
;
1975 static const char *const qi_high_reg_name
[] = QI_HIGH_REGISTER_NAMES
;
1976 static const char *const hi_reg_name
[] = HI_REGISTER_NAMES
;
1978 /* Array of the smallest class containing reg number REGNO, indexed by
1979 REGNO. Used by REGNO_REG_CLASS in i386.h. */
1981 enum reg_class
const regclass_map
[FIRST_PSEUDO_REGISTER
] =
1983 /* ax, dx, cx, bx */
1984 AREG
, DREG
, CREG
, BREG
,
1985 /* si, di, bp, sp */
1986 SIREG
, DIREG
, NON_Q_REGS
, NON_Q_REGS
,
1988 FP_TOP_REG
, FP_SECOND_REG
, FLOAT_REGS
, FLOAT_REGS
,
1989 FLOAT_REGS
, FLOAT_REGS
, FLOAT_REGS
, FLOAT_REGS
,
1992 /* flags, fpsr, fpcr, frame */
1993 NO_REGS
, NO_REGS
, NO_REGS
, NON_Q_REGS
,
1995 SSE_FIRST_REG
, SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
,
1998 MMX_REGS
, MMX_REGS
, MMX_REGS
, MMX_REGS
, MMX_REGS
, MMX_REGS
,
2001 NON_Q_REGS
, NON_Q_REGS
, NON_Q_REGS
, NON_Q_REGS
,
2002 NON_Q_REGS
, NON_Q_REGS
, NON_Q_REGS
, NON_Q_REGS
,
2003 /* SSE REX registers */
2004 SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
,
2008 /* The "default" register map used in 32bit mode. */
2010 int const dbx_register_map
[FIRST_PSEUDO_REGISTER
] =
2012 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
2013 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
2014 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
2015 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
2016 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
2017 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
2018 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
2021 /* The "default" register map used in 64bit mode. */
2023 int const dbx64_register_map
[FIRST_PSEUDO_REGISTER
] =
2025 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
2026 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
2027 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
2028 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
2029 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
2030 8,9,10,11,12,13,14,15, /* extended integer registers */
2031 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
2034 /* Define the register numbers to be used in Dwarf debugging information.
2035 The SVR4 reference port C compiler uses the following register numbers
2036 in its Dwarf output code:
2037 0 for %eax (gcc regno = 0)
2038 1 for %ecx (gcc regno = 2)
2039 2 for %edx (gcc regno = 1)
2040 3 for %ebx (gcc regno = 3)
2041 4 for %esp (gcc regno = 7)
2042 5 for %ebp (gcc regno = 6)
2043 6 for %esi (gcc regno = 4)
2044 7 for %edi (gcc regno = 5)
2045 The following three DWARF register numbers are never generated by
2046 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
2047 believes these numbers have these meanings.
2048 8 for %eip (no gcc equivalent)
2049 9 for %eflags (gcc regno = 17)
2050 10 for %trapno (no gcc equivalent)
2051 It is not at all clear how we should number the FP stack registers
2052 for the x86 architecture. If the version of SDB on x86/svr4 were
2053 a bit less brain dead with respect to floating-point then we would
2054 have a precedent to follow with respect to DWARF register numbers
2055 for x86 FP registers, but the SDB on x86/svr4 is so completely
2056 broken with respect to FP registers that it is hardly worth thinking
2057 of it as something to strive for compatibility with.
2058 The version of x86/svr4 SDB I have at the moment does (partially)
2059 seem to believe that DWARF register number 11 is associated with
2060 the x86 register %st(0), but that's about all. Higher DWARF
2061 register numbers don't seem to be associated with anything in
2062 particular, and even for DWARF regno 11, SDB only seems to under-
2063 stand that it should say that a variable lives in %st(0) (when
2064 asked via an `=' command) if we said it was in DWARF regno 11,
2065 but SDB still prints garbage when asked for the value of the
2066 variable in question (via a `/' command).
2067 (Also note that the labels SDB prints for various FP stack regs
2068 when doing an `x' command are all wrong.)
2069 Note that these problems generally don't affect the native SVR4
2070 C compiler because it doesn't allow the use of -O with -g and
2071 because when it is *not* optimizing, it allocates a memory
2072 location for each floating-point variable, and the memory
2073 location is what gets described in the DWARF AT_location
2074 attribute for the variable in question.
2075 Regardless of the severe mental illness of the x86/svr4 SDB, we
2076 do something sensible here and we use the following DWARF
2077 register numbers. Note that these are all stack-top-relative
2079 11 for %st(0) (gcc regno = 8)
2080 12 for %st(1) (gcc regno = 9)
2081 13 for %st(2) (gcc regno = 10)
2082 14 for %st(3) (gcc regno = 11)
2083 15 for %st(4) (gcc regno = 12)
2084 16 for %st(5) (gcc regno = 13)
2085 17 for %st(6) (gcc regno = 14)
2086 18 for %st(7) (gcc regno = 15)
2088 int const svr4_dbx_register_map
[FIRST_PSEUDO_REGISTER
] =
2090 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
2091 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
2092 -1, 9, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
2093 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
2094 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
2095 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
2096 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
2099 /* Define parameter passing and return registers. */
2101 static int const x86_64_int_parameter_registers
[6] =
2103 DI_REG
, SI_REG
, DX_REG
, CX_REG
, R8_REG
, R9_REG
2106 static int const x86_64_ms_abi_int_parameter_registers
[4] =
2108 CX_REG
, DX_REG
, R8_REG
, R9_REG
2111 static int const x86_64_int_return_registers
[4] =
2113 AX_REG
, DX_REG
, DI_REG
, SI_REG
2116 /* Define the structure for the machine field in struct function. */
2118 struct GTY(()) stack_local_entry
{
2119 unsigned short mode
;
2122 struct stack_local_entry
*next
;
2125 /* Structure describing stack frame layout.
2126 Stack grows downward:
2132 saved static chain if ix86_static_chain_on_stack
2134 saved frame pointer if frame_pointer_needed
2135 <- HARD_FRAME_POINTER
2141 <- sse_regs_save_offset
2144 [va_arg registers] |
2148 [padding2] | = to_allocate
2157 int outgoing_arguments_size
;
2158 HOST_WIDE_INT frame
;
2160 /* The offsets relative to ARG_POINTER. */
2161 HOST_WIDE_INT frame_pointer_offset
;
2162 HOST_WIDE_INT hard_frame_pointer_offset
;
2163 HOST_WIDE_INT stack_pointer_offset
;
2164 HOST_WIDE_INT hfp_save_offset
;
2165 HOST_WIDE_INT reg_save_offset
;
2166 HOST_WIDE_INT sse_reg_save_offset
;
2168 /* When save_regs_using_mov is set, emit prologue using
2169 move instead of push instructions. */
2170 bool save_regs_using_mov
;
2173 /* Code model option. */
2174 enum cmodel ix86_cmodel
;
2176 enum asm_dialect ix86_asm_dialect
= ASM_ATT
;
2178 enum tls_dialect ix86_tls_dialect
= TLS_DIALECT_GNU
;
2180 /* Which unit we are generating floating point math for. */
2181 enum fpmath_unit ix86_fpmath
;
2183 /* Which cpu are we scheduling for. */
2184 enum attr_cpu ix86_schedule
;
2186 /* Which cpu are we optimizing for. */
2187 enum processor_type ix86_tune
;
2189 /* Which instruction set architecture to use. */
2190 enum processor_type ix86_arch
;
2192 /* true if sse prefetch instruction is not NOOP. */
2193 int x86_prefetch_sse
;
2195 /* ix86_regparm_string as a number */
2196 static int ix86_regparm
;
2198 /* -mstackrealign option */
2199 static const char ix86_force_align_arg_pointer_string
[]
2200 = "force_align_arg_pointer";
2202 static rtx (*ix86_gen_leave
) (void);
2203 static rtx (*ix86_gen_add3
) (rtx
, rtx
, rtx
);
2204 static rtx (*ix86_gen_sub3
) (rtx
, rtx
, rtx
);
2205 static rtx (*ix86_gen_sub3_carry
) (rtx
, rtx
, rtx
, rtx
, rtx
);
2206 static rtx (*ix86_gen_one_cmpl2
) (rtx
, rtx
);
2207 static rtx (*ix86_gen_monitor
) (rtx
, rtx
, rtx
);
2208 static rtx (*ix86_gen_andsp
) (rtx
, rtx
, rtx
);
2209 static rtx (*ix86_gen_allocate_stack_worker
) (rtx
, rtx
);
2210 static rtx (*ix86_gen_adjust_stack_and_probe
) (rtx
, rtx
, rtx
);
2211 static rtx (*ix86_gen_probe_stack_range
) (rtx
, rtx
, rtx
);
2213 /* Preferred alignment for stack boundary in bits. */
2214 unsigned int ix86_preferred_stack_boundary
;
2216 /* Alignment for incoming stack boundary in bits specified at
2218 static unsigned int ix86_user_incoming_stack_boundary
;
2220 /* Default alignment for incoming stack boundary in bits. */
2221 static unsigned int ix86_default_incoming_stack_boundary
;
2223 /* Alignment for incoming stack boundary in bits. */
2224 unsigned int ix86_incoming_stack_boundary
;
2226 /* The abi used by target. */
2227 enum calling_abi ix86_abi
;
2229 /* Values 1-5: see jump.c */
2230 int ix86_branch_cost
;
2232 /* Calling abi specific va_list type nodes. */
2233 static GTY(()) tree sysv_va_list_type_node
;
2234 static GTY(()) tree ms_va_list_type_node
;
2236 /* Variables which are this size or smaller are put in the data/bss
2237 or ldata/lbss sections. */
2239 int ix86_section_threshold
= 65536;
2241 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
2242 char internal_label_prefix
[16];
2243 int internal_label_prefix_len
;
2245 /* Fence to use after loop using movnt. */
2248 /* Register class used for passing given 64bit part of the argument.
2249 These represent classes as documented by the PS ABI, with the exception
2250 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
2251 use SF or DFmode move instead of DImode to avoid reformatting penalties.
2253 Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
2254 whenever possible (upper half does contain padding). */
2255 enum x86_64_reg_class
2258 X86_64_INTEGER_CLASS
,
2259 X86_64_INTEGERSI_CLASS
,
2266 X86_64_COMPLEX_X87_CLASS
,
2270 #define MAX_CLASSES 4
2272 /* Table of constants used by fldpi, fldln2, etc.... */
2273 static REAL_VALUE_TYPE ext_80387_constants_table
[5];
2274 static bool ext_80387_constants_init
= 0;
2277 static struct machine_function
* ix86_init_machine_status (void);
2278 static rtx
ix86_function_value (const_tree
, const_tree
, bool);
2279 static bool ix86_function_value_regno_p (const unsigned int);
2280 static unsigned int ix86_function_arg_boundary (enum machine_mode
,
2282 static rtx
ix86_static_chain (const_tree
, bool);
2283 static int ix86_function_regparm (const_tree
, const_tree
);
2284 static void ix86_compute_frame_layout (struct ix86_frame
*);
2285 static bool ix86_expand_vector_init_one_nonzero (bool, enum machine_mode
,
2287 static void ix86_add_new_builtins (int);
2288 static rtx
ix86_expand_vec_perm_builtin (tree
);
2289 static tree
ix86_canonical_va_list_type (tree
);
2290 static void predict_jump (int);
2291 static unsigned int split_stack_prologue_scratch_regno (void);
2292 static bool i386_asm_output_addr_const_extra (FILE *, rtx
);
2294 enum ix86_function_specific_strings
2296 IX86_FUNCTION_SPECIFIC_ARCH
,
2297 IX86_FUNCTION_SPECIFIC_TUNE
,
2298 IX86_FUNCTION_SPECIFIC_FPMATH
,
2299 IX86_FUNCTION_SPECIFIC_MAX
2302 static char *ix86_target_string (int, int, const char *, const char *,
2303 const char *, bool);
2304 static void ix86_debug_options (void) ATTRIBUTE_UNUSED
;
2305 static void ix86_function_specific_save (struct cl_target_option
*);
2306 static void ix86_function_specific_restore (struct cl_target_option
*);
2307 static void ix86_function_specific_print (FILE *, int,
2308 struct cl_target_option
*);
2309 static bool ix86_valid_target_attribute_p (tree
, tree
, tree
, int);
2310 static bool ix86_valid_target_attribute_inner_p (tree
, char *[]);
2311 static bool ix86_can_inline_p (tree
, tree
);
2312 static void ix86_set_current_function (tree
);
2313 static unsigned int ix86_minimum_incoming_stack_boundary (bool);
2315 static enum calling_abi
ix86_function_abi (const_tree
);
2318 #ifndef SUBTARGET32_DEFAULT_CPU
2319 #define SUBTARGET32_DEFAULT_CPU "i386"
2322 /* The svr4 ABI for the i386 says that records and unions are returned
2324 #ifndef DEFAULT_PCC_STRUCT_RETURN
2325 #define DEFAULT_PCC_STRUCT_RETURN 1
2328 /* Whether -mtune= or -march= were specified */
2329 static int ix86_tune_defaulted
;
2330 static int ix86_arch_specified
;
2332 /* A mask of ix86_isa_flags that includes bit X if X
2333 was set or cleared on the command line. */
2334 static int ix86_isa_flags_explicit
;
2336 /* Define a set of ISAs which are available when a given ISA is
2337 enabled. MMX and SSE ISAs are handled separately. */
2339 #define OPTION_MASK_ISA_MMX_SET OPTION_MASK_ISA_MMX
2340 #define OPTION_MASK_ISA_3DNOW_SET \
2341 (OPTION_MASK_ISA_3DNOW | OPTION_MASK_ISA_MMX_SET)
2343 #define OPTION_MASK_ISA_SSE_SET OPTION_MASK_ISA_SSE
2344 #define OPTION_MASK_ISA_SSE2_SET \
2345 (OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE_SET)
2346 #define OPTION_MASK_ISA_SSE3_SET \
2347 (OPTION_MASK_ISA_SSE3 | OPTION_MASK_ISA_SSE2_SET)
2348 #define OPTION_MASK_ISA_SSSE3_SET \
2349 (OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_SSE3_SET)
2350 #define OPTION_MASK_ISA_SSE4_1_SET \
2351 (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_SSSE3_SET)
2352 #define OPTION_MASK_ISA_SSE4_2_SET \
2353 (OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_SSE4_1_SET)
2354 #define OPTION_MASK_ISA_AVX_SET \
2355 (OPTION_MASK_ISA_AVX | OPTION_MASK_ISA_SSE4_2_SET)
2356 #define OPTION_MASK_ISA_FMA_SET \
2357 (OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_AVX_SET)
2359 /* SSE4 includes both SSE4.1 and SSE4.2. -msse4 should be the same
2361 #define OPTION_MASK_ISA_SSE4_SET OPTION_MASK_ISA_SSE4_2_SET
2363 #define OPTION_MASK_ISA_SSE4A_SET \
2364 (OPTION_MASK_ISA_SSE4A | OPTION_MASK_ISA_SSE3_SET)
2365 #define OPTION_MASK_ISA_FMA4_SET \
2366 (OPTION_MASK_ISA_FMA4 | OPTION_MASK_ISA_SSE4A_SET \
2367 | OPTION_MASK_ISA_AVX_SET)
2368 #define OPTION_MASK_ISA_XOP_SET \
2369 (OPTION_MASK_ISA_XOP | OPTION_MASK_ISA_FMA4_SET)
2370 #define OPTION_MASK_ISA_LWP_SET \
2373 /* AES and PCLMUL need SSE2 because they use xmm registers */
2374 #define OPTION_MASK_ISA_AES_SET \
2375 (OPTION_MASK_ISA_AES | OPTION_MASK_ISA_SSE2_SET)
2376 #define OPTION_MASK_ISA_PCLMUL_SET \
2377 (OPTION_MASK_ISA_PCLMUL | OPTION_MASK_ISA_SSE2_SET)
2379 #define OPTION_MASK_ISA_ABM_SET \
2380 (OPTION_MASK_ISA_ABM | OPTION_MASK_ISA_POPCNT)
2382 #define OPTION_MASK_ISA_BMI_SET OPTION_MASK_ISA_BMI
2383 #define OPTION_MASK_ISA_TBM_SET OPTION_MASK_ISA_TBM
2384 #define OPTION_MASK_ISA_POPCNT_SET OPTION_MASK_ISA_POPCNT
2385 #define OPTION_MASK_ISA_CX16_SET OPTION_MASK_ISA_CX16
2386 #define OPTION_MASK_ISA_SAHF_SET OPTION_MASK_ISA_SAHF
2387 #define OPTION_MASK_ISA_MOVBE_SET OPTION_MASK_ISA_MOVBE
2388 #define OPTION_MASK_ISA_CRC32_SET OPTION_MASK_ISA_CRC32
2390 #define OPTION_MASK_ISA_FSGSBASE_SET OPTION_MASK_ISA_FSGSBASE
2391 #define OPTION_MASK_ISA_RDRND_SET OPTION_MASK_ISA_RDRND
2392 #define OPTION_MASK_ISA_F16C_SET \
2393 (OPTION_MASK_ISA_F16C | OPTION_MASK_ISA_AVX_SET)
2395 /* Define a set of ISAs which aren't available when a given ISA is
2396 disabled. MMX and SSE ISAs are handled separately. */
2398 #define OPTION_MASK_ISA_MMX_UNSET \
2399 (OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_3DNOW_UNSET)
2400 #define OPTION_MASK_ISA_3DNOW_UNSET \
2401 (OPTION_MASK_ISA_3DNOW | OPTION_MASK_ISA_3DNOW_A_UNSET)
2402 #define OPTION_MASK_ISA_3DNOW_A_UNSET OPTION_MASK_ISA_3DNOW_A
2404 #define OPTION_MASK_ISA_SSE_UNSET \
2405 (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_SSE2_UNSET)
2406 #define OPTION_MASK_ISA_SSE2_UNSET \
2407 (OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE3_UNSET)
2408 #define OPTION_MASK_ISA_SSE3_UNSET \
2409 (OPTION_MASK_ISA_SSE3 \
2410 | OPTION_MASK_ISA_SSSE3_UNSET \
2411 | OPTION_MASK_ISA_SSE4A_UNSET )
2412 #define OPTION_MASK_ISA_SSSE3_UNSET \
2413 (OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_SSE4_1_UNSET)
2414 #define OPTION_MASK_ISA_SSE4_1_UNSET \
2415 (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_SSE4_2_UNSET)
2416 #define OPTION_MASK_ISA_SSE4_2_UNSET \
2417 (OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_AVX_UNSET )
2418 #define OPTION_MASK_ISA_AVX_UNSET \
2419 (OPTION_MASK_ISA_AVX | OPTION_MASK_ISA_FMA_UNSET \
2420 | OPTION_MASK_ISA_FMA4_UNSET | OPTION_MASK_ISA_F16C_UNSET)
2421 #define OPTION_MASK_ISA_FMA_UNSET OPTION_MASK_ISA_FMA
2423 /* SSE4 includes both SSE4.1 and SSE4.2. -mno-sse4 should the same
2425 #define OPTION_MASK_ISA_SSE4_UNSET OPTION_MASK_ISA_SSE4_1_UNSET
2427 #define OPTION_MASK_ISA_SSE4A_UNSET \
2428 (OPTION_MASK_ISA_SSE4A | OPTION_MASK_ISA_FMA4_UNSET)
2430 #define OPTION_MASK_ISA_FMA4_UNSET \
2431 (OPTION_MASK_ISA_FMA4 | OPTION_MASK_ISA_XOP_UNSET)
2432 #define OPTION_MASK_ISA_XOP_UNSET OPTION_MASK_ISA_XOP
2433 #define OPTION_MASK_ISA_LWP_UNSET OPTION_MASK_ISA_LWP
2435 #define OPTION_MASK_ISA_AES_UNSET OPTION_MASK_ISA_AES
2436 #define OPTION_MASK_ISA_PCLMUL_UNSET OPTION_MASK_ISA_PCLMUL
2437 #define OPTION_MASK_ISA_ABM_UNSET OPTION_MASK_ISA_ABM
2438 #define OPTION_MASK_ISA_BMI_UNSET OPTION_MASK_ISA_BMI
2439 #define OPTION_MASK_ISA_TBM_UNSET OPTION_MASK_ISA_TBM
2440 #define OPTION_MASK_ISA_POPCNT_UNSET OPTION_MASK_ISA_POPCNT
2441 #define OPTION_MASK_ISA_CX16_UNSET OPTION_MASK_ISA_CX16
2442 #define OPTION_MASK_ISA_SAHF_UNSET OPTION_MASK_ISA_SAHF
2443 #define OPTION_MASK_ISA_MOVBE_UNSET OPTION_MASK_ISA_MOVBE
2444 #define OPTION_MASK_ISA_CRC32_UNSET OPTION_MASK_ISA_CRC32
2446 #define OPTION_MASK_ISA_FSGSBASE_UNSET OPTION_MASK_ISA_FSGSBASE
2447 #define OPTION_MASK_ISA_RDRND_UNSET OPTION_MASK_ISA_RDRND
2448 #define OPTION_MASK_ISA_F16C_UNSET OPTION_MASK_ISA_F16C
2450 /* Vectorization library interface and handlers. */
2451 static tree (*ix86_veclib_handler
) (enum built_in_function
, tree
, tree
);
2453 static tree
ix86_veclibabi_svml (enum built_in_function
, tree
, tree
);
2454 static tree
ix86_veclibabi_acml (enum built_in_function
, tree
, tree
);
2456 /* Processor target table, indexed by processor number */
2459 const struct processor_costs
*cost
; /* Processor costs */
2460 const int align_loop
; /* Default alignments. */
2461 const int align_loop_max_skip
;
2462 const int align_jump
;
2463 const int align_jump_max_skip
;
2464 const int align_func
;
2467 static const struct ptt processor_target_table
[PROCESSOR_max
] =
2469 {&i386_cost
, 4, 3, 4, 3, 4},
2470 {&i486_cost
, 16, 15, 16, 15, 16},
2471 {&pentium_cost
, 16, 7, 16, 7, 16},
2472 {&pentiumpro_cost
, 16, 15, 16, 10, 16},
2473 {&geode_cost
, 0, 0, 0, 0, 0},
2474 {&k6_cost
, 32, 7, 32, 7, 32},
2475 {&athlon_cost
, 16, 7, 16, 7, 16},
2476 {&pentium4_cost
, 0, 0, 0, 0, 0},
2477 {&k8_cost
, 16, 7, 16, 7, 16},
2478 {&nocona_cost
, 0, 0, 0, 0, 0},
2479 /* Core 2 32-bit. */
2480 {&generic32_cost
, 16, 10, 16, 10, 16},
2481 /* Core 2 64-bit. */
2482 {&generic64_cost
, 16, 10, 16, 10, 16},
2483 /* Core i7 32-bit. */
2484 {&generic32_cost
, 16, 10, 16, 10, 16},
2485 /* Core i7 64-bit. */
2486 {&generic64_cost
, 16, 10, 16, 10, 16},
2487 {&generic32_cost
, 16, 7, 16, 7, 16},
2488 {&generic64_cost
, 16, 10, 16, 10, 16},
2489 {&amdfam10_cost
, 32, 24, 32, 7, 32},
2490 {&bdver1_cost
, 32, 24, 32, 7, 32},
2491 {&atom_cost
, 16, 7, 16, 7, 16}
2494 static const char *const cpu_names
[TARGET_CPU_DEFAULT_max
] =
2522 /* Return true if a red-zone is in use. */
2525 ix86_using_red_zone (void)
2527 return TARGET_RED_ZONE
&& !TARGET_64BIT_MS_ABI
;
2530 /* Implement TARGET_HANDLE_OPTION. */
2533 ix86_handle_option (size_t code
, const char *arg ATTRIBUTE_UNUSED
, int value
)
2540 ix86_isa_flags
|= OPTION_MASK_ISA_MMX_SET
;
2541 ix86_isa_flags_explicit
|= OPTION_MASK_ISA_MMX_SET
;
2545 ix86_isa_flags
&= ~OPTION_MASK_ISA_MMX_UNSET
;
2546 ix86_isa_flags_explicit
|= OPTION_MASK_ISA_MMX_UNSET
;
2553 ix86_isa_flags
|= OPTION_MASK_ISA_3DNOW_SET
;
2554 ix86_isa_flags_explicit
|= OPTION_MASK_ISA_3DNOW_SET
;
2558 ix86_isa_flags
&= ~OPTION_MASK_ISA_3DNOW_UNSET
;
2559 ix86_isa_flags_explicit
|= OPTION_MASK_ISA_3DNOW_UNSET
;
2569 ix86_isa_flags
|= OPTION_MASK_ISA_SSE_SET
;
2570 ix86_isa_flags_explicit
|= OPTION_MASK_ISA_SSE_SET
;
2574 ix86_isa_flags
&= ~OPTION_MASK_ISA_SSE_UNSET
;
2575 ix86_isa_flags_explicit
|= OPTION_MASK_ISA_SSE_UNSET
;
2582 ix86_isa_flags
|= OPTION_MASK_ISA_SSE2_SET
;
2583 ix86_isa_flags_explicit
|= OPTION_MASK_ISA_SSE2_SET
;
2587 ix86_isa_flags
&= ~OPTION_MASK_ISA_SSE2_UNSET
;
2588 ix86_isa_flags_explicit
|= OPTION_MASK_ISA_SSE2_UNSET
;
2595 ix86_isa_flags
|= OPTION_MASK_ISA_SSE3_SET
;
2596 ix86_isa_flags_explicit
|= OPTION_MASK_ISA_SSE3_SET
;
2600 ix86_isa_flags
&= ~OPTION_MASK_ISA_SSE3_UNSET
;
2601 ix86_isa_flags_explicit
|= OPTION_MASK_ISA_SSE3_UNSET
;
2608 ix86_isa_flags
|= OPTION_MASK_ISA_SSSE3_SET
;
2609 ix86_isa_flags_explicit
|= OPTION_MASK_ISA_SSSE3_SET
;
2613 ix86_isa_flags
&= ~OPTION_MASK_ISA_SSSE3_UNSET
;
2614 ix86_isa_flags_explicit
|= OPTION_MASK_ISA_SSSE3_UNSET
;
2621 ix86_isa_flags
|= OPTION_MASK_ISA_SSE4_1_SET
;
2622 ix86_isa_flags_explicit
|= OPTION_MASK_ISA_SSE4_1_SET
;
2626 ix86_isa_flags
&= ~OPTION_MASK_ISA_SSE4_1_UNSET
;
2627 ix86_isa_flags_explicit
|= OPTION_MASK_ISA_SSE4_1_UNSET
;
2634 ix86_isa_flags
|= OPTION_MASK_ISA_SSE4_2_SET
;
2635 ix86_isa_flags_explicit
|= OPTION_MASK_ISA_SSE4_2_SET
;
2639 ix86_isa_flags
&= ~OPTION_MASK_ISA_SSE4_2_UNSET
;
2640 ix86_isa_flags_explicit
|= OPTION_MASK_ISA_SSE4_2_UNSET
;
2647 ix86_isa_flags
|= OPTION_MASK_ISA_AVX_SET
;
2648 ix86_isa_flags_explicit
|= OPTION_MASK_ISA_AVX_SET
;
2652 ix86_isa_flags
&= ~OPTION_MASK_ISA_AVX_UNSET
;
2653 ix86_isa_flags_explicit
|= OPTION_MASK_ISA_AVX_UNSET
;
2660 ix86_isa_flags
|= OPTION_MASK_ISA_FMA_SET
;
2661 ix86_isa_flags_explicit
|= OPTION_MASK_ISA_FMA_SET
;
2665 ix86_isa_flags
&= ~OPTION_MASK_ISA_FMA_UNSET
;
2666 ix86_isa_flags_explicit
|= OPTION_MASK_ISA_FMA_UNSET
;
2671 ix86_isa_flags
|= OPTION_MASK_ISA_SSE4_SET
;
2672 ix86_isa_flags_explicit
|= OPTION_MASK_ISA_SSE4_SET
;
2676 ix86_isa_flags
&= ~OPTION_MASK_ISA_SSE4_UNSET
;
2677 ix86_isa_flags_explicit
|= OPTION_MASK_ISA_SSE4_UNSET
;
2683 ix86_isa_flags
|= OPTION_MASK_ISA_SSE4A_SET
;
2684 ix86_isa_flags_explicit
|= OPTION_MASK_ISA_SSE4A_SET
;
2688 ix86_isa_flags
&= ~OPTION_MASK_ISA_SSE4A_UNSET
;
2689 ix86_isa_flags_explicit
|= OPTION_MASK_ISA_SSE4A_UNSET
;
2696 ix86_isa_flags
|= OPTION_MASK_ISA_FMA4_SET
;
2697 ix86_isa_flags_explicit
|= OPTION_MASK_ISA_FMA4_SET
;
2701 ix86_isa_flags
&= ~OPTION_MASK_ISA_FMA4_UNSET
;
2702 ix86_isa_flags_explicit
|= OPTION_MASK_ISA_FMA4_UNSET
;
2709 ix86_isa_flags
|= OPTION_MASK_ISA_XOP_SET
;
2710 ix86_isa_flags_explicit
|= OPTION_MASK_ISA_XOP_SET
;
2714 ix86_isa_flags
&= ~OPTION_MASK_ISA_XOP_UNSET
;
2715 ix86_isa_flags_explicit
|= OPTION_MASK_ISA_XOP_UNSET
;
2722 ix86_isa_flags
|= OPTION_MASK_ISA_LWP_SET
;
2723 ix86_isa_flags_explicit
|= OPTION_MASK_ISA_LWP_SET
;
2727 ix86_isa_flags
&= ~OPTION_MASK_ISA_LWP_UNSET
;
2728 ix86_isa_flags_explicit
|= OPTION_MASK_ISA_LWP_UNSET
;
2735 ix86_isa_flags
|= OPTION_MASK_ISA_ABM_SET
;
2736 ix86_isa_flags_explicit
|= OPTION_MASK_ISA_ABM_SET
;
2740 ix86_isa_flags
&= ~OPTION_MASK_ISA_ABM_UNSET
;
2741 ix86_isa_flags_explicit
|= OPTION_MASK_ISA_ABM_UNSET
;
2748 ix86_isa_flags
|= OPTION_MASK_ISA_BMI_SET
;
2749 ix86_isa_flags_explicit
|= OPTION_MASK_ISA_BMI_SET
;
2753 ix86_isa_flags
&= ~OPTION_MASK_ISA_BMI_UNSET
;
2754 ix86_isa_flags_explicit
|= OPTION_MASK_ISA_BMI_UNSET
;
2761 ix86_isa_flags
|= OPTION_MASK_ISA_TBM_SET
;
2762 ix86_isa_flags_explicit
|= OPTION_MASK_ISA_TBM_SET
;
2766 ix86_isa_flags
&= ~OPTION_MASK_ISA_TBM_UNSET
;
2767 ix86_isa_flags_explicit
|= OPTION_MASK_ISA_TBM_UNSET
;
2774 ix86_isa_flags
|= OPTION_MASK_ISA_POPCNT_SET
;
2775 ix86_isa_flags_explicit
|= OPTION_MASK_ISA_POPCNT_SET
;
2779 ix86_isa_flags
&= ~OPTION_MASK_ISA_POPCNT_UNSET
;
2780 ix86_isa_flags_explicit
|= OPTION_MASK_ISA_POPCNT_UNSET
;
2787 ix86_isa_flags
|= OPTION_MASK_ISA_SAHF_SET
;
2788 ix86_isa_flags_explicit
|= OPTION_MASK_ISA_SAHF_SET
;
2792 ix86_isa_flags
&= ~OPTION_MASK_ISA_SAHF_UNSET
;
2793 ix86_isa_flags_explicit
|= OPTION_MASK_ISA_SAHF_UNSET
;
2800 ix86_isa_flags
|= OPTION_MASK_ISA_CX16_SET
;
2801 ix86_isa_flags_explicit
|= OPTION_MASK_ISA_CX16_SET
;
2805 ix86_isa_flags
&= ~OPTION_MASK_ISA_CX16_UNSET
;
2806 ix86_isa_flags_explicit
|= OPTION_MASK_ISA_CX16_UNSET
;
2813 ix86_isa_flags
|= OPTION_MASK_ISA_MOVBE_SET
;
2814 ix86_isa_flags_explicit
|= OPTION_MASK_ISA_MOVBE_SET
;
2818 ix86_isa_flags
&= ~OPTION_MASK_ISA_MOVBE_UNSET
;
2819 ix86_isa_flags_explicit
|= OPTION_MASK_ISA_MOVBE_UNSET
;
2826 ix86_isa_flags
|= OPTION_MASK_ISA_CRC32_SET
;
2827 ix86_isa_flags_explicit
|= OPTION_MASK_ISA_CRC32_SET
;
2831 ix86_isa_flags
&= ~OPTION_MASK_ISA_CRC32_UNSET
;
2832 ix86_isa_flags_explicit
|= OPTION_MASK_ISA_CRC32_UNSET
;
2839 ix86_isa_flags
|= OPTION_MASK_ISA_AES_SET
;
2840 ix86_isa_flags_explicit
|= OPTION_MASK_ISA_AES_SET
;
2844 ix86_isa_flags
&= ~OPTION_MASK_ISA_AES_UNSET
;
2845 ix86_isa_flags_explicit
|= OPTION_MASK_ISA_AES_UNSET
;
2852 ix86_isa_flags
|= OPTION_MASK_ISA_PCLMUL_SET
;
2853 ix86_isa_flags_explicit
|= OPTION_MASK_ISA_PCLMUL_SET
;
2857 ix86_isa_flags
&= ~OPTION_MASK_ISA_PCLMUL_UNSET
;
2858 ix86_isa_flags_explicit
|= OPTION_MASK_ISA_PCLMUL_UNSET
;
2865 ix86_isa_flags
|= OPTION_MASK_ISA_FSGSBASE_SET
;
2866 ix86_isa_flags_explicit
|= OPTION_MASK_ISA_FSGSBASE_SET
;
2870 ix86_isa_flags
&= ~OPTION_MASK_ISA_FSGSBASE_UNSET
;
2871 ix86_isa_flags_explicit
|= OPTION_MASK_ISA_FSGSBASE_UNSET
;
2878 ix86_isa_flags
|= OPTION_MASK_ISA_RDRND_SET
;
2879 ix86_isa_flags_explicit
|= OPTION_MASK_ISA_RDRND_SET
;
2883 ix86_isa_flags
&= ~OPTION_MASK_ISA_RDRND_UNSET
;
2884 ix86_isa_flags_explicit
|= OPTION_MASK_ISA_RDRND_UNSET
;
2891 ix86_isa_flags
|= OPTION_MASK_ISA_F16C_SET
;
2892 ix86_isa_flags_explicit
|= OPTION_MASK_ISA_F16C_SET
;
2896 ix86_isa_flags
&= ~OPTION_MASK_ISA_F16C_UNSET
;
2897 ix86_isa_flags_explicit
|= OPTION_MASK_ISA_F16C_UNSET
;
2906 /* Return a string that documents the current -m options. The caller is
2907 responsible for freeing the string. */
2910 ix86_target_string (int isa
, int flags
, const char *arch
, const char *tune
,
2911 const char *fpmath
, bool add_nl_p
)
2913 struct ix86_target_opts
2915 const char *option
; /* option string */
2916 int mask
; /* isa mask options */
2919 /* This table is ordered so that options like -msse4.2 that imply
2920 preceding options while match those first. */
2921 static struct ix86_target_opts isa_opts
[] =
2923 { "-m64", OPTION_MASK_ISA_64BIT
},
2924 { "-mfma4", OPTION_MASK_ISA_FMA4
},
2925 { "-mfma", OPTION_MASK_ISA_FMA
},
2926 { "-mxop", OPTION_MASK_ISA_XOP
},
2927 { "-mlwp", OPTION_MASK_ISA_LWP
},
2928 { "-msse4a", OPTION_MASK_ISA_SSE4A
},
2929 { "-msse4.2", OPTION_MASK_ISA_SSE4_2
},
2930 { "-msse4.1", OPTION_MASK_ISA_SSE4_1
},
2931 { "-mssse3", OPTION_MASK_ISA_SSSE3
},
2932 { "-msse3", OPTION_MASK_ISA_SSE3
},
2933 { "-msse2", OPTION_MASK_ISA_SSE2
},
2934 { "-msse", OPTION_MASK_ISA_SSE
},
2935 { "-m3dnow", OPTION_MASK_ISA_3DNOW
},
2936 { "-m3dnowa", OPTION_MASK_ISA_3DNOW_A
},
2937 { "-mmmx", OPTION_MASK_ISA_MMX
},
2938 { "-mabm", OPTION_MASK_ISA_ABM
},
2939 { "-mbmi", OPTION_MASK_ISA_BMI
},
2940 { "-mtbm", OPTION_MASK_ISA_TBM
},
2941 { "-mpopcnt", OPTION_MASK_ISA_POPCNT
},
2942 { "-mmovbe", OPTION_MASK_ISA_MOVBE
},
2943 { "-mcrc32", OPTION_MASK_ISA_CRC32
},
2944 { "-maes", OPTION_MASK_ISA_AES
},
2945 { "-mpclmul", OPTION_MASK_ISA_PCLMUL
},
2946 { "-mfsgsbase", OPTION_MASK_ISA_FSGSBASE
},
2947 { "-mrdrnd", OPTION_MASK_ISA_RDRND
},
2948 { "-mf16c", OPTION_MASK_ISA_F16C
},
2952 static struct ix86_target_opts flag_opts
[] =
2954 { "-m128bit-long-double", MASK_128BIT_LONG_DOUBLE
},
2955 { "-m80387", MASK_80387
},
2956 { "-maccumulate-outgoing-args", MASK_ACCUMULATE_OUTGOING_ARGS
},
2957 { "-malign-double", MASK_ALIGN_DOUBLE
},
2958 { "-mcld", MASK_CLD
},
2959 { "-mfp-ret-in-387", MASK_FLOAT_RETURNS
},
2960 { "-mieee-fp", MASK_IEEE_FP
},
2961 { "-minline-all-stringops", MASK_INLINE_ALL_STRINGOPS
},
2962 { "-minline-stringops-dynamically", MASK_INLINE_STRINGOPS_DYNAMICALLY
},
2963 { "-mms-bitfields", MASK_MS_BITFIELD_LAYOUT
},
2964 { "-mno-align-stringops", MASK_NO_ALIGN_STRINGOPS
},
2965 { "-mno-fancy-math-387", MASK_NO_FANCY_MATH_387
},
2966 { "-mno-push-args", MASK_NO_PUSH_ARGS
},
2967 { "-mno-red-zone", MASK_NO_RED_ZONE
},
2968 { "-momit-leaf-frame-pointer", MASK_OMIT_LEAF_FRAME_POINTER
},
2969 { "-mrecip", MASK_RECIP
},
2970 { "-mrtd", MASK_RTD
},
2971 { "-msseregparm", MASK_SSEREGPARM
},
2972 { "-mstack-arg-probe", MASK_STACK_PROBE
},
2973 { "-mtls-direct-seg-refs", MASK_TLS_DIRECT_SEG_REFS
},
2974 { "-mvect8-ret-in-mem", MASK_VECT8_RETURNS
},
2975 { "-m8bit-idiv", MASK_USE_8BIT_IDIV
},
2976 { "-mvzeroupper", MASK_VZEROUPPER
},
2979 const char *opts
[ARRAY_SIZE (isa_opts
) + ARRAY_SIZE (flag_opts
) + 6][2];
2982 char target_other
[40];
2991 memset (opts
, '\0', sizeof (opts
));
2993 /* Add -march= option. */
2996 opts
[num
][0] = "-march=";
2997 opts
[num
++][1] = arch
;
3000 /* Add -mtune= option. */
3003 opts
[num
][0] = "-mtune=";
3004 opts
[num
++][1] = tune
;
3007 /* Pick out the options in isa options. */
3008 for (i
= 0; i
< ARRAY_SIZE (isa_opts
); i
++)
3010 if ((isa
& isa_opts
[i
].mask
) != 0)
3012 opts
[num
++][0] = isa_opts
[i
].option
;
3013 isa
&= ~ isa_opts
[i
].mask
;
3017 if (isa
&& add_nl_p
)
3019 opts
[num
++][0] = isa_other
;
3020 sprintf (isa_other
, "(other isa: %#x)", isa
);
3023 /* Add flag options. */
3024 for (i
= 0; i
< ARRAY_SIZE (flag_opts
); i
++)
3026 if ((flags
& flag_opts
[i
].mask
) != 0)
3028 opts
[num
++][0] = flag_opts
[i
].option
;
3029 flags
&= ~ flag_opts
[i
].mask
;
3033 if (flags
&& add_nl_p
)
3035 opts
[num
++][0] = target_other
;
3036 sprintf (target_other
, "(other flags: %#x)", flags
);
3039 /* Add -fpmath= option. */
3042 opts
[num
][0] = "-mfpmath=";
3043 opts
[num
++][1] = fpmath
;
3050 gcc_assert (num
< ARRAY_SIZE (opts
));
3052 /* Size the string. */
3054 sep_len
= (add_nl_p
) ? 3 : 1;
3055 for (i
= 0; i
< num
; i
++)
3058 for (j
= 0; j
< 2; j
++)
3060 len
+= strlen (opts
[i
][j
]);
3063 /* Build the string. */
3064 ret
= ptr
= (char *) xmalloc (len
);
3067 for (i
= 0; i
< num
; i
++)
3071 for (j
= 0; j
< 2; j
++)
3072 len2
[j
] = (opts
[i
][j
]) ? strlen (opts
[i
][j
]) : 0;
3079 if (add_nl_p
&& line_len
+ len2
[0] + len2
[1] > 70)
3087 for (j
= 0; j
< 2; j
++)
3090 memcpy (ptr
, opts
[i
][j
], len2
[j
]);
3092 line_len
+= len2
[j
];
3097 gcc_assert (ret
+ len
>= ptr
);
3102 /* Return TRUE if software prefetching is beneficial for the
3106 software_prefetching_beneficial_p (void)
3110 case PROCESSOR_GEODE
:
3112 case PROCESSOR_ATHLON
:
3114 case PROCESSOR_AMDFAM10
:
3122 /* Return true, if profiling code should be emitted before
3123 prologue. Otherwise it returns false.
3124 Note: For x86 with "hotfix" it is sorried. */
3126 ix86_profile_before_prologue (void)
3128 return flag_fentry
!= 0;
3131 /* Function that is callable from the debugger to print the current
3134 ix86_debug_options (void)
3136 char *opts
= ix86_target_string (ix86_isa_flags
, target_flags
,
3137 ix86_arch_string
, ix86_tune_string
,
3138 ix86_fpmath_string
, true);
3142 fprintf (stderr
, "%s\n\n", opts
);
3146 fputs ("<no options>\n\n", stderr
);
3151 /* Override various settings based on options. If MAIN_ARGS_P, the
3152 options are from the command line, otherwise they are from
3156 ix86_option_override_internal (bool main_args_p
)
3159 unsigned int ix86_arch_mask
, ix86_tune_mask
;
3160 const bool ix86_tune_specified
= (ix86_tune_string
!= NULL
);
3165 /* Comes from final.c -- no real reason to change it. */
3166 #define MAX_CODE_ALIGN 16
3174 PTA_PREFETCH_SSE
= 1 << 4,
3176 PTA_3DNOW_A
= 1 << 6,
3180 PTA_POPCNT
= 1 << 10,
3182 PTA_SSE4A
= 1 << 12,
3183 PTA_NO_SAHF
= 1 << 13,
3184 PTA_SSE4_1
= 1 << 14,
3185 PTA_SSE4_2
= 1 << 15,
3187 PTA_PCLMUL
= 1 << 17,
3190 PTA_MOVBE
= 1 << 20,
3194 PTA_FSGSBASE
= 1 << 24,
3195 PTA_RDRND
= 1 << 25,
3199 /* if this reaches 32, need to widen struct pta flags below */
3204 const char *const name
; /* processor name or nickname. */
3205 const enum processor_type processor
;
3206 const enum attr_cpu schedule
;
3207 const unsigned /*enum pta_flags*/ flags
;
3209 const processor_alias_table
[] =
3211 {"i386", PROCESSOR_I386
, CPU_NONE
, 0},
3212 {"i486", PROCESSOR_I486
, CPU_NONE
, 0},
3213 {"i586", PROCESSOR_PENTIUM
, CPU_PENTIUM
, 0},
3214 {"pentium", PROCESSOR_PENTIUM
, CPU_PENTIUM
, 0},
3215 {"pentium-mmx", PROCESSOR_PENTIUM
, CPU_PENTIUM
, PTA_MMX
},
3216 {"winchip-c6", PROCESSOR_I486
, CPU_NONE
, PTA_MMX
},
3217 {"winchip2", PROCESSOR_I486
, CPU_NONE
, PTA_MMX
| PTA_3DNOW
},
3218 {"c3", PROCESSOR_I486
, CPU_NONE
, PTA_MMX
| PTA_3DNOW
},
3219 {"c3-2", PROCESSOR_PENTIUMPRO
, CPU_PENTIUMPRO
, PTA_MMX
| PTA_SSE
},
3220 {"i686", PROCESSOR_PENTIUMPRO
, CPU_PENTIUMPRO
, 0},
3221 {"pentiumpro", PROCESSOR_PENTIUMPRO
, CPU_PENTIUMPRO
, 0},
3222 {"pentium2", PROCESSOR_PENTIUMPRO
, CPU_PENTIUMPRO
, PTA_MMX
},
3223 {"pentium3", PROCESSOR_PENTIUMPRO
, CPU_PENTIUMPRO
,
3225 {"pentium3m", PROCESSOR_PENTIUMPRO
, CPU_PENTIUMPRO
,
3227 {"pentium-m", PROCESSOR_PENTIUMPRO
, CPU_PENTIUMPRO
,
3228 PTA_MMX
| PTA_SSE
| PTA_SSE2
},
3229 {"pentium4", PROCESSOR_PENTIUM4
, CPU_NONE
,
3230 PTA_MMX
|PTA_SSE
| PTA_SSE2
},
3231 {"pentium4m", PROCESSOR_PENTIUM4
, CPU_NONE
,
3232 PTA_MMX
| PTA_SSE
| PTA_SSE2
},
3233 {"prescott", PROCESSOR_NOCONA
, CPU_NONE
,
3234 PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_SSE3
},
3235 {"nocona", PROCESSOR_NOCONA
, CPU_NONE
,
3236 PTA_64BIT
| PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_SSE3
3237 | PTA_CX16
| PTA_NO_SAHF
},
3238 {"core2", PROCESSOR_CORE2_64
, CPU_CORE2
,
3239 PTA_64BIT
| PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_SSE3
3240 | PTA_SSSE3
| PTA_CX16
},
3241 {"corei7", PROCESSOR_COREI7_64
, CPU_COREI7
,
3242 PTA_64BIT
| PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_SSE3
3243 | PTA_SSSE3
| PTA_SSE4_1
| PTA_SSE4_2
| PTA_CX16
},
3244 {"corei7-avx", PROCESSOR_COREI7_64
, CPU_COREI7
,
3245 PTA_64BIT
| PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_SSE3
3246 | PTA_SSSE3
| PTA_SSE4_1
| PTA_SSE4_2
| PTA_AVX
3247 | PTA_CX16
| PTA_POPCNT
| PTA_AES
| PTA_PCLMUL
},
3248 {"atom", PROCESSOR_ATOM
, CPU_ATOM
,
3249 PTA_64BIT
| PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_SSE3
3250 | PTA_SSSE3
| PTA_CX16
| PTA_MOVBE
},
3251 {"geode", PROCESSOR_GEODE
, CPU_GEODE
,
3252 PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
|PTA_PREFETCH_SSE
},
3253 {"k6", PROCESSOR_K6
, CPU_K6
, PTA_MMX
},
3254 {"k6-2", PROCESSOR_K6
, CPU_K6
, PTA_MMX
| PTA_3DNOW
},
3255 {"k6-3", PROCESSOR_K6
, CPU_K6
, PTA_MMX
| PTA_3DNOW
},
3256 {"athlon", PROCESSOR_ATHLON
, CPU_ATHLON
,
3257 PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_PREFETCH_SSE
},
3258 {"athlon-tbird", PROCESSOR_ATHLON
, CPU_ATHLON
,
3259 PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_PREFETCH_SSE
},
3260 {"athlon-4", PROCESSOR_ATHLON
, CPU_ATHLON
,
3261 PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_SSE
},
3262 {"athlon-xp", PROCESSOR_ATHLON
, CPU_ATHLON
,
3263 PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_SSE
},
3264 {"athlon-mp", PROCESSOR_ATHLON
, CPU_ATHLON
,
3265 PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_SSE
},
3266 {"x86-64", PROCESSOR_K8
, CPU_K8
,
3267 PTA_64BIT
| PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_NO_SAHF
},
3268 {"k8", PROCESSOR_K8
, CPU_K8
,
3269 PTA_64BIT
| PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_SSE
3270 | PTA_SSE2
| PTA_NO_SAHF
},
3271 {"k8-sse3", PROCESSOR_K8
, CPU_K8
,
3272 PTA_64BIT
| PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_SSE
3273 | PTA_SSE2
| PTA_SSE3
| PTA_NO_SAHF
},
3274 {"opteron", PROCESSOR_K8
, CPU_K8
,
3275 PTA_64BIT
| PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_SSE
3276 | PTA_SSE2
| PTA_NO_SAHF
},
3277 {"opteron-sse3", PROCESSOR_K8
, CPU_K8
,
3278 PTA_64BIT
| PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_SSE
3279 | PTA_SSE2
| PTA_SSE3
| PTA_NO_SAHF
},
3280 {"athlon64", PROCESSOR_K8
, CPU_K8
,
3281 PTA_64BIT
| PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_SSE
3282 | PTA_SSE2
| PTA_NO_SAHF
},
3283 {"athlon64-sse3", PROCESSOR_K8
, CPU_K8
,
3284 PTA_64BIT
| PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_SSE
3285 | PTA_SSE2
| PTA_SSE3
| PTA_NO_SAHF
},
3286 {"athlon-fx", PROCESSOR_K8
, CPU_K8
,
3287 PTA_64BIT
| PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_SSE
3288 | PTA_SSE2
| PTA_NO_SAHF
},
3289 {"amdfam10", PROCESSOR_AMDFAM10
, CPU_AMDFAM10
,
3290 PTA_64BIT
| PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_SSE
3291 | PTA_SSE2
| PTA_SSE3
| PTA_SSE4A
| PTA_CX16
| PTA_ABM
},
3292 {"barcelona", PROCESSOR_AMDFAM10
, CPU_AMDFAM10
,
3293 PTA_64BIT
| PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_SSE
3294 | PTA_SSE2
| PTA_SSE3
| PTA_SSE4A
| PTA_CX16
| PTA_ABM
},
3295 {"bdver1", PROCESSOR_BDVER1
, CPU_BDVER1
,
3296 PTA_64BIT
| PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_SSE
3297 | PTA_SSE2
| PTA_SSE3
| PTA_SSE4A
| PTA_CX16
| PTA_ABM
3298 | PTA_SSSE3
| PTA_SSE4_1
| PTA_SSE4_2
| PTA_AES
3299 | PTA_PCLMUL
| PTA_AVX
| PTA_FMA4
| PTA_XOP
| PTA_LWP
},
3300 {"generic32", PROCESSOR_GENERIC32
, CPU_PENTIUMPRO
,
3301 0 /* flags are only used for -march switch. */ },
3302 {"generic64", PROCESSOR_GENERIC64
, CPU_GENERIC64
,
3303 PTA_64BIT
/* flags are only used for -march switch. */ },
3306 int const pta_size
= ARRAY_SIZE (processor_alias_table
);
3308 /* Set up prefix/suffix so the error messages refer to either the command
3309 line argument, or the attribute(target). */
3318 prefix
= "option(\"";
3323 #ifdef SUBTARGET_OVERRIDE_OPTIONS
3324 SUBTARGET_OVERRIDE_OPTIONS
;
3327 #ifdef SUBSUBTARGET_OVERRIDE_OPTIONS
3328 SUBSUBTARGET_OVERRIDE_OPTIONS
;
3331 /* -fPIC is the default for x86_64. */
3332 if (TARGET_MACHO
&& TARGET_64BIT
)
3335 /* Need to check -mtune=generic first. */
3336 if (ix86_tune_string
)
3338 if (!strcmp (ix86_tune_string
, "generic")
3339 || !strcmp (ix86_tune_string
, "i686")
3340 /* As special support for cross compilers we read -mtune=native
3341 as -mtune=generic. With native compilers we won't see the
3342 -mtune=native, as it was changed by the driver. */
3343 || !strcmp (ix86_tune_string
, "native"))
3346 ix86_tune_string
= "generic64";
3348 ix86_tune_string
= "generic32";
3350 /* If this call is for setting the option attribute, allow the
3351 generic32/generic64 that was previously set. */
3352 else if (!main_args_p
3353 && (!strcmp (ix86_tune_string
, "generic32")
3354 || !strcmp (ix86_tune_string
, "generic64")))
3356 else if (!strncmp (ix86_tune_string
, "generic", 7))
3357 error ("bad value (%s) for %stune=%s %s",
3358 ix86_tune_string
, prefix
, suffix
, sw
);
3359 else if (!strcmp (ix86_tune_string
, "x86-64"))
3360 warning (OPT_Wdeprecated
, "%stune=x86-64%s is deprecated; use "
3361 "%stune=k8%s or %stune=generic%s instead as appropriate",
3362 prefix
, suffix
, prefix
, suffix
, prefix
, suffix
);
3366 if (ix86_arch_string
)
3367 ix86_tune_string
= ix86_arch_string
;
3368 if (!ix86_tune_string
)
3370 ix86_tune_string
= cpu_names
[TARGET_CPU_DEFAULT
];
3371 ix86_tune_defaulted
= 1;
3374 /* ix86_tune_string is set to ix86_arch_string or defaulted. We
3375 need to use a sensible tune option. */
3376 if (!strcmp (ix86_tune_string
, "generic")
3377 || !strcmp (ix86_tune_string
, "x86-64")
3378 || !strcmp (ix86_tune_string
, "i686"))
3381 ix86_tune_string
= "generic64";
3383 ix86_tune_string
= "generic32";
3387 if (ix86_stringop_string
)
3389 if (!strcmp (ix86_stringop_string
, "rep_byte"))
3390 stringop_alg
= rep_prefix_1_byte
;
3391 else if (!strcmp (ix86_stringop_string
, "libcall"))
3392 stringop_alg
= libcall
;
3393 else if (!strcmp (ix86_stringop_string
, "rep_4byte"))
3394 stringop_alg
= rep_prefix_4_byte
;
3395 else if (!strcmp (ix86_stringop_string
, "rep_8byte")
3397 /* rep; movq isn't available in 32-bit code. */
3398 stringop_alg
= rep_prefix_8_byte
;
3399 else if (!strcmp (ix86_stringop_string
, "byte_loop"))
3400 stringop_alg
= loop_1_byte
;
3401 else if (!strcmp (ix86_stringop_string
, "loop"))
3402 stringop_alg
= loop
;
3403 else if (!strcmp (ix86_stringop_string
, "unrolled_loop"))
3404 stringop_alg
= unrolled_loop
;
3406 error ("bad value (%s) for %sstringop-strategy=%s %s",
3407 ix86_stringop_string
, prefix
, suffix
, sw
);
3410 if (!ix86_arch_string
)
3411 ix86_arch_string
= TARGET_64BIT
? "x86-64" : SUBTARGET32_DEFAULT_CPU
;
3413 ix86_arch_specified
= 1;
3415 /* Validate -mabi= value. */
3416 if (ix86_abi_string
)
3418 if (strcmp (ix86_abi_string
, "sysv") == 0)
3419 ix86_abi
= SYSV_ABI
;
3420 else if (strcmp (ix86_abi_string
, "ms") == 0)
3423 error ("unknown ABI (%s) for %sabi=%s %s",
3424 ix86_abi_string
, prefix
, suffix
, sw
);
3427 ix86_abi
= DEFAULT_ABI
;
3429 if (ix86_cmodel_string
!= 0)
3431 if (!strcmp (ix86_cmodel_string
, "small"))
3432 ix86_cmodel
= flag_pic
? CM_SMALL_PIC
: CM_SMALL
;
3433 else if (!strcmp (ix86_cmodel_string
, "medium"))
3434 ix86_cmodel
= flag_pic
? CM_MEDIUM_PIC
: CM_MEDIUM
;
3435 else if (!strcmp (ix86_cmodel_string
, "large"))
3436 ix86_cmodel
= flag_pic
? CM_LARGE_PIC
: CM_LARGE
;
3438 error ("code model %s does not support PIC mode", ix86_cmodel_string
);
3439 else if (!strcmp (ix86_cmodel_string
, "32"))
3440 ix86_cmodel
= CM_32
;
3441 else if (!strcmp (ix86_cmodel_string
, "kernel") && !flag_pic
)
3442 ix86_cmodel
= CM_KERNEL
;
3444 error ("bad value (%s) for %scmodel=%s %s",
3445 ix86_cmodel_string
, prefix
, suffix
, sw
);
3449 /* For TARGET_64BIT and MS_ABI, force pic on, in order to enable the
3450 use of rip-relative addressing. This eliminates fixups that
3451 would otherwise be needed if this object is to be placed in a
3452 DLL, and is essentially just as efficient as direct addressing. */
3453 if (TARGET_64BIT
&& DEFAULT_ABI
== MS_ABI
)
3454 ix86_cmodel
= CM_SMALL_PIC
, flag_pic
= 1;
3455 else if (TARGET_64BIT
)
3456 ix86_cmodel
= flag_pic
? CM_SMALL_PIC
: CM_SMALL
;
3458 ix86_cmodel
= CM_32
;
3460 if (ix86_asm_string
!= 0)
3463 && !strcmp (ix86_asm_string
, "intel"))
3464 ix86_asm_dialect
= ASM_INTEL
;
3465 else if (!strcmp (ix86_asm_string
, "att"))
3466 ix86_asm_dialect
= ASM_ATT
;
3468 error ("bad value (%s) for %sasm=%s %s",
3469 ix86_asm_string
, prefix
, suffix
, sw
);
3471 if ((TARGET_64BIT
== 0) != (ix86_cmodel
== CM_32
))
3472 error ("code model %qs not supported in the %s bit mode",
3473 ix86_cmodel_string
, TARGET_64BIT
? "64" : "32");
3474 if ((TARGET_64BIT
!= 0) != ((ix86_isa_flags
& OPTION_MASK_ISA_64BIT
) != 0))
3475 sorry ("%i-bit mode not compiled in",
3476 (ix86_isa_flags
& OPTION_MASK_ISA_64BIT
) ? 64 : 32);
3478 for (i
= 0; i
< pta_size
; i
++)
3479 if (! strcmp (ix86_arch_string
, processor_alias_table
[i
].name
))
3481 ix86_schedule
= processor_alias_table
[i
].schedule
;
3482 ix86_arch
= processor_alias_table
[i
].processor
;
3483 /* Default cpu tuning to the architecture. */
3484 ix86_tune
= ix86_arch
;
3486 if (TARGET_64BIT
&& !(processor_alias_table
[i
].flags
& PTA_64BIT
))
3487 error ("CPU you selected does not support x86-64 "
3490 if (processor_alias_table
[i
].flags
& PTA_MMX
3491 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_MMX
))
3492 ix86_isa_flags
|= OPTION_MASK_ISA_MMX
;
3493 if (processor_alias_table
[i
].flags
& PTA_3DNOW
3494 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_3DNOW
))
3495 ix86_isa_flags
|= OPTION_MASK_ISA_3DNOW
;
3496 if (processor_alias_table
[i
].flags
& PTA_3DNOW_A
3497 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_3DNOW_A
))
3498 ix86_isa_flags
|= OPTION_MASK_ISA_3DNOW_A
;
3499 if (processor_alias_table
[i
].flags
& PTA_SSE
3500 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_SSE
))
3501 ix86_isa_flags
|= OPTION_MASK_ISA_SSE
;
3502 if (processor_alias_table
[i
].flags
& PTA_SSE2
3503 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_SSE2
))
3504 ix86_isa_flags
|= OPTION_MASK_ISA_SSE2
;
3505 if (processor_alias_table
[i
].flags
& PTA_SSE3
3506 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_SSE3
))
3507 ix86_isa_flags
|= OPTION_MASK_ISA_SSE3
;
3508 if (processor_alias_table
[i
].flags
& PTA_SSSE3
3509 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_SSSE3
))
3510 ix86_isa_flags
|= OPTION_MASK_ISA_SSSE3
;
3511 if (processor_alias_table
[i
].flags
& PTA_SSE4_1
3512 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_SSE4_1
))
3513 ix86_isa_flags
|= OPTION_MASK_ISA_SSE4_1
;
3514 if (processor_alias_table
[i
].flags
& PTA_SSE4_2
3515 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_SSE4_2
))
3516 ix86_isa_flags
|= OPTION_MASK_ISA_SSE4_2
;
3517 if (processor_alias_table
[i
].flags
& PTA_AVX
3518 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_AVX
))
3519 ix86_isa_flags
|= OPTION_MASK_ISA_AVX
;
3520 if (processor_alias_table
[i
].flags
& PTA_FMA
3521 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_FMA
))
3522 ix86_isa_flags
|= OPTION_MASK_ISA_FMA
;
3523 if (processor_alias_table
[i
].flags
& PTA_SSE4A
3524 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_SSE4A
))
3525 ix86_isa_flags
|= OPTION_MASK_ISA_SSE4A
;
3526 if (processor_alias_table
[i
].flags
& PTA_FMA4
3527 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_FMA4
))
3528 ix86_isa_flags
|= OPTION_MASK_ISA_FMA4
;
3529 if (processor_alias_table
[i
].flags
& PTA_XOP
3530 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_XOP
))
3531 ix86_isa_flags
|= OPTION_MASK_ISA_XOP
;
3532 if (processor_alias_table
[i
].flags
& PTA_LWP
3533 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_LWP
))
3534 ix86_isa_flags
|= OPTION_MASK_ISA_LWP
;
3535 if (processor_alias_table
[i
].flags
& PTA_ABM
3536 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_ABM
))
3537 ix86_isa_flags
|= OPTION_MASK_ISA_ABM
;
3538 if (processor_alias_table
[i
].flags
& PTA_BMI
3539 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_BMI
))
3540 ix86_isa_flags
|= OPTION_MASK_ISA_BMI
;
3541 if (processor_alias_table
[i
].flags
& PTA_TBM
3542 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_TBM
))
3543 ix86_isa_flags
|= OPTION_MASK_ISA_TBM
;
3544 if (processor_alias_table
[i
].flags
& PTA_CX16
3545 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_CX16
))
3546 ix86_isa_flags
|= OPTION_MASK_ISA_CX16
;
3547 if (processor_alias_table
[i
].flags
& (PTA_POPCNT
| PTA_ABM
)
3548 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_POPCNT
))
3549 ix86_isa_flags
|= OPTION_MASK_ISA_POPCNT
;
3550 if (!(TARGET_64BIT
&& (processor_alias_table
[i
].flags
& PTA_NO_SAHF
))
3551 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_SAHF
))
3552 ix86_isa_flags
|= OPTION_MASK_ISA_SAHF
;
3553 if (processor_alias_table
[i
].flags
& PTA_MOVBE
3554 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_MOVBE
))
3555 ix86_isa_flags
|= OPTION_MASK_ISA_MOVBE
;
3556 if (processor_alias_table
[i
].flags
& PTA_AES
3557 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_AES
))
3558 ix86_isa_flags
|= OPTION_MASK_ISA_AES
;
3559 if (processor_alias_table
[i
].flags
& PTA_PCLMUL
3560 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_PCLMUL
))
3561 ix86_isa_flags
|= OPTION_MASK_ISA_PCLMUL
;
3562 if (processor_alias_table
[i
].flags
& PTA_FSGSBASE
3563 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_FSGSBASE
))
3564 ix86_isa_flags
|= OPTION_MASK_ISA_FSGSBASE
;
3565 if (processor_alias_table
[i
].flags
& PTA_RDRND
3566 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_RDRND
))
3567 ix86_isa_flags
|= OPTION_MASK_ISA_RDRND
;
3568 if (processor_alias_table
[i
].flags
& PTA_F16C
3569 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_F16C
))
3570 ix86_isa_flags
|= OPTION_MASK_ISA_F16C
;
3571 if (processor_alias_table
[i
].flags
& (PTA_PREFETCH_SSE
| PTA_SSE
))
3572 x86_prefetch_sse
= true;
3577 if (!strcmp (ix86_arch_string
, "generic"))
3578 error ("generic CPU can be used only for %stune=%s %s",
3579 prefix
, suffix
, sw
);
3580 else if (!strncmp (ix86_arch_string
, "generic", 7) || i
== pta_size
)
3581 error ("bad value (%s) for %sarch=%s %s",
3582 ix86_arch_string
, prefix
, suffix
, sw
);
3584 ix86_arch_mask
= 1u << ix86_arch
;
3585 for (i
= 0; i
< X86_ARCH_LAST
; ++i
)
3586 ix86_arch_features
[i
] = !!(initial_ix86_arch_features
[i
] & ix86_arch_mask
);
3588 for (i
= 0; i
< pta_size
; i
++)
3589 if (! strcmp (ix86_tune_string
, processor_alias_table
[i
].name
))
3591 ix86_schedule
= processor_alias_table
[i
].schedule
;
3592 ix86_tune
= processor_alias_table
[i
].processor
;
3595 if (!(processor_alias_table
[i
].flags
& PTA_64BIT
))
3597 if (ix86_tune_defaulted
)
3599 ix86_tune_string
= "x86-64";
3600 for (i
= 0; i
< pta_size
; i
++)
3601 if (! strcmp (ix86_tune_string
,
3602 processor_alias_table
[i
].name
))
3604 ix86_schedule
= processor_alias_table
[i
].schedule
;
3605 ix86_tune
= processor_alias_table
[i
].processor
;
3608 error ("CPU you selected does not support x86-64 "
3614 /* Adjust tuning when compiling for 32-bit ABI. */
3617 case PROCESSOR_GENERIC64
:
3618 ix86_tune
= PROCESSOR_GENERIC32
;
3619 ix86_schedule
= CPU_PENTIUMPRO
;
3622 case PROCESSOR_CORE2_64
:
3623 ix86_tune
= PROCESSOR_CORE2_32
;
3626 case PROCESSOR_COREI7_64
:
3627 ix86_tune
= PROCESSOR_COREI7_32
;
3634 /* Intel CPUs have always interpreted SSE prefetch instructions as
3635 NOPs; so, we can enable SSE prefetch instructions even when
3636 -mtune (rather than -march) points us to a processor that has them.
3637 However, the VIA C3 gives a SIGILL, so we only do that for i686 and
3638 higher processors. */
3640 && (processor_alias_table
[i
].flags
& (PTA_PREFETCH_SSE
| PTA_SSE
)))
3641 x86_prefetch_sse
= true;
3645 if (ix86_tune_specified
&& i
== pta_size
)
3646 error ("bad value (%s) for %stune=%s %s",
3647 ix86_tune_string
, prefix
, suffix
, sw
);
3649 ix86_tune_mask
= 1u << ix86_tune
;
3650 for (i
= 0; i
< X86_TUNE_LAST
; ++i
)
3651 ix86_tune_features
[i
] = !!(initial_ix86_tune_features
[i
] & ix86_tune_mask
);
3653 #ifndef USE_IX86_FRAME_POINTER
3654 #define USE_IX86_FRAME_POINTER 0
3657 #ifndef USE_X86_64_FRAME_POINTER
3658 #define USE_X86_64_FRAME_POINTER 0
3661 /* Set the default values for switches whose default depends on TARGET_64BIT
3662 in case they weren't overwritten by command line options. */
3665 if (optimize
> 1 && !global_options_set
.x_flag_zee
)
3667 if (optimize
>= 1 && !global_options_set
.x_flag_omit_frame_pointer
)
3668 flag_omit_frame_pointer
= !USE_X86_64_FRAME_POINTER
;
3669 if (flag_asynchronous_unwind_tables
== 2)
3670 flag_unwind_tables
= flag_asynchronous_unwind_tables
= 1;
3671 if (flag_pcc_struct_return
== 2)
3672 flag_pcc_struct_return
= 0;
3676 if (optimize
>= 1 && !global_options_set
.x_flag_omit_frame_pointer
)
3677 flag_omit_frame_pointer
= !(USE_IX86_FRAME_POINTER
|| optimize_size
);
3678 if (flag_asynchronous_unwind_tables
== 2)
3679 flag_asynchronous_unwind_tables
= !USE_IX86_FRAME_POINTER
;
3680 if (flag_pcc_struct_return
== 2)
3681 flag_pcc_struct_return
= DEFAULT_PCC_STRUCT_RETURN
;
3685 ix86_cost
= &ix86_size_cost
;
3687 ix86_cost
= processor_target_table
[ix86_tune
].cost
;
3689 /* Arrange to set up i386_stack_locals for all functions. */
3690 init_machine_status
= ix86_init_machine_status
;
3692 /* Validate -mregparm= value. */
3693 if (ix86_regparm_string
)
3696 warning (0, "%sregparm%s is ignored in 64-bit mode", prefix
, suffix
);
3697 i
= atoi (ix86_regparm_string
);
3698 if (i
< 0 || i
> REGPARM_MAX
)
3699 error ("%sregparm=%d%s is not between 0 and %d",
3700 prefix
, i
, suffix
, REGPARM_MAX
);
3705 ix86_regparm
= REGPARM_MAX
;
3707 /* If the user has provided any of the -malign-* options,
3708 warn and use that value only if -falign-* is not set.
3709 Remove this code in GCC 3.2 or later. */
3710 if (ix86_align_loops_string
)
3712 warning (0, "%salign-loops%s is obsolete, use -falign-loops%s",
3713 prefix
, suffix
, suffix
);
3714 if (align_loops
== 0)
3716 i
= atoi (ix86_align_loops_string
);
3717 if (i
< 0 || i
> MAX_CODE_ALIGN
)
3718 error ("%salign-loops=%d%s is not between 0 and %d",
3719 prefix
, i
, suffix
, MAX_CODE_ALIGN
);
3721 align_loops
= 1 << i
;
3725 if (ix86_align_jumps_string
)
3727 warning (0, "%salign-jumps%s is obsolete, use -falign-jumps%s",
3728 prefix
, suffix
, suffix
);
3729 if (align_jumps
== 0)
3731 i
= atoi (ix86_align_jumps_string
);
3732 if (i
< 0 || i
> MAX_CODE_ALIGN
)
3733 error ("%salign-loops=%d%s is not between 0 and %d",
3734 prefix
, i
, suffix
, MAX_CODE_ALIGN
);
3736 align_jumps
= 1 << i
;
3740 if (ix86_align_funcs_string
)
3742 warning (0, "%salign-functions%s is obsolete, use -falign-functions%s",
3743 prefix
, suffix
, suffix
);
3744 if (align_functions
== 0)
3746 i
= atoi (ix86_align_funcs_string
);
3747 if (i
< 0 || i
> MAX_CODE_ALIGN
)
3748 error ("%salign-loops=%d%s is not between 0 and %d",
3749 prefix
, i
, suffix
, MAX_CODE_ALIGN
);
3751 align_functions
= 1 << i
;
3755 /* Default align_* from the processor table. */
3756 if (align_loops
== 0)
3758 align_loops
= processor_target_table
[ix86_tune
].align_loop
;
3759 align_loops_max_skip
= processor_target_table
[ix86_tune
].align_loop_max_skip
;
3761 if (align_jumps
== 0)
3763 align_jumps
= processor_target_table
[ix86_tune
].align_jump
;
3764 align_jumps_max_skip
= processor_target_table
[ix86_tune
].align_jump_max_skip
;
3766 if (align_functions
== 0)
3768 align_functions
= processor_target_table
[ix86_tune
].align_func
;
3771 /* Validate -mbranch-cost= value, or provide default. */
3772 ix86_branch_cost
= ix86_cost
->branch_cost
;
3773 if (ix86_branch_cost_string
)
3775 i
= atoi (ix86_branch_cost_string
);
3777 error ("%sbranch-cost=%d%s is not between 0 and 5", prefix
, i
, suffix
);
3779 ix86_branch_cost
= i
;
3781 if (ix86_section_threshold_string
)
3783 i
= atoi (ix86_section_threshold_string
);
3785 error ("%slarge-data-threshold=%d%s is negative", prefix
, i
, suffix
);
3787 ix86_section_threshold
= i
;
3790 if (ix86_tls_dialect_string
)
3792 if (strcmp (ix86_tls_dialect_string
, "gnu") == 0)
3793 ix86_tls_dialect
= TLS_DIALECT_GNU
;
3794 else if (strcmp (ix86_tls_dialect_string
, "gnu2") == 0)
3795 ix86_tls_dialect
= TLS_DIALECT_GNU2
;
3797 error ("bad value (%s) for %stls-dialect=%s %s",
3798 ix86_tls_dialect_string
, prefix
, suffix
, sw
);
3801 if (ix87_precision_string
)
3803 i
= atoi (ix87_precision_string
);
3804 if (i
!= 32 && i
!= 64 && i
!= 80)
3805 error ("pc%d is not valid precision setting (32, 64 or 80)", i
);
3810 target_flags
|= TARGET_SUBTARGET64_DEFAULT
& ~target_flags_explicit
;
3812 /* Enable by default the SSE and MMX builtins. Do allow the user to
3813 explicitly disable any of these. In particular, disabling SSE and
3814 MMX for kernel code is extremely useful. */
3815 if (!ix86_arch_specified
)
3817 |= ((OPTION_MASK_ISA_SSE2
| OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_MMX
3818 | TARGET_SUBTARGET64_ISA_DEFAULT
) & ~ix86_isa_flags_explicit
);
3821 warning (0, "%srtd%s is ignored in 64bit mode", prefix
, suffix
);
3825 target_flags
|= TARGET_SUBTARGET32_DEFAULT
& ~target_flags_explicit
;
3827 if (!ix86_arch_specified
)
3829 |= TARGET_SUBTARGET32_ISA_DEFAULT
& ~ix86_isa_flags_explicit
;
3831 /* i386 ABI does not specify red zone. It still makes sense to use it
3832 when programmer takes care to stack from being destroyed. */
3833 if (!(target_flags_explicit
& MASK_NO_RED_ZONE
))
3834 target_flags
|= MASK_NO_RED_ZONE
;
3837 /* Keep nonleaf frame pointers. */
3838 if (flag_omit_frame_pointer
)
3839 target_flags
&= ~MASK_OMIT_LEAF_FRAME_POINTER
;
3840 else if (TARGET_OMIT_LEAF_FRAME_POINTER
)
3841 flag_omit_frame_pointer
= 1;
3843 /* If we're doing fast math, we don't care about comparison order
3844 wrt NaNs. This lets us use a shorter comparison sequence. */
3845 if (flag_finite_math_only
)
3846 target_flags
&= ~MASK_IEEE_FP
;
3848 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
3849 since the insns won't need emulation. */
3850 if (x86_arch_always_fancy_math_387
& ix86_arch_mask
)
3851 target_flags
&= ~MASK_NO_FANCY_MATH_387
;
3853 /* Likewise, if the target doesn't have a 387, or we've specified
3854 software floating point, don't use 387 inline intrinsics. */
3856 target_flags
|= MASK_NO_FANCY_MATH_387
;
3858 /* Turn on MMX builtins for -msse. */
3861 ix86_isa_flags
|= OPTION_MASK_ISA_MMX
& ~ix86_isa_flags_explicit
;
3862 x86_prefetch_sse
= true;
3865 /* Turn on popcnt instruction for -msse4.2 or -mabm. */
3866 if (TARGET_SSE4_2
|| TARGET_ABM
)
3867 ix86_isa_flags
|= OPTION_MASK_ISA_POPCNT
& ~ix86_isa_flags_explicit
;
3869 /* Validate -mpreferred-stack-boundary= value or default it to
3870 PREFERRED_STACK_BOUNDARY_DEFAULT. */
3871 ix86_preferred_stack_boundary
= PREFERRED_STACK_BOUNDARY_DEFAULT
;
3872 if (ix86_preferred_stack_boundary_string
)
3874 int min
= (TARGET_64BIT
? 4 : 2);
3875 int max
= (TARGET_SEH
? 4 : 12);
3877 i
= atoi (ix86_preferred_stack_boundary_string
);
3878 if (i
< min
|| i
> max
)
3881 error ("%spreferred-stack-boundary%s is not supported "
3882 "for this target", prefix
, suffix
);
3884 error ("%spreferred-stack-boundary=%d%s is not between %d and %d",
3885 prefix
, i
, suffix
, min
, max
);
3888 ix86_preferred_stack_boundary
= (1 << i
) * BITS_PER_UNIT
;
3891 /* Set the default value for -mstackrealign. */
3892 if (ix86_force_align_arg_pointer
== -1)
3893 ix86_force_align_arg_pointer
= STACK_REALIGN_DEFAULT
;
3895 ix86_default_incoming_stack_boundary
= PREFERRED_STACK_BOUNDARY
;
3897 /* Validate -mincoming-stack-boundary= value or default it to
3898 MIN_STACK_BOUNDARY/PREFERRED_STACK_BOUNDARY. */
3899 ix86_incoming_stack_boundary
= ix86_default_incoming_stack_boundary
;
3900 if (ix86_incoming_stack_boundary_string
)
3902 i
= atoi (ix86_incoming_stack_boundary_string
);
3903 if (i
< (TARGET_64BIT
? 4 : 2) || i
> 12)
3904 error ("-mincoming-stack-boundary=%d is not between %d and 12",
3905 i
, TARGET_64BIT
? 4 : 2);
3908 ix86_user_incoming_stack_boundary
= (1 << i
) * BITS_PER_UNIT
;
3909 ix86_incoming_stack_boundary
3910 = ix86_user_incoming_stack_boundary
;
3914 /* Accept -msseregparm only if at least SSE support is enabled. */
3915 if (TARGET_SSEREGPARM
3917 error ("%ssseregparm%s used without SSE enabled", prefix
, suffix
);
3919 ix86_fpmath
= TARGET_FPMATH_DEFAULT
;
3920 if (ix86_fpmath_string
!= 0)
3922 if (! strcmp (ix86_fpmath_string
, "387"))
3923 ix86_fpmath
= FPMATH_387
;
3924 else if (! strcmp (ix86_fpmath_string
, "sse"))
3928 warning (0, "SSE instruction set disabled, using 387 arithmetics");
3929 ix86_fpmath
= FPMATH_387
;
3932 ix86_fpmath
= FPMATH_SSE
;
3934 else if (! strcmp (ix86_fpmath_string
, "387,sse")
3935 || ! strcmp (ix86_fpmath_string
, "387+sse")
3936 || ! strcmp (ix86_fpmath_string
, "sse,387")
3937 || ! strcmp (ix86_fpmath_string
, "sse+387")
3938 || ! strcmp (ix86_fpmath_string
, "both"))
3942 warning (0, "SSE instruction set disabled, using 387 arithmetics");
3943 ix86_fpmath
= FPMATH_387
;
3945 else if (!TARGET_80387
)
3947 warning (0, "387 instruction set disabled, using SSE arithmetics");
3948 ix86_fpmath
= FPMATH_SSE
;
3951 ix86_fpmath
= (enum fpmath_unit
) (FPMATH_SSE
| FPMATH_387
);
3954 error ("bad value (%s) for %sfpmath=%s %s",
3955 ix86_fpmath_string
, prefix
, suffix
, sw
);
3958 /* If the i387 is disabled, then do not return values in it. */
3960 target_flags
&= ~MASK_FLOAT_RETURNS
;
3962 /* Use external vectorized library in vectorizing intrinsics. */
3963 if (ix86_veclibabi_string
)
3965 if (strcmp (ix86_veclibabi_string
, "svml") == 0)
3966 ix86_veclib_handler
= ix86_veclibabi_svml
;
3967 else if (strcmp (ix86_veclibabi_string
, "acml") == 0)
3968 ix86_veclib_handler
= ix86_veclibabi_acml
;
3970 error ("unknown vectorization library ABI type (%s) for "
3971 "%sveclibabi=%s %s", ix86_veclibabi_string
,
3972 prefix
, suffix
, sw
);
3975 if ((!USE_IX86_FRAME_POINTER
3976 || (x86_accumulate_outgoing_args
& ix86_tune_mask
))
3977 && !(target_flags_explicit
& MASK_ACCUMULATE_OUTGOING_ARGS
)
3979 target_flags
|= MASK_ACCUMULATE_OUTGOING_ARGS
;
3981 /* ??? Unwind info is not correct around the CFG unless either a frame
3982 pointer is present or M_A_O_A is set. Fixing this requires rewriting
3983 unwind info generation to be aware of the CFG and propagating states
3985 if ((flag_unwind_tables
|| flag_asynchronous_unwind_tables
3986 || flag_exceptions
|| flag_non_call_exceptions
)
3987 && flag_omit_frame_pointer
3988 && !(target_flags
& MASK_ACCUMULATE_OUTGOING_ARGS
))
3990 if (target_flags_explicit
& MASK_ACCUMULATE_OUTGOING_ARGS
)
3991 warning (0, "unwind tables currently require either a frame pointer "
3992 "or %saccumulate-outgoing-args%s for correctness",
3994 target_flags
|= MASK_ACCUMULATE_OUTGOING_ARGS
;
3997 /* If stack probes are required, the space used for large function
3998 arguments on the stack must also be probed, so enable
3999 -maccumulate-outgoing-args so this happens in the prologue. */
4000 if (TARGET_STACK_PROBE
4001 && !(target_flags
& MASK_ACCUMULATE_OUTGOING_ARGS
))
4003 if (target_flags_explicit
& MASK_ACCUMULATE_OUTGOING_ARGS
)
4004 warning (0, "stack probing requires %saccumulate-outgoing-args%s "
4005 "for correctness", prefix
, suffix
);
4006 target_flags
|= MASK_ACCUMULATE_OUTGOING_ARGS
;
4009 /* For sane SSE instruction set generation we need fcomi instruction.
4010 It is safe to enable all CMOVE instructions. */
4014 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
4017 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix
, "LX", 0);
4018 p
= strchr (internal_label_prefix
, 'X');
4019 internal_label_prefix_len
= p
- internal_label_prefix
;
4023 /* When scheduling description is not available, disable scheduler pass
4024 so it won't slow down the compilation and make x87 code slower. */
4025 if (!TARGET_SCHEDULE
)
4026 flag_schedule_insns_after_reload
= flag_schedule_insns
= 0;
4028 maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES
,
4029 ix86_cost
->simultaneous_prefetches
,
4030 global_options
.x_param_values
,
4031 global_options_set
.x_param_values
);
4032 maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE
, ix86_cost
->prefetch_block
,
4033 global_options
.x_param_values
,
4034 global_options_set
.x_param_values
);
4035 maybe_set_param_value (PARAM_L1_CACHE_SIZE
, ix86_cost
->l1_cache_size
,
4036 global_options
.x_param_values
,
4037 global_options_set
.x_param_values
);
4038 maybe_set_param_value (PARAM_L2_CACHE_SIZE
, ix86_cost
->l2_cache_size
,
4039 global_options
.x_param_values
,
4040 global_options_set
.x_param_values
);
4042 /* Enable sw prefetching at -O3 for CPUS that prefetching is helpful. */
4043 if (flag_prefetch_loop_arrays
< 0
4046 && software_prefetching_beneficial_p ())
4047 flag_prefetch_loop_arrays
= 1;
4049 /* If using typedef char *va_list, signal that __builtin_va_start (&ap, 0)
4050 can be optimized to ap = __builtin_next_arg (0). */
4051 if (!TARGET_64BIT
&& !flag_split_stack
)
4052 targetm
.expand_builtin_va_start
= NULL
;
4056 ix86_gen_leave
= gen_leave_rex64
;
4057 ix86_gen_add3
= gen_adddi3
;
4058 ix86_gen_sub3
= gen_subdi3
;
4059 ix86_gen_sub3_carry
= gen_subdi3_carry
;
4060 ix86_gen_one_cmpl2
= gen_one_cmpldi2
;
4061 ix86_gen_monitor
= gen_sse3_monitor64
;
4062 ix86_gen_andsp
= gen_anddi3
;
4063 ix86_gen_allocate_stack_worker
= gen_allocate_stack_worker_probe_di
;
4064 ix86_gen_adjust_stack_and_probe
= gen_adjust_stack_and_probedi
;
4065 ix86_gen_probe_stack_range
= gen_probe_stack_rangedi
;
4069 ix86_gen_leave
= gen_leave
;
4070 ix86_gen_add3
= gen_addsi3
;
4071 ix86_gen_sub3
= gen_subsi3
;
4072 ix86_gen_sub3_carry
= gen_subsi3_carry
;
4073 ix86_gen_one_cmpl2
= gen_one_cmplsi2
;
4074 ix86_gen_monitor
= gen_sse3_monitor
;
4075 ix86_gen_andsp
= gen_andsi3
;
4076 ix86_gen_allocate_stack_worker
= gen_allocate_stack_worker_probe_si
;
4077 ix86_gen_adjust_stack_and_probe
= gen_adjust_stack_and_probesi
;
4078 ix86_gen_probe_stack_range
= gen_probe_stack_rangesi
;
4082 /* Use -mcld by default for 32-bit code if configured with --enable-cld. */
4084 target_flags
|= MASK_CLD
& ~target_flags_explicit
;
4087 if (!TARGET_64BIT
&& flag_pic
)
4089 if (flag_fentry
> 0)
4090 sorry ("-mfentry isn%'t supported for 32-bit in combination "
4094 else if (TARGET_SEH
)
4096 if (flag_fentry
== 0)
4097 sorry ("-mno-fentry isn%'t compatible with SEH");
4100 else if (flag_fentry
< 0)
4102 #if defined(PROFILE_BEFORE_PROLOGUE)
4109 /* Save the initial options in case the user does function specific options */
4111 target_option_default_node
= target_option_current_node
4112 = build_target_option_node ();
4116 /* When not optimize for size, enable vzeroupper optimization for
4117 TARGET_AVX with -fexpensive-optimizations. */
4119 && flag_expensive_optimizations
4120 && !(target_flags_explicit
& MASK_VZEROUPPER
))
4121 target_flags
|= MASK_VZEROUPPER
;
4125 /* Disable vzeroupper pass if TARGET_AVX is disabled. */
4126 target_flags
&= ~MASK_VZEROUPPER
;
4130 /* Return TRUE if VAL is passed in register with 256bit AVX modes. */
4133 function_pass_avx256_p (const_rtx val
)
4138 if (REG_P (val
) && VALID_AVX256_REG_MODE (GET_MODE (val
)))
4141 if (GET_CODE (val
) == PARALLEL
)
4146 for (i
= XVECLEN (val
, 0) - 1; i
>= 0; i
--)
4148 r
= XVECEXP (val
, 0, i
);
4149 if (GET_CODE (r
) == EXPR_LIST
4151 && REG_P (XEXP (r
, 0))
4152 && (GET_MODE (XEXP (r
, 0)) == OImode
4153 || VALID_AVX256_REG_MODE (GET_MODE (XEXP (r
, 0)))))
4161 /* Implement the TARGET_OPTION_OVERRIDE hook. */
4164 ix86_option_override (void)
4166 ix86_option_override_internal (true);
4169 /* Update register usage after having seen the compiler flags. */
4172 ix86_conditional_register_usage (void)
4177 for (i
= 0; i
< FIRST_PSEUDO_REGISTER
; i
++)
4179 if (fixed_regs
[i
] > 1)
4180 fixed_regs
[i
] = (fixed_regs
[i
] == (TARGET_64BIT
? 3 : 2));
4181 if (call_used_regs
[i
] > 1)
4182 call_used_regs
[i
] = (call_used_regs
[i
] == (TARGET_64BIT
? 3 : 2));
4185 /* The PIC register, if it exists, is fixed. */
4186 j
= PIC_OFFSET_TABLE_REGNUM
;
4187 if (j
!= INVALID_REGNUM
)
4188 fixed_regs
[j
] = call_used_regs
[j
] = 1;
4190 /* The MS_ABI changes the set of call-used registers. */
4191 if (TARGET_64BIT
&& ix86_cfun_abi () == MS_ABI
)
4193 call_used_regs
[SI_REG
] = 0;
4194 call_used_regs
[DI_REG
] = 0;
4195 call_used_regs
[XMM6_REG
] = 0;
4196 call_used_regs
[XMM7_REG
] = 0;
4197 for (i
= FIRST_REX_SSE_REG
; i
<= LAST_REX_SSE_REG
; i
++)
4198 call_used_regs
[i
] = 0;
4201 /* The default setting of CLOBBERED_REGS is for 32-bit; add in the
4202 other call-clobbered regs for 64-bit. */
4205 CLEAR_HARD_REG_SET (reg_class_contents
[(int)CLOBBERED_REGS
]);
4207 for (i
= 0; i
< FIRST_PSEUDO_REGISTER
; i
++)
4208 if (TEST_HARD_REG_BIT (reg_class_contents
[(int)GENERAL_REGS
], i
)
4209 && call_used_regs
[i
])
4210 SET_HARD_REG_BIT (reg_class_contents
[(int)CLOBBERED_REGS
], i
);
4213 /* If MMX is disabled, squash the registers. */
4215 for (i
= 0; i
< FIRST_PSEUDO_REGISTER
; i
++)
4216 if (TEST_HARD_REG_BIT (reg_class_contents
[(int)MMX_REGS
], i
))
4217 fixed_regs
[i
] = call_used_regs
[i
] = 1, reg_names
[i
] = "";
4219 /* If SSE is disabled, squash the registers. */
4221 for (i
= 0; i
< FIRST_PSEUDO_REGISTER
; i
++)
4222 if (TEST_HARD_REG_BIT (reg_class_contents
[(int)SSE_REGS
], i
))
4223 fixed_regs
[i
] = call_used_regs
[i
] = 1, reg_names
[i
] = "";
4225 /* If the FPU is disabled, squash the registers. */
4226 if (! (TARGET_80387
|| TARGET_FLOAT_RETURNS_IN_80387
))
4227 for (i
= 0; i
< FIRST_PSEUDO_REGISTER
; i
++)
4228 if (TEST_HARD_REG_BIT (reg_class_contents
[(int)FLOAT_REGS
], i
))
4229 fixed_regs
[i
] = call_used_regs
[i
] = 1, reg_names
[i
] = "";
4231 /* If 32-bit, squash the 64-bit registers. */
4234 for (i
= FIRST_REX_INT_REG
; i
<= LAST_REX_INT_REG
; i
++)
4236 for (i
= FIRST_REX_SSE_REG
; i
<= LAST_REX_SSE_REG
; i
++)
4242 /* Save the current options */
4245 ix86_function_specific_save (struct cl_target_option
*ptr
)
4247 ptr
->arch
= ix86_arch
;
4248 ptr
->schedule
= ix86_schedule
;
4249 ptr
->tune
= ix86_tune
;
4250 ptr
->fpmath
= ix86_fpmath
;
4251 ptr
->branch_cost
= ix86_branch_cost
;
4252 ptr
->tune_defaulted
= ix86_tune_defaulted
;
4253 ptr
->arch_specified
= ix86_arch_specified
;
4254 ptr
->ix86_isa_flags_explicit
= ix86_isa_flags_explicit
;
4255 ptr
->ix86_target_flags_explicit
= target_flags_explicit
;
4257 /* The fields are char but the variables are not; make sure the
4258 values fit in the fields. */
4259 gcc_assert (ptr
->arch
== ix86_arch
);
4260 gcc_assert (ptr
->schedule
== ix86_schedule
);
4261 gcc_assert (ptr
->tune
== ix86_tune
);
4262 gcc_assert (ptr
->fpmath
== ix86_fpmath
);
4263 gcc_assert (ptr
->branch_cost
== ix86_branch_cost
);
4266 /* Restore the current options */
4269 ix86_function_specific_restore (struct cl_target_option
*ptr
)
4271 enum processor_type old_tune
= ix86_tune
;
4272 enum processor_type old_arch
= ix86_arch
;
4273 unsigned int ix86_arch_mask
, ix86_tune_mask
;
4276 ix86_arch
= (enum processor_type
) ptr
->arch
;
4277 ix86_schedule
= (enum attr_cpu
) ptr
->schedule
;
4278 ix86_tune
= (enum processor_type
) ptr
->tune
;
4279 ix86_fpmath
= (enum fpmath_unit
) ptr
->fpmath
;
4280 ix86_branch_cost
= ptr
->branch_cost
;
4281 ix86_tune_defaulted
= ptr
->tune_defaulted
;
4282 ix86_arch_specified
= ptr
->arch_specified
;
4283 ix86_isa_flags_explicit
= ptr
->ix86_isa_flags_explicit
;
4284 target_flags_explicit
= ptr
->ix86_target_flags_explicit
;
4286 /* Recreate the arch feature tests if the arch changed */
4287 if (old_arch
!= ix86_arch
)
4289 ix86_arch_mask
= 1u << ix86_arch
;
4290 for (i
= 0; i
< X86_ARCH_LAST
; ++i
)
4291 ix86_arch_features
[i
]
4292 = !!(initial_ix86_arch_features
[i
] & ix86_arch_mask
);
4295 /* Recreate the tune optimization tests */
4296 if (old_tune
!= ix86_tune
)
4298 ix86_tune_mask
= 1u << ix86_tune
;
4299 for (i
= 0; i
< X86_TUNE_LAST
; ++i
)
4300 ix86_tune_features
[i
]
4301 = !!(initial_ix86_tune_features
[i
] & ix86_tune_mask
);
4305 /* Print the current options */
4308 ix86_function_specific_print (FILE *file
, int indent
,
4309 struct cl_target_option
*ptr
)
4312 = ix86_target_string (ptr
->x_ix86_isa_flags
, ptr
->x_target_flags
,
4313 NULL
, NULL
, NULL
, false);
4315 fprintf (file
, "%*sarch = %d (%s)\n",
4318 ((ptr
->arch
< TARGET_CPU_DEFAULT_max
)
4319 ? cpu_names
[ptr
->arch
]
4322 fprintf (file
, "%*stune = %d (%s)\n",
4325 ((ptr
->tune
< TARGET_CPU_DEFAULT_max
)
4326 ? cpu_names
[ptr
->tune
]
4329 fprintf (file
, "%*sfpmath = %d%s%s\n", indent
, "", ptr
->fpmath
,
4330 (ptr
->fpmath
& FPMATH_387
) ? ", 387" : "",
4331 (ptr
->fpmath
& FPMATH_SSE
) ? ", sse" : "");
4332 fprintf (file
, "%*sbranch_cost = %d\n", indent
, "", ptr
->branch_cost
);
4336 fprintf (file
, "%*s%s\n", indent
, "", target_string
);
4337 free (target_string
);
4342 /* Inner function to process the attribute((target(...))), take an argument and
4343 set the current options from the argument. If we have a list, recursively go
4347 ix86_valid_target_attribute_inner_p (tree args
, char *p_strings
[])
4352 #define IX86_ATTR_ISA(S,O) { S, sizeof (S)-1, ix86_opt_isa, O, 0 }
4353 #define IX86_ATTR_STR(S,O) { S, sizeof (S)-1, ix86_opt_str, O, 0 }
4354 #define IX86_ATTR_YES(S,O,M) { S, sizeof (S)-1, ix86_opt_yes, O, M }
4355 #define IX86_ATTR_NO(S,O,M) { S, sizeof (S)-1, ix86_opt_no, O, M }
4370 enum ix86_opt_type type
;
4375 IX86_ATTR_ISA ("3dnow", OPT_m3dnow
),
4376 IX86_ATTR_ISA ("abm", OPT_mabm
),
4377 IX86_ATTR_ISA ("bmi", OPT_mbmi
),
4378 IX86_ATTR_ISA ("tbm", OPT_mtbm
),
4379 IX86_ATTR_ISA ("aes", OPT_maes
),
4380 IX86_ATTR_ISA ("avx", OPT_mavx
),
4381 IX86_ATTR_ISA ("mmx", OPT_mmmx
),
4382 IX86_ATTR_ISA ("pclmul", OPT_mpclmul
),
4383 IX86_ATTR_ISA ("popcnt", OPT_mpopcnt
),
4384 IX86_ATTR_ISA ("sse", OPT_msse
),
4385 IX86_ATTR_ISA ("sse2", OPT_msse2
),
4386 IX86_ATTR_ISA ("sse3", OPT_msse3
),
4387 IX86_ATTR_ISA ("sse4", OPT_msse4
),
4388 IX86_ATTR_ISA ("sse4.1", OPT_msse4_1
),
4389 IX86_ATTR_ISA ("sse4.2", OPT_msse4_2
),
4390 IX86_ATTR_ISA ("sse4a", OPT_msse4a
),
4391 IX86_ATTR_ISA ("ssse3", OPT_mssse3
),
4392 IX86_ATTR_ISA ("fma4", OPT_mfma4
),
4393 IX86_ATTR_ISA ("xop", OPT_mxop
),
4394 IX86_ATTR_ISA ("lwp", OPT_mlwp
),
4395 IX86_ATTR_ISA ("fsgsbase", OPT_mfsgsbase
),
4396 IX86_ATTR_ISA ("rdrnd", OPT_mrdrnd
),
4397 IX86_ATTR_ISA ("f16c", OPT_mf16c
),
4399 /* string options */
4400 IX86_ATTR_STR ("arch=", IX86_FUNCTION_SPECIFIC_ARCH
),
4401 IX86_ATTR_STR ("fpmath=", IX86_FUNCTION_SPECIFIC_FPMATH
),
4402 IX86_ATTR_STR ("tune=", IX86_FUNCTION_SPECIFIC_TUNE
),
4405 IX86_ATTR_YES ("cld",
4409 IX86_ATTR_NO ("fancy-math-387",
4410 OPT_mfancy_math_387
,
4411 MASK_NO_FANCY_MATH_387
),
4413 IX86_ATTR_YES ("ieee-fp",
4417 IX86_ATTR_YES ("inline-all-stringops",
4418 OPT_minline_all_stringops
,
4419 MASK_INLINE_ALL_STRINGOPS
),
4421 IX86_ATTR_YES ("inline-stringops-dynamically",
4422 OPT_minline_stringops_dynamically
,
4423 MASK_INLINE_STRINGOPS_DYNAMICALLY
),
4425 IX86_ATTR_NO ("align-stringops",
4426 OPT_mno_align_stringops
,
4427 MASK_NO_ALIGN_STRINGOPS
),
4429 IX86_ATTR_YES ("recip",
4435 /* If this is a list, recurse to get the options. */
4436 if (TREE_CODE (args
) == TREE_LIST
)
4440 for (; args
; args
= TREE_CHAIN (args
))
4441 if (TREE_VALUE (args
)
4442 && !ix86_valid_target_attribute_inner_p (TREE_VALUE (args
), p_strings
))
4448 else if (TREE_CODE (args
) != STRING_CST
)
4451 /* Handle multiple arguments separated by commas. */
4452 next_optstr
= ASTRDUP (TREE_STRING_POINTER (args
));
4454 while (next_optstr
&& *next_optstr
!= '\0')
4456 char *p
= next_optstr
;
4458 char *comma
= strchr (next_optstr
, ',');
4459 const char *opt_string
;
4460 size_t len
, opt_len
;
4465 enum ix86_opt_type type
= ix86_opt_unknown
;
4471 len
= comma
- next_optstr
;
4472 next_optstr
= comma
+ 1;
4480 /* Recognize no-xxx. */
4481 if (len
> 3 && p
[0] == 'n' && p
[1] == 'o' && p
[2] == '-')
4490 /* Find the option. */
4493 for (i
= 0; i
< ARRAY_SIZE (attrs
); i
++)
4495 type
= attrs
[i
].type
;
4496 opt_len
= attrs
[i
].len
;
4497 if (ch
== attrs
[i
].string
[0]
4498 && ((type
!= ix86_opt_str
) ? len
== opt_len
: len
> opt_len
)
4499 && memcmp (p
, attrs
[i
].string
, opt_len
) == 0)
4502 mask
= attrs
[i
].mask
;
4503 opt_string
= attrs
[i
].string
;
4508 /* Process the option. */
4511 error ("attribute(target(\"%s\")) is unknown", orig_p
);
4515 else if (type
== ix86_opt_isa
)
4516 ix86_handle_option (opt
, p
, opt_set_p
);
4518 else if (type
== ix86_opt_yes
|| type
== ix86_opt_no
)
4520 if (type
== ix86_opt_no
)
4521 opt_set_p
= !opt_set_p
;
4524 target_flags
|= mask
;
4526 target_flags
&= ~mask
;
4529 else if (type
== ix86_opt_str
)
4533 error ("option(\"%s\") was already specified", opt_string
);
4537 p_strings
[opt
] = xstrdup (p
+ opt_len
);
4547 /* Return a TARGET_OPTION_NODE tree of the target options listed or NULL. */
4550 ix86_valid_target_attribute_tree (tree args
)
4552 const char *orig_arch_string
= ix86_arch_string
;
4553 const char *orig_tune_string
= ix86_tune_string
;
4554 const char *orig_fpmath_string
= ix86_fpmath_string
;
4555 int orig_tune_defaulted
= ix86_tune_defaulted
;
4556 int orig_arch_specified
= ix86_arch_specified
;
4557 char *option_strings
[IX86_FUNCTION_SPECIFIC_MAX
] = { NULL
, NULL
, NULL
};
4560 struct cl_target_option
*def
4561 = TREE_TARGET_OPTION (target_option_default_node
);
4563 /* Process each of the options on the chain. */
4564 if (! ix86_valid_target_attribute_inner_p (args
, option_strings
))
4567 /* If the changed options are different from the default, rerun
4568 ix86_option_override_internal, and then save the options away.
4569 The string options are are attribute options, and will be undone
4570 when we copy the save structure. */
4571 if (ix86_isa_flags
!= def
->x_ix86_isa_flags
4572 || target_flags
!= def
->x_target_flags
4573 || option_strings
[IX86_FUNCTION_SPECIFIC_ARCH
]
4574 || option_strings
[IX86_FUNCTION_SPECIFIC_TUNE
]
4575 || option_strings
[IX86_FUNCTION_SPECIFIC_FPMATH
])
4577 /* If we are using the default tune= or arch=, undo the string assigned,
4578 and use the default. */
4579 if (option_strings
[IX86_FUNCTION_SPECIFIC_ARCH
])
4580 ix86_arch_string
= option_strings
[IX86_FUNCTION_SPECIFIC_ARCH
];
4581 else if (!orig_arch_specified
)
4582 ix86_arch_string
= NULL
;
4584 if (option_strings
[IX86_FUNCTION_SPECIFIC_TUNE
])
4585 ix86_tune_string
= option_strings
[IX86_FUNCTION_SPECIFIC_TUNE
];
4586 else if (orig_tune_defaulted
)
4587 ix86_tune_string
= NULL
;
4589 /* If fpmath= is not set, and we now have sse2 on 32-bit, use it. */
4590 if (option_strings
[IX86_FUNCTION_SPECIFIC_FPMATH
])
4591 ix86_fpmath_string
= option_strings
[IX86_FUNCTION_SPECIFIC_FPMATH
];
4592 else if (!TARGET_64BIT
&& TARGET_SSE
)
4593 ix86_fpmath_string
= "sse,387";
4595 /* Do any overrides, such as arch=xxx, or tune=xxx support. */
4596 ix86_option_override_internal (false);
4598 /* Add any builtin functions with the new isa if any. */
4599 ix86_add_new_builtins (ix86_isa_flags
);
4601 /* Save the current options unless we are validating options for
4603 t
= build_target_option_node ();
4605 ix86_arch_string
= orig_arch_string
;
4606 ix86_tune_string
= orig_tune_string
;
4607 ix86_fpmath_string
= orig_fpmath_string
;
4609 /* Free up memory allocated to hold the strings */
4610 for (i
= 0; i
< IX86_FUNCTION_SPECIFIC_MAX
; i
++)
4611 if (option_strings
[i
])
4612 free (option_strings
[i
]);
4618 /* Hook to validate attribute((target("string"))). */
4621 ix86_valid_target_attribute_p (tree fndecl
,
4622 tree
ARG_UNUSED (name
),
4624 int ARG_UNUSED (flags
))
4626 struct cl_target_option cur_target
;
4628 tree old_optimize
= build_optimization_node ();
4629 tree new_target
, new_optimize
;
4630 tree func_optimize
= DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl
);
4632 /* If the function changed the optimization levels as well as setting target
4633 options, start with the optimizations specified. */
4634 if (func_optimize
&& func_optimize
!= old_optimize
)
4635 cl_optimization_restore (&global_options
,
4636 TREE_OPTIMIZATION (func_optimize
));
4638 /* The target attributes may also change some optimization flags, so update
4639 the optimization options if necessary. */
4640 cl_target_option_save (&cur_target
, &global_options
);
4641 new_target
= ix86_valid_target_attribute_tree (args
);
4642 new_optimize
= build_optimization_node ();
4649 DECL_FUNCTION_SPECIFIC_TARGET (fndecl
) = new_target
;
4651 if (old_optimize
!= new_optimize
)
4652 DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl
) = new_optimize
;
4655 cl_target_option_restore (&global_options
, &cur_target
);
4657 if (old_optimize
!= new_optimize
)
4658 cl_optimization_restore (&global_options
,
4659 TREE_OPTIMIZATION (old_optimize
));
4665 /* Hook to determine if one function can safely inline another. */
4668 ix86_can_inline_p (tree caller
, tree callee
)
4671 tree caller_tree
= DECL_FUNCTION_SPECIFIC_TARGET (caller
);
4672 tree callee_tree
= DECL_FUNCTION_SPECIFIC_TARGET (callee
);
4674 /* If callee has no option attributes, then it is ok to inline. */
4678 /* If caller has no option attributes, but callee does then it is not ok to
4680 else if (!caller_tree
)
4685 struct cl_target_option
*caller_opts
= TREE_TARGET_OPTION (caller_tree
);
4686 struct cl_target_option
*callee_opts
= TREE_TARGET_OPTION (callee_tree
);
4688 /* Callee's isa options should a subset of the caller's, i.e. a SSE4 function
4689 can inline a SSE2 function but a SSE2 function can't inline a SSE4
4691 if ((caller_opts
->x_ix86_isa_flags
& callee_opts
->x_ix86_isa_flags
)
4692 != callee_opts
->x_ix86_isa_flags
)
4695 /* See if we have the same non-isa options. */
4696 else if (caller_opts
->x_target_flags
!= callee_opts
->x_target_flags
)
4699 /* See if arch, tune, etc. are the same. */
4700 else if (caller_opts
->arch
!= callee_opts
->arch
)
4703 else if (caller_opts
->tune
!= callee_opts
->tune
)
4706 else if (caller_opts
->fpmath
!= callee_opts
->fpmath
)
4709 else if (caller_opts
->branch_cost
!= callee_opts
->branch_cost
)
4720 /* Remember the last target of ix86_set_current_function. */
4721 static GTY(()) tree ix86_previous_fndecl
;
4723 /* Establish appropriate back-end context for processing the function
4724 FNDECL. The argument might be NULL to indicate processing at top
4725 level, outside of any function scope. */
4727 ix86_set_current_function (tree fndecl
)
4729 /* Only change the context if the function changes. This hook is called
4730 several times in the course of compiling a function, and we don't want to
4731 slow things down too much or call target_reinit when it isn't safe. */
4732 if (fndecl
&& fndecl
!= ix86_previous_fndecl
)
4734 tree old_tree
= (ix86_previous_fndecl
4735 ? DECL_FUNCTION_SPECIFIC_TARGET (ix86_previous_fndecl
)
4738 tree new_tree
= (fndecl
4739 ? DECL_FUNCTION_SPECIFIC_TARGET (fndecl
)
4742 ix86_previous_fndecl
= fndecl
;
4743 if (old_tree
== new_tree
)
4748 cl_target_option_restore (&global_options
,
4749 TREE_TARGET_OPTION (new_tree
));
4755 struct cl_target_option
*def
4756 = TREE_TARGET_OPTION (target_option_current_node
);
4758 cl_target_option_restore (&global_options
, def
);
4765 /* Return true if this goes in large data/bss. */
4768 ix86_in_large_data_p (tree exp
)
4770 if (ix86_cmodel
!= CM_MEDIUM
&& ix86_cmodel
!= CM_MEDIUM_PIC
)
4773 /* Functions are never large data. */
4774 if (TREE_CODE (exp
) == FUNCTION_DECL
)
4777 if (TREE_CODE (exp
) == VAR_DECL
&& DECL_SECTION_NAME (exp
))
4779 const char *section
= TREE_STRING_POINTER (DECL_SECTION_NAME (exp
));
4780 if (strcmp (section
, ".ldata") == 0
4781 || strcmp (section
, ".lbss") == 0)
4787 HOST_WIDE_INT size
= int_size_in_bytes (TREE_TYPE (exp
));
4789 /* If this is an incomplete type with size 0, then we can't put it
4790 in data because it might be too big when completed. */
4791 if (!size
|| size
> ix86_section_threshold
)
4798 /* Switch to the appropriate section for output of DECL.
4799 DECL is either a `VAR_DECL' node or a constant of some sort.
4800 RELOC indicates whether forming the initial value of DECL requires
4801 link-time relocations. */
4803 static section
* x86_64_elf_select_section (tree
, int, unsigned HOST_WIDE_INT
)
4807 x86_64_elf_select_section (tree decl
, int reloc
,
4808 unsigned HOST_WIDE_INT align
)
4810 if ((ix86_cmodel
== CM_MEDIUM
|| ix86_cmodel
== CM_MEDIUM_PIC
)
4811 && ix86_in_large_data_p (decl
))
4813 const char *sname
= NULL
;
4814 unsigned int flags
= SECTION_WRITE
;
4815 switch (categorize_decl_for_section (decl
, reloc
))
4820 case SECCAT_DATA_REL
:
4821 sname
= ".ldata.rel";
4823 case SECCAT_DATA_REL_LOCAL
:
4824 sname
= ".ldata.rel.local";
4826 case SECCAT_DATA_REL_RO
:
4827 sname
= ".ldata.rel.ro";
4829 case SECCAT_DATA_REL_RO_LOCAL
:
4830 sname
= ".ldata.rel.ro.local";
4834 flags
|= SECTION_BSS
;
4837 case SECCAT_RODATA_MERGE_STR
:
4838 case SECCAT_RODATA_MERGE_STR_INIT
:
4839 case SECCAT_RODATA_MERGE_CONST
:
4843 case SECCAT_SRODATA
:
4850 /* We don't split these for medium model. Place them into
4851 default sections and hope for best. */
4856 /* We might get called with string constants, but get_named_section
4857 doesn't like them as they are not DECLs. Also, we need to set
4858 flags in that case. */
4860 return get_section (sname
, flags
, NULL
);
4861 return get_named_section (decl
, sname
, reloc
);
4864 return default_elf_select_section (decl
, reloc
, align
);
4867 /* Build up a unique section name, expressed as a
4868 STRING_CST node, and assign it to DECL_SECTION_NAME (decl).
4869 RELOC indicates whether the initial value of EXP requires
4870 link-time relocations. */
4872 static void ATTRIBUTE_UNUSED
4873 x86_64_elf_unique_section (tree decl
, int reloc
)
4875 if ((ix86_cmodel
== CM_MEDIUM
|| ix86_cmodel
== CM_MEDIUM_PIC
)
4876 && ix86_in_large_data_p (decl
))
4878 const char *prefix
= NULL
;
4879 /* We only need to use .gnu.linkonce if we don't have COMDAT groups. */
4880 bool one_only
= DECL_ONE_ONLY (decl
) && !HAVE_COMDAT_GROUP
;
4882 switch (categorize_decl_for_section (decl
, reloc
))
4885 case SECCAT_DATA_REL
:
4886 case SECCAT_DATA_REL_LOCAL
:
4887 case SECCAT_DATA_REL_RO
:
4888 case SECCAT_DATA_REL_RO_LOCAL
:
4889 prefix
= one_only
? ".ld" : ".ldata";
4892 prefix
= one_only
? ".lb" : ".lbss";
4895 case SECCAT_RODATA_MERGE_STR
:
4896 case SECCAT_RODATA_MERGE_STR_INIT
:
4897 case SECCAT_RODATA_MERGE_CONST
:
4898 prefix
= one_only
? ".lr" : ".lrodata";
4900 case SECCAT_SRODATA
:
4907 /* We don't split these for medium model. Place them into
4908 default sections and hope for best. */
4913 const char *name
, *linkonce
;
4916 name
= IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl
));
4917 name
= targetm
.strip_name_encoding (name
);
4919 /* If we're using one_only, then there needs to be a .gnu.linkonce
4920 prefix to the section name. */
4921 linkonce
= one_only
? ".gnu.linkonce" : "";
4923 string
= ACONCAT ((linkonce
, prefix
, ".", name
, NULL
));
4925 DECL_SECTION_NAME (decl
) = build_string (strlen (string
), string
);
4929 default_unique_section (decl
, reloc
);
4932 #ifdef COMMON_ASM_OP
4933 /* This says how to output assembler code to declare an
4934 uninitialized external linkage data object.
4936 For medium model x86-64 we need to use .largecomm opcode for
4939 x86_elf_aligned_common (FILE *file
,
4940 const char *name
, unsigned HOST_WIDE_INT size
,
4943 if ((ix86_cmodel
== CM_MEDIUM
|| ix86_cmodel
== CM_MEDIUM_PIC
)
4944 && size
> (unsigned int)ix86_section_threshold
)
4945 fputs (".largecomm\t", file
);
4947 fputs (COMMON_ASM_OP
, file
);
4948 assemble_name (file
, name
);
4949 fprintf (file
, "," HOST_WIDE_INT_PRINT_UNSIGNED
",%u\n",
4950 size
, align
/ BITS_PER_UNIT
);
4954 /* Utility function for targets to use in implementing
4955 ASM_OUTPUT_ALIGNED_BSS. */
4958 x86_output_aligned_bss (FILE *file
, tree decl ATTRIBUTE_UNUSED
,
4959 const char *name
, unsigned HOST_WIDE_INT size
,
4962 if ((ix86_cmodel
== CM_MEDIUM
|| ix86_cmodel
== CM_MEDIUM_PIC
)
4963 && size
> (unsigned int)ix86_section_threshold
)
4964 switch_to_section (get_named_section (decl
, ".lbss", 0));
4966 switch_to_section (bss_section
);
4967 ASM_OUTPUT_ALIGN (file
, floor_log2 (align
/ BITS_PER_UNIT
));
4968 #ifdef ASM_DECLARE_OBJECT_NAME
4969 last_assemble_variable_decl
= decl
;
4970 ASM_DECLARE_OBJECT_NAME (file
, name
, decl
);
4972 /* Standard thing is just output label for the object. */
4973 ASM_OUTPUT_LABEL (file
, name
);
4974 #endif /* ASM_DECLARE_OBJECT_NAME */
4975 ASM_OUTPUT_SKIP (file
, size
? size
: 1);
4978 static const struct default_options ix86_option_optimization_table
[] =
4980 /* Turn off -fschedule-insns by default. It tends to make the
4981 problem with not enough registers even worse. */
4982 #ifdef INSN_SCHEDULING
4983 { OPT_LEVELS_ALL
, OPT_fschedule_insns
, NULL
, 0 },
4986 #ifdef SUBTARGET_OPTIMIZATION_OPTIONS
4987 SUBTARGET_OPTIMIZATION_OPTIONS
,
4989 { OPT_LEVELS_NONE
, 0, NULL
, 0 }
4992 /* Implement TARGET_OPTION_INIT_STRUCT. */
4995 ix86_option_init_struct (struct gcc_options
*opts
)
4998 /* The Darwin libraries never set errno, so we might as well
4999 avoid calling them when that's the only reason we would. */
5000 opts
->x_flag_errno_math
= 0;
5002 opts
->x_flag_pcc_struct_return
= 2;
5003 opts
->x_flag_asynchronous_unwind_tables
= 2;
5004 opts
->x_flag_vect_cost_model
= 1;
5007 /* Decide whether we must probe the stack before any space allocation
5008 on this target. It's essentially TARGET_STACK_PROBE except when
5009 -fstack-check causes the stack to be already probed differently. */
5012 ix86_target_stack_probe (void)
5014 /* Do not probe the stack twice if static stack checking is enabled. */
5015 if (flag_stack_check
== STATIC_BUILTIN_STACK_CHECK
)
5018 return TARGET_STACK_PROBE
;
5021 /* Decide whether we can make a sibling call to a function. DECL is the
5022 declaration of the function being targeted by the call and EXP is the
5023 CALL_EXPR representing the call. */
5026 ix86_function_ok_for_sibcall (tree decl
, tree exp
)
5028 tree type
, decl_or_type
;
5031 /* If we are generating position-independent code, we cannot sibcall
5032 optimize any indirect call, or a direct call to a global function,
5033 as the PLT requires %ebx be live. (Darwin does not have a PLT.) */
5037 && (!decl
|| !targetm
.binds_local_p (decl
)))
5040 /* If we need to align the outgoing stack, then sibcalling would
5041 unalign the stack, which may break the called function. */
5042 if (ix86_minimum_incoming_stack_boundary (true)
5043 < PREFERRED_STACK_BOUNDARY
)
5048 decl_or_type
= decl
;
5049 type
= TREE_TYPE (decl
);
5053 /* We're looking at the CALL_EXPR, we need the type of the function. */
5054 type
= CALL_EXPR_FN (exp
); /* pointer expression */
5055 type
= TREE_TYPE (type
); /* pointer type */
5056 type
= TREE_TYPE (type
); /* function type */
5057 decl_or_type
= type
;
5060 /* Check that the return value locations are the same. Like
5061 if we are returning floats on the 80387 register stack, we cannot
5062 make a sibcall from a function that doesn't return a float to a
5063 function that does or, conversely, from a function that does return
5064 a float to a function that doesn't; the necessary stack adjustment
5065 would not be executed. This is also the place we notice
5066 differences in the return value ABI. Note that it is ok for one
5067 of the functions to have void return type as long as the return
5068 value of the other is passed in a register. */
5069 a
= ix86_function_value (TREE_TYPE (exp
), decl_or_type
, false);
5070 b
= ix86_function_value (TREE_TYPE (DECL_RESULT (cfun
->decl
)),
5072 if (STACK_REG_P (a
) || STACK_REG_P (b
))
5074 if (!rtx_equal_p (a
, b
))
5077 else if (VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun
->decl
))))
5079 /* Disable sibcall if we need to generate vzeroupper after
5081 if (TARGET_VZEROUPPER
5082 && cfun
->machine
->callee_return_avx256_p
5083 && !cfun
->machine
->caller_return_avx256_p
)
5086 else if (!rtx_equal_p (a
, b
))
5091 /* The SYSV ABI has more call-clobbered registers;
5092 disallow sibcalls from MS to SYSV. */
5093 if (cfun
->machine
->call_abi
== MS_ABI
5094 && ix86_function_type_abi (type
) == SYSV_ABI
)
5099 /* If this call is indirect, we'll need to be able to use a
5100 call-clobbered register for the address of the target function.
5101 Make sure that all such registers are not used for passing
5102 parameters. Note that DLLIMPORT functions are indirect. */
5104 || (TARGET_DLLIMPORT_DECL_ATTRIBUTES
&& DECL_DLLIMPORT_P (decl
)))
5106 if (ix86_function_regparm (type
, NULL
) >= 3)
5108 /* ??? Need to count the actual number of registers to be used,
5109 not the possible number of registers. Fix later. */
5115 /* Otherwise okay. That also includes certain types of indirect calls. */
5119 /* Handle "cdecl", "stdcall", "fastcall", "regparm", "thiscall",
5120 and "sseregparm" calling convention attributes;
5121 arguments as in struct attribute_spec.handler. */
5124 ix86_handle_cconv_attribute (tree
*node
, tree name
,
5126 int flags ATTRIBUTE_UNUSED
,
5129 if (TREE_CODE (*node
) != FUNCTION_TYPE
5130 && TREE_CODE (*node
) != METHOD_TYPE
5131 && TREE_CODE (*node
) != FIELD_DECL
5132 && TREE_CODE (*node
) != TYPE_DECL
)
5134 warning (OPT_Wattributes
, "%qE attribute only applies to functions",
5136 *no_add_attrs
= true;
5140 /* Can combine regparm with all attributes but fastcall. */
5141 if (is_attribute_p ("regparm", name
))
5145 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node
)))
5147 error ("fastcall and regparm attributes are not compatible");
5150 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node
)))
5152 error ("regparam and thiscall attributes are not compatible");
5155 cst
= TREE_VALUE (args
);
5156 if (TREE_CODE (cst
) != INTEGER_CST
)
5158 warning (OPT_Wattributes
,
5159 "%qE attribute requires an integer constant argument",
5161 *no_add_attrs
= true;
5163 else if (compare_tree_int (cst
, REGPARM_MAX
) > 0)
5165 warning (OPT_Wattributes
, "argument to %qE attribute larger than %d",
5167 *no_add_attrs
= true;
5175 /* Do not warn when emulating the MS ABI. */
5176 if ((TREE_CODE (*node
) != FUNCTION_TYPE
5177 && TREE_CODE (*node
) != METHOD_TYPE
)
5178 || ix86_function_type_abi (*node
) != MS_ABI
)
5179 warning (OPT_Wattributes
, "%qE attribute ignored",
5181 *no_add_attrs
= true;
5185 /* Can combine fastcall with stdcall (redundant) and sseregparm. */
5186 if (is_attribute_p ("fastcall", name
))
5188 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node
)))
5190 error ("fastcall and cdecl attributes are not compatible");
5192 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node
)))
5194 error ("fastcall and stdcall attributes are not compatible");
5196 if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node
)))
5198 error ("fastcall and regparm attributes are not compatible");
5200 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node
)))
5202 error ("fastcall and thiscall attributes are not compatible");
5206 /* Can combine stdcall with fastcall (redundant), regparm and
5208 else if (is_attribute_p ("stdcall", name
))
5210 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node
)))
5212 error ("stdcall and cdecl attributes are not compatible");
5214 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node
)))
5216 error ("stdcall and fastcall attributes are not compatible");
5218 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node
)))
5220 error ("stdcall and thiscall attributes are not compatible");
5224 /* Can combine cdecl with regparm and sseregparm. */
5225 else if (is_attribute_p ("cdecl", name
))
5227 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node
)))
5229 error ("stdcall and cdecl attributes are not compatible");
5231 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node
)))
5233 error ("fastcall and cdecl attributes are not compatible");
5235 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node
)))
5237 error ("cdecl and thiscall attributes are not compatible");
5240 else if (is_attribute_p ("thiscall", name
))
5242 if (TREE_CODE (*node
) != METHOD_TYPE
&& pedantic
)
5243 warning (OPT_Wattributes
, "%qE attribute is used for none class-method",
5245 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node
)))
5247 error ("stdcall and thiscall attributes are not compatible");
5249 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node
)))
5251 error ("fastcall and thiscall attributes are not compatible");
5253 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node
)))
5255 error ("cdecl and thiscall attributes are not compatible");
5259 /* Can combine sseregparm with all attributes. */
5264 /* Return 0 if the attributes for two types are incompatible, 1 if they
5265 are compatible, and 2 if they are nearly compatible (which causes a
5266 warning to be generated). */
5269 ix86_comp_type_attributes (const_tree type1
, const_tree type2
)
5271 /* Check for mismatch of non-default calling convention. */
5272 const char *const rtdstr
= TARGET_RTD
? "cdecl" : "stdcall";
5274 if (TREE_CODE (type1
) != FUNCTION_TYPE
5275 && TREE_CODE (type1
) != METHOD_TYPE
)
5278 /* Check for mismatched fastcall/regparm types. */
5279 if ((!lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type1
))
5280 != !lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type2
)))
5281 || (ix86_function_regparm (type1
, NULL
)
5282 != ix86_function_regparm (type2
, NULL
)))
5285 /* Check for mismatched sseregparm types. */
5286 if (!lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type1
))
5287 != !lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type2
)))
5290 /* Check for mismatched thiscall types. */
5291 if (!lookup_attribute ("thiscall", TYPE_ATTRIBUTES (type1
))
5292 != !lookup_attribute ("thiscall", TYPE_ATTRIBUTES (type2
)))
5295 /* Check for mismatched return types (cdecl vs stdcall). */
5296 if (!lookup_attribute (rtdstr
, TYPE_ATTRIBUTES (type1
))
5297 != !lookup_attribute (rtdstr
, TYPE_ATTRIBUTES (type2
)))
5303 /* Return the regparm value for a function with the indicated TYPE and DECL.
5304 DECL may be NULL when calling function indirectly
5305 or considering a libcall. */
5308 ix86_function_regparm (const_tree type
, const_tree decl
)
5314 return (ix86_function_type_abi (type
) == SYSV_ABI
5315 ? X86_64_REGPARM_MAX
: X86_64_MS_REGPARM_MAX
);
5317 regparm
= ix86_regparm
;
5318 attr
= lookup_attribute ("regparm", TYPE_ATTRIBUTES (type
));
5321 regparm
= TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr
)));
5325 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type
)))
5328 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (type
)))
5331 /* Use register calling convention for local functions when possible. */
5333 && TREE_CODE (decl
) == FUNCTION_DECL
5335 && !(profile_flag
&& !flag_fentry
))
5337 /* FIXME: remove this CONST_CAST when cgraph.[ch] is constified. */
5338 struct cgraph_local_info
*i
= cgraph_local_info (CONST_CAST_TREE (decl
));
5341 int local_regparm
, globals
= 0, regno
;
5343 /* Make sure no regparm register is taken by a
5344 fixed register variable. */
5345 for (local_regparm
= 0; local_regparm
< REGPARM_MAX
; local_regparm
++)
5346 if (fixed_regs
[local_regparm
])
5349 /* We don't want to use regparm(3) for nested functions as
5350 these use a static chain pointer in the third argument. */
5351 if (local_regparm
== 3 && DECL_STATIC_CHAIN (decl
))
5354 /* In 32-bit mode save a register for the split stack. */
5355 if (!TARGET_64BIT
&& local_regparm
== 3 && flag_split_stack
)
5358 /* Each fixed register usage increases register pressure,
5359 so less registers should be used for argument passing.
5360 This functionality can be overriden by an explicit
5362 for (regno
= 0; regno
<= DI_REG
; regno
++)
5363 if (fixed_regs
[regno
])
5367 = globals
< local_regparm
? local_regparm
- globals
: 0;
5369 if (local_regparm
> regparm
)
5370 regparm
= local_regparm
;
5377 /* Return 1 or 2, if we can pass up to SSE_REGPARM_MAX SFmode (1) and
5378 DFmode (2) arguments in SSE registers for a function with the
5379 indicated TYPE and DECL. DECL may be NULL when calling function
5380 indirectly or considering a libcall. Otherwise return 0. */
5383 ix86_function_sseregparm (const_tree type
, const_tree decl
, bool warn
)
5385 gcc_assert (!TARGET_64BIT
);
5387 /* Use SSE registers to pass SFmode and DFmode arguments if requested
5388 by the sseregparm attribute. */
5389 if (TARGET_SSEREGPARM
5390 || (type
&& lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type
))))
5397 error ("calling %qD with attribute sseregparm without "
5398 "SSE/SSE2 enabled", decl
);
5400 error ("calling %qT with attribute sseregparm without "
5401 "SSE/SSE2 enabled", type
);
5409 /* For local functions, pass up to SSE_REGPARM_MAX SFmode
5410 (and DFmode for SSE2) arguments in SSE registers. */
5411 if (decl
&& TARGET_SSE_MATH
&& optimize
5412 && !(profile_flag
&& !flag_fentry
))
5414 /* FIXME: remove this CONST_CAST when cgraph.[ch] is constified. */
5415 struct cgraph_local_info
*i
= cgraph_local_info (CONST_CAST_TREE(decl
));
5417 return TARGET_SSE2
? 2 : 1;
5423 /* Return true if EAX is live at the start of the function. Used by
5424 ix86_expand_prologue to determine if we need special help before
5425 calling allocate_stack_worker. */
5428 ix86_eax_live_at_start_p (void)
5430 /* Cheat. Don't bother working forward from ix86_function_regparm
5431 to the function type to whether an actual argument is located in
5432 eax. Instead just look at cfg info, which is still close enough
5433 to correct at this point. This gives false positives for broken
5434 functions that might use uninitialized data that happens to be
5435 allocated in eax, but who cares? */
5436 return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR
), 0);
5440 ix86_keep_aggregate_return_pointer (tree fntype
)
5444 attr
= lookup_attribute ("callee_pop_aggregate_return",
5445 TYPE_ATTRIBUTES (fntype
));
5447 return (TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr
))) == 0);
5449 return KEEP_AGGREGATE_RETURN_POINTER
!= 0;
5452 /* Value is the number of bytes of arguments automatically
5453 popped when returning from a subroutine call.
5454 FUNDECL is the declaration node of the function (as a tree),
5455 FUNTYPE is the data type of the function (as a tree),
5456 or for a library call it is an identifier node for the subroutine name.
5457 SIZE is the number of bytes of arguments passed on the stack.
5459 On the 80386, the RTD insn may be used to pop them if the number
5460 of args is fixed, but if the number is variable then the caller
5461 must pop them all. RTD can't be used for library calls now
5462 because the library is compiled with the Unix compiler.
5463 Use of RTD is a selectable option, since it is incompatible with
5464 standard Unix calling sequences. If the option is not selected,
5465 the caller must always pop the args.
5467 The attribute stdcall is equivalent to RTD on a per module basis. */
5470 ix86_return_pops_args (tree fundecl
, tree funtype
, int size
)
5474 /* None of the 64-bit ABIs pop arguments. */
5478 rtd
= TARGET_RTD
&& (!fundecl
|| TREE_CODE (fundecl
) != IDENTIFIER_NODE
);
5480 /* Cdecl functions override -mrtd, and never pop the stack. */
5481 if (! lookup_attribute ("cdecl", TYPE_ATTRIBUTES (funtype
)))
5483 /* Stdcall and fastcall functions will pop the stack if not
5485 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (funtype
))
5486 || lookup_attribute ("fastcall", TYPE_ATTRIBUTES (funtype
))
5487 || lookup_attribute ("thiscall", TYPE_ATTRIBUTES (funtype
)))
5490 if (rtd
&& ! stdarg_p (funtype
))
5494 /* Lose any fake structure return argument if it is passed on the stack. */
5495 if (aggregate_value_p (TREE_TYPE (funtype
), fundecl
)
5496 && !ix86_keep_aggregate_return_pointer (funtype
))
5498 int nregs
= ix86_function_regparm (funtype
, fundecl
);
5500 return GET_MODE_SIZE (Pmode
);
5506 /* Argument support functions. */
5508 /* Return true when register may be used to pass function parameters. */
5510 ix86_function_arg_regno_p (int regno
)
5513 const int *parm_regs
;
5518 return (regno
< REGPARM_MAX
5519 || (TARGET_SSE
&& SSE_REGNO_P (regno
) && !fixed_regs
[regno
]));
5521 return (regno
< REGPARM_MAX
5522 || (TARGET_MMX
&& MMX_REGNO_P (regno
)
5523 && (regno
< FIRST_MMX_REG
+ MMX_REGPARM_MAX
))
5524 || (TARGET_SSE
&& SSE_REGNO_P (regno
)
5525 && (regno
< FIRST_SSE_REG
+ SSE_REGPARM_MAX
)));
5530 if (SSE_REGNO_P (regno
) && TARGET_SSE
)
5535 if (TARGET_SSE
&& SSE_REGNO_P (regno
)
5536 && (regno
< FIRST_SSE_REG
+ SSE_REGPARM_MAX
))
5540 /* TODO: The function should depend on current function ABI but
5541 builtins.c would need updating then. Therefore we use the
5544 /* RAX is used as hidden argument to va_arg functions. */
5545 if (ix86_abi
== SYSV_ABI
&& regno
== AX_REG
)
5548 if (ix86_abi
== MS_ABI
)
5549 parm_regs
= x86_64_ms_abi_int_parameter_registers
;
5551 parm_regs
= x86_64_int_parameter_registers
;
5552 for (i
= 0; i
< (ix86_abi
== MS_ABI
5553 ? X86_64_MS_REGPARM_MAX
: X86_64_REGPARM_MAX
); i
++)
5554 if (regno
== parm_regs
[i
])
5559 /* Return if we do not know how to pass TYPE solely in registers. */
5562 ix86_must_pass_in_stack (enum machine_mode mode
, const_tree type
)
5564 if (must_pass_in_stack_var_size_or_pad (mode
, type
))
5567 /* For 32-bit, we want TImode aggregates to go on the stack. But watch out!
5568 The layout_type routine is crafty and tries to trick us into passing
5569 currently unsupported vector types on the stack by using TImode. */
5570 return (!TARGET_64BIT
&& mode
== TImode
5571 && type
&& TREE_CODE (type
) != VECTOR_TYPE
);
5574 /* It returns the size, in bytes, of the area reserved for arguments passed
5575 in registers for the function represented by fndecl dependent to the used
5578 ix86_reg_parm_stack_space (const_tree fndecl
)
5580 enum calling_abi call_abi
= SYSV_ABI
;
5581 if (fndecl
!= NULL_TREE
&& TREE_CODE (fndecl
) == FUNCTION_DECL
)
5582 call_abi
= ix86_function_abi (fndecl
);
5584 call_abi
= ix86_function_type_abi (fndecl
);
5585 if (call_abi
== MS_ABI
)
5590 /* Returns value SYSV_ABI, MS_ABI dependent on fntype, specifying the
5593 ix86_function_type_abi (const_tree fntype
)
5595 if (TARGET_64BIT
&& fntype
!= NULL
)
5597 enum calling_abi abi
= ix86_abi
;
5598 if (abi
== SYSV_ABI
)
5600 if (lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (fntype
)))
5603 else if (lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (fntype
)))
5611 ix86_function_ms_hook_prologue (const_tree fn
)
5613 if (fn
&& lookup_attribute ("ms_hook_prologue", DECL_ATTRIBUTES (fn
)))
5615 if (decl_function_context (fn
) != NULL_TREE
)
5616 error_at (DECL_SOURCE_LOCATION (fn
),
5617 "ms_hook_prologue is not compatible with nested function");
5624 static enum calling_abi
5625 ix86_function_abi (const_tree fndecl
)
5629 return ix86_function_type_abi (TREE_TYPE (fndecl
));
5632 /* Returns value SYSV_ABI, MS_ABI dependent on cfun, specifying the
5635 ix86_cfun_abi (void)
5637 if (! cfun
|| ! TARGET_64BIT
)
5639 return cfun
->machine
->call_abi
;
5642 /* Write the extra assembler code needed to declare a function properly. */
5645 ix86_asm_output_function_label (FILE *asm_out_file
, const char *fname
,
5648 bool is_ms_hook
= ix86_function_ms_hook_prologue (decl
);
5652 int i
, filler_count
= (TARGET_64BIT
? 32 : 16);
5653 unsigned int filler_cc
= 0xcccccccc;
5655 for (i
= 0; i
< filler_count
; i
+= 4)
5656 fprintf (asm_out_file
, ASM_LONG
" %#x\n", filler_cc
);
5659 #ifdef SUBTARGET_ASM_UNWIND_INIT
5660 SUBTARGET_ASM_UNWIND_INIT (asm_out_file
);
5663 ASM_OUTPUT_LABEL (asm_out_file
, fname
);
5665 /* Output magic byte marker, if hot-patch attribute is set. */
5670 /* leaq [%rsp + 0], %rsp */
5671 asm_fprintf (asm_out_file
, ASM_BYTE
5672 "0x48, 0x8d, 0xa4, 0x24, 0x00, 0x00, 0x00, 0x00\n");
5676 /* movl.s %edi, %edi
5678 movl.s %esp, %ebp */
5679 asm_fprintf (asm_out_file
, ASM_BYTE
5680 "0x8b, 0xff, 0x55, 0x8b, 0xec\n");
5686 extern void init_regs (void);
5688 /* Implementation of call abi switching target hook. Specific to FNDECL
5689 the specific call register sets are set. See also
5690 ix86_conditional_register_usage for more details. */
5692 ix86_call_abi_override (const_tree fndecl
)
5694 if (fndecl
== NULL_TREE
)
5695 cfun
->machine
->call_abi
= ix86_abi
;
5697 cfun
->machine
->call_abi
= ix86_function_type_abi (TREE_TYPE (fndecl
));
5700 /* MS and SYSV ABI have different set of call used registers. Avoid expensive
5701 re-initialization of init_regs each time we switch function context since
5702 this is needed only during RTL expansion. */
5704 ix86_maybe_switch_abi (void)
5707 call_used_regs
[SI_REG
] == (cfun
->machine
->call_abi
== MS_ABI
))
5711 /* Initialize a variable CUM of type CUMULATIVE_ARGS
5712 for a call to a function whose data type is FNTYPE.
5713 For a library call, FNTYPE is 0. */
5716 init_cumulative_args (CUMULATIVE_ARGS
*cum
, /* Argument info to initialize */
5717 tree fntype
, /* tree ptr for function decl */
5718 rtx libname
, /* SYMBOL_REF of library name or 0 */
5722 struct cgraph_local_info
*i
;
5725 memset (cum
, 0, sizeof (*cum
));
5727 /* Initialize for the current callee. */
5730 cfun
->machine
->callee_pass_avx256_p
= false;
5731 cfun
->machine
->callee_return_avx256_p
= false;
5736 i
= cgraph_local_info (fndecl
);
5737 cum
->call_abi
= ix86_function_abi (fndecl
);
5738 fnret_type
= TREE_TYPE (TREE_TYPE (fndecl
));
5743 cum
->call_abi
= ix86_function_type_abi (fntype
);
5745 fnret_type
= TREE_TYPE (fntype
);
5750 if (TARGET_VZEROUPPER
&& fnret_type
)
5752 rtx fnret_value
= ix86_function_value (fnret_type
, fntype
,
5754 if (function_pass_avx256_p (fnret_value
))
5756 /* The return value of this function uses 256bit AVX modes. */
5758 cfun
->machine
->callee_return_avx256_p
= true;
5760 cfun
->machine
->caller_return_avx256_p
= true;
5764 cum
->caller
= caller
;
5766 /* Set up the number of registers to use for passing arguments. */
5768 if (cum
->call_abi
== MS_ABI
&& !ACCUMULATE_OUTGOING_ARGS
)
5769 sorry ("ms_abi attribute requires -maccumulate-outgoing-args "
5770 "or subtarget optimization implying it");
5771 cum
->nregs
= ix86_regparm
;
5774 cum
->nregs
= (cum
->call_abi
== SYSV_ABI
5775 ? X86_64_REGPARM_MAX
5776 : X86_64_MS_REGPARM_MAX
);
5780 cum
->sse_nregs
= SSE_REGPARM_MAX
;
5783 cum
->sse_nregs
= (cum
->call_abi
== SYSV_ABI
5784 ? X86_64_SSE_REGPARM_MAX
5785 : X86_64_MS_SSE_REGPARM_MAX
);
5789 cum
->mmx_nregs
= MMX_REGPARM_MAX
;
5790 cum
->warn_avx
= true;
5791 cum
->warn_sse
= true;
5792 cum
->warn_mmx
= true;
5794 /* Because type might mismatch in between caller and callee, we need to
5795 use actual type of function for local calls.
5796 FIXME: cgraph_analyze can be told to actually record if function uses
5797 va_start so for local functions maybe_vaarg can be made aggressive
5799 FIXME: once typesytem is fixed, we won't need this code anymore. */
5801 fntype
= TREE_TYPE (fndecl
);
5802 cum
->maybe_vaarg
= (fntype
5803 ? (!prototype_p (fntype
) || stdarg_p (fntype
))
5808 /* If there are variable arguments, then we won't pass anything
5809 in registers in 32-bit mode. */
5810 if (stdarg_p (fntype
))
5821 /* Use ecx and edx registers if function has fastcall attribute,
5822 else look for regparm information. */
5825 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (fntype
)))
5828 cum
->fastcall
= 1; /* Same first register as in fastcall. */
5830 else if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype
)))
5836 cum
->nregs
= ix86_function_regparm (fntype
, fndecl
);
5839 /* Set up the number of SSE registers used for passing SFmode
5840 and DFmode arguments. Warn for mismatching ABI. */
5841 cum
->float_in_sse
= ix86_function_sseregparm (fntype
, fndecl
, true);
5845 /* Return the "natural" mode for TYPE. In most cases, this is just TYPE_MODE.
5846 But in the case of vector types, it is some vector mode.
5848 When we have only some of our vector isa extensions enabled, then there
5849 are some modes for which vector_mode_supported_p is false. For these
5850 modes, the generic vector support in gcc will choose some non-vector mode
5851 in order to implement the type. By computing the natural mode, we'll
5852 select the proper ABI location for the operand and not depend on whatever
5853 the middle-end decides to do with these vector types.
5855 The midde-end can't deal with the vector types > 16 bytes. In this
5856 case, we return the original mode and warn ABI change if CUM isn't
5859 static enum machine_mode
5860 type_natural_mode (const_tree type
, const CUMULATIVE_ARGS
*cum
)
5862 enum machine_mode mode
= TYPE_MODE (type
);
5864 if (TREE_CODE (type
) == VECTOR_TYPE
&& !VECTOR_MODE_P (mode
))
5866 HOST_WIDE_INT size
= int_size_in_bytes (type
);
5867 if ((size
== 8 || size
== 16 || size
== 32)
5868 /* ??? Generic code allows us to create width 1 vectors. Ignore. */
5869 && TYPE_VECTOR_SUBPARTS (type
) > 1)
5871 enum machine_mode innermode
= TYPE_MODE (TREE_TYPE (type
));
5873 if (TREE_CODE (TREE_TYPE (type
)) == REAL_TYPE
)
5874 mode
= MIN_MODE_VECTOR_FLOAT
;
5876 mode
= MIN_MODE_VECTOR_INT
;
5878 /* Get the mode which has this inner mode and number of units. */
5879 for (; mode
!= VOIDmode
; mode
= GET_MODE_WIDER_MODE (mode
))
5880 if (GET_MODE_NUNITS (mode
) == TYPE_VECTOR_SUBPARTS (type
)
5881 && GET_MODE_INNER (mode
) == innermode
)
5883 if (size
== 32 && !TARGET_AVX
)
5885 static bool warnedavx
;
5892 warning (0, "AVX vector argument without AVX "
5893 "enabled changes the ABI");
5895 return TYPE_MODE (type
);
5908 /* We want to pass a value in REGNO whose "natural" mode is MODE. However,
5909 this may not agree with the mode that the type system has chosen for the
5910 register, which is ORIG_MODE. If ORIG_MODE is not BLKmode, then we can
5911 go ahead and use it. Otherwise we have to build a PARALLEL instead. */
5914 gen_reg_or_parallel (enum machine_mode mode
, enum machine_mode orig_mode
,
5919 if (orig_mode
!= BLKmode
)
5920 tmp
= gen_rtx_REG (orig_mode
, regno
);
5923 tmp
= gen_rtx_REG (mode
, regno
);
5924 tmp
= gen_rtx_EXPR_LIST (VOIDmode
, tmp
, const0_rtx
);
5925 tmp
= gen_rtx_PARALLEL (orig_mode
, gen_rtvec (1, tmp
));
5931 /* x86-64 register passing implementation. See x86-64 ABI for details. Goal
5932 of this code is to classify each 8bytes of incoming argument by the register
5933 class and assign registers accordingly. */
5935 /* Return the union class of CLASS1 and CLASS2.
5936 See the x86-64 PS ABI for details. */
5938 static enum x86_64_reg_class
5939 merge_classes (enum x86_64_reg_class class1
, enum x86_64_reg_class class2
)
5941 /* Rule #1: If both classes are equal, this is the resulting class. */
5942 if (class1
== class2
)
5945 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
5947 if (class1
== X86_64_NO_CLASS
)
5949 if (class2
== X86_64_NO_CLASS
)
5952 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
5953 if (class1
== X86_64_MEMORY_CLASS
|| class2
== X86_64_MEMORY_CLASS
)
5954 return X86_64_MEMORY_CLASS
;
5956 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
5957 if ((class1
== X86_64_INTEGERSI_CLASS
&& class2
== X86_64_SSESF_CLASS
)
5958 || (class2
== X86_64_INTEGERSI_CLASS
&& class1
== X86_64_SSESF_CLASS
))
5959 return X86_64_INTEGERSI_CLASS
;
5960 if (class1
== X86_64_INTEGER_CLASS
|| class1
== X86_64_INTEGERSI_CLASS
5961 || class2
== X86_64_INTEGER_CLASS
|| class2
== X86_64_INTEGERSI_CLASS
)
5962 return X86_64_INTEGER_CLASS
;
5964 /* Rule #5: If one of the classes is X87, X87UP, or COMPLEX_X87 class,
5966 if (class1
== X86_64_X87_CLASS
5967 || class1
== X86_64_X87UP_CLASS
5968 || class1
== X86_64_COMPLEX_X87_CLASS
5969 || class2
== X86_64_X87_CLASS
5970 || class2
== X86_64_X87UP_CLASS
5971 || class2
== X86_64_COMPLEX_X87_CLASS
)
5972 return X86_64_MEMORY_CLASS
;
5974 /* Rule #6: Otherwise class SSE is used. */
5975 return X86_64_SSE_CLASS
;
5978 /* Classify the argument of type TYPE and mode MODE.
5979 CLASSES will be filled by the register class used to pass each word
5980 of the operand. The number of words is returned. In case the parameter
5981 should be passed in memory, 0 is returned. As a special case for zero
5982 sized containers, classes[0] will be NO_CLASS and 1 is returned.
5984 BIT_OFFSET is used internally for handling records and specifies offset
5985 of the offset in bits modulo 256 to avoid overflow cases.
5987 See the x86-64 PS ABI for details.
5991 classify_argument (enum machine_mode mode
, const_tree type
,
5992 enum x86_64_reg_class classes
[MAX_CLASSES
], int bit_offset
)
5994 HOST_WIDE_INT bytes
=
5995 (mode
== BLKmode
) ? int_size_in_bytes (type
) : (int) GET_MODE_SIZE (mode
);
5996 int words
= (bytes
+ (bit_offset
% 64) / 8 + UNITS_PER_WORD
- 1) / UNITS_PER_WORD
;
5998 /* Variable sized entities are always passed/returned in memory. */
6002 if (mode
!= VOIDmode
6003 && targetm
.calls
.must_pass_in_stack (mode
, type
))
6006 /* Special case check for pointer to shared, on 64-bit target. */
6007 if (TARGET_64BIT
&& mode
== TImode
6008 && type
&& TREE_CODE (type
) == POINTER_TYPE
6009 && upc_shared_type_p (TREE_TYPE (type
)))
6011 classes
[0] = classes
[1] = X86_64_INTEGER_CLASS
;
6015 if (type
&& AGGREGATE_TYPE_P (type
))
6019 enum x86_64_reg_class subclasses
[MAX_CLASSES
];
6021 /* On x86-64 we pass structures larger than 32 bytes on the stack. */
6025 for (i
= 0; i
< words
; i
++)
6026 classes
[i
] = X86_64_NO_CLASS
;
6028 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
6029 signalize memory class, so handle it as special case. */
6032 classes
[0] = X86_64_NO_CLASS
;
6036 /* Classify each field of record and merge classes. */
6037 switch (TREE_CODE (type
))
6040 /* And now merge the fields of structure. */
6041 for (field
= TYPE_FIELDS (type
); field
; field
= DECL_CHAIN (field
))
6043 if (TREE_CODE (field
) == FIELD_DECL
)
6047 if (TREE_TYPE (field
) == error_mark_node
)
6050 /* Bitfields are always classified as integer. Handle them
6051 early, since later code would consider them to be
6052 misaligned integers. */
6053 if (DECL_BIT_FIELD (field
))
6055 for (i
= (int_bit_position (field
) + (bit_offset
% 64)) / 8 / 8;
6056 i
< ((int_bit_position (field
) + (bit_offset
% 64))
6057 + tree_low_cst (DECL_SIZE (field
), 0)
6060 merge_classes (X86_64_INTEGER_CLASS
,
6067 type
= TREE_TYPE (field
);
6069 /* Flexible array member is ignored. */
6070 if (TYPE_MODE (type
) == BLKmode
6071 && TREE_CODE (type
) == ARRAY_TYPE
6072 && TYPE_SIZE (type
) == NULL_TREE
6073 && TYPE_DOMAIN (type
) != NULL_TREE
6074 && (TYPE_MAX_VALUE (TYPE_DOMAIN (type
))
6079 if (!warned
&& warn_psabi
)
6082 inform (input_location
,
6083 "the ABI of passing struct with"
6084 " a flexible array member has"
6085 " changed in GCC 4.4");
6089 num
= classify_argument (TYPE_MODE (type
), type
,
6091 (int_bit_position (field
)
6092 + bit_offset
) % 256);
6095 pos
= (int_bit_position (field
) + (bit_offset
% 64)) / 8 / 8;
6096 for (i
= 0; i
< num
&& (i
+ pos
) < words
; i
++)
6098 merge_classes (subclasses
[i
], classes
[i
+ pos
]);
6105 /* Arrays are handled as small records. */
6108 num
= classify_argument (TYPE_MODE (TREE_TYPE (type
)),
6109 TREE_TYPE (type
), subclasses
, bit_offset
);
6113 /* The partial classes are now full classes. */
6114 if (subclasses
[0] == X86_64_SSESF_CLASS
&& bytes
!= 4)
6115 subclasses
[0] = X86_64_SSE_CLASS
;
6116 if (subclasses
[0] == X86_64_INTEGERSI_CLASS
6117 && !((bit_offset
% 64) == 0 && bytes
== 4))
6118 subclasses
[0] = X86_64_INTEGER_CLASS
;
6120 for (i
= 0; i
< words
; i
++)
6121 classes
[i
] = subclasses
[i
% num
];
6126 case QUAL_UNION_TYPE
:
6127 /* Unions are similar to RECORD_TYPE but offset is always 0.
6129 for (field
= TYPE_FIELDS (type
); field
; field
= DECL_CHAIN (field
))
6131 if (TREE_CODE (field
) == FIELD_DECL
)
6135 if (TREE_TYPE (field
) == error_mark_node
)
6138 num
= classify_argument (TYPE_MODE (TREE_TYPE (field
)),
6139 TREE_TYPE (field
), subclasses
,
6143 for (i
= 0; i
< num
; i
++)
6144 classes
[i
] = merge_classes (subclasses
[i
], classes
[i
]);
6155 /* When size > 16 bytes, if the first one isn't
6156 X86_64_SSE_CLASS or any other ones aren't
6157 X86_64_SSEUP_CLASS, everything should be passed in
6159 if (classes
[0] != X86_64_SSE_CLASS
)
6162 for (i
= 1; i
< words
; i
++)
6163 if (classes
[i
] != X86_64_SSEUP_CLASS
)
6167 /* Final merger cleanup. */
6168 for (i
= 0; i
< words
; i
++)
6170 /* If one class is MEMORY, everything should be passed in
6172 if (classes
[i
] == X86_64_MEMORY_CLASS
)
6175 /* The X86_64_SSEUP_CLASS should be always preceded by
6176 X86_64_SSE_CLASS or X86_64_SSEUP_CLASS. */
6177 if (classes
[i
] == X86_64_SSEUP_CLASS
6178 && classes
[i
- 1] != X86_64_SSE_CLASS
6179 && classes
[i
- 1] != X86_64_SSEUP_CLASS
)
6181 /* The first one should never be X86_64_SSEUP_CLASS. */
6182 gcc_assert (i
!= 0);
6183 classes
[i
] = X86_64_SSE_CLASS
;
6186 /* If X86_64_X87UP_CLASS isn't preceded by X86_64_X87_CLASS,
6187 everything should be passed in memory. */
6188 if (classes
[i
] == X86_64_X87UP_CLASS
6189 && (classes
[i
- 1] != X86_64_X87_CLASS
))
6193 /* The first one should never be X86_64_X87UP_CLASS. */
6194 gcc_assert (i
!= 0);
6195 if (!warned
&& warn_psabi
)
6198 inform (input_location
,
6199 "the ABI of passing union with long double"
6200 " has changed in GCC 4.4");
6208 /* Compute alignment needed. We align all types to natural boundaries with
6209 exception of XFmode that is aligned to 64bits. */
6210 if (mode
!= VOIDmode
&& mode
!= BLKmode
)
6212 int mode_alignment
= GET_MODE_BITSIZE (mode
);
6215 mode_alignment
= 128;
6216 else if (mode
== XCmode
)
6217 mode_alignment
= 256;
6218 if (COMPLEX_MODE_P (mode
))
6219 mode_alignment
/= 2;
6220 /* Misaligned fields are always returned in memory. */
6221 if (bit_offset
% mode_alignment
)
6225 /* for V1xx modes, just use the base mode */
6226 if (VECTOR_MODE_P (mode
) && mode
!= V1DImode
&& mode
!= V1TImode
6227 && GET_MODE_SIZE (GET_MODE_INNER (mode
)) == bytes
)
6228 mode
= GET_MODE_INNER (mode
);
6230 /* Classification of atomic types. */
6235 classes
[0] = X86_64_SSE_CLASS
;
6238 classes
[0] = X86_64_SSE_CLASS
;
6239 classes
[1] = X86_64_SSEUP_CLASS
;
6249 int size
= (bit_offset
% 64)+ (int) GET_MODE_BITSIZE (mode
);
6253 classes
[0] = X86_64_INTEGERSI_CLASS
;
6256 else if (size
<= 64)
6258 classes
[0] = X86_64_INTEGER_CLASS
;
6261 else if (size
<= 64+32)
6263 classes
[0] = X86_64_INTEGER_CLASS
;
6264 classes
[1] = X86_64_INTEGERSI_CLASS
;
6267 else if (size
<= 64+64)
6269 classes
[0] = classes
[1] = X86_64_INTEGER_CLASS
;
6277 classes
[0] = classes
[1] = X86_64_INTEGER_CLASS
;
6281 /* OImode shouldn't be used directly. */
6286 if (!(bit_offset
% 64))
6287 classes
[0] = X86_64_SSESF_CLASS
;
6289 classes
[0] = X86_64_SSE_CLASS
;
6292 classes
[0] = X86_64_SSEDF_CLASS
;
6295 classes
[0] = X86_64_X87_CLASS
;
6296 classes
[1] = X86_64_X87UP_CLASS
;
6299 classes
[0] = X86_64_SSE_CLASS
;
6300 classes
[1] = X86_64_SSEUP_CLASS
;
6303 classes
[0] = X86_64_SSE_CLASS
;
6304 if (!(bit_offset
% 64))
6310 if (!warned
&& warn_psabi
)
6313 inform (input_location
,
6314 "the ABI of passing structure with complex float"
6315 " member has changed in GCC 4.4");
6317 classes
[1] = X86_64_SSESF_CLASS
;
6321 classes
[0] = X86_64_SSEDF_CLASS
;
6322 classes
[1] = X86_64_SSEDF_CLASS
;
6325 classes
[0] = X86_64_COMPLEX_X87_CLASS
;
6328 /* This modes is larger than 16 bytes. */
6336 classes
[0] = X86_64_SSE_CLASS
;
6337 classes
[1] = X86_64_SSEUP_CLASS
;
6338 classes
[2] = X86_64_SSEUP_CLASS
;
6339 classes
[3] = X86_64_SSEUP_CLASS
;
6347 classes
[0] = X86_64_SSE_CLASS
;
6348 classes
[1] = X86_64_SSEUP_CLASS
;
6356 classes
[0] = X86_64_SSE_CLASS
;
6362 gcc_assert (VECTOR_MODE_P (mode
));
6367 gcc_assert (GET_MODE_CLASS (GET_MODE_INNER (mode
)) == MODE_INT
);
6369 if (bit_offset
+ GET_MODE_BITSIZE (mode
) <= 32)
6370 classes
[0] = X86_64_INTEGERSI_CLASS
;
6372 classes
[0] = X86_64_INTEGER_CLASS
;
6373 classes
[1] = X86_64_INTEGER_CLASS
;
6374 return 1 + (bytes
> 8);
6378 /* Examine the argument and return set number of register required in each
6379 class. Return 0 iff parameter should be passed in memory. */
6381 examine_argument (enum machine_mode mode
, const_tree type
, int in_return
,
6382 int *int_nregs
, int *sse_nregs
)
6384 enum x86_64_reg_class regclass
[MAX_CLASSES
];
6385 int n
= classify_argument (mode
, type
, regclass
, 0);
6391 for (n
--; n
>= 0; n
--)
6392 switch (regclass
[n
])
6394 case X86_64_INTEGER_CLASS
:
6395 case X86_64_INTEGERSI_CLASS
:
6398 case X86_64_SSE_CLASS
:
6399 case X86_64_SSESF_CLASS
:
6400 case X86_64_SSEDF_CLASS
:
6403 case X86_64_NO_CLASS
:
6404 case X86_64_SSEUP_CLASS
:
6406 case X86_64_X87_CLASS
:
6407 case X86_64_X87UP_CLASS
:
6411 case X86_64_COMPLEX_X87_CLASS
:
6412 return in_return
? 2 : 0;
6413 case X86_64_MEMORY_CLASS
:
6419 /* Construct container for the argument used by GCC interface. See
6420 FUNCTION_ARG for the detailed description. */
6423 construct_container (enum machine_mode mode
, enum machine_mode orig_mode
,
6424 const_tree type
, int in_return
, int nintregs
, int nsseregs
,
6425 const int *intreg
, int sse_regno
)
6427 /* The following variables hold the static issued_error state. */
6428 static bool issued_sse_arg_error
;
6429 static bool issued_sse_ret_error
;
6430 static bool issued_x87_ret_error
;
6432 enum machine_mode tmpmode
;
6434 (mode
== BLKmode
) ? int_size_in_bytes (type
) : (int) GET_MODE_SIZE (mode
);
6435 enum x86_64_reg_class regclass
[MAX_CLASSES
];
6439 int needed_sseregs
, needed_intregs
;
6440 rtx exp
[MAX_CLASSES
];
6443 n
= classify_argument (mode
, type
, regclass
, 0);
6446 if (!examine_argument (mode
, type
, in_return
, &needed_intregs
,
6449 if (needed_intregs
> nintregs
|| needed_sseregs
> nsseregs
)
6452 /* We allowed the user to turn off SSE for kernel mode. Don't crash if
6453 some less clueful developer tries to use floating-point anyway. */
6454 if (needed_sseregs
&& !TARGET_SSE
)
6458 if (!issued_sse_ret_error
)
6460 error ("SSE register return with SSE disabled");
6461 issued_sse_ret_error
= true;
6464 else if (!issued_sse_arg_error
)
6466 error ("SSE register argument with SSE disabled");
6467 issued_sse_arg_error
= true;
6472 /* Likewise, error if the ABI requires us to return values in the
6473 x87 registers and the user specified -mno-80387. */
6474 if (!TARGET_80387
&& in_return
)
6475 for (i
= 0; i
< n
; i
++)
6476 if (regclass
[i
] == X86_64_X87_CLASS
6477 || regclass
[i
] == X86_64_X87UP_CLASS
6478 || regclass
[i
] == X86_64_COMPLEX_X87_CLASS
)
6480 if (!issued_x87_ret_error
)
6482 error ("x87 register return with x87 disabled");
6483 issued_x87_ret_error
= true;
6488 /* First construct simple cases. Avoid SCmode, since we want to use
6489 single register to pass this type. */
6490 if (n
== 1 && mode
!= SCmode
)
6491 switch (regclass
[0])
6493 case X86_64_INTEGER_CLASS
:
6494 case X86_64_INTEGERSI_CLASS
:
6495 return gen_rtx_REG (mode
, intreg
[0]);
6496 case X86_64_SSE_CLASS
:
6497 case X86_64_SSESF_CLASS
:
6498 case X86_64_SSEDF_CLASS
:
6499 if (mode
!= BLKmode
)
6500 return gen_reg_or_parallel (mode
, orig_mode
,
6501 SSE_REGNO (sse_regno
));
6503 case X86_64_X87_CLASS
:
6504 case X86_64_COMPLEX_X87_CLASS
:
6505 return gen_rtx_REG (mode
, FIRST_STACK_REG
);
6506 case X86_64_NO_CLASS
:
6507 /* Zero sized array, struct or class. */
6512 if (n
== 2 && regclass
[0] == X86_64_SSE_CLASS
6513 && regclass
[1] == X86_64_SSEUP_CLASS
&& mode
!= BLKmode
)
6514 return gen_rtx_REG (mode
, SSE_REGNO (sse_regno
));
6516 && regclass
[0] == X86_64_SSE_CLASS
6517 && regclass
[1] == X86_64_SSEUP_CLASS
6518 && regclass
[2] == X86_64_SSEUP_CLASS
6519 && regclass
[3] == X86_64_SSEUP_CLASS
6521 return gen_rtx_REG (mode
, SSE_REGNO (sse_regno
));
6524 && regclass
[0] == X86_64_X87_CLASS
&& regclass
[1] == X86_64_X87UP_CLASS
)
6525 return gen_rtx_REG (XFmode
, FIRST_STACK_REG
);
6526 if (n
== 2 && regclass
[0] == X86_64_INTEGER_CLASS
6527 && regclass
[1] == X86_64_INTEGER_CLASS
6528 && (mode
== CDImode
|| mode
== TImode
|| mode
== TFmode
)
6529 && intreg
[0] + 1 == intreg
[1])
6530 return gen_rtx_REG (mode
, intreg
[0]);
6532 /* Otherwise figure out the entries of the PARALLEL. */
6533 for (i
= 0; i
< n
; i
++)
6537 switch (regclass
[i
])
6539 case X86_64_NO_CLASS
:
6541 case X86_64_INTEGER_CLASS
:
6542 case X86_64_INTEGERSI_CLASS
:
6543 /* Merge TImodes on aligned occasions here too. */
6544 if (i
* 8 + 8 > bytes
)
6545 tmpmode
= mode_for_size ((bytes
- i
* 8) * BITS_PER_UNIT
, MODE_INT
, 0);
6546 else if (regclass
[i
] == X86_64_INTEGERSI_CLASS
)
6550 /* We've requested 24 bytes we don't have mode for. Use DImode. */
6551 if (tmpmode
== BLKmode
)
6553 exp
[nexps
++] = gen_rtx_EXPR_LIST (VOIDmode
,
6554 gen_rtx_REG (tmpmode
, *intreg
),
6558 case X86_64_SSESF_CLASS
:
6559 exp
[nexps
++] = gen_rtx_EXPR_LIST (VOIDmode
,
6560 gen_rtx_REG (SFmode
,
6561 SSE_REGNO (sse_regno
)),
6565 case X86_64_SSEDF_CLASS
:
6566 exp
[nexps
++] = gen_rtx_EXPR_LIST (VOIDmode
,
6567 gen_rtx_REG (DFmode
,
6568 SSE_REGNO (sse_regno
)),
6572 case X86_64_SSE_CLASS
:
6580 if (i
== 0 && regclass
[1] == X86_64_SSEUP_CLASS
)
6590 && regclass
[1] == X86_64_SSEUP_CLASS
6591 && regclass
[2] == X86_64_SSEUP_CLASS
6592 && regclass
[3] == X86_64_SSEUP_CLASS
);
6599 exp
[nexps
++] = gen_rtx_EXPR_LIST (VOIDmode
,
6600 gen_rtx_REG (tmpmode
,
6601 SSE_REGNO (sse_regno
)),
6610 /* Empty aligned struct, union or class. */
6614 ret
= gen_rtx_PARALLEL (mode
, rtvec_alloc (nexps
));
6615 for (i
= 0; i
< nexps
; i
++)
6616 XVECEXP (ret
, 0, i
) = exp
[i
];
6620 /* Update the data in CUM to advance over an argument of mode MODE
6621 and data type TYPE. (TYPE is null for libcalls where that information
6622 may not be available.) */
6625 function_arg_advance_32 (CUMULATIVE_ARGS
*cum
, enum machine_mode mode
,
6626 const_tree type
, HOST_WIDE_INT bytes
,
6627 HOST_WIDE_INT words
)
6643 cum
->words
+= words
;
6644 cum
->nregs
-= words
;
6645 cum
->regno
+= words
;
6647 if (cum
->nregs
<= 0)
6655 /* OImode shouldn't be used directly. */
6659 if (cum
->float_in_sse
< 2)
6662 if (cum
->float_in_sse
< 1)
6679 if (!type
|| !AGGREGATE_TYPE_P (type
))
6681 cum
->sse_words
+= words
;
6682 cum
->sse_nregs
-= 1;
6683 cum
->sse_regno
+= 1;
6684 if (cum
->sse_nregs
<= 0)
6698 if (!type
|| !AGGREGATE_TYPE_P (type
))
6700 cum
->mmx_words
+= words
;
6701 cum
->mmx_nregs
-= 1;
6702 cum
->mmx_regno
+= 1;
6703 if (cum
->mmx_nregs
<= 0)
6714 function_arg_advance_64 (CUMULATIVE_ARGS
*cum
, enum machine_mode mode
,
6715 const_tree type
, HOST_WIDE_INT words
, bool named
)
6717 int int_nregs
, sse_nregs
;
6719 /* Unnamed 256bit vector mode parameters are passed on stack. */
6720 if (!named
&& VALID_AVX256_REG_MODE (mode
))
6723 if (examine_argument (mode
, type
, 0, &int_nregs
, &sse_nregs
)
6724 && sse_nregs
<= cum
->sse_nregs
&& int_nregs
<= cum
->nregs
)
6726 cum
->nregs
-= int_nregs
;
6727 cum
->sse_nregs
-= sse_nregs
;
6728 cum
->regno
+= int_nregs
;
6729 cum
->sse_regno
+= sse_nregs
;
6733 int align
= ix86_function_arg_boundary (mode
, type
) / BITS_PER_WORD
;
6734 cum
->words
= (cum
->words
+ align
- 1) & ~(align
- 1);
6735 cum
->words
+= words
;
6740 function_arg_advance_ms_64 (CUMULATIVE_ARGS
*cum
, HOST_WIDE_INT bytes
,
6741 HOST_WIDE_INT words
)
6743 /* Otherwise, this should be passed indirect. */
6744 gcc_assert (bytes
== 1 || bytes
== 2 || bytes
== 4 || bytes
== 8);
6746 cum
->words
+= words
;
6754 /* Update the data in CUM to advance over an argument of mode MODE and
6755 data type TYPE. (TYPE is null for libcalls where that information
6756 may not be available.) */
6759 ix86_function_arg_advance (CUMULATIVE_ARGS
*cum
, enum machine_mode mode
,
6760 const_tree type
, bool named
)
6762 HOST_WIDE_INT bytes
, words
;
6764 if (mode
== BLKmode
)
6765 bytes
= int_size_in_bytes (type
);
6767 bytes
= GET_MODE_SIZE (mode
);
6768 words
= (bytes
+ UNITS_PER_WORD
- 1) / UNITS_PER_WORD
;
6771 mode
= type_natural_mode (type
, NULL
);
6773 if (TARGET_64BIT
&& (cum
? cum
->call_abi
: ix86_abi
) == MS_ABI
)
6774 function_arg_advance_ms_64 (cum
, bytes
, words
);
6775 else if (TARGET_64BIT
)
6776 function_arg_advance_64 (cum
, mode
, type
, words
, named
);
6778 function_arg_advance_32 (cum
, mode
, type
, bytes
, words
);
6781 /* Define where to put the arguments to a function.
6782 Value is zero to push the argument on the stack,
6783 or a hard register in which to store the argument.
6785 MODE is the argument's machine mode.
6786 TYPE is the data type of the argument (as a tree).
6787 This is null for libcalls where that information may
6789 CUM is a variable of type CUMULATIVE_ARGS which gives info about
6790 the preceding args and about the function being called.
6791 NAMED is nonzero if this argument is a named parameter
6792 (otherwise it is an extra parameter matching an ellipsis). */
6795 function_arg_32 (const CUMULATIVE_ARGS
*cum
, enum machine_mode mode
,
6796 enum machine_mode orig_mode
, const_tree type
,
6797 HOST_WIDE_INT bytes
, HOST_WIDE_INT words
)
6799 static bool warnedsse
, warnedmmx
;
6801 /* Avoid the AL settings for the Unix64 ABI. */
6802 if (mode
== VOIDmode
)
6818 if (words
<= cum
->nregs
)
6820 int regno
= cum
->regno
;
6822 /* Fastcall allocates the first two DWORD (SImode) or
6823 smaller arguments to ECX and EDX if it isn't an
6829 || (type
&& AGGREGATE_TYPE_P (type
)))
6832 /* ECX not EAX is the first allocated register. */
6833 if (regno
== AX_REG
)
6836 return gen_rtx_REG (mode
, regno
);
6841 if (cum
->float_in_sse
< 2)
6844 if (cum
->float_in_sse
< 1)
6848 /* In 32bit, we pass TImode in xmm registers. */
6855 if (!type
|| !AGGREGATE_TYPE_P (type
))
6857 if (!TARGET_SSE
&& !warnedsse
&& cum
->warn_sse
)
6860 warning (0, "SSE vector argument without SSE enabled "
6864 return gen_reg_or_parallel (mode
, orig_mode
,
6865 cum
->sse_regno
+ FIRST_SSE_REG
);
6870 /* OImode shouldn't be used directly. */
6879 if (!type
|| !AGGREGATE_TYPE_P (type
))
6882 return gen_reg_or_parallel (mode
, orig_mode
,
6883 cum
->sse_regno
+ FIRST_SSE_REG
);
6893 if (!type
|| !AGGREGATE_TYPE_P (type
))
6895 if (!TARGET_MMX
&& !warnedmmx
&& cum
->warn_mmx
)
6898 warning (0, "MMX vector argument without MMX enabled "
6902 return gen_reg_or_parallel (mode
, orig_mode
,
6903 cum
->mmx_regno
+ FIRST_MMX_REG
);
6912 function_arg_64 (const CUMULATIVE_ARGS
*cum
, enum machine_mode mode
,
6913 enum machine_mode orig_mode
, const_tree type
, bool named
)
6915 /* Handle a hidden AL argument containing number of registers
6916 for varargs x86-64 functions. */
6917 if (mode
== VOIDmode
)
6918 return GEN_INT (cum
->maybe_vaarg
6919 ? (cum
->sse_nregs
< 0
6920 ? X86_64_SSE_REGPARM_MAX
6935 /* Unnamed 256bit vector mode parameters are passed on stack. */
6941 return construct_container (mode
, orig_mode
, type
, 0, cum
->nregs
,
6943 &x86_64_int_parameter_registers
[cum
->regno
],
6948 function_arg_ms_64 (const CUMULATIVE_ARGS
*cum
, enum machine_mode mode
,
6949 enum machine_mode orig_mode
, bool named
,
6950 HOST_WIDE_INT bytes
)
6954 /* We need to add clobber for MS_ABI->SYSV ABI calls in expand_call.
6955 We use value of -2 to specify that current function call is MSABI. */
6956 if (mode
== VOIDmode
)
6957 return GEN_INT (-2);
6959 /* If we've run out of registers, it goes on the stack. */
6960 if (cum
->nregs
== 0)
6963 regno
= x86_64_ms_abi_int_parameter_registers
[cum
->regno
];
6965 /* Only floating point modes are passed in anything but integer regs. */
6966 if (TARGET_SSE
&& (mode
== SFmode
|| mode
== DFmode
))
6969 regno
= cum
->regno
+ FIRST_SSE_REG
;
6974 /* Unnamed floating parameters are passed in both the
6975 SSE and integer registers. */
6976 t1
= gen_rtx_REG (mode
, cum
->regno
+ FIRST_SSE_REG
);
6977 t2
= gen_rtx_REG (mode
, regno
);
6978 t1
= gen_rtx_EXPR_LIST (VOIDmode
, t1
, const0_rtx
);
6979 t2
= gen_rtx_EXPR_LIST (VOIDmode
, t2
, const0_rtx
);
6980 return gen_rtx_PARALLEL (mode
, gen_rtvec (2, t1
, t2
));
6983 /* Handle aggregated types passed in register. */
6984 if (orig_mode
== BLKmode
)
6986 if (bytes
> 0 && bytes
<= 8)
6987 mode
= (bytes
> 4 ? DImode
: SImode
);
6988 if (mode
== BLKmode
)
6992 return gen_reg_or_parallel (mode
, orig_mode
, regno
);
6995 /* Return where to put the arguments to a function.
6996 Return zero to push the argument on the stack, or a hard register in which to store the argument.
6998 MODE is the argument's machine mode. TYPE is the data type of the
6999 argument. It is null for libcalls where that information may not be
7000 available. CUM gives information about the preceding args and about
7001 the function being called. NAMED is nonzero if this argument is a
7002 named parameter (otherwise it is an extra parameter matching an
7006 ix86_function_arg (CUMULATIVE_ARGS
*cum
, enum machine_mode omode
,
7007 const_tree type
, bool named
)
7009 enum machine_mode mode
= omode
;
7010 HOST_WIDE_INT bytes
, words
;
7013 if (mode
== BLKmode
)
7014 bytes
= int_size_in_bytes (type
);
7016 bytes
= GET_MODE_SIZE (mode
);
7017 words
= (bytes
+ UNITS_PER_WORD
- 1) / UNITS_PER_WORD
;
7019 /* To simplify the code below, represent vector types with a vector mode
7020 even if MMX/SSE are not active. */
7021 if (type
&& TREE_CODE (type
) == VECTOR_TYPE
)
7022 mode
= type_natural_mode (type
, cum
);
7024 if (TARGET_64BIT
&& (cum
? cum
->call_abi
: ix86_abi
) == MS_ABI
)
7025 arg
= function_arg_ms_64 (cum
, mode
, omode
, named
, bytes
);
7026 else if (TARGET_64BIT
)
7027 arg
= function_arg_64 (cum
, mode
, omode
, type
, named
);
7029 arg
= function_arg_32 (cum
, mode
, omode
, type
, bytes
, words
);
7031 if (TARGET_VZEROUPPER
&& function_pass_avx256_p (arg
))
7033 /* This argument uses 256bit AVX modes. */
7035 cfun
->machine
->callee_pass_avx256_p
= true;
7037 cfun
->machine
->caller_pass_avx256_p
= true;
7043 /* A C expression that indicates when an argument must be passed by
7044 reference. If nonzero for an argument, a copy of that argument is
7045 made in memory and a pointer to the argument is passed instead of
7046 the argument itself. The pointer is passed in whatever way is
7047 appropriate for passing a pointer to that type. */
7050 ix86_pass_by_reference (CUMULATIVE_ARGS
*cum ATTRIBUTE_UNUSED
,
7051 enum machine_mode mode ATTRIBUTE_UNUSED
,
7052 const_tree type
, bool named ATTRIBUTE_UNUSED
)
7054 /* See Windows x64 Software Convention. */
7055 if (TARGET_64BIT
&& (cum
? cum
->call_abi
: ix86_abi
) == MS_ABI
)
7057 int msize
= (int) GET_MODE_SIZE (mode
);
7060 /* Arrays are passed by reference. */
7061 if (TREE_CODE (type
) == ARRAY_TYPE
)
7064 if (AGGREGATE_TYPE_P (type
))
7066 /* Structs/unions of sizes other than 8, 16, 32, or 64 bits
7067 are passed by reference. */
7068 msize
= int_size_in_bytes (type
);
7072 /* __m128 is passed by reference. */
7074 case 1: case 2: case 4: case 8:
7080 else if (TARGET_64BIT
&& type
&& int_size_in_bytes (type
) == -1)
7086 /* Return true when TYPE should be 128bit aligned for 32bit argument
7087 passing ABI. XXX: This function is obsolete and is only used for
7088 checking psABI compatibility with previous versions of GCC. */
7091 ix86_compat_aligned_value_p (const_tree type
)
7093 enum machine_mode mode
= TYPE_MODE (type
);
7094 if (((TARGET_SSE
&& SSE_REG_MODE_P (mode
))
7098 && (!TYPE_USER_ALIGN (type
) || TYPE_ALIGN (type
) > 128))
7100 if (TYPE_ALIGN (type
) < 128)
7103 if (AGGREGATE_TYPE_P (type
))
7105 /* Walk the aggregates recursively. */
7106 switch (TREE_CODE (type
))
7110 case QUAL_UNION_TYPE
:
7114 /* Walk all the structure fields. */
7115 for (field
= TYPE_FIELDS (type
); field
; field
= DECL_CHAIN (field
))
7117 if (TREE_CODE (field
) == FIELD_DECL
7118 && ix86_compat_aligned_value_p (TREE_TYPE (field
)))
7125 /* Just for use if some languages passes arrays by value. */
7126 if (ix86_compat_aligned_value_p (TREE_TYPE (type
)))
7137 /* Return the alignment boundary for MODE and TYPE with alignment ALIGN.
7138 XXX: This function is obsolete and is only used for checking psABI
7139 compatibility with previous versions of GCC. */
7142 ix86_compat_function_arg_boundary (enum machine_mode mode
,
7143 const_tree type
, unsigned int align
)
7145 /* In 32bit, only _Decimal128 and __float128 are aligned to their
7146 natural boundaries. */
7147 if (!TARGET_64BIT
&& mode
!= TDmode
&& mode
!= TFmode
)
7149 /* i386 ABI defines all arguments to be 4 byte aligned. We have to
7150 make an exception for SSE modes since these require 128bit
7153 The handling here differs from field_alignment. ICC aligns MMX
7154 arguments to 4 byte boundaries, while structure fields are aligned
7155 to 8 byte boundaries. */
7158 if (!(TARGET_SSE
&& SSE_REG_MODE_P (mode
)))
7159 align
= PARM_BOUNDARY
;
7163 if (!ix86_compat_aligned_value_p (type
))
7164 align
= PARM_BOUNDARY
;
7167 if (align
> BIGGEST_ALIGNMENT
)
7168 align
= BIGGEST_ALIGNMENT
;
7172 /* Return true when TYPE should be 128bit aligned for 32bit argument
7176 ix86_contains_aligned_value_p (const_tree type
)
7178 enum machine_mode mode
= TYPE_MODE (type
);
7180 if (mode
== XFmode
|| mode
== XCmode
)
7183 if (TYPE_ALIGN (type
) < 128)
7186 if (AGGREGATE_TYPE_P (type
))
7188 /* Walk the aggregates recursively. */
7189 switch (TREE_CODE (type
))
7193 case QUAL_UNION_TYPE
:
7197 /* Walk all the structure fields. */
7198 for (field
= TYPE_FIELDS (type
);
7200 field
= DECL_CHAIN (field
))
7202 if (TREE_CODE (field
) == FIELD_DECL
7203 && ix86_contains_aligned_value_p (TREE_TYPE (field
)))
7210 /* Just for use if some languages passes arrays by value. */
7211 if (ix86_contains_aligned_value_p (TREE_TYPE (type
)))
7220 return TYPE_ALIGN (type
) >= 128;
7225 /* Gives the alignment boundary, in bits, of an argument with the
7226 specified mode and type. */
7229 ix86_function_arg_boundary (enum machine_mode mode
, const_tree type
)
7234 /* Since the main variant type is used for call, we convert it to
7235 the main variant type. */
7236 type
= TYPE_MAIN_VARIANT (type
);
7237 align
= TYPE_ALIGN (type
);
7240 align
= GET_MODE_ALIGNMENT (mode
);
7241 if (align
< PARM_BOUNDARY
)
7242 align
= PARM_BOUNDARY
;
7246 unsigned int saved_align
= align
;
7250 /* i386 ABI defines XFmode arguments to be 4 byte aligned. */
7253 if (mode
== XFmode
|| mode
== XCmode
)
7254 align
= PARM_BOUNDARY
;
7256 else if (!ix86_contains_aligned_value_p (type
))
7257 align
= PARM_BOUNDARY
;
7260 align
= PARM_BOUNDARY
;
7265 && align
!= ix86_compat_function_arg_boundary (mode
, type
,
7269 inform (input_location
,
7270 "The ABI for passing parameters with %d-byte"
7271 " alignment has changed in GCC 4.6",
7272 align
/ BITS_PER_UNIT
);
7279 /* Return true if N is a possible register number of function value. */
7282 ix86_function_value_regno_p (const unsigned int regno
)
7289 case FIRST_FLOAT_REG
:
7290 /* TODO: The function should depend on current function ABI but
7291 builtins.c would need updating then. Therefore we use the
7293 if (TARGET_64BIT
&& ix86_abi
== MS_ABI
)
7295 return TARGET_FLOAT_RETURNS_IN_80387
;
7301 if (TARGET_MACHO
|| TARGET_64BIT
)
7309 /* Define how to find the value returned by a function.
7310 VALTYPE is the data type of the value (as a tree).
7311 If the precise function being called is known, FUNC is its FUNCTION_DECL;
7312 otherwise, FUNC is 0. */
7315 function_value_32 (enum machine_mode orig_mode
, enum machine_mode mode
,
7316 const_tree fntype
, const_tree fn
)
7320 /* 8-byte vector modes in %mm0. See ix86_return_in_memory for where
7321 we normally prevent this case when mmx is not available. However
7322 some ABIs may require the result to be returned like DImode. */
7323 if (VECTOR_MODE_P (mode
) && GET_MODE_SIZE (mode
) == 8)
7324 regno
= TARGET_MMX
? FIRST_MMX_REG
: 0;
7326 /* 16-byte vector modes in %xmm0. See ix86_return_in_memory for where
7327 we prevent this case when sse is not available. However some ABIs
7328 may require the result to be returned like integer TImode. */
7329 else if (mode
== TImode
7330 || (VECTOR_MODE_P (mode
) && GET_MODE_SIZE (mode
) == 16))
7331 regno
= TARGET_SSE
? FIRST_SSE_REG
: 0;
7333 /* 32-byte vector modes in %ymm0. */
7334 else if (VECTOR_MODE_P (mode
) && GET_MODE_SIZE (mode
) == 32)
7335 regno
= TARGET_AVX
? FIRST_SSE_REG
: 0;
7337 /* Floating point return values in %st(0) (unless -mno-fp-ret-in-387). */
7338 else if (X87_FLOAT_MODE_P (mode
) && TARGET_FLOAT_RETURNS_IN_80387
)
7339 regno
= FIRST_FLOAT_REG
;
7341 /* Most things go in %eax. */
7344 /* Override FP return register with %xmm0 for local functions when
7345 SSE math is enabled or for functions with sseregparm attribute. */
7346 if ((fn
|| fntype
) && (mode
== SFmode
|| mode
== DFmode
))
7348 int sse_level
= ix86_function_sseregparm (fntype
, fn
, false);
7349 if ((sse_level
>= 1 && mode
== SFmode
)
7350 || (sse_level
== 2 && mode
== DFmode
))
7351 regno
= FIRST_SSE_REG
;
7354 /* OImode shouldn't be used directly. */
7355 gcc_assert (mode
!= OImode
);
7357 return gen_rtx_REG (orig_mode
, regno
);
7361 function_value_64 (enum machine_mode orig_mode
, enum machine_mode mode
,
7366 /* Handle libcalls, which don't provide a type node. */
7367 if (valtype
== NULL
)
7379 return gen_rtx_REG (mode
, FIRST_SSE_REG
);
7382 return gen_rtx_REG (mode
, FIRST_FLOAT_REG
);
7386 return gen_rtx_REG (mode
, AX_REG
);
7390 ret
= construct_container (mode
, orig_mode
, valtype
, 1,
7391 X86_64_REGPARM_MAX
, X86_64_SSE_REGPARM_MAX
,
7392 x86_64_int_return_registers
, 0);
7394 /* For zero sized structures, construct_container returns NULL, but we
7395 need to keep rest of compiler happy by returning meaningful value. */
7397 ret
= gen_rtx_REG (orig_mode
, AX_REG
);
7403 function_value_ms_64 (enum machine_mode orig_mode
, enum machine_mode mode
)
7405 unsigned int regno
= AX_REG
;
7409 switch (GET_MODE_SIZE (mode
))
7412 if((SCALAR_INT_MODE_P (mode
) || VECTOR_MODE_P (mode
))
7413 && !COMPLEX_MODE_P (mode
))
7414 regno
= FIRST_SSE_REG
;
7418 if (mode
== SFmode
|| mode
== DFmode
)
7419 regno
= FIRST_SSE_REG
;
7425 return gen_rtx_REG (orig_mode
, regno
);
7429 ix86_function_value_1 (const_tree valtype
, const_tree fntype_or_decl
,
7430 enum machine_mode orig_mode
, enum machine_mode mode
)
7432 const_tree fn
, fntype
;
7435 if (fntype_or_decl
&& DECL_P (fntype_or_decl
))
7436 fn
= fntype_or_decl
;
7437 fntype
= fn
? TREE_TYPE (fn
) : fntype_or_decl
;
7439 if (TARGET_64BIT
&& ix86_function_type_abi (fntype
) == MS_ABI
)
7440 return function_value_ms_64 (orig_mode
, mode
);
7441 else if (TARGET_64BIT
)
7442 return function_value_64 (orig_mode
, mode
, valtype
);
7444 return function_value_32 (orig_mode
, mode
, fntype
, fn
);
7448 ix86_function_value (const_tree valtype
, const_tree fntype_or_decl
,
7449 bool outgoing ATTRIBUTE_UNUSED
)
7451 enum machine_mode mode
, orig_mode
;
7453 orig_mode
= TYPE_MODE (valtype
);
7454 mode
= type_natural_mode (valtype
, NULL
);
7455 return ix86_function_value_1 (valtype
, fntype_or_decl
, orig_mode
, mode
);
7459 ix86_libcall_value (enum machine_mode mode
)
7461 return ix86_function_value_1 (NULL
, NULL
, mode
, mode
);
7464 /* Return true iff type is returned in memory. */
7466 static bool ATTRIBUTE_UNUSED
7467 return_in_memory_32 (const_tree type
, enum machine_mode mode
)
7471 if (mode
== BLKmode
)
7474 size
= int_size_in_bytes (type
);
7476 if (MS_AGGREGATE_RETURN
&& AGGREGATE_TYPE_P (type
) && size
<= 8)
7479 if (VECTOR_MODE_P (mode
) || mode
== TImode
)
7481 /* User-created vectors small enough to fit in EAX. */
7485 /* MMX/3dNow values are returned in MM0,
7486 except when it doesn't exits or the ABI prescribes otherwise. */
7488 return !TARGET_MMX
|| TARGET_VECT8_RETURNS
;
7490 /* SSE values are returned in XMM0, except when it doesn't exist. */
7494 /* AVX values are returned in YMM0, except when it doesn't exist. */
7505 /* OImode shouldn't be used directly. */
7506 gcc_assert (mode
!= OImode
);
7511 static bool ATTRIBUTE_UNUSED
7512 return_in_memory_64 (const_tree type
, enum machine_mode mode
)
7514 int needed_intregs
, needed_sseregs
;
7515 return !examine_argument (mode
, type
, 1, &needed_intregs
, &needed_sseregs
);
7518 static bool ATTRIBUTE_UNUSED
7519 return_in_memory_ms_64 (const_tree type
, enum machine_mode mode
)
7521 HOST_WIDE_INT size
= int_size_in_bytes (type
);
7523 /* __m128 is returned in xmm0. */
7524 if ((SCALAR_INT_MODE_P (mode
) || VECTOR_MODE_P (mode
))
7525 && !COMPLEX_MODE_P (mode
) && (GET_MODE_SIZE (mode
) == 16 || size
== 16))
7528 /* Otherwise, the size must be exactly in [1248]. */
7529 return size
!= 1 && size
!= 2 && size
!= 4 && size
!= 8;
7533 ix86_return_in_memory (const_tree type
, const_tree fntype ATTRIBUTE_UNUSED
)
7535 #ifdef SUBTARGET_RETURN_IN_MEMORY
7536 return SUBTARGET_RETURN_IN_MEMORY (type
, fntype
);
7538 const enum machine_mode mode
= type_natural_mode (type
, NULL
);
7542 if (ix86_function_type_abi (fntype
) == MS_ABI
)
7543 return return_in_memory_ms_64 (type
, mode
);
7545 return return_in_memory_64 (type
, mode
);
7548 return return_in_memory_32 (type
, mode
);
7552 /* When returning SSE vector types, we have a choice of either
7553 (1) being abi incompatible with a -march switch, or
7554 (2) generating an error.
7555 Given no good solution, I think the safest thing is one warning.
7556 The user won't be able to use -Werror, but....
7558 Choose the STRUCT_VALUE_RTX hook because that's (at present) only
7559 called in response to actually generating a caller or callee that
7560 uses such a type. As opposed to TARGET_RETURN_IN_MEMORY, which is called
7561 via aggregate_value_p for general type probing from tree-ssa. */
7564 ix86_struct_value_rtx (tree type
, int incoming ATTRIBUTE_UNUSED
)
7566 static bool warnedsse
, warnedmmx
;
7568 if (!TARGET_64BIT
&& type
)
7570 /* Look at the return type of the function, not the function type. */
7571 enum machine_mode mode
= TYPE_MODE (TREE_TYPE (type
));
7573 if (!TARGET_SSE
&& !warnedsse
)
7576 || (VECTOR_MODE_P (mode
) && GET_MODE_SIZE (mode
) == 16))
7579 warning (0, "SSE vector return without SSE enabled "
7584 if (!TARGET_MMX
&& !warnedmmx
)
7586 if (VECTOR_MODE_P (mode
) && GET_MODE_SIZE (mode
) == 8)
7589 warning (0, "MMX vector return without MMX enabled "
7599 /* Create the va_list data type. */
7601 /* Returns the calling convention specific va_list date type.
7602 The argument ABI can be DEFAULT_ABI, MS_ABI, or SYSV_ABI. */
7605 ix86_build_builtin_va_list_abi (enum calling_abi abi
)
7607 tree f_gpr
, f_fpr
, f_ovf
, f_sav
, record
, type_decl
;
7609 /* For i386 we use plain pointer to argument area. */
7610 if (!TARGET_64BIT
|| abi
== MS_ABI
)
7611 return build_pointer_type (char_type_node
);
7613 record
= lang_hooks
.types
.make_type (RECORD_TYPE
);
7614 type_decl
= build_decl (BUILTINS_LOCATION
,
7615 TYPE_DECL
, get_identifier ("__va_list_tag"), record
);
7617 f_gpr
= build_decl (BUILTINS_LOCATION
,
7618 FIELD_DECL
, get_identifier ("gp_offset"),
7619 unsigned_type_node
);
7620 f_fpr
= build_decl (BUILTINS_LOCATION
,
7621 FIELD_DECL
, get_identifier ("fp_offset"),
7622 unsigned_type_node
);
7623 f_ovf
= build_decl (BUILTINS_LOCATION
,
7624 FIELD_DECL
, get_identifier ("overflow_arg_area"),
7626 f_sav
= build_decl (BUILTINS_LOCATION
,
7627 FIELD_DECL
, get_identifier ("reg_save_area"),
7630 va_list_gpr_counter_field
= f_gpr
;
7631 va_list_fpr_counter_field
= f_fpr
;
7633 DECL_FIELD_CONTEXT (f_gpr
) = record
;
7634 DECL_FIELD_CONTEXT (f_fpr
) = record
;
7635 DECL_FIELD_CONTEXT (f_ovf
) = record
;
7636 DECL_FIELD_CONTEXT (f_sav
) = record
;
7638 TYPE_STUB_DECL (record
) = type_decl
;
7639 TYPE_NAME (record
) = type_decl
;
7640 TYPE_FIELDS (record
) = f_gpr
;
7641 DECL_CHAIN (f_gpr
) = f_fpr
;
7642 DECL_CHAIN (f_fpr
) = f_ovf
;
7643 DECL_CHAIN (f_ovf
) = f_sav
;
7645 layout_type (record
);
7647 /* The correct type is an array type of one element. */
7648 return build_array_type (record
, build_index_type (size_zero_node
));
7651 /* Setup the builtin va_list data type and for 64-bit the additional
7652 calling convention specific va_list data types. */
7655 ix86_build_builtin_va_list (void)
7657 tree ret
= ix86_build_builtin_va_list_abi (ix86_abi
);
7659 /* Initialize abi specific va_list builtin types. */
7663 if (ix86_abi
== MS_ABI
)
7665 t
= ix86_build_builtin_va_list_abi (SYSV_ABI
);
7666 if (TREE_CODE (t
) != RECORD_TYPE
)
7667 t
= build_variant_type_copy (t
);
7668 sysv_va_list_type_node
= t
;
7673 if (TREE_CODE (t
) != RECORD_TYPE
)
7674 t
= build_variant_type_copy (t
);
7675 sysv_va_list_type_node
= t
;
7677 if (ix86_abi
!= MS_ABI
)
7679 t
= ix86_build_builtin_va_list_abi (MS_ABI
);
7680 if (TREE_CODE (t
) != RECORD_TYPE
)
7681 t
= build_variant_type_copy (t
);
7682 ms_va_list_type_node
= t
;
7687 if (TREE_CODE (t
) != RECORD_TYPE
)
7688 t
= build_variant_type_copy (t
);
7689 ms_va_list_type_node
= t
;
7696 /* Worker function for TARGET_SETUP_INCOMING_VARARGS. */
7699 setup_incoming_varargs_64 (CUMULATIVE_ARGS
*cum
)
7705 /* GPR size of varargs save area. */
7706 if (cfun
->va_list_gpr_size
)
7707 ix86_varargs_gpr_size
= X86_64_REGPARM_MAX
* UNITS_PER_WORD
;
7709 ix86_varargs_gpr_size
= 0;
7711 /* FPR size of varargs save area. We don't need it if we don't pass
7712 anything in SSE registers. */
7713 if (TARGET_SSE
&& cfun
->va_list_fpr_size
)
7714 ix86_varargs_fpr_size
= X86_64_SSE_REGPARM_MAX
* 16;
7716 ix86_varargs_fpr_size
= 0;
7718 if (! ix86_varargs_gpr_size
&& ! ix86_varargs_fpr_size
)
7721 save_area
= frame_pointer_rtx
;
7722 set
= get_varargs_alias_set ();
7724 max
= cum
->regno
+ cfun
->va_list_gpr_size
/ UNITS_PER_WORD
;
7725 if (max
> X86_64_REGPARM_MAX
)
7726 max
= X86_64_REGPARM_MAX
;
7728 for (i
= cum
->regno
; i
< max
; i
++)
7730 mem
= gen_rtx_MEM (Pmode
,
7731 plus_constant (save_area
, i
* UNITS_PER_WORD
));
7732 MEM_NOTRAP_P (mem
) = 1;
7733 set_mem_alias_set (mem
, set
);
7734 emit_move_insn (mem
, gen_rtx_REG (Pmode
,
7735 x86_64_int_parameter_registers
[i
]));
7738 if (ix86_varargs_fpr_size
)
7740 enum machine_mode smode
;
7743 /* Now emit code to save SSE registers. The AX parameter contains number
7744 of SSE parameter registers used to call this function, though all we
7745 actually check here is the zero/non-zero status. */
7747 label
= gen_label_rtx ();
7748 test
= gen_rtx_EQ (VOIDmode
, gen_rtx_REG (QImode
, AX_REG
), const0_rtx
);
7749 emit_jump_insn (gen_cbranchqi4 (test
, XEXP (test
, 0), XEXP (test
, 1),
7752 /* ??? If !TARGET_SSE_TYPELESS_STORES, would we perform better if
7753 we used movdqa (i.e. TImode) instead? Perhaps even better would
7754 be if we could determine the real mode of the data, via a hook
7755 into pass_stdarg. Ignore all that for now. */
7757 if (crtl
->stack_alignment_needed
< GET_MODE_ALIGNMENT (smode
))
7758 crtl
->stack_alignment_needed
= GET_MODE_ALIGNMENT (smode
);
7760 max
= cum
->sse_regno
+ cfun
->va_list_fpr_size
/ 16;
7761 if (max
> X86_64_SSE_REGPARM_MAX
)
7762 max
= X86_64_SSE_REGPARM_MAX
;
7764 for (i
= cum
->sse_regno
; i
< max
; ++i
)
7766 mem
= plus_constant (save_area
, i
* 16 + ix86_varargs_gpr_size
);
7767 mem
= gen_rtx_MEM (smode
, mem
);
7768 MEM_NOTRAP_P (mem
) = 1;
7769 set_mem_alias_set (mem
, set
);
7770 set_mem_align (mem
, GET_MODE_ALIGNMENT (smode
));
7772 emit_move_insn (mem
, gen_rtx_REG (smode
, SSE_REGNO (i
)));
7780 setup_incoming_varargs_ms_64 (CUMULATIVE_ARGS
*cum
)
7782 alias_set_type set
= get_varargs_alias_set ();
7785 for (i
= cum
->regno
; i
< X86_64_MS_REGPARM_MAX
; i
++)
7789 mem
= gen_rtx_MEM (Pmode
,
7790 plus_constant (virtual_incoming_args_rtx
,
7791 i
* UNITS_PER_WORD
));
7792 MEM_NOTRAP_P (mem
) = 1;
7793 set_mem_alias_set (mem
, set
);
7795 reg
= gen_rtx_REG (Pmode
, x86_64_ms_abi_int_parameter_registers
[i
]);
7796 emit_move_insn (mem
, reg
);
7801 ix86_setup_incoming_varargs (CUMULATIVE_ARGS
*cum
, enum machine_mode mode
,
7802 tree type
, int *pretend_size ATTRIBUTE_UNUSED
,
7805 CUMULATIVE_ARGS next_cum
;
7808 /* This argument doesn't appear to be used anymore. Which is good,
7809 because the old code here didn't suppress rtl generation. */
7810 gcc_assert (!no_rtl
);
7815 fntype
= TREE_TYPE (current_function_decl
);
7817 /* For varargs, we do not want to skip the dummy va_dcl argument.
7818 For stdargs, we do want to skip the last named argument. */
7820 if (stdarg_p (fntype
))
7821 ix86_function_arg_advance (&next_cum
, mode
, type
, true);
7823 if (cum
->call_abi
== MS_ABI
)
7824 setup_incoming_varargs_ms_64 (&next_cum
);
7826 setup_incoming_varargs_64 (&next_cum
);
7829 /* Checks if TYPE is of kind va_list char *. */
7832 is_va_list_char_pointer (tree type
)
7836 /* For 32-bit it is always true. */
7839 canonic
= ix86_canonical_va_list_type (type
);
7840 return (canonic
== ms_va_list_type_node
7841 || (ix86_abi
== MS_ABI
&& canonic
== va_list_type_node
));
7844 /* Implement va_start. */
7847 ix86_va_start (tree valist
, rtx nextarg
)
7849 HOST_WIDE_INT words
, n_gpr
, n_fpr
;
7850 tree f_gpr
, f_fpr
, f_ovf
, f_sav
;
7851 tree gpr
, fpr
, ovf
, sav
, t
;
7855 if (flag_split_stack
7856 && cfun
->machine
->split_stack_varargs_pointer
== NULL_RTX
)
7858 unsigned int scratch_regno
;
7860 /* When we are splitting the stack, we can't refer to the stack
7861 arguments using internal_arg_pointer, because they may be on
7862 the old stack. The split stack prologue will arrange to
7863 leave a pointer to the old stack arguments in a scratch
7864 register, which we here copy to a pseudo-register. The split
7865 stack prologue can't set the pseudo-register directly because
7866 it (the prologue) runs before any registers have been saved. */
7868 scratch_regno
= split_stack_prologue_scratch_regno ();
7869 if (scratch_regno
!= INVALID_REGNUM
)
7873 reg
= gen_reg_rtx (Pmode
);
7874 cfun
->machine
->split_stack_varargs_pointer
= reg
;
7877 emit_move_insn (reg
, gen_rtx_REG (Pmode
, scratch_regno
));
7881 push_topmost_sequence ();
7882 emit_insn_after (seq
, entry_of_function ());
7883 pop_topmost_sequence ();
7887 /* Only 64bit target needs something special. */
7888 if (!TARGET_64BIT
|| is_va_list_char_pointer (TREE_TYPE (valist
)))
7890 if (cfun
->machine
->split_stack_varargs_pointer
== NULL_RTX
)
7891 std_expand_builtin_va_start (valist
, nextarg
);
7896 va_r
= expand_expr (valist
, NULL_RTX
, VOIDmode
, EXPAND_WRITE
);
7897 next
= expand_binop (ptr_mode
, add_optab
,
7898 cfun
->machine
->split_stack_varargs_pointer
,
7899 crtl
->args
.arg_offset_rtx
,
7900 NULL_RTX
, 0, OPTAB_LIB_WIDEN
);
7901 convert_move (va_r
, next
, 0);
7906 f_gpr
= TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node
));
7907 f_fpr
= DECL_CHAIN (f_gpr
);
7908 f_ovf
= DECL_CHAIN (f_fpr
);
7909 f_sav
= DECL_CHAIN (f_ovf
);
7911 valist
= build_simple_mem_ref (valist
);
7912 TREE_TYPE (valist
) = TREE_TYPE (sysv_va_list_type_node
);
7913 /* The following should be folded into the MEM_REF offset. */
7914 gpr
= build3 (COMPONENT_REF
, TREE_TYPE (f_gpr
), unshare_expr (valist
),
7916 fpr
= build3 (COMPONENT_REF
, TREE_TYPE (f_fpr
), unshare_expr (valist
),
7918 ovf
= build3 (COMPONENT_REF
, TREE_TYPE (f_ovf
), unshare_expr (valist
),
7920 sav
= build3 (COMPONENT_REF
, TREE_TYPE (f_sav
), unshare_expr (valist
),
7923 /* Count number of gp and fp argument registers used. */
7924 words
= crtl
->args
.info
.words
;
7925 n_gpr
= crtl
->args
.info
.regno
;
7926 n_fpr
= crtl
->args
.info
.sse_regno
;
7928 if (cfun
->va_list_gpr_size
)
7930 type
= TREE_TYPE (gpr
);
7931 t
= build2 (MODIFY_EXPR
, type
,
7932 gpr
, build_int_cst (type
, n_gpr
* 8));
7933 TREE_SIDE_EFFECTS (t
) = 1;
7934 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
7937 if (TARGET_SSE
&& cfun
->va_list_fpr_size
)
7939 type
= TREE_TYPE (fpr
);
7940 t
= build2 (MODIFY_EXPR
, type
, fpr
,
7941 build_int_cst (type
, n_fpr
* 16 + 8*X86_64_REGPARM_MAX
));
7942 TREE_SIDE_EFFECTS (t
) = 1;
7943 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
7946 /* Find the overflow area. */
7947 type
= TREE_TYPE (ovf
);
7948 if (cfun
->machine
->split_stack_varargs_pointer
== NULL_RTX
)
7949 ovf_rtx
= crtl
->args
.internal_arg_pointer
;
7951 ovf_rtx
= cfun
->machine
->split_stack_varargs_pointer
;
7952 t
= make_tree (type
, ovf_rtx
);
7954 t
= build2 (POINTER_PLUS_EXPR
, type
, t
,
7955 size_int (words
* UNITS_PER_WORD
));
7956 t
= build2 (MODIFY_EXPR
, type
, ovf
, t
);
7957 TREE_SIDE_EFFECTS (t
) = 1;
7958 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
7960 if (ix86_varargs_gpr_size
|| ix86_varargs_fpr_size
)
7962 /* Find the register save area.
7963 Prologue of the function save it right above stack frame. */
7964 type
= TREE_TYPE (sav
);
7965 t
= make_tree (type
, frame_pointer_rtx
);
7966 if (!ix86_varargs_gpr_size
)
7967 t
= build2 (POINTER_PLUS_EXPR
, type
, t
,
7968 size_int (-8 * X86_64_REGPARM_MAX
));
7969 t
= build2 (MODIFY_EXPR
, type
, sav
, t
);
7970 TREE_SIDE_EFFECTS (t
) = 1;
7971 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
7975 /* Implement va_arg. */
7978 ix86_gimplify_va_arg (tree valist
, tree type
, gimple_seq
*pre_p
,
7981 static const int intreg
[6] = { 0, 1, 2, 3, 4, 5 };
7982 tree f_gpr
, f_fpr
, f_ovf
, f_sav
;
7983 tree gpr
, fpr
, ovf
, sav
, t
;
7985 tree lab_false
, lab_over
= NULL_TREE
;
7990 enum machine_mode nat_mode
;
7991 unsigned int arg_boundary
;
7993 /* Only 64bit target needs something special. */
7994 if (!TARGET_64BIT
|| is_va_list_char_pointer (TREE_TYPE (valist
)))
7995 return std_gimplify_va_arg_expr (valist
, type
, pre_p
, post_p
);
7997 f_gpr
= TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node
));
7998 f_fpr
= DECL_CHAIN (f_gpr
);
7999 f_ovf
= DECL_CHAIN (f_fpr
);
8000 f_sav
= DECL_CHAIN (f_ovf
);
8002 gpr
= build3 (COMPONENT_REF
, TREE_TYPE (f_gpr
),
8003 build_va_arg_indirect_ref (valist
), f_gpr
, NULL_TREE
);
8004 valist
= build_va_arg_indirect_ref (valist
);
8005 fpr
= build3 (COMPONENT_REF
, TREE_TYPE (f_fpr
), valist
, f_fpr
, NULL_TREE
);
8006 ovf
= build3 (COMPONENT_REF
, TREE_TYPE (f_ovf
), valist
, f_ovf
, NULL_TREE
);
8007 sav
= build3 (COMPONENT_REF
, TREE_TYPE (f_sav
), valist
, f_sav
, NULL_TREE
);
8009 indirect_p
= pass_by_reference (NULL
, TYPE_MODE (type
), type
, false);
8011 type
= build_pointer_type (type
);
8012 size
= int_size_in_bytes (type
);
8013 rsize
= (size
+ UNITS_PER_WORD
- 1) / UNITS_PER_WORD
;
8015 nat_mode
= type_natural_mode (type
, NULL
);
8024 /* Unnamed 256bit vector mode parameters are passed on stack. */
8025 if (ix86_cfun_abi () == SYSV_ABI
)
8032 container
= construct_container (nat_mode
, TYPE_MODE (type
),
8033 type
, 0, X86_64_REGPARM_MAX
,
8034 X86_64_SSE_REGPARM_MAX
, intreg
,
8039 /* Pull the value out of the saved registers. */
8041 addr
= create_tmp_var (ptr_type_node
, "addr");
8045 int needed_intregs
, needed_sseregs
;
8047 tree int_addr
, sse_addr
;
8049 lab_false
= create_artificial_label (UNKNOWN_LOCATION
);
8050 lab_over
= create_artificial_label (UNKNOWN_LOCATION
);
8052 examine_argument (nat_mode
, type
, 0, &needed_intregs
, &needed_sseregs
);
8054 need_temp
= (!REG_P (container
)
8055 && ((needed_intregs
&& TYPE_ALIGN (type
) > 64)
8056 || TYPE_ALIGN (type
) > 128));
8058 /* In case we are passing structure, verify that it is consecutive block
8059 on the register save area. If not we need to do moves. */
8060 if (!need_temp
&& !REG_P (container
))
8062 /* Verify that all registers are strictly consecutive */
8063 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container
, 0, 0), 0))))
8067 for (i
= 0; i
< XVECLEN (container
, 0) && !need_temp
; i
++)
8069 rtx slot
= XVECEXP (container
, 0, i
);
8070 if (REGNO (XEXP (slot
, 0)) != FIRST_SSE_REG
+ (unsigned int) i
8071 || INTVAL (XEXP (slot
, 1)) != i
* 16)
8079 for (i
= 0; i
< XVECLEN (container
, 0) && !need_temp
; i
++)
8081 rtx slot
= XVECEXP (container
, 0, i
);
8082 if (REGNO (XEXP (slot
, 0)) != (unsigned int) i
8083 || INTVAL (XEXP (slot
, 1)) != i
* 8)
8095 int_addr
= create_tmp_var (ptr_type_node
, "int_addr");
8096 sse_addr
= create_tmp_var (ptr_type_node
, "sse_addr");
8099 /* First ensure that we fit completely in registers. */
8102 t
= build_int_cst (TREE_TYPE (gpr
),
8103 (X86_64_REGPARM_MAX
- needed_intregs
+ 1) * 8);
8104 t
= build2 (GE_EXPR
, boolean_type_node
, gpr
, t
);
8105 t2
= build1 (GOTO_EXPR
, void_type_node
, lab_false
);
8106 t
= build3 (COND_EXPR
, void_type_node
, t
, t2
, NULL_TREE
);
8107 gimplify_and_add (t
, pre_p
);
8111 t
= build_int_cst (TREE_TYPE (fpr
),
8112 (X86_64_SSE_REGPARM_MAX
- needed_sseregs
+ 1) * 16
8113 + X86_64_REGPARM_MAX
* 8);
8114 t
= build2 (GE_EXPR
, boolean_type_node
, fpr
, t
);
8115 t2
= build1 (GOTO_EXPR
, void_type_node
, lab_false
);
8116 t
= build3 (COND_EXPR
, void_type_node
, t
, t2
, NULL_TREE
);
8117 gimplify_and_add (t
, pre_p
);
8120 /* Compute index to start of area used for integer regs. */
8123 /* int_addr = gpr + sav; */
8124 t
= fold_convert (sizetype
, gpr
);
8125 t
= build2 (POINTER_PLUS_EXPR
, ptr_type_node
, sav
, t
);
8126 gimplify_assign (int_addr
, t
, pre_p
);
8130 /* sse_addr = fpr + sav; */
8131 t
= fold_convert (sizetype
, fpr
);
8132 t
= build2 (POINTER_PLUS_EXPR
, ptr_type_node
, sav
, t
);
8133 gimplify_assign (sse_addr
, t
, pre_p
);
8137 int i
, prev_size
= 0;
8138 tree temp
= create_tmp_var (type
, "va_arg_tmp");
8141 t
= build1 (ADDR_EXPR
, build_pointer_type (type
), temp
);
8142 gimplify_assign (addr
, t
, pre_p
);
8144 for (i
= 0; i
< XVECLEN (container
, 0); i
++)
8146 rtx slot
= XVECEXP (container
, 0, i
);
8147 rtx reg
= XEXP (slot
, 0);
8148 enum machine_mode mode
= GET_MODE (reg
);
8154 tree dest_addr
, dest
;
8155 int cur_size
= GET_MODE_SIZE (mode
);
8157 gcc_assert (prev_size
<= INTVAL (XEXP (slot
, 1)));
8158 prev_size
= INTVAL (XEXP (slot
, 1));
8159 if (prev_size
+ cur_size
> size
)
8161 cur_size
= size
- prev_size
;
8162 mode
= mode_for_size (cur_size
* BITS_PER_UNIT
, MODE_INT
, 1);
8163 if (mode
== BLKmode
)
8166 piece_type
= lang_hooks
.types
.type_for_mode (mode
, 1);
8167 if (mode
== GET_MODE (reg
))
8168 addr_type
= build_pointer_type (piece_type
);
8170 addr_type
= build_pointer_type_for_mode (piece_type
, ptr_mode
,
8172 daddr_type
= build_pointer_type_for_mode (piece_type
, ptr_mode
,
8175 if (SSE_REGNO_P (REGNO (reg
)))
8177 src_addr
= sse_addr
;
8178 src_offset
= (REGNO (reg
) - FIRST_SSE_REG
) * 16;
8182 src_addr
= int_addr
;
8183 src_offset
= REGNO (reg
) * 8;
8185 src_addr
= fold_convert (addr_type
, src_addr
);
8186 src_addr
= fold_build2 (POINTER_PLUS_EXPR
, addr_type
, src_addr
,
8187 size_int (src_offset
));
8189 dest_addr
= fold_convert (daddr_type
, addr
);
8190 dest_addr
= fold_build2 (POINTER_PLUS_EXPR
, daddr_type
, dest_addr
,
8191 size_int (prev_size
));
8192 if (cur_size
== GET_MODE_SIZE (mode
))
8194 src
= build_va_arg_indirect_ref (src_addr
);
8195 dest
= build_va_arg_indirect_ref (dest_addr
);
8197 gimplify_assign (dest
, src
, pre_p
);
8202 = build_call_expr (implicit_built_in_decls
[BUILT_IN_MEMCPY
],
8203 3, dest_addr
, src_addr
,
8204 size_int (cur_size
));
8205 gimplify_and_add (copy
, pre_p
);
8207 prev_size
+= cur_size
;
8213 t
= build2 (PLUS_EXPR
, TREE_TYPE (gpr
), gpr
,
8214 build_int_cst (TREE_TYPE (gpr
), needed_intregs
* 8));
8215 gimplify_assign (gpr
, t
, pre_p
);
8220 t
= build2 (PLUS_EXPR
, TREE_TYPE (fpr
), fpr
,
8221 build_int_cst (TREE_TYPE (fpr
), needed_sseregs
* 16));
8222 gimplify_assign (fpr
, t
, pre_p
);
8225 gimple_seq_add_stmt (pre_p
, gimple_build_goto (lab_over
));
8227 gimple_seq_add_stmt (pre_p
, gimple_build_label (lab_false
));
8230 /* ... otherwise out of the overflow area. */
8232 /* When we align parameter on stack for caller, if the parameter
8233 alignment is beyond MAX_SUPPORTED_STACK_ALIGNMENT, it will be
8234 aligned at MAX_SUPPORTED_STACK_ALIGNMENT. We will match callee
8235 here with caller. */
8236 arg_boundary
= ix86_function_arg_boundary (VOIDmode
, type
);
8237 if ((unsigned int) arg_boundary
> MAX_SUPPORTED_STACK_ALIGNMENT
)
8238 arg_boundary
= MAX_SUPPORTED_STACK_ALIGNMENT
;
8240 /* Care for on-stack alignment if needed. */
8241 if (arg_boundary
<= 64 || size
== 0)
8245 HOST_WIDE_INT align
= arg_boundary
/ 8;
8246 t
= build2 (POINTER_PLUS_EXPR
, TREE_TYPE (ovf
), ovf
,
8247 size_int (align
- 1));
8248 t
= fold_convert (sizetype
, t
);
8249 t
= build2 (BIT_AND_EXPR
, TREE_TYPE (t
), t
,
8251 t
= fold_convert (TREE_TYPE (ovf
), t
);
8254 gimplify_expr (&t
, pre_p
, NULL
, is_gimple_val
, fb_rvalue
);
8255 gimplify_assign (addr
, t
, pre_p
);
8257 t
= build2 (POINTER_PLUS_EXPR
, TREE_TYPE (t
), t
,
8258 size_int (rsize
* UNITS_PER_WORD
));
8259 gimplify_assign (unshare_expr (ovf
), t
, pre_p
);
8262 gimple_seq_add_stmt (pre_p
, gimple_build_label (lab_over
));
8264 ptrtype
= build_pointer_type_for_mode (type
, ptr_mode
, true);
8265 addr
= fold_convert (ptrtype
, addr
);
8268 addr
= build_va_arg_indirect_ref (addr
);
8269 return build_va_arg_indirect_ref (addr
);
8272 /* Return true if OPNUM's MEM should be matched
8273 in movabs* patterns. */
8276 ix86_check_movabs (rtx insn
, int opnum
)
8280 set
= PATTERN (insn
);
8281 if (GET_CODE (set
) == PARALLEL
)
8282 set
= XVECEXP (set
, 0, 0);
8283 gcc_assert (GET_CODE (set
) == SET
);
8284 mem
= XEXP (set
, opnum
);
8285 while (GET_CODE (mem
) == SUBREG
)
8286 mem
= SUBREG_REG (mem
);
8287 gcc_assert (MEM_P (mem
));
8288 return volatile_ok
|| !MEM_VOLATILE_P (mem
);
8291 /* Initialize the table of extra 80387 mathematical constants. */
8294 init_ext_80387_constants (void)
8296 static const char * cst
[5] =
8298 "0.3010299956639811952256464283594894482", /* 0: fldlg2 */
8299 "0.6931471805599453094286904741849753009", /* 1: fldln2 */
8300 "1.4426950408889634073876517827983434472", /* 2: fldl2e */
8301 "3.3219280948873623478083405569094566090", /* 3: fldl2t */
8302 "3.1415926535897932385128089594061862044", /* 4: fldpi */
8306 for (i
= 0; i
< 5; i
++)
8308 real_from_string (&ext_80387_constants_table
[i
], cst
[i
]);
8309 /* Ensure each constant is rounded to XFmode precision. */
8310 real_convert (&ext_80387_constants_table
[i
],
8311 XFmode
, &ext_80387_constants_table
[i
]);
8314 ext_80387_constants_init
= 1;
8317 /* Return non-zero if the constant is something that
8318 can be loaded with a special instruction. */
8321 standard_80387_constant_p (rtx x
)
8323 enum machine_mode mode
= GET_MODE (x
);
8327 if (!(X87_FLOAT_MODE_P (mode
) && (GET_CODE (x
) == CONST_DOUBLE
)))
8330 if (x
== CONST0_RTX (mode
))
8332 if (x
== CONST1_RTX (mode
))
8335 REAL_VALUE_FROM_CONST_DOUBLE (r
, x
);
8337 /* For XFmode constants, try to find a special 80387 instruction when
8338 optimizing for size or on those CPUs that benefit from them. */
8340 && (optimize_function_for_size_p (cfun
) || TARGET_EXT_80387_CONSTANTS
))
8344 if (! ext_80387_constants_init
)
8345 init_ext_80387_constants ();
8347 for (i
= 0; i
< 5; i
++)
8348 if (real_identical (&r
, &ext_80387_constants_table
[i
]))
8352 /* Load of the constant -0.0 or -1.0 will be split as
8353 fldz;fchs or fld1;fchs sequence. */
8354 if (real_isnegzero (&r
))
8356 if (real_identical (&r
, &dconstm1
))
8362 /* Return the opcode of the special instruction to be used to load
8366 standard_80387_constant_opcode (rtx x
)
8368 switch (standard_80387_constant_p (x
))
8392 /* Return the CONST_DOUBLE representing the 80387 constant that is
8393 loaded by the specified special instruction. The argument IDX
8394 matches the return value from standard_80387_constant_p. */
8397 standard_80387_constant_rtx (int idx
)
8401 if (! ext_80387_constants_init
)
8402 init_ext_80387_constants ();
8418 return CONST_DOUBLE_FROM_REAL_VALUE (ext_80387_constants_table
[i
],
8422 /* Return 1 if X is all 0s and 2 if x is all 1s
8423 in supported SSE vector mode. */
8426 standard_sse_constant_p (rtx x
)
8428 enum machine_mode mode
= GET_MODE (x
);
8430 if (x
== const0_rtx
|| x
== CONST0_RTX (GET_MODE (x
)))
8432 if (vector_all_ones_operand (x
, mode
))
8448 /* Return the opcode of the special instruction to be used to load
8452 standard_sse_constant_opcode (rtx insn
, rtx x
)
8454 switch (standard_sse_constant_p (x
))
8457 switch (get_attr_mode (insn
))
8460 return TARGET_AVX
? "vxorps\t%0, %0, %0" : "xorps\t%0, %0";
8462 if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
)
8463 return TARGET_AVX
? "vxorps\t%0, %0, %0" : "xorps\t%0, %0";
8465 return TARGET_AVX
? "vxorpd\t%0, %0, %0" : "xorpd\t%0, %0";
8467 if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
)
8468 return TARGET_AVX
? "vxorps\t%0, %0, %0" : "xorps\t%0, %0";
8470 return TARGET_AVX
? "vpxor\t%0, %0, %0" : "pxor\t%0, %0";
8472 return "vxorps\t%x0, %x0, %x0";
8474 if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
)
8475 return "vxorps\t%x0, %x0, %x0";
8477 return "vxorpd\t%x0, %x0, %x0";
8479 if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
)
8480 return "vxorps\t%x0, %x0, %x0";
8482 return "vpxor\t%x0, %x0, %x0";
8487 return TARGET_AVX
? "vpcmpeqd\t%0, %0, %0" : "pcmpeqd\t%0, %0";
8494 /* Returns true if OP contains a symbol reference */
8497 symbolic_reference_mentioned_p (rtx op
)
8502 if (GET_CODE (op
) == SYMBOL_REF
|| GET_CODE (op
) == LABEL_REF
)
8505 fmt
= GET_RTX_FORMAT (GET_CODE (op
));
8506 for (i
= GET_RTX_LENGTH (GET_CODE (op
)) - 1; i
>= 0; i
--)
8512 for (j
= XVECLEN (op
, i
) - 1; j
>= 0; j
--)
8513 if (symbolic_reference_mentioned_p (XVECEXP (op
, i
, j
)))
8517 else if (fmt
[i
] == 'e' && symbolic_reference_mentioned_p (XEXP (op
, i
)))
8524 /* Return true if it is appropriate to emit `ret' instructions in the
8525 body of a function. Do this only if the epilogue is simple, needing a
8526 couple of insns. Prior to reloading, we can't tell how many registers
8527 must be saved, so return false then. Return false if there is no frame
8528 marker to de-allocate. */
8531 ix86_can_use_return_insn_p (void)
8533 struct ix86_frame frame
;
8535 if (! reload_completed
|| frame_pointer_needed
)
8538 /* Don't allow more than 32k pop, since that's all we can do
8539 with one instruction. */
8540 if (crtl
->args
.pops_args
&& crtl
->args
.size
>= 32768)
8543 ix86_compute_frame_layout (&frame
);
8544 return (frame
.stack_pointer_offset
== UNITS_PER_WORD
8545 && (frame
.nregs
+ frame
.nsseregs
) == 0);
8548 /* Value should be nonzero if functions must have frame pointers.
8549 Zero means the frame pointer need not be set up (and parms may
8550 be accessed via the stack pointer) in functions that seem suitable. */
8553 ix86_frame_pointer_required (void)
8555 /* If we accessed previous frames, then the generated code expects
8556 to be able to access the saved ebp value in our frame. */
8557 if (cfun
->machine
->accesses_prev_frame
)
8560 /* Several x86 os'es need a frame pointer for other reasons,
8561 usually pertaining to setjmp. */
8562 if (SUBTARGET_FRAME_POINTER_REQUIRED
)
8565 /* In ix86_option_override_internal, TARGET_OMIT_LEAF_FRAME_POINTER
8566 turns off the frame pointer by default. Turn it back on now if
8567 we've not got a leaf function. */
8568 if (TARGET_OMIT_LEAF_FRAME_POINTER
8569 && (!current_function_is_leaf
8570 || ix86_current_function_calls_tls_descriptor
))
8573 if (crtl
->profile
&& !flag_fentry
)
8579 /* Record that the current function accesses previous call frames. */
8582 ix86_setup_frame_addresses (void)
8584 cfun
->machine
->accesses_prev_frame
= 1;
8587 #ifndef USE_HIDDEN_LINKONCE
8588 # if (defined(HAVE_GAS_HIDDEN) && (SUPPORTS_ONE_ONLY - 0)) || TARGET_MACHO
8589 # define USE_HIDDEN_LINKONCE 1
8591 # define USE_HIDDEN_LINKONCE 0
8595 static int pic_labels_used
;
8597 /* Fills in the label name that should be used for a pc thunk for
8598 the given register. */
8601 get_pc_thunk_name (char name
[32], unsigned int regno
)
8603 gcc_assert (!TARGET_64BIT
);
8605 if (USE_HIDDEN_LINKONCE
)
8606 sprintf (name
, "__i686.get_pc_thunk.%s", reg_names
[regno
]);
8608 ASM_GENERATE_INTERNAL_LABEL (name
, "LPR", regno
);
8612 /* This function generates code for -fpic that loads %ebx with
8613 the return address of the caller and then returns. */
8616 ix86_code_end (void)
8621 for (regno
= AX_REG
; regno
<= SP_REG
; regno
++)
8626 if (!(pic_labels_used
& (1 << regno
)))
8629 get_pc_thunk_name (name
, regno
);
8631 decl
= build_decl (BUILTINS_LOCATION
, FUNCTION_DECL
,
8632 get_identifier (name
),
8633 build_function_type (void_type_node
, void_list_node
));
8634 DECL_RESULT (decl
) = build_decl (BUILTINS_LOCATION
, RESULT_DECL
,
8635 NULL_TREE
, void_type_node
);
8636 TREE_PUBLIC (decl
) = 1;
8637 TREE_STATIC (decl
) = 1;
8642 switch_to_section (darwin_sections
[text_coal_section
]);
8643 fputs ("\t.weak_definition\t", asm_out_file
);
8644 assemble_name (asm_out_file
, name
);
8645 fputs ("\n\t.private_extern\t", asm_out_file
);
8646 assemble_name (asm_out_file
, name
);
8647 putc ('\n', asm_out_file
);
8648 ASM_OUTPUT_LABEL (asm_out_file
, name
);
8649 DECL_WEAK (decl
) = 1;
8653 if (USE_HIDDEN_LINKONCE
)
8655 DECL_COMDAT_GROUP (decl
) = DECL_ASSEMBLER_NAME (decl
);
8657 targetm
.asm_out
.unique_section (decl
, 0);
8658 switch_to_section (get_named_section (decl
, NULL
, 0));
8660 targetm
.asm_out
.globalize_label (asm_out_file
, name
);
8661 fputs ("\t.hidden\t", asm_out_file
);
8662 assemble_name (asm_out_file
, name
);
8663 putc ('\n', asm_out_file
);
8664 ASM_DECLARE_FUNCTION_NAME (asm_out_file
, name
, decl
);
8668 switch_to_section (text_section
);
8669 ASM_OUTPUT_LABEL (asm_out_file
, name
);
8672 DECL_INITIAL (decl
) = make_node (BLOCK
);
8673 current_function_decl
= decl
;
8674 init_function_start (decl
);
8675 first_function_block_is_cold
= false;
8676 /* Make sure unwind info is emitted for the thunk if needed. */
8677 final_start_function (emit_barrier (), asm_out_file
, 1);
8679 /* Pad stack IP move with 4 instructions (two NOPs count
8680 as one instruction). */
8681 if (TARGET_PAD_SHORT_FUNCTION
)
8686 fputs ("\tnop\n", asm_out_file
);
8689 xops
[0] = gen_rtx_REG (Pmode
, regno
);
8690 xops
[1] = gen_rtx_MEM (Pmode
, stack_pointer_rtx
);
8691 output_asm_insn ("mov%z0\t{%1, %0|%0, %1}", xops
);
8692 fputs ("\tret\n", asm_out_file
);
8693 final_end_function ();
8694 init_insn_lengths ();
8695 free_after_compilation (cfun
);
8697 current_function_decl
= NULL
;
8700 if (flag_split_stack
)
8701 file_end_indicate_split_stack ();
8704 /* Emit code for the SET_GOT patterns. */
8707 output_set_got (rtx dest
, rtx label ATTRIBUTE_UNUSED
)
8713 if (TARGET_VXWORKS_RTP
&& flag_pic
)
8715 /* Load (*VXWORKS_GOTT_BASE) into the PIC register. */
8716 xops
[2] = gen_rtx_MEM (Pmode
,
8717 gen_rtx_SYMBOL_REF (Pmode
, VXWORKS_GOTT_BASE
));
8718 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops
);
8720 /* Load (*VXWORKS_GOTT_BASE)[VXWORKS_GOTT_INDEX] into the PIC register.
8721 Use %P and a local symbol in order to print VXWORKS_GOTT_INDEX as
8722 an unadorned address. */
8723 xops
[2] = gen_rtx_SYMBOL_REF (Pmode
, VXWORKS_GOTT_INDEX
);
8724 SYMBOL_REF_FLAGS (xops
[2]) |= SYMBOL_FLAG_LOCAL
;
8725 output_asm_insn ("mov{l}\t{%P2(%0), %0|%0, DWORD PTR %P2[%0]}", xops
);
8729 xops
[1] = gen_rtx_SYMBOL_REF (Pmode
, GOT_SYMBOL_NAME
);
8731 if (! TARGET_DEEP_BRANCH_PREDICTION
|| !flag_pic
)
8733 xops
[2] = gen_rtx_LABEL_REF (Pmode
, label
? label
: gen_label_rtx ());
8736 output_asm_insn ("mov%z0\t{%2, %0|%0, %2}", xops
);
8739 output_asm_insn ("call\t%a2", xops
);
8740 #ifdef DWARF2_UNWIND_INFO
8741 /* The call to next label acts as a push. */
8742 if (dwarf2out_do_frame ())
8746 insn
= emit_insn (gen_rtx_SET (VOIDmode
, stack_pointer_rtx
,
8747 gen_rtx_PLUS (Pmode
,
8750 RTX_FRAME_RELATED_P (insn
) = 1;
8751 dwarf2out_frame_debug (insn
, true);
8758 /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This
8759 is what will be referenced by the Mach-O PIC subsystem. */
8761 ASM_OUTPUT_LABEL (asm_out_file
, MACHOPIC_FUNCTION_BASE_NAME
);
8764 targetm
.asm_out
.internal_label (asm_out_file
, "L",
8765 CODE_LABEL_NUMBER (XEXP (xops
[2], 0)));
8769 output_asm_insn ("pop%z0\t%0", xops
);
8770 #ifdef DWARF2_UNWIND_INFO
8771 /* The pop is a pop and clobbers dest, but doesn't restore it
8772 for unwind info purposes. */
8773 if (dwarf2out_do_frame ())
8777 insn
= emit_insn (gen_rtx_SET (VOIDmode
, dest
, const0_rtx
));
8778 dwarf2out_frame_debug (insn
, true);
8779 insn
= emit_insn (gen_rtx_SET (VOIDmode
, stack_pointer_rtx
,
8780 gen_rtx_PLUS (Pmode
,
8783 RTX_FRAME_RELATED_P (insn
) = 1;
8784 dwarf2out_frame_debug (insn
, true);
8793 get_pc_thunk_name (name
, REGNO (dest
));
8794 pic_labels_used
|= 1 << REGNO (dest
);
8796 #ifdef DWARF2_UNWIND_INFO
8797 /* Ensure all queued register saves are flushed before the
8799 if (dwarf2out_do_frame ())
8800 dwarf2out_flush_queued_reg_saves ();
8802 xops
[2] = gen_rtx_SYMBOL_REF (Pmode
, ggc_strdup (name
));
8803 xops
[2] = gen_rtx_MEM (QImode
, xops
[2]);
8804 output_asm_insn ("call\t%X2", xops
);
8805 /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This
8806 is what will be referenced by the Mach-O PIC subsystem. */
8809 ASM_OUTPUT_LABEL (asm_out_file
, MACHOPIC_FUNCTION_BASE_NAME
);
8811 targetm
.asm_out
.internal_label (asm_out_file
, "L",
8812 CODE_LABEL_NUMBER (label
));
8819 if (!flag_pic
|| TARGET_DEEP_BRANCH_PREDICTION
)
8820 output_asm_insn ("add%z0\t{%1, %0|%0, %1}", xops
);
8822 output_asm_insn ("add%z0\t{%1+[.-%a2], %0|%0, %1+(.-%a2)}", xops
);
8827 /* Generate an "push" pattern for input ARG. */
8832 struct machine_function
*m
= cfun
->machine
;
8834 if (m
->fs
.cfa_reg
== stack_pointer_rtx
)
8835 m
->fs
.cfa_offset
+= UNITS_PER_WORD
;
8836 m
->fs
.sp_offset
+= UNITS_PER_WORD
;
8838 return gen_rtx_SET (VOIDmode
,
8840 gen_rtx_PRE_DEC (Pmode
,
8841 stack_pointer_rtx
)),
8845 /* Generate an "pop" pattern for input ARG. */
8850 return gen_rtx_SET (VOIDmode
,
8853 gen_rtx_POST_INC (Pmode
,
8854 stack_pointer_rtx
)));
8857 /* Return >= 0 if there is an unused call-clobbered register available
8858 for the entire function. */
8861 ix86_select_alt_pic_regnum (void)
8863 if (current_function_is_leaf
8865 && !ix86_current_function_calls_tls_descriptor
)
8868 /* Can't use the same register for both PIC and DRAP. */
8870 drap
= REGNO (crtl
->drap_reg
);
8873 for (i
= 2; i
>= 0; --i
)
8874 if (i
!= drap
&& !df_regs_ever_live_p (i
))
8878 return INVALID_REGNUM
;
8881 /* Return 1 if we need to save REGNO. */
8883 ix86_save_reg (unsigned int regno
, int maybe_eh_return
)
8885 if (pic_offset_table_rtx
8886 && regno
== REAL_PIC_OFFSET_TABLE_REGNUM
8887 && (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM
)
8889 || crtl
->calls_eh_return
8890 || crtl
->uses_const_pool
))
8892 if (ix86_select_alt_pic_regnum () != INVALID_REGNUM
)
8897 if (crtl
->calls_eh_return
&& maybe_eh_return
)
8902 unsigned test
= EH_RETURN_DATA_REGNO (i
);
8903 if (test
== INVALID_REGNUM
)
8910 if (crtl
->drap_reg
&& regno
== REGNO (crtl
->drap_reg
))
8913 return (df_regs_ever_live_p (regno
)
8914 && !call_used_regs
[regno
]
8915 && !fixed_regs
[regno
]
8916 && (regno
!= HARD_FRAME_POINTER_REGNUM
|| !frame_pointer_needed
));
8919 /* Return number of saved general prupose registers. */
8922 ix86_nsaved_regs (void)
8927 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; regno
++)
8928 if (!SSE_REGNO_P (regno
) && ix86_save_reg (regno
, true))
8933 /* Return number of saved SSE registrers. */
8936 ix86_nsaved_sseregs (void)
8941 if (ix86_cfun_abi () != MS_ABI
)
8943 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; regno
++)
8944 if (SSE_REGNO_P (regno
) && ix86_save_reg (regno
, true))
8949 /* Given FROM and TO register numbers, say whether this elimination is
8950 allowed. If stack alignment is needed, we can only replace argument
8951 pointer with hard frame pointer, or replace frame pointer with stack
8952 pointer. Otherwise, frame pointer elimination is automatically
8953 handled and all other eliminations are valid. */
8956 ix86_can_eliminate (const int from
, const int to
)
8958 if (stack_realign_fp
)
8959 return ((from
== ARG_POINTER_REGNUM
8960 && to
== HARD_FRAME_POINTER_REGNUM
)
8961 || (from
== FRAME_POINTER_REGNUM
8962 && to
== STACK_POINTER_REGNUM
));
8964 return to
== STACK_POINTER_REGNUM
? !frame_pointer_needed
: true;
8967 /* Return the offset between two registers, one to be eliminated, and the other
8968 its replacement, at the start of a routine. */
8971 ix86_initial_elimination_offset (int from
, int to
)
8973 struct ix86_frame frame
;
8974 ix86_compute_frame_layout (&frame
);
8976 if (from
== ARG_POINTER_REGNUM
&& to
== HARD_FRAME_POINTER_REGNUM
)
8977 return frame
.hard_frame_pointer_offset
;
8978 else if (from
== FRAME_POINTER_REGNUM
8979 && to
== HARD_FRAME_POINTER_REGNUM
)
8980 return frame
.hard_frame_pointer_offset
- frame
.frame_pointer_offset
;
8983 gcc_assert (to
== STACK_POINTER_REGNUM
);
8985 if (from
== ARG_POINTER_REGNUM
)
8986 return frame
.stack_pointer_offset
;
8988 gcc_assert (from
== FRAME_POINTER_REGNUM
);
8989 return frame
.stack_pointer_offset
- frame
.frame_pointer_offset
;
8993 /* In a dynamically-aligned function, we can't know the offset from
8994 stack pointer to frame pointer, so we must ensure that setjmp
8995 eliminates fp against the hard fp (%ebp) rather than trying to
8996 index from %esp up to the top of the frame across a gap that is
8997 of unknown (at compile-time) size. */
8999 ix86_builtin_setjmp_frame_value (void)
9001 return stack_realign_fp
? hard_frame_pointer_rtx
: virtual_stack_vars_rtx
;
9004 /* On the x86 -fsplit-stack and -fstack-protector both use the same
9005 field in the TCB, so they can not be used together. */
9008 ix86_supports_split_stack (bool report ATTRIBUTE_UNUSED
,
9009 struct gcc_options
*opts ATTRIBUTE_UNUSED
)
9013 #ifndef TARGET_THREAD_SPLIT_STACK_OFFSET
9015 error ("%<-fsplit-stack%> currently only supported on GNU/Linux");
9018 if (!HAVE_GAS_CFI_PERSONALITY_DIRECTIVE
)
9021 error ("%<-fsplit-stack%> requires "
9022 "assembler support for CFI directives");
9030 /* When using -fsplit-stack, the allocation routines set a field in
9031 the TCB to the bottom of the stack plus this much space, measured
9034 #define SPLIT_STACK_AVAILABLE 256
9036 /* Fill structure ix86_frame about frame of currently computed function. */
9039 ix86_compute_frame_layout (struct ix86_frame
*frame
)
9041 unsigned int stack_alignment_needed
;
9042 HOST_WIDE_INT offset
;
9043 unsigned int preferred_alignment
;
9044 HOST_WIDE_INT size
= get_frame_size ();
9045 HOST_WIDE_INT to_allocate
;
9047 frame
->nregs
= ix86_nsaved_regs ();
9048 frame
->nsseregs
= ix86_nsaved_sseregs ();
9050 stack_alignment_needed
= crtl
->stack_alignment_needed
/ BITS_PER_UNIT
;
9051 preferred_alignment
= crtl
->preferred_stack_boundary
/ BITS_PER_UNIT
;
9053 /* MS ABI seem to require stack alignment to be always 16 except for function
9054 prologues and leaf. */
9055 if ((ix86_cfun_abi () == MS_ABI
&& preferred_alignment
< 16)
9056 && (!current_function_is_leaf
|| cfun
->calls_alloca
!= 0
9057 || ix86_current_function_calls_tls_descriptor
))
9059 preferred_alignment
= 16;
9060 stack_alignment_needed
= 16;
9061 crtl
->preferred_stack_boundary
= 128;
9062 crtl
->stack_alignment_needed
= 128;
9065 gcc_assert (!size
|| stack_alignment_needed
);
9066 gcc_assert (preferred_alignment
>= STACK_BOUNDARY
/ BITS_PER_UNIT
);
9067 gcc_assert (preferred_alignment
<= stack_alignment_needed
);
9069 /* For SEH we have to limit the amount of code movement into the prologue.
9070 At present we do this via a BLOCKAGE, at which point there's very little
9071 scheduling that can be done, which means that there's very little point
9072 in doing anything except PUSHs. */
9074 cfun
->machine
->use_fast_prologue_epilogue
= false;
9076 /* During reload iteration the amount of registers saved can change.
9077 Recompute the value as needed. Do not recompute when amount of registers
9078 didn't change as reload does multiple calls to the function and does not
9079 expect the decision to change within single iteration. */
9080 else if (!optimize_function_for_size_p (cfun
)
9081 && cfun
->machine
->use_fast_prologue_epilogue_nregs
!= frame
->nregs
)
9083 int count
= frame
->nregs
;
9084 struct cgraph_node
*node
= cgraph_node (current_function_decl
);
9086 cfun
->machine
->use_fast_prologue_epilogue_nregs
= count
;
9088 /* The fast prologue uses move instead of push to save registers. This
9089 is significantly longer, but also executes faster as modern hardware
9090 can execute the moves in parallel, but can't do that for push/pop.
9092 Be careful about choosing what prologue to emit: When function takes
9093 many instructions to execute we may use slow version as well as in
9094 case function is known to be outside hot spot (this is known with
9095 feedback only). Weight the size of function by number of registers
9096 to save as it is cheap to use one or two push instructions but very
9097 slow to use many of them. */
9099 count
= (count
- 1) * FAST_PROLOGUE_INSN_COUNT
;
9100 if (node
->frequency
< NODE_FREQUENCY_NORMAL
9101 || (flag_branch_probabilities
9102 && node
->frequency
< NODE_FREQUENCY_HOT
))
9103 cfun
->machine
->use_fast_prologue_epilogue
= false;
9105 cfun
->machine
->use_fast_prologue_epilogue
9106 = !expensive_function_p (count
);
9108 if (TARGET_PROLOGUE_USING_MOVE
9109 && cfun
->machine
->use_fast_prologue_epilogue
)
9110 frame
->save_regs_using_mov
= true;
9112 frame
->save_regs_using_mov
= false;
9114 /* If static stack checking is enabled and done with probes, the registers
9115 need to be saved before allocating the frame. */
9116 if (flag_stack_check
== STATIC_BUILTIN_STACK_CHECK
)
9117 frame
->save_regs_using_mov
= false;
9119 /* Skip return address. */
9120 offset
= UNITS_PER_WORD
;
9122 /* Skip pushed static chain. */
9123 if (ix86_static_chain_on_stack
)
9124 offset
+= UNITS_PER_WORD
;
9126 /* Skip saved base pointer. */
9127 if (frame_pointer_needed
)
9128 offset
+= UNITS_PER_WORD
;
9129 frame
->hfp_save_offset
= offset
;
9131 /* The traditional frame pointer location is at the top of the frame. */
9132 frame
->hard_frame_pointer_offset
= offset
;
9134 /* Register save area */
9135 offset
+= frame
->nregs
* UNITS_PER_WORD
;
9136 frame
->reg_save_offset
= offset
;
9138 /* Align and set SSE register save area. */
9139 if (frame
->nsseregs
)
9141 /* The only ABI that has saved SSE registers (Win64) also has a
9142 16-byte aligned default stack, and thus we don't need to be
9143 within the re-aligned local stack frame to save them. */
9144 gcc_assert (INCOMING_STACK_BOUNDARY
>= 128);
9145 offset
= (offset
+ 16 - 1) & -16;
9146 offset
+= frame
->nsseregs
* 16;
9148 frame
->sse_reg_save_offset
= offset
;
9150 /* The re-aligned stack starts here. Values before this point are not
9151 directly comparable with values below this point. In order to make
9152 sure that no value happens to be the same before and after, force
9153 the alignment computation below to add a non-zero value. */
9154 if (stack_realign_fp
)
9155 offset
= (offset
+ stack_alignment_needed
) & -stack_alignment_needed
;
9158 frame
->va_arg_size
= ix86_varargs_gpr_size
+ ix86_varargs_fpr_size
;
9159 offset
+= frame
->va_arg_size
;
9161 /* Align start of frame for local function. */
9162 offset
= (offset
+ stack_alignment_needed
- 1) & -stack_alignment_needed
;
9164 /* Frame pointer points here. */
9165 frame
->frame_pointer_offset
= offset
;
9169 /* Add outgoing arguments area. Can be skipped if we eliminated
9170 all the function calls as dead code.
9171 Skipping is however impossible when function calls alloca. Alloca
9172 expander assumes that last crtl->outgoing_args_size
9173 of stack frame are unused. */
9174 if (ACCUMULATE_OUTGOING_ARGS
9175 && (!current_function_is_leaf
|| cfun
->calls_alloca
9176 || ix86_current_function_calls_tls_descriptor
))
9178 offset
+= crtl
->outgoing_args_size
;
9179 frame
->outgoing_arguments_size
= crtl
->outgoing_args_size
;
9182 frame
->outgoing_arguments_size
= 0;
9184 /* Align stack boundary. Only needed if we're calling another function
9186 if (!current_function_is_leaf
|| cfun
->calls_alloca
9187 || ix86_current_function_calls_tls_descriptor
)
9188 offset
= (offset
+ preferred_alignment
- 1) & -preferred_alignment
;
9190 /* We've reached end of stack frame. */
9191 frame
->stack_pointer_offset
= offset
;
9193 /* Size prologue needs to allocate. */
9194 to_allocate
= offset
- frame
->sse_reg_save_offset
;
9196 if ((!to_allocate
&& frame
->nregs
<= 1)
9197 || (TARGET_64BIT
&& to_allocate
>= (HOST_WIDE_INT
) 0x80000000))
9198 frame
->save_regs_using_mov
= false;
9200 if (ix86_using_red_zone ()
9201 && current_function_sp_is_unchanging
9202 && current_function_is_leaf
9203 && !ix86_current_function_calls_tls_descriptor
)
9205 frame
->red_zone_size
= to_allocate
;
9206 if (frame
->save_regs_using_mov
)
9207 frame
->red_zone_size
+= frame
->nregs
* UNITS_PER_WORD
;
9208 if (frame
->red_zone_size
> RED_ZONE_SIZE
- RED_ZONE_RESERVE
)
9209 frame
->red_zone_size
= RED_ZONE_SIZE
- RED_ZONE_RESERVE
;
9212 frame
->red_zone_size
= 0;
9213 frame
->stack_pointer_offset
-= frame
->red_zone_size
;
9215 /* The SEH frame pointer location is near the bottom of the frame.
9216 This is enforced by the fact that the difference between the
9217 stack pointer and the frame pointer is limited to 240 bytes in
9218 the unwind data structure. */
9223 /* If we can leave the frame pointer where it is, do so. */
9224 diff
= frame
->stack_pointer_offset
- frame
->hard_frame_pointer_offset
;
9225 if (diff
> 240 || (diff
& 15) != 0)
9227 /* Ideally we'd determine what portion of the local stack frame
9228 (within the constraint of the lowest 240) is most heavily used.
9229 But without that complication, simply bias the frame pointer
9230 by 128 bytes so as to maximize the amount of the local stack
9231 frame that is addressable with 8-bit offsets. */
9232 frame
->hard_frame_pointer_offset
= frame
->stack_pointer_offset
- 128;
9237 /* This is semi-inlined memory_address_length, but simplified
9238 since we know that we're always dealing with reg+offset, and
9239 to avoid having to create and discard all that rtl. */
9242 choose_baseaddr_len (unsigned int regno
, HOST_WIDE_INT offset
)
9248 /* EBP and R13 cannot be encoded without an offset. */
9249 len
= (regno
== BP_REG
|| regno
== R13_REG
);
9251 else if (IN_RANGE (offset
, -128, 127))
9254 /* ESP and R12 must be encoded with a SIB byte. */
9255 if (regno
== SP_REG
|| regno
== R12_REG
)
9261 /* Return an RTX that points to CFA_OFFSET within the stack frame.
9262 The valid base registers are taken from CFUN->MACHINE->FS. */
9265 choose_baseaddr (HOST_WIDE_INT cfa_offset
)
9267 const struct machine_function
*m
= cfun
->machine
;
9268 rtx base_reg
= NULL
;
9269 HOST_WIDE_INT base_offset
= 0;
9271 if (m
->use_fast_prologue_epilogue
)
9273 /* Choose the base register most likely to allow the most scheduling
9274 opportunities. Generally FP is valid througout the function,
9275 while DRAP must be reloaded within the epilogue. But choose either
9276 over the SP due to increased encoding size. */
9280 base_reg
= hard_frame_pointer_rtx
;
9281 base_offset
= m
->fs
.fp_offset
- cfa_offset
;
9283 else if (m
->fs
.drap_valid
)
9285 base_reg
= crtl
->drap_reg
;
9286 base_offset
= 0 - cfa_offset
;
9288 else if (m
->fs
.sp_valid
)
9290 base_reg
= stack_pointer_rtx
;
9291 base_offset
= m
->fs
.sp_offset
- cfa_offset
;
9296 HOST_WIDE_INT toffset
;
9299 /* Choose the base register with the smallest address encoding.
9300 With a tie, choose FP > DRAP > SP. */
9303 base_reg
= stack_pointer_rtx
;
9304 base_offset
= m
->fs
.sp_offset
- cfa_offset
;
9305 len
= choose_baseaddr_len (STACK_POINTER_REGNUM
, base_offset
);
9307 if (m
->fs
.drap_valid
)
9309 toffset
= 0 - cfa_offset
;
9310 tlen
= choose_baseaddr_len (REGNO (crtl
->drap_reg
), toffset
);
9313 base_reg
= crtl
->drap_reg
;
9314 base_offset
= toffset
;
9320 toffset
= m
->fs
.fp_offset
- cfa_offset
;
9321 tlen
= choose_baseaddr_len (HARD_FRAME_POINTER_REGNUM
, toffset
);
9324 base_reg
= hard_frame_pointer_rtx
;
9325 base_offset
= toffset
;
9330 gcc_assert (base_reg
!= NULL
);
9332 return plus_constant (base_reg
, base_offset
);
9335 /* Emit code to save registers in the prologue. */
9338 ix86_emit_save_regs (void)
9343 for (regno
= FIRST_PSEUDO_REGISTER
- 1; regno
-- > 0; )
9344 if (!SSE_REGNO_P (regno
) && ix86_save_reg (regno
, true))
9346 insn
= emit_insn (gen_push (gen_rtx_REG (Pmode
, regno
)));
9347 RTX_FRAME_RELATED_P (insn
) = 1;
9351 /* Emit a single register save at CFA - CFA_OFFSET. */
9354 ix86_emit_save_reg_using_mov (enum machine_mode mode
, unsigned int regno
,
9355 HOST_WIDE_INT cfa_offset
)
9357 struct machine_function
*m
= cfun
->machine
;
9358 rtx reg
= gen_rtx_REG (mode
, regno
);
9359 rtx mem
, addr
, base
, insn
;
9361 addr
= choose_baseaddr (cfa_offset
);
9362 mem
= gen_frame_mem (mode
, addr
);
9364 /* For SSE saves, we need to indicate the 128-bit alignment. */
9365 set_mem_align (mem
, GET_MODE_ALIGNMENT (mode
));
9367 insn
= emit_move_insn (mem
, reg
);
9368 RTX_FRAME_RELATED_P (insn
) = 1;
9371 if (GET_CODE (base
) == PLUS
)
9372 base
= XEXP (base
, 0);
9373 gcc_checking_assert (REG_P (base
));
9375 /* When saving registers into a re-aligned local stack frame, avoid
9376 any tricky guessing by dwarf2out. */
9377 if (m
->fs
.realigned
)
9379 gcc_checking_assert (stack_realign_drap
);
9381 if (regno
== REGNO (crtl
->drap_reg
))
9383 /* A bit of a hack. We force the DRAP register to be saved in
9384 the re-aligned stack frame, which provides us with a copy
9385 of the CFA that will last past the prologue. Install it. */
9386 gcc_checking_assert (cfun
->machine
->fs
.fp_valid
);
9387 addr
= plus_constant (hard_frame_pointer_rtx
,
9388 cfun
->machine
->fs
.fp_offset
- cfa_offset
);
9389 mem
= gen_rtx_MEM (mode
, addr
);
9390 add_reg_note (insn
, REG_CFA_DEF_CFA
, mem
);
9394 /* The frame pointer is a stable reference within the
9395 aligned frame. Use it. */
9396 gcc_checking_assert (cfun
->machine
->fs
.fp_valid
);
9397 addr
= plus_constant (hard_frame_pointer_rtx
,
9398 cfun
->machine
->fs
.fp_offset
- cfa_offset
);
9399 mem
= gen_rtx_MEM (mode
, addr
);
9400 add_reg_note (insn
, REG_CFA_EXPRESSION
,
9401 gen_rtx_SET (VOIDmode
, mem
, reg
));
9405 /* The memory may not be relative to the current CFA register,
9406 which means that we may need to generate a new pattern for
9407 use by the unwind info. */
9408 else if (base
!= m
->fs
.cfa_reg
)
9410 addr
= plus_constant (m
->fs
.cfa_reg
, m
->fs
.cfa_offset
- cfa_offset
);
9411 mem
= gen_rtx_MEM (mode
, addr
);
9412 add_reg_note (insn
, REG_CFA_OFFSET
, gen_rtx_SET (VOIDmode
, mem
, reg
));
9416 /* Emit code to save registers using MOV insns.
9417 First register is stored at CFA - CFA_OFFSET. */
9419 ix86_emit_save_regs_using_mov (HOST_WIDE_INT cfa_offset
)
9423 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; regno
++)
9424 if (!SSE_REGNO_P (regno
) && ix86_save_reg (regno
, true))
9426 ix86_emit_save_reg_using_mov (Pmode
, regno
, cfa_offset
);
9427 cfa_offset
-= UNITS_PER_WORD
;
9431 /* Emit code to save SSE registers using MOV insns.
9432 First register is stored at CFA - CFA_OFFSET. */
9434 ix86_emit_save_sse_regs_using_mov (HOST_WIDE_INT cfa_offset
)
9438 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; regno
++)
9439 if (SSE_REGNO_P (regno
) && ix86_save_reg (regno
, true))
9441 ix86_emit_save_reg_using_mov (V4SFmode
, regno
, cfa_offset
);
9446 static GTY(()) rtx queued_cfa_restores
;
9448 /* Add a REG_CFA_RESTORE REG note to INSN or queue them until next stack
9449 manipulation insn. The value is on the stack at CFA - CFA_OFFSET.
9450 Don't add the note if the previously saved value will be left untouched
9451 within stack red-zone till return, as unwinders can find the same value
9452 in the register and on the stack. */
9455 ix86_add_cfa_restore_note (rtx insn
, rtx reg
, HOST_WIDE_INT cfa_offset
)
9457 if (cfa_offset
<= cfun
->machine
->fs
.red_zone_offset
)
9462 add_reg_note (insn
, REG_CFA_RESTORE
, reg
);
9463 RTX_FRAME_RELATED_P (insn
) = 1;
9467 = alloc_reg_note (REG_CFA_RESTORE
, reg
, queued_cfa_restores
);
9470 /* Add queued REG_CFA_RESTORE notes if any to INSN. */
9473 ix86_add_queued_cfa_restore_notes (rtx insn
)
9476 if (!queued_cfa_restores
)
9478 for (last
= queued_cfa_restores
; XEXP (last
, 1); last
= XEXP (last
, 1))
9480 XEXP (last
, 1) = REG_NOTES (insn
);
9481 REG_NOTES (insn
) = queued_cfa_restores
;
9482 queued_cfa_restores
= NULL_RTX
;
9483 RTX_FRAME_RELATED_P (insn
) = 1;
9486 /* Expand prologue or epilogue stack adjustment.
9487 The pattern exist to put a dependency on all ebp-based memory accesses.
9488 STYLE should be negative if instructions should be marked as frame related,
9489 zero if %r11 register is live and cannot be freely used and positive
9493 pro_epilogue_adjust_stack (rtx dest
, rtx src
, rtx offset
,
9494 int style
, bool set_cfa
)
9496 struct machine_function
*m
= cfun
->machine
;
9498 bool add_frame_related_expr
= false;
9501 insn
= gen_pro_epilogue_adjust_stack_si_add (dest
, src
, offset
);
9502 else if (x86_64_immediate_operand (offset
, DImode
))
9503 insn
= gen_pro_epilogue_adjust_stack_di_add (dest
, src
, offset
);
9507 /* r11 is used by indirect sibcall return as well, set before the
9508 epilogue and used after the epilogue. */
9510 tmp
= gen_rtx_REG (DImode
, R11_REG
);
9513 gcc_assert (src
!= hard_frame_pointer_rtx
9514 && dest
!= hard_frame_pointer_rtx
);
9515 tmp
= hard_frame_pointer_rtx
;
9517 insn
= emit_insn (gen_rtx_SET (DImode
, tmp
, offset
));
9519 add_frame_related_expr
= true;
9521 insn
= gen_pro_epilogue_adjust_stack_di_add (dest
, src
, tmp
);
9524 insn
= emit_insn (insn
);
9526 ix86_add_queued_cfa_restore_notes (insn
);
9532 gcc_assert (m
->fs
.cfa_reg
== src
);
9533 m
->fs
.cfa_offset
+= INTVAL (offset
);
9534 m
->fs
.cfa_reg
= dest
;
9536 r
= gen_rtx_PLUS (Pmode
, src
, offset
);
9537 r
= gen_rtx_SET (VOIDmode
, dest
, r
);
9538 add_reg_note (insn
, REG_CFA_ADJUST_CFA
, r
);
9539 RTX_FRAME_RELATED_P (insn
) = 1;
9543 RTX_FRAME_RELATED_P (insn
) = 1;
9544 if (add_frame_related_expr
)
9546 rtx r
= gen_rtx_PLUS (Pmode
, src
, offset
);
9547 r
= gen_rtx_SET (VOIDmode
, dest
, r
);
9548 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, r
);
9552 if (dest
== stack_pointer_rtx
)
9554 HOST_WIDE_INT ooffset
= m
->fs
.sp_offset
;
9555 bool valid
= m
->fs
.sp_valid
;
9557 if (src
== hard_frame_pointer_rtx
)
9559 valid
= m
->fs
.fp_valid
;
9560 ooffset
= m
->fs
.fp_offset
;
9562 else if (src
== crtl
->drap_reg
)
9564 valid
= m
->fs
.drap_valid
;
9569 /* Else there are two possibilities: SP itself, which we set
9570 up as the default above. Or EH_RETURN_STACKADJ_RTX, which is
9571 taken care of this by hand along the eh_return path. */
9572 gcc_checking_assert (src
== stack_pointer_rtx
9573 || offset
== const0_rtx
);
9576 m
->fs
.sp_offset
= ooffset
- INTVAL (offset
);
9577 m
->fs
.sp_valid
= valid
;
9581 /* Find an available register to be used as dynamic realign argument
9582 pointer regsiter. Such a register will be written in prologue and
9583 used in begin of body, so it must not be
9584 1. parameter passing register.
9586 We reuse static-chain register if it is available. Otherwise, we
9587 use DI for i386 and R13 for x86-64. We chose R13 since it has
9590 Return: the regno of chosen register. */
9593 find_drap_reg (void)
9595 tree decl
= cfun
->decl
;
9599 /* Use R13 for nested function or function need static chain.
9600 Since function with tail call may use any caller-saved
9601 registers in epilogue, DRAP must not use caller-saved
9602 register in such case. */
9603 if (DECL_STATIC_CHAIN (decl
) || crtl
->tail_call_emit
)
9610 /* Use DI for nested function or function need static chain.
9611 Since function with tail call may use any caller-saved
9612 registers in epilogue, DRAP must not use caller-saved
9613 register in such case. */
9614 if (DECL_STATIC_CHAIN (decl
) || crtl
->tail_call_emit
)
9617 /* Reuse static chain register if it isn't used for parameter
9619 if (ix86_function_regparm (TREE_TYPE (decl
), decl
) <= 2
9620 && !lookup_attribute ("fastcall",
9621 TYPE_ATTRIBUTES (TREE_TYPE (decl
)))
9622 && !lookup_attribute ("thiscall",
9623 TYPE_ATTRIBUTES (TREE_TYPE (decl
))))
9630 /* Return minimum incoming stack alignment. */
9633 ix86_minimum_incoming_stack_boundary (bool sibcall
)
9635 unsigned int incoming_stack_boundary
;
9637 /* Prefer the one specified at command line. */
9638 if (ix86_user_incoming_stack_boundary
)
9639 incoming_stack_boundary
= ix86_user_incoming_stack_boundary
;
9640 /* In 32bit, use MIN_STACK_BOUNDARY for incoming stack boundary
9641 if -mstackrealign is used, it isn't used for sibcall check and
9642 estimated stack alignment is 128bit. */
9645 && ix86_force_align_arg_pointer
9646 && crtl
->stack_alignment_estimated
== 128)
9647 incoming_stack_boundary
= MIN_STACK_BOUNDARY
;
9649 incoming_stack_boundary
= ix86_default_incoming_stack_boundary
;
9651 /* Incoming stack alignment can be changed on individual functions
9652 via force_align_arg_pointer attribute. We use the smallest
9653 incoming stack boundary. */
9654 if (incoming_stack_boundary
> MIN_STACK_BOUNDARY
9655 && lookup_attribute (ix86_force_align_arg_pointer_string
,
9656 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl
))))
9657 incoming_stack_boundary
= MIN_STACK_BOUNDARY
;
9659 /* The incoming stack frame has to be aligned at least at
9660 parm_stack_boundary. */
9661 if (incoming_stack_boundary
< crtl
->parm_stack_boundary
)
9662 incoming_stack_boundary
= crtl
->parm_stack_boundary
;
9664 /* Stack at entrance of main is aligned by runtime. We use the
9665 smallest incoming stack boundary. */
9666 if (incoming_stack_boundary
> MAIN_STACK_BOUNDARY
9667 && DECL_NAME (current_function_decl
)
9668 && MAIN_NAME_P (DECL_NAME (current_function_decl
))
9669 && DECL_FILE_SCOPE_P (current_function_decl
))
9670 incoming_stack_boundary
= MAIN_STACK_BOUNDARY
;
9672 return incoming_stack_boundary
;
9675 /* Update incoming stack boundary and estimated stack alignment. */
9678 ix86_update_stack_boundary (void)
9680 ix86_incoming_stack_boundary
9681 = ix86_minimum_incoming_stack_boundary (false);
9683 /* x86_64 vararg needs 16byte stack alignment for register save
9687 && crtl
->stack_alignment_estimated
< 128)
9688 crtl
->stack_alignment_estimated
= 128;
9691 /* Handle the TARGET_GET_DRAP_RTX hook. Return NULL if no DRAP is
9692 needed or an rtx for DRAP otherwise. */
9695 ix86_get_drap_rtx (void)
9697 if (ix86_force_drap
|| !ACCUMULATE_OUTGOING_ARGS
)
9698 crtl
->need_drap
= true;
9700 if (stack_realign_drap
)
9702 /* Assign DRAP to vDRAP and returns vDRAP */
9703 unsigned int regno
= find_drap_reg ();
9708 arg_ptr
= gen_rtx_REG (Pmode
, regno
);
9709 crtl
->drap_reg
= arg_ptr
;
9712 drap_vreg
= copy_to_reg (arg_ptr
);
9716 insn
= emit_insn_before (seq
, NEXT_INSN (entry_of_function ()));
9719 add_reg_note (insn
, REG_CFA_SET_VDRAP
, drap_vreg
);
9720 RTX_FRAME_RELATED_P (insn
) = 1;
9728 /* Handle the TARGET_INTERNAL_ARG_POINTER hook. */
9731 ix86_internal_arg_pointer (void)
9733 return virtual_incoming_args_rtx
;
9736 struct scratch_reg
{
9741 /* Return a short-lived scratch register for use on function entry.
9742 In 32-bit mode, it is valid only after the registers are saved
9743 in the prologue. This register must be released by means of
9744 release_scratch_register_on_entry once it is dead. */
9747 get_scratch_register_on_entry (struct scratch_reg
*sr
)
9755 /* We always use R11 in 64-bit mode. */
9760 tree decl
= current_function_decl
, fntype
= TREE_TYPE (decl
);
9762 = lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype
)) != NULL_TREE
;
9763 bool static_chain_p
= DECL_STATIC_CHAIN (decl
);
9764 int regparm
= ix86_function_regparm (fntype
, decl
);
9766 = crtl
->drap_reg
? REGNO (crtl
->drap_reg
) : INVALID_REGNUM
;
9768 /* 'fastcall' sets regparm to 2, uses ecx/edx for arguments and eax
9769 for the static chain register. */
9770 if ((regparm
< 1 || (fastcall_p
&& !static_chain_p
))
9771 && drap_regno
!= AX_REG
)
9773 else if (regparm
< 2 && drap_regno
!= DX_REG
)
9775 /* ecx is the static chain register. */
9776 else if (regparm
< 3 && !fastcall_p
&& !static_chain_p
9777 && drap_regno
!= CX_REG
)
9779 else if (ix86_save_reg (BX_REG
, true))
9781 /* esi is the static chain register. */
9782 else if (!(regparm
== 3 && static_chain_p
)
9783 && ix86_save_reg (SI_REG
, true))
9785 else if (ix86_save_reg (DI_REG
, true))
9789 regno
= (drap_regno
== AX_REG
? DX_REG
: AX_REG
);
9794 sr
->reg
= gen_rtx_REG (Pmode
, regno
);
9797 rtx insn
= emit_insn (gen_push (sr
->reg
));
9798 RTX_FRAME_RELATED_P (insn
) = 1;
9802 /* Release a scratch register obtained from the preceding function. */
9805 release_scratch_register_on_entry (struct scratch_reg
*sr
)
9809 rtx x
, insn
= emit_insn (gen_pop (sr
->reg
));
9811 /* The RTX_FRAME_RELATED_P mechanism doesn't know about pop. */
9812 RTX_FRAME_RELATED_P (insn
) = 1;
9813 x
= gen_rtx_PLUS (Pmode
, stack_pointer_rtx
, GEN_INT (UNITS_PER_WORD
));
9814 x
= gen_rtx_SET (VOIDmode
, stack_pointer_rtx
, x
);
9815 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, x
);
9819 #define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP)
9821 /* Emit code to adjust the stack pointer by SIZE bytes while probing it. */
9824 ix86_adjust_stack_and_probe (const HOST_WIDE_INT size
)
9826 /* We skip the probe for the first interval + a small dope of 4 words and
9827 probe that many bytes past the specified size to maintain a protection
9828 area at the botton of the stack. */
9829 const int dope
= 4 * UNITS_PER_WORD
;
9830 rtx size_rtx
= GEN_INT (size
);
9832 /* See if we have a constant small number of probes to generate. If so,
9833 that's the easy case. The run-time loop is made up of 11 insns in the
9834 generic case while the compile-time loop is made up of 3+2*(n-1) insns
9835 for n # of intervals. */
9836 if (size
<= 5 * PROBE_INTERVAL
)
9838 HOST_WIDE_INT i
, adjust
;
9839 bool first_probe
= true;
9841 /* Adjust SP and probe at PROBE_INTERVAL + N * PROBE_INTERVAL for
9842 values of N from 1 until it exceeds SIZE. If only one probe is
9843 needed, this will not generate any code. Then adjust and probe
9844 to PROBE_INTERVAL + SIZE. */
9845 for (i
= PROBE_INTERVAL
; i
< size
; i
+= PROBE_INTERVAL
)
9849 adjust
= 2 * PROBE_INTERVAL
+ dope
;
9850 first_probe
= false;
9853 adjust
= PROBE_INTERVAL
;
9855 emit_insn (gen_rtx_SET (VOIDmode
, stack_pointer_rtx
,
9856 plus_constant (stack_pointer_rtx
, -adjust
)));
9857 emit_stack_probe (stack_pointer_rtx
);
9861 adjust
= size
+ PROBE_INTERVAL
+ dope
;
9863 adjust
= size
+ PROBE_INTERVAL
- i
;
9865 emit_insn (gen_rtx_SET (VOIDmode
, stack_pointer_rtx
,
9866 plus_constant (stack_pointer_rtx
, -adjust
)));
9867 emit_stack_probe (stack_pointer_rtx
);
9869 /* Adjust back to account for the additional first interval. */
9870 emit_insn (gen_rtx_SET (VOIDmode
, stack_pointer_rtx
,
9871 plus_constant (stack_pointer_rtx
,
9872 PROBE_INTERVAL
+ dope
)));
9875 /* Otherwise, do the same as above, but in a loop. Note that we must be
9876 extra careful with variables wrapping around because we might be at
9877 the very top (or the very bottom) of the address space and we have
9878 to be able to handle this case properly; in particular, we use an
9879 equality test for the loop condition. */
9882 HOST_WIDE_INT rounded_size
;
9883 struct scratch_reg sr
;
9885 get_scratch_register_on_entry (&sr
);
9888 /* Step 1: round SIZE to the previous multiple of the interval. */
9890 rounded_size
= size
& -PROBE_INTERVAL
;
9893 /* Step 2: compute initial and final value of the loop counter. */
9895 /* SP = SP_0 + PROBE_INTERVAL. */
9896 emit_insn (gen_rtx_SET (VOIDmode
, stack_pointer_rtx
,
9897 plus_constant (stack_pointer_rtx
,
9898 - (PROBE_INTERVAL
+ dope
))));
9900 /* LAST_ADDR = SP_0 + PROBE_INTERVAL + ROUNDED_SIZE. */
9901 emit_move_insn (sr
.reg
, GEN_INT (-rounded_size
));
9902 emit_insn (gen_rtx_SET (VOIDmode
, sr
.reg
,
9903 gen_rtx_PLUS (Pmode
, sr
.reg
,
9904 stack_pointer_rtx
)));
9909 while (SP != LAST_ADDR)
9911 SP = SP + PROBE_INTERVAL
9915 adjusts SP and probes to PROBE_INTERVAL + N * PROBE_INTERVAL for
9916 values of N from 1 until it is equal to ROUNDED_SIZE. */
9918 emit_insn (ix86_gen_adjust_stack_and_probe (sr
.reg
, sr
.reg
, size_rtx
));
9921 /* Step 4: adjust SP and probe at PROBE_INTERVAL + SIZE if we cannot
9922 assert at compile-time that SIZE is equal to ROUNDED_SIZE. */
9924 if (size
!= rounded_size
)
9926 emit_insn (gen_rtx_SET (VOIDmode
, stack_pointer_rtx
,
9927 plus_constant (stack_pointer_rtx
,
9928 rounded_size
- size
)));
9929 emit_stack_probe (stack_pointer_rtx
);
9932 /* Adjust back to account for the additional first interval. */
9933 emit_insn (gen_rtx_SET (VOIDmode
, stack_pointer_rtx
,
9934 plus_constant (stack_pointer_rtx
,
9935 PROBE_INTERVAL
+ dope
)));
9937 release_scratch_register_on_entry (&sr
);
9940 gcc_assert (cfun
->machine
->fs
.cfa_reg
!= stack_pointer_rtx
);
9941 cfun
->machine
->fs
.sp_offset
+= size
;
9943 /* Make sure nothing is scheduled before we are done. */
9944 emit_insn (gen_blockage ());
9947 /* Adjust the stack pointer up to REG while probing it. */
9950 output_adjust_stack_and_probe (rtx reg
)
9952 static int labelno
= 0;
9953 char loop_lab
[32], end_lab
[32];
9956 ASM_GENERATE_INTERNAL_LABEL (loop_lab
, "LPSRL", labelno
);
9957 ASM_GENERATE_INTERNAL_LABEL (end_lab
, "LPSRE", labelno
++);
9959 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file
, loop_lab
);
9961 /* Jump to END_LAB if SP == LAST_ADDR. */
9962 xops
[0] = stack_pointer_rtx
;
9964 output_asm_insn ("cmp%z0\t{%1, %0|%0, %1}", xops
);
9965 fputs ("\tje\t", asm_out_file
);
9966 assemble_name_raw (asm_out_file
, end_lab
);
9967 fputc ('\n', asm_out_file
);
9969 /* SP = SP + PROBE_INTERVAL. */
9970 xops
[1] = GEN_INT (PROBE_INTERVAL
);
9971 output_asm_insn ("sub%z0\t{%1, %0|%0, %1}", xops
);
9974 xops
[1] = const0_rtx
;
9975 output_asm_insn ("or%z0\t{%1, (%0)|DWORD PTR [%0], %1}", xops
);
9977 fprintf (asm_out_file
, "\tjmp\t");
9978 assemble_name_raw (asm_out_file
, loop_lab
);
9979 fputc ('\n', asm_out_file
);
9981 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file
, end_lab
);
9986 /* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE,
9987 inclusive. These are offsets from the current stack pointer. */
9990 ix86_emit_probe_stack_range (HOST_WIDE_INT first
, HOST_WIDE_INT size
)
9992 /* See if we have a constant small number of probes to generate. If so,
9993 that's the easy case. The run-time loop is made up of 7 insns in the
9994 generic case while the compile-time loop is made up of n insns for n #
9996 if (size
<= 7 * PROBE_INTERVAL
)
10000 /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 1 until
10001 it exceeds SIZE. If only one probe is needed, this will not
10002 generate any code. Then probe at FIRST + SIZE. */
10003 for (i
= PROBE_INTERVAL
; i
< size
; i
+= PROBE_INTERVAL
)
10004 emit_stack_probe (plus_constant (stack_pointer_rtx
, -(first
+ i
)));
10006 emit_stack_probe (plus_constant (stack_pointer_rtx
, -(first
+ size
)));
10009 /* Otherwise, do the same as above, but in a loop. Note that we must be
10010 extra careful with variables wrapping around because we might be at
10011 the very top (or the very bottom) of the address space and we have
10012 to be able to handle this case properly; in particular, we use an
10013 equality test for the loop condition. */
10016 HOST_WIDE_INT rounded_size
, last
;
10017 struct scratch_reg sr
;
10019 get_scratch_register_on_entry (&sr
);
10022 /* Step 1: round SIZE to the previous multiple of the interval. */
10024 rounded_size
= size
& -PROBE_INTERVAL
;
10027 /* Step 2: compute initial and final value of the loop counter. */
10029 /* TEST_OFFSET = FIRST. */
10030 emit_move_insn (sr
.reg
, GEN_INT (-first
));
10032 /* LAST_OFFSET = FIRST + ROUNDED_SIZE. */
10033 last
= first
+ rounded_size
;
10036 /* Step 3: the loop
10038 while (TEST_ADDR != LAST_ADDR)
10040 TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
10044 probes at FIRST + N * PROBE_INTERVAL for values of N from 1
10045 until it is equal to ROUNDED_SIZE. */
10047 emit_insn (ix86_gen_probe_stack_range (sr
.reg
, sr
.reg
, GEN_INT (-last
)));
10050 /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
10051 that SIZE is equal to ROUNDED_SIZE. */
10053 if (size
!= rounded_size
)
10054 emit_stack_probe (plus_constant (gen_rtx_PLUS (Pmode
,
10057 rounded_size
- size
));
10059 release_scratch_register_on_entry (&sr
);
10062 /* Make sure nothing is scheduled before we are done. */
10063 emit_insn (gen_blockage ());
10066 /* Probe a range of stack addresses from REG to END, inclusive. These are
10067 offsets from the current stack pointer. */
10070 output_probe_stack_range (rtx reg
, rtx end
)
10072 static int labelno
= 0;
10073 char loop_lab
[32], end_lab
[32];
10076 ASM_GENERATE_INTERNAL_LABEL (loop_lab
, "LPSRL", labelno
);
10077 ASM_GENERATE_INTERNAL_LABEL (end_lab
, "LPSRE", labelno
++);
10079 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file
, loop_lab
);
10081 /* Jump to END_LAB if TEST_ADDR == LAST_ADDR. */
10084 output_asm_insn ("cmp%z0\t{%1, %0|%0, %1}", xops
);
10085 fputs ("\tje\t", asm_out_file
);
10086 assemble_name_raw (asm_out_file
, end_lab
);
10087 fputc ('\n', asm_out_file
);
10089 /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL. */
10090 xops
[1] = GEN_INT (PROBE_INTERVAL
);
10091 output_asm_insn ("sub%z0\t{%1, %0|%0, %1}", xops
);
10093 /* Probe at TEST_ADDR. */
10094 xops
[0] = stack_pointer_rtx
;
10096 xops
[2] = const0_rtx
;
10097 output_asm_insn ("or%z0\t{%2, (%0,%1)|DWORD PTR [%0+%1], %2}", xops
);
10099 fprintf (asm_out_file
, "\tjmp\t");
10100 assemble_name_raw (asm_out_file
, loop_lab
);
10101 fputc ('\n', asm_out_file
);
10103 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file
, end_lab
);
10108 /* Finalize stack_realign_needed flag, which will guide prologue/epilogue
10109 to be generated in correct form. */
10111 ix86_finalize_stack_realign_flags (void)
10113 /* Check if stack realign is really needed after reload, and
10114 stores result in cfun */
10115 unsigned int incoming_stack_boundary
10116 = (crtl
->parm_stack_boundary
> ix86_incoming_stack_boundary
10117 ? crtl
->parm_stack_boundary
: ix86_incoming_stack_boundary
);
10118 unsigned int stack_realign
= (incoming_stack_boundary
10119 < (current_function_is_leaf
10120 ? crtl
->max_used_stack_slot_alignment
10121 : crtl
->stack_alignment_needed
));
10123 if (crtl
->stack_realign_finalized
)
10125 /* After stack_realign_needed is finalized, we can't no longer
10127 gcc_assert (crtl
->stack_realign_needed
== stack_realign
);
10131 crtl
->stack_realign_needed
= stack_realign
;
10132 crtl
->stack_realign_finalized
= true;
10136 /* Expand the prologue into a bunch of separate insns. */
10139 ix86_expand_prologue (void)
10141 struct machine_function
*m
= cfun
->machine
;
10144 struct ix86_frame frame
;
10145 HOST_WIDE_INT allocate
;
10146 bool int_registers_saved
;
10148 ix86_finalize_stack_realign_flags ();
10150 /* DRAP should not coexist with stack_realign_fp */
10151 gcc_assert (!(crtl
->drap_reg
&& stack_realign_fp
));
10153 memset (&m
->fs
, 0, sizeof (m
->fs
));
10155 /* Initialize CFA state for before the prologue. */
10156 m
->fs
.cfa_reg
= stack_pointer_rtx
;
10157 m
->fs
.cfa_offset
= INCOMING_FRAME_SP_OFFSET
;
10159 /* Track SP offset to the CFA. We continue tracking this after we've
10160 swapped the CFA register away from SP. In the case of re-alignment
10161 this is fudged; we're interested to offsets within the local frame. */
10162 m
->fs
.sp_offset
= INCOMING_FRAME_SP_OFFSET
;
10163 m
->fs
.sp_valid
= true;
10165 ix86_compute_frame_layout (&frame
);
10167 if (!TARGET_64BIT
&& ix86_function_ms_hook_prologue (current_function_decl
))
10169 /* We should have already generated an error for any use of
10170 ms_hook on a nested function. */
10171 gcc_checking_assert (!ix86_static_chain_on_stack
);
10173 /* Check if profiling is active and we shall use profiling before
10174 prologue variant. If so sorry. */
10175 if (crtl
->profile
&& flag_fentry
!= 0)
10176 sorry ("ms_hook_prologue attribute isn%'t compatible "
10177 "with -mfentry for 32-bit");
10179 /* In ix86_asm_output_function_label we emitted:
10180 8b ff movl.s %edi,%edi
10182 8b ec movl.s %esp,%ebp
10184 This matches the hookable function prologue in Win32 API
10185 functions in Microsoft Windows XP Service Pack 2 and newer.
10186 Wine uses this to enable Windows apps to hook the Win32 API
10187 functions provided by Wine.
10189 What that means is that we've already set up the frame pointer. */
10191 if (frame_pointer_needed
10192 && !(crtl
->drap_reg
&& crtl
->stack_realign_needed
))
10196 /* We've decided to use the frame pointer already set up.
10197 Describe this to the unwinder by pretending that both
10198 push and mov insns happen right here.
10200 Putting the unwind info here at the end of the ms_hook
10201 is done so that we can make absolutely certain we get
10202 the required byte sequence at the start of the function,
10203 rather than relying on an assembler that can produce
10204 the exact encoding required.
10206 However it does mean (in the unpatched case) that we have
10207 a 1 insn window where the asynchronous unwind info is
10208 incorrect. However, if we placed the unwind info at
10209 its correct location we would have incorrect unwind info
10210 in the patched case. Which is probably all moot since
10211 I don't expect Wine generates dwarf2 unwind info for the
10212 system libraries that use this feature. */
10214 insn
= emit_insn (gen_blockage ());
10216 push
= gen_push (hard_frame_pointer_rtx
);
10217 mov
= gen_rtx_SET (VOIDmode
, hard_frame_pointer_rtx
,
10218 stack_pointer_rtx
);
10219 RTX_FRAME_RELATED_P (push
) = 1;
10220 RTX_FRAME_RELATED_P (mov
) = 1;
10222 RTX_FRAME_RELATED_P (insn
) = 1;
10223 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
,
10224 gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, push
, mov
)));
10226 /* Note that gen_push incremented m->fs.cfa_offset, even
10227 though we didn't emit the push insn here. */
10228 m
->fs
.cfa_reg
= hard_frame_pointer_rtx
;
10229 m
->fs
.fp_offset
= m
->fs
.cfa_offset
;
10230 m
->fs
.fp_valid
= true;
10234 /* The frame pointer is not needed so pop %ebp again.
10235 This leaves us with a pristine state. */
10236 emit_insn (gen_pop (hard_frame_pointer_rtx
));
10240 /* The first insn of a function that accepts its static chain on the
10241 stack is to push the register that would be filled in by a direct
10242 call. This insn will be skipped by the trampoline. */
10243 else if (ix86_static_chain_on_stack
)
10245 insn
= emit_insn (gen_push (ix86_static_chain (cfun
->decl
, false)));
10246 emit_insn (gen_blockage ());
10248 /* We don't want to interpret this push insn as a register save,
10249 only as a stack adjustment. The real copy of the register as
10250 a save will be done later, if needed. */
10251 t
= plus_constant (stack_pointer_rtx
, -UNITS_PER_WORD
);
10252 t
= gen_rtx_SET (VOIDmode
, stack_pointer_rtx
, t
);
10253 add_reg_note (insn
, REG_CFA_ADJUST_CFA
, t
);
10254 RTX_FRAME_RELATED_P (insn
) = 1;
10257 /* Emit prologue code to adjust stack alignment and setup DRAP, in case
10258 of DRAP is needed and stack realignment is really needed after reload */
10259 if (stack_realign_drap
)
10261 int align_bytes
= crtl
->stack_alignment_needed
/ BITS_PER_UNIT
;
10263 /* Only need to push parameter pointer reg if it is caller saved. */
10264 if (!call_used_regs
[REGNO (crtl
->drap_reg
)])
10266 /* Push arg pointer reg */
10267 insn
= emit_insn (gen_push (crtl
->drap_reg
));
10268 RTX_FRAME_RELATED_P (insn
) = 1;
10271 /* Grab the argument pointer. */
10272 t
= plus_constant (stack_pointer_rtx
, m
->fs
.sp_offset
);
10273 insn
= emit_insn (gen_rtx_SET (VOIDmode
, crtl
->drap_reg
, t
));
10274 RTX_FRAME_RELATED_P (insn
) = 1;
10275 m
->fs
.cfa_reg
= crtl
->drap_reg
;
10276 m
->fs
.cfa_offset
= 0;
10278 /* Align the stack. */
10279 insn
= emit_insn (ix86_gen_andsp (stack_pointer_rtx
,
10281 GEN_INT (-align_bytes
)));
10282 RTX_FRAME_RELATED_P (insn
) = 1;
10284 /* Replicate the return address on the stack so that return
10285 address can be reached via (argp - 1) slot. This is needed
10286 to implement macro RETURN_ADDR_RTX and intrinsic function
10287 expand_builtin_return_addr etc. */
10288 t
= plus_constant (crtl
->drap_reg
, -UNITS_PER_WORD
);
10289 t
= gen_frame_mem (Pmode
, t
);
10290 insn
= emit_insn (gen_push (t
));
10291 RTX_FRAME_RELATED_P (insn
) = 1;
10293 /* For the purposes of frame and register save area addressing,
10294 we've started over with a new frame. */
10295 m
->fs
.sp_offset
= INCOMING_FRAME_SP_OFFSET
;
10296 m
->fs
.realigned
= true;
10299 if (frame_pointer_needed
&& !m
->fs
.fp_valid
)
10301 /* Note: AT&T enter does NOT have reversed args. Enter is probably
10302 slower on all targets. Also sdb doesn't like it. */
10303 insn
= emit_insn (gen_push (hard_frame_pointer_rtx
));
10304 RTX_FRAME_RELATED_P (insn
) = 1;
10306 if (m
->fs
.sp_offset
== frame
.hard_frame_pointer_offset
)
10308 insn
= emit_move_insn (hard_frame_pointer_rtx
, stack_pointer_rtx
);
10309 RTX_FRAME_RELATED_P (insn
) = 1;
10311 if (m
->fs
.cfa_reg
== stack_pointer_rtx
)
10312 m
->fs
.cfa_reg
= hard_frame_pointer_rtx
;
10313 m
->fs
.fp_offset
= m
->fs
.sp_offset
;
10314 m
->fs
.fp_valid
= true;
10318 int_registers_saved
= (frame
.nregs
== 0);
10320 if (!int_registers_saved
)
10322 /* If saving registers via PUSH, do so now. */
10323 if (!frame
.save_regs_using_mov
)
10325 ix86_emit_save_regs ();
10326 int_registers_saved
= true;
10327 gcc_assert (m
->fs
.sp_offset
== frame
.reg_save_offset
);
10330 /* When using red zone we may start register saving before allocating
10331 the stack frame saving one cycle of the prologue. However, avoid
10332 doing this if we have to probe the stack; at least on x86_64 the
10333 stack probe can turn into a call that clobbers a red zone location. */
10334 else if (ix86_using_red_zone ()
10335 && (! TARGET_STACK_PROBE
10336 || frame
.stack_pointer_offset
< CHECK_STACK_LIMIT
))
10338 ix86_emit_save_regs_using_mov (frame
.reg_save_offset
);
10339 int_registers_saved
= true;
10343 if (stack_realign_fp
)
10345 int align_bytes
= crtl
->stack_alignment_needed
/ BITS_PER_UNIT
;
10346 gcc_assert (align_bytes
> MIN_STACK_BOUNDARY
/ BITS_PER_UNIT
);
10348 /* The computation of the size of the re-aligned stack frame means
10349 that we must allocate the size of the register save area before
10350 performing the actual alignment. Otherwise we cannot guarantee
10351 that there's enough storage above the realignment point. */
10352 if (m
->fs
.sp_offset
!= frame
.sse_reg_save_offset
)
10353 pro_epilogue_adjust_stack (stack_pointer_rtx
, stack_pointer_rtx
,
10354 GEN_INT (m
->fs
.sp_offset
10355 - frame
.sse_reg_save_offset
),
10358 /* Align the stack. */
10359 insn
= emit_insn (ix86_gen_andsp (stack_pointer_rtx
,
10361 GEN_INT (-align_bytes
)));
10363 /* For the purposes of register save area addressing, the stack
10364 pointer is no longer valid. As for the value of sp_offset,
10365 see ix86_compute_frame_layout, which we need to match in order
10366 to pass verification of stack_pointer_offset at the end. */
10367 m
->fs
.sp_offset
= (m
->fs
.sp_offset
+ align_bytes
) & -align_bytes
;
10368 m
->fs
.sp_valid
= false;
10371 allocate
= frame
.stack_pointer_offset
- m
->fs
.sp_offset
;
10373 if (flag_stack_usage
)
10375 /* We start to count from ARG_POINTER. */
10376 HOST_WIDE_INT stack_size
= frame
.stack_pointer_offset
;
10378 /* If it was realigned, take into account the fake frame. */
10379 if (stack_realign_drap
)
10381 if (ix86_static_chain_on_stack
)
10382 stack_size
+= UNITS_PER_WORD
;
10384 if (!call_used_regs
[REGNO (crtl
->drap_reg
)])
10385 stack_size
+= UNITS_PER_WORD
;
10387 /* This over-estimates by 1 minimal-stack-alignment-unit but
10388 mitigates that by counting in the new return address slot. */
10389 current_function_dynamic_stack_size
10390 += crtl
->stack_alignment_needed
/ BITS_PER_UNIT
;
10393 current_function_static_stack_size
= stack_size
;
10396 /* The stack has already been decremented by the instruction calling us
10397 so we need to probe unconditionally to preserve the protection area. */
10398 if (flag_stack_check
== STATIC_BUILTIN_STACK_CHECK
)
10400 /* We expect the registers to be saved when probes are used. */
10401 gcc_assert (int_registers_saved
);
10403 if (STACK_CHECK_MOVING_SP
)
10405 ix86_adjust_stack_and_probe (allocate
);
10410 HOST_WIDE_INT size
= allocate
;
10412 if (TARGET_64BIT
&& size
>= (HOST_WIDE_INT
) 0x80000000)
10413 size
= 0x80000000 - STACK_CHECK_PROTECT
- 1;
10415 if (TARGET_STACK_PROBE
)
10416 ix86_emit_probe_stack_range (0, size
+ STACK_CHECK_PROTECT
);
10418 ix86_emit_probe_stack_range (STACK_CHECK_PROTECT
, size
);
10424 else if (!ix86_target_stack_probe ()
10425 || frame
.stack_pointer_offset
< CHECK_STACK_LIMIT
)
10427 pro_epilogue_adjust_stack (stack_pointer_rtx
, stack_pointer_rtx
,
10428 GEN_INT (-allocate
), -1,
10429 m
->fs
.cfa_reg
== stack_pointer_rtx
);
10433 rtx eax
= gen_rtx_REG (Pmode
, AX_REG
);
10435 rtx (*adjust_stack_insn
)(rtx
, rtx
, rtx
);
10437 bool eax_live
= false;
10438 bool r10_live
= false;
10441 r10_live
= (DECL_STATIC_CHAIN (current_function_decl
) != 0);
10442 if (!TARGET_64BIT_MS_ABI
)
10443 eax_live
= ix86_eax_live_at_start_p ();
10447 emit_insn (gen_push (eax
));
10448 allocate
-= UNITS_PER_WORD
;
10452 r10
= gen_rtx_REG (Pmode
, R10_REG
);
10453 emit_insn (gen_push (r10
));
10454 allocate
-= UNITS_PER_WORD
;
10457 emit_move_insn (eax
, GEN_INT (allocate
));
10458 emit_insn (ix86_gen_allocate_stack_worker (eax
, eax
));
10460 /* Use the fact that AX still contains ALLOCATE. */
10461 adjust_stack_insn
= (TARGET_64BIT
10462 ? gen_pro_epilogue_adjust_stack_di_sub
10463 : gen_pro_epilogue_adjust_stack_si_sub
);
10465 insn
= emit_insn (adjust_stack_insn (stack_pointer_rtx
,
10466 stack_pointer_rtx
, eax
));
10468 /* Note that SEH directives need to continue tracking the stack
10469 pointer even after the frame pointer has been set up. */
10470 if (m
->fs
.cfa_reg
== stack_pointer_rtx
|| TARGET_SEH
)
10472 if (m
->fs
.cfa_reg
== stack_pointer_rtx
)
10473 m
->fs
.cfa_offset
+= allocate
;
10475 RTX_FRAME_RELATED_P (insn
) = 1;
10476 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
,
10477 gen_rtx_SET (VOIDmode
, stack_pointer_rtx
,
10478 plus_constant (stack_pointer_rtx
,
10481 m
->fs
.sp_offset
+= allocate
;
10483 if (r10_live
&& eax_live
)
10485 t
= choose_baseaddr (m
->fs
.sp_offset
- allocate
);
10486 emit_move_insn (r10
, gen_frame_mem (Pmode
, t
));
10487 t
= choose_baseaddr (m
->fs
.sp_offset
- allocate
- UNITS_PER_WORD
);
10488 emit_move_insn (eax
, gen_frame_mem (Pmode
, t
));
10490 else if (eax_live
|| r10_live
)
10492 t
= choose_baseaddr (m
->fs
.sp_offset
- allocate
);
10493 emit_move_insn ((eax_live
? eax
: r10
), gen_frame_mem (Pmode
, t
));
10496 gcc_assert (m
->fs
.sp_offset
== frame
.stack_pointer_offset
);
10498 /* If we havn't already set up the frame pointer, do so now. */
10499 if (frame_pointer_needed
&& !m
->fs
.fp_valid
)
10501 insn
= ix86_gen_add3 (hard_frame_pointer_rtx
, stack_pointer_rtx
,
10502 GEN_INT (frame
.stack_pointer_offset
10503 - frame
.hard_frame_pointer_offset
));
10504 insn
= emit_insn (insn
);
10505 RTX_FRAME_RELATED_P (insn
) = 1;
10506 add_reg_note (insn
, REG_CFA_ADJUST_CFA
, NULL
);
10508 if (m
->fs
.cfa_reg
== stack_pointer_rtx
)
10509 m
->fs
.cfa_reg
= hard_frame_pointer_rtx
;
10510 m
->fs
.fp_offset
= frame
.hard_frame_pointer_offset
;
10511 m
->fs
.fp_valid
= true;
10514 if (!int_registers_saved
)
10515 ix86_emit_save_regs_using_mov (frame
.reg_save_offset
);
10516 if (frame
.nsseregs
)
10517 ix86_emit_save_sse_regs_using_mov (frame
.sse_reg_save_offset
);
10519 pic_reg_used
= false;
10520 if (pic_offset_table_rtx
10521 && (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM
)
10524 unsigned int alt_pic_reg_used
= ix86_select_alt_pic_regnum ();
10526 if (alt_pic_reg_used
!= INVALID_REGNUM
)
10527 SET_REGNO (pic_offset_table_rtx
, alt_pic_reg_used
);
10529 pic_reg_used
= true;
10536 if (ix86_cmodel
== CM_LARGE_PIC
)
10538 rtx tmp_reg
= gen_rtx_REG (DImode
, R11_REG
);
10539 rtx label
= gen_label_rtx ();
10540 emit_label (label
);
10541 LABEL_PRESERVE_P (label
) = 1;
10542 gcc_assert (REGNO (pic_offset_table_rtx
) != REGNO (tmp_reg
));
10543 insn
= emit_insn (gen_set_rip_rex64 (pic_offset_table_rtx
, label
));
10544 insn
= emit_insn (gen_set_got_offset_rex64 (tmp_reg
, label
));
10545 insn
= emit_insn (gen_adddi3 (pic_offset_table_rtx
,
10546 pic_offset_table_rtx
, tmp_reg
));
10549 insn
= emit_insn (gen_set_got_rex64 (pic_offset_table_rtx
));
10552 insn
= emit_insn (gen_set_got (pic_offset_table_rtx
));
10555 /* In the pic_reg_used case, make sure that the got load isn't deleted
10556 when mcount needs it. Blockage to avoid call movement across mcount
10557 call is emitted in generic code after the NOTE_INSN_PROLOGUE_END
10559 if (crtl
->profile
&& !flag_fentry
&& pic_reg_used
)
10560 emit_insn (gen_prologue_use (pic_offset_table_rtx
));
10562 if (crtl
->drap_reg
&& !crtl
->stack_realign_needed
)
10564 /* vDRAP is setup but after reload it turns out stack realign
10565 isn't necessary, here we will emit prologue to setup DRAP
10566 without stack realign adjustment */
10567 t
= choose_baseaddr (0);
10568 emit_insn (gen_rtx_SET (VOIDmode
, crtl
->drap_reg
, t
));
10571 /* Prevent instructions from being scheduled into register save push
10572 sequence when access to the redzone area is done through frame pointer.
10573 The offset between the frame pointer and the stack pointer is calculated
10574 relative to the value of the stack pointer at the end of the function
10575 prologue, and moving instructions that access redzone area via frame
10576 pointer inside push sequence violates this assumption. */
10577 if (frame_pointer_needed
&& frame
.red_zone_size
)
10578 emit_insn (gen_memory_blockage ());
10580 /* Emit cld instruction if stringops are used in the function. */
10581 if (TARGET_CLD
&& ix86_current_function_needs_cld
)
10582 emit_insn (gen_cld ());
10584 /* SEH requires that the prologue end within 256 bytes of the start of
10585 the function. Prevent instruction schedules that would extend that. */
10587 emit_insn (gen_blockage ());
10590 /* Emit code to restore REG using a POP insn. */
10593 ix86_emit_restore_reg_using_pop (rtx reg
)
10595 struct machine_function
*m
= cfun
->machine
;
10596 rtx insn
= emit_insn (gen_pop (reg
));
10598 ix86_add_cfa_restore_note (insn
, reg
, m
->fs
.sp_offset
);
10599 m
->fs
.sp_offset
-= UNITS_PER_WORD
;
10601 if (m
->fs
.cfa_reg
== crtl
->drap_reg
10602 && REGNO (reg
) == REGNO (crtl
->drap_reg
))
10604 /* Previously we'd represented the CFA as an expression
10605 like *(%ebp - 8). We've just popped that value from
10606 the stack, which means we need to reset the CFA to
10607 the drap register. This will remain until we restore
10608 the stack pointer. */
10609 add_reg_note (insn
, REG_CFA_DEF_CFA
, reg
);
10610 RTX_FRAME_RELATED_P (insn
) = 1;
10612 /* This means that the DRAP register is valid for addressing too. */
10613 m
->fs
.drap_valid
= true;
10617 if (m
->fs
.cfa_reg
== stack_pointer_rtx
)
10619 rtx x
= plus_constant (stack_pointer_rtx
, UNITS_PER_WORD
);
10620 x
= gen_rtx_SET (VOIDmode
, stack_pointer_rtx
, x
);
10621 add_reg_note (insn
, REG_CFA_ADJUST_CFA
, x
);
10622 RTX_FRAME_RELATED_P (insn
) = 1;
10624 m
->fs
.cfa_offset
-= UNITS_PER_WORD
;
10627 /* When the frame pointer is the CFA, and we pop it, we are
10628 swapping back to the stack pointer as the CFA. This happens
10629 for stack frames that don't allocate other data, so we assume
10630 the stack pointer is now pointing at the return address, i.e.
10631 the function entry state, which makes the offset be 1 word. */
10632 if (reg
== hard_frame_pointer_rtx
)
10634 m
->fs
.fp_valid
= false;
10635 if (m
->fs
.cfa_reg
== hard_frame_pointer_rtx
)
10637 m
->fs
.cfa_reg
= stack_pointer_rtx
;
10638 m
->fs
.cfa_offset
-= UNITS_PER_WORD
;
10640 add_reg_note (insn
, REG_CFA_DEF_CFA
,
10641 gen_rtx_PLUS (Pmode
, stack_pointer_rtx
,
10642 GEN_INT (m
->fs
.cfa_offset
)));
10643 RTX_FRAME_RELATED_P (insn
) = 1;
10648 /* Emit code to restore saved registers using POP insns. */
10651 ix86_emit_restore_regs_using_pop (void)
10653 unsigned int regno
;
10655 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; regno
++)
10656 if (!SSE_REGNO_P (regno
) && ix86_save_reg (regno
, false))
10657 ix86_emit_restore_reg_using_pop (gen_rtx_REG (Pmode
, regno
));
10660 /* Emit code and notes for the LEAVE instruction. */
10663 ix86_emit_leave (void)
10665 struct machine_function
*m
= cfun
->machine
;
10666 rtx insn
= emit_insn (ix86_gen_leave ());
10668 ix86_add_queued_cfa_restore_notes (insn
);
10670 gcc_assert (m
->fs
.fp_valid
);
10671 m
->fs
.sp_valid
= true;
10672 m
->fs
.sp_offset
= m
->fs
.fp_offset
- UNITS_PER_WORD
;
10673 m
->fs
.fp_valid
= false;
10675 if (m
->fs
.cfa_reg
== hard_frame_pointer_rtx
)
10677 m
->fs
.cfa_reg
= stack_pointer_rtx
;
10678 m
->fs
.cfa_offset
= m
->fs
.sp_offset
;
10680 add_reg_note (insn
, REG_CFA_DEF_CFA
,
10681 plus_constant (stack_pointer_rtx
, m
->fs
.sp_offset
));
10682 RTX_FRAME_RELATED_P (insn
) = 1;
10683 ix86_add_cfa_restore_note (insn
, hard_frame_pointer_rtx
,
10688 /* Emit code to restore saved registers using MOV insns.
10689 First register is restored from CFA - CFA_OFFSET. */
10691 ix86_emit_restore_regs_using_mov (HOST_WIDE_INT cfa_offset
,
10692 int maybe_eh_return
)
10694 struct machine_function
*m
= cfun
->machine
;
10695 unsigned int regno
;
10697 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; regno
++)
10698 if (!SSE_REGNO_P (regno
) && ix86_save_reg (regno
, maybe_eh_return
))
10700 rtx reg
= gen_rtx_REG (Pmode
, regno
);
10703 mem
= choose_baseaddr (cfa_offset
);
10704 mem
= gen_frame_mem (Pmode
, mem
);
10705 insn
= emit_move_insn (reg
, mem
);
10707 if (m
->fs
.cfa_reg
== crtl
->drap_reg
&& regno
== REGNO (crtl
->drap_reg
))
10709 /* Previously we'd represented the CFA as an expression
10710 like *(%ebp - 8). We've just popped that value from
10711 the stack, which means we need to reset the CFA to
10712 the drap register. This will remain until we restore
10713 the stack pointer. */
10714 add_reg_note (insn
, REG_CFA_DEF_CFA
, reg
);
10715 RTX_FRAME_RELATED_P (insn
) = 1;
10717 /* This means that the DRAP register is valid for addressing. */
10718 m
->fs
.drap_valid
= true;
10721 ix86_add_cfa_restore_note (NULL_RTX
, reg
, cfa_offset
);
10723 cfa_offset
-= UNITS_PER_WORD
;
10727 /* Emit code to restore saved registers using MOV insns.
10728 First register is restored from CFA - CFA_OFFSET. */
10730 ix86_emit_restore_sse_regs_using_mov (HOST_WIDE_INT cfa_offset
,
10731 int maybe_eh_return
)
10733 unsigned int regno
;
10735 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; regno
++)
10736 if (SSE_REGNO_P (regno
) && ix86_save_reg (regno
, maybe_eh_return
))
10738 rtx reg
= gen_rtx_REG (V4SFmode
, regno
);
10741 mem
= choose_baseaddr (cfa_offset
);
10742 mem
= gen_rtx_MEM (V4SFmode
, mem
);
10743 set_mem_align (mem
, 128);
10744 emit_move_insn (reg
, mem
);
10746 ix86_add_cfa_restore_note (NULL_RTX
, reg
, cfa_offset
);
10752 /* Restore function stack, frame, and registers. */
10755 ix86_expand_epilogue (int style
)
10757 struct machine_function
*m
= cfun
->machine
;
10758 struct machine_frame_state frame_state_save
= m
->fs
;
10759 struct ix86_frame frame
;
10760 bool restore_regs_via_mov
;
10763 ix86_finalize_stack_realign_flags ();
10764 ix86_compute_frame_layout (&frame
);
10766 m
->fs
.sp_valid
= (!frame_pointer_needed
10767 || (current_function_sp_is_unchanging
10768 && !stack_realign_fp
));
10769 gcc_assert (!m
->fs
.sp_valid
10770 || m
->fs
.sp_offset
== frame
.stack_pointer_offset
);
10772 /* The FP must be valid if the frame pointer is present. */
10773 gcc_assert (frame_pointer_needed
== m
->fs
.fp_valid
);
10774 gcc_assert (!m
->fs
.fp_valid
10775 || m
->fs
.fp_offset
== frame
.hard_frame_pointer_offset
);
10777 /* We must have *some* valid pointer to the stack frame. */
10778 gcc_assert (m
->fs
.sp_valid
|| m
->fs
.fp_valid
);
10780 /* The DRAP is never valid at this point. */
10781 gcc_assert (!m
->fs
.drap_valid
);
10783 /* See the comment about red zone and frame
10784 pointer usage in ix86_expand_prologue. */
10785 if (frame_pointer_needed
&& frame
.red_zone_size
)
10786 emit_insn (gen_memory_blockage ());
10788 using_drap
= crtl
->drap_reg
&& crtl
->stack_realign_needed
;
10789 gcc_assert (!using_drap
|| m
->fs
.cfa_reg
== crtl
->drap_reg
);
10791 /* Determine the CFA offset of the end of the red-zone. */
10792 m
->fs
.red_zone_offset
= 0;
10793 if (ix86_using_red_zone () && crtl
->args
.pops_args
< 65536)
10795 /* The red-zone begins below the return address. */
10796 m
->fs
.red_zone_offset
= RED_ZONE_SIZE
+ UNITS_PER_WORD
;
10798 /* When the register save area is in the aligned portion of
10799 the stack, determine the maximum runtime displacement that
10800 matches up with the aligned frame. */
10801 if (stack_realign_drap
)
10802 m
->fs
.red_zone_offset
-= (crtl
->stack_alignment_needed
/ BITS_PER_UNIT
10806 /* Special care must be taken for the normal return case of a function
10807 using eh_return: the eax and edx registers are marked as saved, but
10808 not restored along this path. Adjust the save location to match. */
10809 if (crtl
->calls_eh_return
&& style
!= 2)
10810 frame
.reg_save_offset
-= 2 * UNITS_PER_WORD
;
10812 /* EH_RETURN requires the use of moves to function properly. */
10813 if (crtl
->calls_eh_return
)
10814 restore_regs_via_mov
= true;
10815 /* SEH requires the use of pops to identify the epilogue. */
10816 else if (TARGET_SEH
)
10817 restore_regs_via_mov
= false;
10818 /* If we're only restoring one register and sp is not valid then
10819 using a move instruction to restore the register since it's
10820 less work than reloading sp and popping the register. */
10821 else if (!m
->fs
.sp_valid
&& frame
.nregs
<= 1)
10822 restore_regs_via_mov
= true;
10823 else if (TARGET_EPILOGUE_USING_MOVE
10824 && cfun
->machine
->use_fast_prologue_epilogue
10825 && (frame
.nregs
> 1
10826 || m
->fs
.sp_offset
!= frame
.reg_save_offset
))
10827 restore_regs_via_mov
= true;
10828 else if (frame_pointer_needed
10830 && m
->fs
.sp_offset
!= frame
.reg_save_offset
)
10831 restore_regs_via_mov
= true;
10832 else if (frame_pointer_needed
10833 && TARGET_USE_LEAVE
10834 && cfun
->machine
->use_fast_prologue_epilogue
10835 && frame
.nregs
== 1)
10836 restore_regs_via_mov
= true;
10838 restore_regs_via_mov
= false;
10840 if (restore_regs_via_mov
|| frame
.nsseregs
)
10842 /* Ensure that the entire register save area is addressable via
10843 the stack pointer, if we will restore via sp. */
10845 && m
->fs
.sp_offset
> 0x7fffffff
10846 && !(m
->fs
.fp_valid
|| m
->fs
.drap_valid
)
10847 && (frame
.nsseregs
+ frame
.nregs
) != 0)
10849 pro_epilogue_adjust_stack (stack_pointer_rtx
, stack_pointer_rtx
,
10850 GEN_INT (m
->fs
.sp_offset
10851 - frame
.sse_reg_save_offset
),
10853 m
->fs
.cfa_reg
== stack_pointer_rtx
);
10857 /* If there are any SSE registers to restore, then we have to do it
10858 via moves, since there's obviously no pop for SSE regs. */
10859 if (frame
.nsseregs
)
10860 ix86_emit_restore_sse_regs_using_mov (frame
.sse_reg_save_offset
,
10863 if (restore_regs_via_mov
)
10868 ix86_emit_restore_regs_using_mov (frame
.reg_save_offset
, style
== 2);
10870 /* eh_return epilogues need %ecx added to the stack pointer. */
10873 rtx insn
, sa
= EH_RETURN_STACKADJ_RTX
;
10875 /* Stack align doesn't work with eh_return. */
10876 gcc_assert (!stack_realign_drap
);
10877 /* Neither does regparm nested functions. */
10878 gcc_assert (!ix86_static_chain_on_stack
);
10880 if (frame_pointer_needed
)
10882 t
= gen_rtx_PLUS (Pmode
, hard_frame_pointer_rtx
, sa
);
10883 t
= plus_constant (t
, m
->fs
.fp_offset
- UNITS_PER_WORD
);
10884 emit_insn (gen_rtx_SET (VOIDmode
, sa
, t
));
10886 t
= gen_frame_mem (Pmode
, hard_frame_pointer_rtx
);
10887 insn
= emit_move_insn (hard_frame_pointer_rtx
, t
);
10889 /* Note that we use SA as a temporary CFA, as the return
10890 address is at the proper place relative to it. We
10891 pretend this happens at the FP restore insn because
10892 prior to this insn the FP would be stored at the wrong
10893 offset relative to SA, and after this insn we have no
10894 other reasonable register to use for the CFA. We don't
10895 bother resetting the CFA to the SP for the duration of
10896 the return insn. */
10897 add_reg_note (insn
, REG_CFA_DEF_CFA
,
10898 plus_constant (sa
, UNITS_PER_WORD
));
10899 ix86_add_queued_cfa_restore_notes (insn
);
10900 add_reg_note (insn
, REG_CFA_RESTORE
, hard_frame_pointer_rtx
);
10901 RTX_FRAME_RELATED_P (insn
) = 1;
10903 m
->fs
.cfa_reg
= sa
;
10904 m
->fs
.cfa_offset
= UNITS_PER_WORD
;
10905 m
->fs
.fp_valid
= false;
10907 pro_epilogue_adjust_stack (stack_pointer_rtx
, sa
,
10908 const0_rtx
, style
, false);
10912 t
= gen_rtx_PLUS (Pmode
, stack_pointer_rtx
, sa
);
10913 t
= plus_constant (t
, m
->fs
.sp_offset
- UNITS_PER_WORD
);
10914 insn
= emit_insn (gen_rtx_SET (VOIDmode
, stack_pointer_rtx
, t
));
10915 ix86_add_queued_cfa_restore_notes (insn
);
10917 gcc_assert (m
->fs
.cfa_reg
== stack_pointer_rtx
);
10918 if (m
->fs
.cfa_offset
!= UNITS_PER_WORD
)
10920 m
->fs
.cfa_offset
= UNITS_PER_WORD
;
10921 add_reg_note (insn
, REG_CFA_DEF_CFA
,
10922 plus_constant (stack_pointer_rtx
,
10924 RTX_FRAME_RELATED_P (insn
) = 1;
10927 m
->fs
.sp_offset
= UNITS_PER_WORD
;
10928 m
->fs
.sp_valid
= true;
10933 /* SEH requires that the function end with (1) a stack adjustment
10934 if necessary, (2) a sequence of pops, and (3) a return or
10935 jump instruction. Prevent insns from the function body from
10936 being scheduled into this sequence. */
10939 /* Prevent a catch region from being adjacent to the standard
10940 epilogue sequence. Unfortuantely crtl->uses_eh_lsda nor
10941 several other flags that would be interesting to test are
10943 if (flag_non_call_exceptions
)
10944 emit_insn (gen_nops (const1_rtx
));
10946 emit_insn (gen_blockage ());
10949 /* First step is to deallocate the stack frame so that we can
10950 pop the registers. */
10951 if (!m
->fs
.sp_valid
)
10953 pro_epilogue_adjust_stack (stack_pointer_rtx
, hard_frame_pointer_rtx
,
10954 GEN_INT (m
->fs
.fp_offset
10955 - frame
.reg_save_offset
),
10958 else if (m
->fs
.sp_offset
!= frame
.reg_save_offset
)
10960 pro_epilogue_adjust_stack (stack_pointer_rtx
, stack_pointer_rtx
,
10961 GEN_INT (m
->fs
.sp_offset
10962 - frame
.reg_save_offset
),
10964 m
->fs
.cfa_reg
== stack_pointer_rtx
);
10967 ix86_emit_restore_regs_using_pop ();
10970 /* If we used a stack pointer and haven't already got rid of it,
10972 if (m
->fs
.fp_valid
)
10974 /* If the stack pointer is valid and pointing at the frame
10975 pointer store address, then we only need a pop. */
10976 if (m
->fs
.sp_valid
&& m
->fs
.sp_offset
== frame
.hfp_save_offset
)
10977 ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx
);
10978 /* Leave results in shorter dependency chains on CPUs that are
10979 able to grok it fast. */
10980 else if (TARGET_USE_LEAVE
10981 || optimize_function_for_size_p (cfun
)
10982 || !cfun
->machine
->use_fast_prologue_epilogue
)
10983 ix86_emit_leave ();
10986 pro_epilogue_adjust_stack (stack_pointer_rtx
,
10987 hard_frame_pointer_rtx
,
10988 const0_rtx
, style
, !using_drap
);
10989 ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx
);
10995 int param_ptr_offset
= UNITS_PER_WORD
;
10998 gcc_assert (stack_realign_drap
);
11000 if (ix86_static_chain_on_stack
)
11001 param_ptr_offset
+= UNITS_PER_WORD
;
11002 if (!call_used_regs
[REGNO (crtl
->drap_reg
)])
11003 param_ptr_offset
+= UNITS_PER_WORD
;
11005 insn
= emit_insn (gen_rtx_SET
11006 (VOIDmode
, stack_pointer_rtx
,
11007 gen_rtx_PLUS (Pmode
,
11009 GEN_INT (-param_ptr_offset
))));
11010 m
->fs
.cfa_reg
= stack_pointer_rtx
;
11011 m
->fs
.cfa_offset
= param_ptr_offset
;
11012 m
->fs
.sp_offset
= param_ptr_offset
;
11013 m
->fs
.realigned
= false;
11015 add_reg_note (insn
, REG_CFA_DEF_CFA
,
11016 gen_rtx_PLUS (Pmode
, stack_pointer_rtx
,
11017 GEN_INT (param_ptr_offset
)));
11018 RTX_FRAME_RELATED_P (insn
) = 1;
11020 if (!call_used_regs
[REGNO (crtl
->drap_reg
)])
11021 ix86_emit_restore_reg_using_pop (crtl
->drap_reg
);
11024 /* At this point the stack pointer must be valid, and we must have
11025 restored all of the registers. We may not have deallocated the
11026 entire stack frame. We've delayed this until now because it may
11027 be possible to merge the local stack deallocation with the
11028 deallocation forced by ix86_static_chain_on_stack. */
11029 gcc_assert (m
->fs
.sp_valid
);
11030 gcc_assert (!m
->fs
.fp_valid
);
11031 gcc_assert (!m
->fs
.realigned
);
11032 if (m
->fs
.sp_offset
!= UNITS_PER_WORD
)
11034 pro_epilogue_adjust_stack (stack_pointer_rtx
, stack_pointer_rtx
,
11035 GEN_INT (m
->fs
.sp_offset
- UNITS_PER_WORD
),
11039 /* Sibcall epilogues don't want a return instruction. */
11042 m
->fs
= frame_state_save
;
11046 /* Emit vzeroupper if needed. */
11047 if (TARGET_VZEROUPPER
11048 && !TREE_THIS_VOLATILE (cfun
->decl
)
11049 && !cfun
->machine
->caller_return_avx256_p
)
11050 emit_insn (gen_avx_vzeroupper (GEN_INT (call_no_avx256
)));
11052 if (crtl
->args
.pops_args
&& crtl
->args
.size
)
11054 rtx popc
= GEN_INT (crtl
->args
.pops_args
);
11056 /* i386 can only pop 64K bytes. If asked to pop more, pop return
11057 address, do explicit add, and jump indirectly to the caller. */
11059 if (crtl
->args
.pops_args
>= 65536)
11061 rtx ecx
= gen_rtx_REG (SImode
, CX_REG
);
11064 /* There is no "pascal" calling convention in any 64bit ABI. */
11065 gcc_assert (!TARGET_64BIT
);
11067 insn
= emit_insn (gen_pop (ecx
));
11068 m
->fs
.cfa_offset
-= UNITS_PER_WORD
;
11069 m
->fs
.sp_offset
-= UNITS_PER_WORD
;
11071 add_reg_note (insn
, REG_CFA_ADJUST_CFA
,
11072 copy_rtx (XVECEXP (PATTERN (insn
), 0, 1)));
11073 add_reg_note (insn
, REG_CFA_REGISTER
,
11074 gen_rtx_SET (VOIDmode
, ecx
, pc_rtx
));
11075 RTX_FRAME_RELATED_P (insn
) = 1;
11077 pro_epilogue_adjust_stack (stack_pointer_rtx
, stack_pointer_rtx
,
11079 emit_jump_insn (gen_return_indirect_internal (ecx
));
11082 emit_jump_insn (gen_return_pop_internal (popc
));
11085 emit_jump_insn (gen_return_internal ());
11087 /* Restore the state back to the state from the prologue,
11088 so that it's correct for the next epilogue. */
11089 m
->fs
= frame_state_save
;
11092 /* Reset from the function's potential modifications. */
11095 ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED
,
11096 HOST_WIDE_INT size ATTRIBUTE_UNUSED
)
11098 if (pic_offset_table_rtx
)
11099 SET_REGNO (pic_offset_table_rtx
, REAL_PIC_OFFSET_TABLE_REGNUM
);
11101 /* Mach-O doesn't support labels at the end of objects, so if
11102 it looks like we might want one, insert a NOP. */
11104 rtx insn
= get_last_insn ();
11107 && NOTE_KIND (insn
) != NOTE_INSN_DELETED_LABEL
)
11108 insn
= PREV_INSN (insn
);
11112 && NOTE_KIND (insn
) == NOTE_INSN_DELETED_LABEL
)))
11113 fputs ("\tnop\n", file
);
11119 /* Return a scratch register to use in the split stack prologue. The
11120 split stack prologue is used for -fsplit-stack. It is the first
11121 instructions in the function, even before the regular prologue.
11122 The scratch register can be any caller-saved register which is not
11123 used for parameters or for the static chain. */
11125 static unsigned int
11126 split_stack_prologue_scratch_regno (void)
11135 is_fastcall
= (lookup_attribute ("fastcall",
11136 TYPE_ATTRIBUTES (TREE_TYPE (cfun
->decl
)))
11138 regparm
= ix86_function_regparm (TREE_TYPE (cfun
->decl
), cfun
->decl
);
11142 if (DECL_STATIC_CHAIN (cfun
->decl
))
11144 sorry ("-fsplit-stack does not support fastcall with "
11145 "nested function");
11146 return INVALID_REGNUM
;
11150 else if (regparm
< 3)
11152 if (!DECL_STATIC_CHAIN (cfun
->decl
))
11158 sorry ("-fsplit-stack does not support 2 register "
11159 " parameters for a nested function");
11160 return INVALID_REGNUM
;
11167 /* FIXME: We could make this work by pushing a register
11168 around the addition and comparison. */
11169 sorry ("-fsplit-stack does not support 3 register parameters");
11170 return INVALID_REGNUM
;
11175 /* A SYMBOL_REF for the function which allocates new stackspace for
11178 static GTY(()) rtx split_stack_fn
;
11180 /* A SYMBOL_REF for the more stack function when using the large
11183 static GTY(()) rtx split_stack_fn_large
;
11185 /* Handle -fsplit-stack. These are the first instructions in the
11186 function, even before the regular prologue. */
11189 ix86_expand_split_stack_prologue (void)
11191 struct ix86_frame frame
;
11192 HOST_WIDE_INT allocate
;
11193 unsigned HOST_WIDE_INT args_size
;
11194 rtx label
, limit
, current
, jump_insn
, allocate_rtx
, call_insn
, call_fusage
;
11195 rtx scratch_reg
= NULL_RTX
;
11196 rtx varargs_label
= NULL_RTX
;
11199 gcc_assert (flag_split_stack
&& reload_completed
);
11201 ix86_finalize_stack_realign_flags ();
11202 ix86_compute_frame_layout (&frame
);
11203 allocate
= frame
.stack_pointer_offset
- INCOMING_FRAME_SP_OFFSET
;
11205 /* This is the label we will branch to if we have enough stack
11206 space. We expect the basic block reordering pass to reverse this
11207 branch if optimizing, so that we branch in the unlikely case. */
11208 label
= gen_label_rtx ();
11210 /* We need to compare the stack pointer minus the frame size with
11211 the stack boundary in the TCB. The stack boundary always gives
11212 us SPLIT_STACK_AVAILABLE bytes, so if we need less than that we
11213 can compare directly. Otherwise we need to do an addition. */
11215 limit
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, const0_rtx
),
11216 UNSPEC_STACK_CHECK
);
11217 limit
= gen_rtx_CONST (Pmode
, limit
);
11218 limit
= gen_rtx_MEM (Pmode
, limit
);
11219 if (allocate
< SPLIT_STACK_AVAILABLE
)
11220 current
= stack_pointer_rtx
;
11223 unsigned int scratch_regno
;
11226 /* We need a scratch register to hold the stack pointer minus
11227 the required frame size. Since this is the very start of the
11228 function, the scratch register can be any caller-saved
11229 register which is not used for parameters. */
11230 offset
= GEN_INT (- allocate
);
11231 scratch_regno
= split_stack_prologue_scratch_regno ();
11232 if (scratch_regno
== INVALID_REGNUM
)
11234 scratch_reg
= gen_rtx_REG (Pmode
, scratch_regno
);
11235 if (!TARGET_64BIT
|| x86_64_immediate_operand (offset
, Pmode
))
11237 /* We don't use ix86_gen_add3 in this case because it will
11238 want to split to lea, but when not optimizing the insn
11239 will not be split after this point. */
11240 emit_insn (gen_rtx_SET (VOIDmode
, scratch_reg
,
11241 gen_rtx_PLUS (Pmode
, stack_pointer_rtx
,
11246 emit_move_insn (scratch_reg
, offset
);
11247 emit_insn (gen_adddi3 (scratch_reg
, scratch_reg
,
11248 stack_pointer_rtx
));
11250 current
= scratch_reg
;
11253 ix86_expand_branch (GEU
, current
, limit
, label
);
11254 jump_insn
= get_last_insn ();
11255 JUMP_LABEL (jump_insn
) = label
;
11257 /* Mark the jump as very likely to be taken. */
11258 add_reg_note (jump_insn
, REG_BR_PROB
,
11259 GEN_INT (REG_BR_PROB_BASE
- REG_BR_PROB_BASE
/ 100));
11261 if (split_stack_fn
== NULL_RTX
)
11262 split_stack_fn
= gen_rtx_SYMBOL_REF (Pmode
, "__morestack");
11263 fn
= split_stack_fn
;
11265 /* Get more stack space. We pass in the desired stack space and the
11266 size of the arguments to copy to the new stack. In 32-bit mode
11267 we push the parameters; __morestack will return on a new stack
11268 anyhow. In 64-bit mode we pass the parameters in r10 and
11270 allocate_rtx
= GEN_INT (allocate
);
11271 args_size
= crtl
->args
.size
>= 0 ? crtl
->args
.size
: 0;
11272 call_fusage
= NULL_RTX
;
11277 reg10
= gen_rtx_REG (Pmode
, R10_REG
);
11278 reg11
= gen_rtx_REG (Pmode
, R11_REG
);
11280 /* If this function uses a static chain, it will be in %r10.
11281 Preserve it across the call to __morestack. */
11282 if (DECL_STATIC_CHAIN (cfun
->decl
))
11286 rax
= gen_rtx_REG (Pmode
, AX_REG
);
11287 emit_move_insn (rax
, reg10
);
11288 use_reg (&call_fusage
, rax
);
11291 if (ix86_cmodel
== CM_LARGE
|| ix86_cmodel
== CM_LARGE_PIC
)
11293 HOST_WIDE_INT argval
;
11295 /* When using the large model we need to load the address
11296 into a register, and we've run out of registers. So we
11297 switch to a different calling convention, and we call a
11298 different function: __morestack_large. We pass the
11299 argument size in the upper 32 bits of r10 and pass the
11300 frame size in the lower 32 bits. */
11301 gcc_assert ((allocate
& (HOST_WIDE_INT
) 0xffffffff) == allocate
);
11302 gcc_assert ((args_size
& 0xffffffff) == args_size
);
11304 if (split_stack_fn_large
== NULL_RTX
)
11305 split_stack_fn_large
=
11306 gen_rtx_SYMBOL_REF (Pmode
, "__morestack_large_model");
11308 if (ix86_cmodel
== CM_LARGE_PIC
)
11312 label
= gen_label_rtx ();
11313 emit_label (label
);
11314 LABEL_PRESERVE_P (label
) = 1;
11315 emit_insn (gen_set_rip_rex64 (reg10
, label
));
11316 emit_insn (gen_set_got_offset_rex64 (reg11
, label
));
11317 emit_insn (gen_adddi3 (reg10
, reg10
, reg11
));
11318 x
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, split_stack_fn_large
),
11320 x
= gen_rtx_CONST (Pmode
, x
);
11321 emit_move_insn (reg11
, x
);
11322 x
= gen_rtx_PLUS (Pmode
, reg10
, reg11
);
11323 x
= gen_const_mem (Pmode
, x
);
11324 emit_move_insn (reg11
, x
);
11327 emit_move_insn (reg11
, split_stack_fn_large
);
11331 argval
= ((args_size
<< 16) << 16) + allocate
;
11332 emit_move_insn (reg10
, GEN_INT (argval
));
11336 emit_move_insn (reg10
, allocate_rtx
);
11337 emit_move_insn (reg11
, GEN_INT (args_size
));
11338 use_reg (&call_fusage
, reg11
);
11341 use_reg (&call_fusage
, reg10
);
11345 emit_insn (gen_push (GEN_INT (args_size
)));
11346 emit_insn (gen_push (allocate_rtx
));
11348 call_insn
= ix86_expand_call (NULL_RTX
, gen_rtx_MEM (QImode
, fn
),
11349 GEN_INT (UNITS_PER_WORD
), constm1_rtx
,
11351 add_function_usage_to (call_insn
, call_fusage
);
11353 /* In order to make call/return prediction work right, we now need
11354 to execute a return instruction. See
11355 libgcc/config/i386/morestack.S for the details on how this works.
11357 For flow purposes gcc must not see this as a return
11358 instruction--we need control flow to continue at the subsequent
11359 label. Therefore, we use an unspec. */
11360 gcc_assert (crtl
->args
.pops_args
< 65536);
11361 emit_insn (gen_split_stack_return (GEN_INT (crtl
->args
.pops_args
)));
11363 /* If we are in 64-bit mode and this function uses a static chain,
11364 we saved %r10 in %rax before calling _morestack. */
11365 if (TARGET_64BIT
&& DECL_STATIC_CHAIN (cfun
->decl
))
11366 emit_move_insn (gen_rtx_REG (Pmode
, R10_REG
),
11367 gen_rtx_REG (Pmode
, AX_REG
));
11369 /* If this function calls va_start, we need to store a pointer to
11370 the arguments on the old stack, because they may not have been
11371 all copied to the new stack. At this point the old stack can be
11372 found at the frame pointer value used by __morestack, because
11373 __morestack has set that up before calling back to us. Here we
11374 store that pointer in a scratch register, and in
11375 ix86_expand_prologue we store the scratch register in a stack
11377 if (cfun
->machine
->split_stack_varargs_pointer
!= NULL_RTX
)
11379 unsigned int scratch_regno
;
11383 scratch_regno
= split_stack_prologue_scratch_regno ();
11384 scratch_reg
= gen_rtx_REG (Pmode
, scratch_regno
);
11385 frame_reg
= gen_rtx_REG (Pmode
, BP_REG
);
11389 return address within this function
11390 return address of caller of this function
11392 So we add three words to get to the stack arguments.
11396 return address within this function
11397 first argument to __morestack
11398 second argument to __morestack
11399 return address of caller of this function
11401 So we add five words to get to the stack arguments.
11403 words
= TARGET_64BIT
? 3 : 5;
11404 emit_insn (gen_rtx_SET (VOIDmode
, scratch_reg
,
11405 gen_rtx_PLUS (Pmode
, frame_reg
,
11406 GEN_INT (words
* UNITS_PER_WORD
))));
11408 varargs_label
= gen_label_rtx ();
11409 emit_jump_insn (gen_jump (varargs_label
));
11410 JUMP_LABEL (get_last_insn ()) = varargs_label
;
11415 emit_label (label
);
11416 LABEL_NUSES (label
) = 1;
11418 /* If this function calls va_start, we now have to set the scratch
11419 register for the case where we do not call __morestack. In this
11420 case we need to set it based on the stack pointer. */
11421 if (cfun
->machine
->split_stack_varargs_pointer
!= NULL_RTX
)
11423 emit_insn (gen_rtx_SET (VOIDmode
, scratch_reg
,
11424 gen_rtx_PLUS (Pmode
, stack_pointer_rtx
,
11425 GEN_INT (UNITS_PER_WORD
))));
11427 emit_label (varargs_label
);
11428 LABEL_NUSES (varargs_label
) = 1;
11432 /* We may have to tell the dataflow pass that the split stack prologue
11433 is initializing a scratch register. */
11436 ix86_live_on_entry (bitmap regs
)
11438 if (cfun
->machine
->split_stack_varargs_pointer
!= NULL_RTX
)
11440 gcc_assert (flag_split_stack
);
11441 bitmap_set_bit (regs
, split_stack_prologue_scratch_regno ());
11445 /* Extract the parts of an RTL expression that is a valid memory address
11446 for an instruction. Return 0 if the structure of the address is
11447 grossly off. Return -1 if the address contains ASHIFT, so it is not
11448 strictly valid, but still used for computing length of lea instruction. */
11451 ix86_decompose_address (rtx addr
, struct ix86_address
*out
)
11453 rtx base
= NULL_RTX
, index
= NULL_RTX
, disp
= NULL_RTX
;
11454 rtx base_reg
, index_reg
;
11455 HOST_WIDE_INT scale
= 1;
11456 rtx scale_rtx
= NULL_RTX
;
11459 enum ix86_address_seg seg
= SEG_DEFAULT
;
11461 if (REG_P (addr
) || GET_CODE (addr
) == SUBREG
)
11463 else if (GET_CODE (addr
) == PLUS
)
11465 rtx addends
[4], op
;
11473 addends
[n
++] = XEXP (op
, 1);
11476 while (GET_CODE (op
) == PLUS
);
11481 for (i
= n
; i
>= 0; --i
)
11484 switch (GET_CODE (op
))
11489 index
= XEXP (op
, 0);
11490 scale_rtx
= XEXP (op
, 1);
11496 index
= XEXP (op
, 0);
11497 tmp
= XEXP (op
, 1);
11498 if (!CONST_INT_P (tmp
))
11500 scale
= INTVAL (tmp
);
11501 if ((unsigned HOST_WIDE_INT
) scale
> 3)
11503 scale
= 1 << scale
;
11507 if (XINT (op
, 1) == UNSPEC_TP
11508 && TARGET_TLS_DIRECT_SEG_REFS
11509 && seg
== SEG_DEFAULT
)
11510 seg
= TARGET_64BIT
? SEG_FS
: SEG_GS
;
11539 else if (GET_CODE (addr
) == MULT
)
11541 index
= XEXP (addr
, 0); /* index*scale */
11542 scale_rtx
= XEXP (addr
, 1);
11544 else if (GET_CODE (addr
) == ASHIFT
)
11546 /* We're called for lea too, which implements ashift on occasion. */
11547 index
= XEXP (addr
, 0);
11548 tmp
= XEXP (addr
, 1);
11549 if (!CONST_INT_P (tmp
))
11551 scale
= INTVAL (tmp
);
11552 if ((unsigned HOST_WIDE_INT
) scale
> 3)
11554 scale
= 1 << scale
;
11558 disp
= addr
; /* displacement */
11560 /* Extract the integral value of scale. */
11563 if (!CONST_INT_P (scale_rtx
))
11565 scale
= INTVAL (scale_rtx
);
11568 base_reg
= base
&& GET_CODE (base
) == SUBREG
? SUBREG_REG (base
) : base
;
11569 index_reg
= index
&& GET_CODE (index
) == SUBREG
? SUBREG_REG (index
) : index
;
11571 /* Avoid useless 0 displacement. */
11572 if (disp
== const0_rtx
&& (base
|| index
))
11575 /* Allow arg pointer and stack pointer as index if there is not scaling. */
11576 if (base_reg
&& index_reg
&& scale
== 1
11577 && (index_reg
== arg_pointer_rtx
11578 || index_reg
== frame_pointer_rtx
11579 || (REG_P (index_reg
) && REGNO (index_reg
) == STACK_POINTER_REGNUM
)))
11582 tmp
= base
, base
= index
, index
= tmp
;
11583 tmp
= base_reg
, base_reg
= index_reg
, index_reg
= tmp
;
11586 /* Special case: %ebp cannot be encoded as a base without a displacement.
11590 && (base_reg
== hard_frame_pointer_rtx
11591 || base_reg
== frame_pointer_rtx
11592 || base_reg
== arg_pointer_rtx
11593 || (REG_P (base_reg
)
11594 && (REGNO (base_reg
) == HARD_FRAME_POINTER_REGNUM
11595 || REGNO (base_reg
) == R13_REG
))))
11598 /* Special case: on K6, [%esi] makes the instruction vector decoded.
11599 Avoid this by transforming to [%esi+0].
11600 Reload calls address legitimization without cfun defined, so we need
11601 to test cfun for being non-NULL. */
11602 if (TARGET_K6
&& cfun
&& optimize_function_for_speed_p (cfun
)
11603 && base_reg
&& !index_reg
&& !disp
11604 && REG_P (base_reg
) && REGNO (base_reg
) == SI_REG
)
11607 /* Special case: encode reg+reg instead of reg*2. */
11608 if (!base
&& index
&& scale
== 2)
11609 base
= index
, base_reg
= index_reg
, scale
= 1;
11611 /* Special case: scaling cannot be encoded without base or displacement. */
11612 if (!base
&& !disp
&& index
&& scale
!= 1)
11616 out
->index
= index
;
11618 out
->scale
= scale
;
11624 /* Return cost of the memory address x.
11625 For i386, it is better to use a complex address than let gcc copy
11626 the address into a reg and make a new pseudo. But not if the address
11627 requires to two regs - that would mean more pseudos with longer
11630 ix86_address_cost (rtx x
, bool speed ATTRIBUTE_UNUSED
)
11632 struct ix86_address parts
;
11634 int ok
= ix86_decompose_address (x
, &parts
);
11638 if (parts
.base
&& GET_CODE (parts
.base
) == SUBREG
)
11639 parts
.base
= SUBREG_REG (parts
.base
);
11640 if (parts
.index
&& GET_CODE (parts
.index
) == SUBREG
)
11641 parts
.index
= SUBREG_REG (parts
.index
);
11643 /* Attempt to minimize number of registers in the address. */
11645 && (!REG_P (parts
.base
) || REGNO (parts
.base
) >= FIRST_PSEUDO_REGISTER
))
11647 && (!REG_P (parts
.index
)
11648 || REGNO (parts
.index
) >= FIRST_PSEUDO_REGISTER
)))
11652 && (!REG_P (parts
.base
) || REGNO (parts
.base
) >= FIRST_PSEUDO_REGISTER
)
11654 && (!REG_P (parts
.index
) || REGNO (parts
.index
) >= FIRST_PSEUDO_REGISTER
)
11655 && parts
.base
!= parts
.index
)
11658 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
11659 since it's predecode logic can't detect the length of instructions
11660 and it degenerates to vector decoded. Increase cost of such
11661 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
11662 to split such addresses or even refuse such addresses at all.
11664 Following addressing modes are affected:
11669 The first and last case may be avoidable by explicitly coding the zero in
11670 memory address, but I don't have AMD-K6 machine handy to check this
11674 && ((!parts
.disp
&& parts
.base
&& parts
.index
&& parts
.scale
!= 1)
11675 || (parts
.disp
&& !parts
.base
&& parts
.index
&& parts
.scale
!= 1)
11676 || (!parts
.disp
&& parts
.base
&& parts
.index
&& parts
.scale
== 1)))
11682 /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O as
11683 this is used for to form addresses to local data when -fPIC is in
11687 darwin_local_data_pic (rtx disp
)
11689 return (GET_CODE (disp
) == UNSPEC
11690 && XINT (disp
, 1) == UNSPEC_MACHOPIC_OFFSET
);
11693 /* Determine if a given RTX is a valid constant. We already know this
11694 satisfies CONSTANT_P. */
11697 legitimate_constant_p (rtx x
)
11699 switch (GET_CODE (x
))
11704 if (GET_CODE (x
) == PLUS
)
11706 if (!CONST_INT_P (XEXP (x
, 1)))
11711 if (TARGET_MACHO
&& darwin_local_data_pic (x
))
11714 /* Only some unspecs are valid as "constants". */
11715 if (GET_CODE (x
) == UNSPEC
)
11716 switch (XINT (x
, 1))
11719 case UNSPEC_GOTOFF
:
11720 case UNSPEC_PLTOFF
:
11721 return TARGET_64BIT
;
11723 case UNSPEC_NTPOFF
:
11724 x
= XVECEXP (x
, 0, 0);
11725 return (GET_CODE (x
) == SYMBOL_REF
11726 && SYMBOL_REF_TLS_MODEL (x
) == TLS_MODEL_LOCAL_EXEC
);
11727 case UNSPEC_DTPOFF
:
11728 x
= XVECEXP (x
, 0, 0);
11729 return (GET_CODE (x
) == SYMBOL_REF
11730 && SYMBOL_REF_TLS_MODEL (x
) == TLS_MODEL_LOCAL_DYNAMIC
);
11735 /* We must have drilled down to a symbol. */
11736 if (GET_CODE (x
) == LABEL_REF
)
11738 if (GET_CODE (x
) != SYMBOL_REF
)
11743 /* TLS symbols are never valid. */
11744 if (SYMBOL_REF_TLS_MODEL (x
))
11747 /* DLLIMPORT symbols are never valid. */
11748 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
11749 && SYMBOL_REF_DLLIMPORT_P (x
))
11753 /* mdynamic-no-pic */
11754 if (MACHO_DYNAMIC_NO_PIC_P
)
11755 return machopic_symbol_defined_p (x
);
11760 if (GET_MODE (x
) == TImode
11761 && x
!= CONST0_RTX (TImode
)
11767 if (!standard_sse_constant_p (x
))
11774 /* Otherwise we handle everything else in the move patterns. */
11778 /* Determine if it's legal to put X into the constant pool. This
11779 is not possible for the address of thread-local symbols, which
11780 is checked above. */
11783 ix86_cannot_force_const_mem (rtx x
)
11785 /* We can always put integral constants and vectors in memory. */
11786 switch (GET_CODE (x
))
11796 return !legitimate_constant_p (x
);
11800 /* Nonzero if the constant value X is a legitimate general operand
11801 when generating PIC code. It is given that flag_pic is on and
11802 that X satisfies CONSTANT_P or is a CONST_DOUBLE. */
11805 legitimate_pic_operand_p (rtx x
)
11809 switch (GET_CODE (x
))
11812 inner
= XEXP (x
, 0);
11813 if (GET_CODE (inner
) == PLUS
11814 && CONST_INT_P (XEXP (inner
, 1)))
11815 inner
= XEXP (inner
, 0);
11817 /* Only some unspecs are valid as "constants". */
11818 if (GET_CODE (inner
) == UNSPEC
)
11819 switch (XINT (inner
, 1))
11822 case UNSPEC_GOTOFF
:
11823 case UNSPEC_PLTOFF
:
11824 return TARGET_64BIT
;
11826 x
= XVECEXP (inner
, 0, 0);
11827 return (GET_CODE (x
) == SYMBOL_REF
11828 && SYMBOL_REF_TLS_MODEL (x
) == TLS_MODEL_LOCAL_EXEC
);
11829 case UNSPEC_MACHOPIC_OFFSET
:
11830 return legitimate_pic_address_disp_p (x
);
11838 return legitimate_pic_address_disp_p (x
);
11845 /* Determine if a given CONST RTX is a valid memory displacement
11849 legitimate_pic_address_disp_p (rtx disp
)
11853 /* In 64bit mode we can allow direct addresses of symbols and labels
11854 when they are not dynamic symbols. */
11857 rtx op0
= disp
, op1
;
11859 switch (GET_CODE (disp
))
11865 if (GET_CODE (XEXP (disp
, 0)) != PLUS
)
11867 op0
= XEXP (XEXP (disp
, 0), 0);
11868 op1
= XEXP (XEXP (disp
, 0), 1);
11869 if (!CONST_INT_P (op1
)
11870 || INTVAL (op1
) >= 16*1024*1024
11871 || INTVAL (op1
) < -16*1024*1024)
11873 if (GET_CODE (op0
) == LABEL_REF
)
11875 if (GET_CODE (op0
) != SYMBOL_REF
)
11880 /* TLS references should always be enclosed in UNSPEC. */
11881 if (SYMBOL_REF_TLS_MODEL (op0
))
11883 if (!SYMBOL_REF_FAR_ADDR_P (op0
) && SYMBOL_REF_LOCAL_P (op0
)
11884 && ix86_cmodel
!= CM_LARGE_PIC
)
11892 if (GET_CODE (disp
) != CONST
)
11894 disp
= XEXP (disp
, 0);
11898 /* We are unsafe to allow PLUS expressions. This limit allowed distance
11899 of GOT tables. We should not need these anyway. */
11900 if (GET_CODE (disp
) != UNSPEC
11901 || (XINT (disp
, 1) != UNSPEC_GOTPCREL
11902 && XINT (disp
, 1) != UNSPEC_GOTOFF
11903 && XINT (disp
, 1) != UNSPEC_PCREL
11904 && XINT (disp
, 1) != UNSPEC_PLTOFF
))
11907 if (GET_CODE (XVECEXP (disp
, 0, 0)) != SYMBOL_REF
11908 && GET_CODE (XVECEXP (disp
, 0, 0)) != LABEL_REF
)
11914 if (GET_CODE (disp
) == PLUS
)
11916 if (!CONST_INT_P (XEXP (disp
, 1)))
11918 disp
= XEXP (disp
, 0);
11922 if (TARGET_MACHO
&& darwin_local_data_pic (disp
))
11925 if (GET_CODE (disp
) != UNSPEC
)
11928 switch (XINT (disp
, 1))
11933 /* We need to check for both symbols and labels because VxWorks loads
11934 text labels with @GOT rather than @GOTOFF. See gotoff_operand for
11936 return (GET_CODE (XVECEXP (disp
, 0, 0)) == SYMBOL_REF
11937 || GET_CODE (XVECEXP (disp
, 0, 0)) == LABEL_REF
);
11938 case UNSPEC_GOTOFF
:
11939 /* Refuse GOTOFF in 64bit mode since it is always 64bit when used.
11940 While ABI specify also 32bit relocation but we don't produce it in
11941 small PIC model at all. */
11942 if ((GET_CODE (XVECEXP (disp
, 0, 0)) == SYMBOL_REF
11943 || GET_CODE (XVECEXP (disp
, 0, 0)) == LABEL_REF
)
11945 return gotoff_operand (XVECEXP (disp
, 0, 0), Pmode
);
11947 case UNSPEC_GOTTPOFF
:
11948 case UNSPEC_GOTNTPOFF
:
11949 case UNSPEC_INDNTPOFF
:
11952 disp
= XVECEXP (disp
, 0, 0);
11953 return (GET_CODE (disp
) == SYMBOL_REF
11954 && SYMBOL_REF_TLS_MODEL (disp
) == TLS_MODEL_INITIAL_EXEC
);
11955 case UNSPEC_NTPOFF
:
11956 disp
= XVECEXP (disp
, 0, 0);
11957 return (GET_CODE (disp
) == SYMBOL_REF
11958 && SYMBOL_REF_TLS_MODEL (disp
) == TLS_MODEL_LOCAL_EXEC
);
11959 case UNSPEC_DTPOFF
:
11960 disp
= XVECEXP (disp
, 0, 0);
11961 return (GET_CODE (disp
) == SYMBOL_REF
11962 && SYMBOL_REF_TLS_MODEL (disp
) == TLS_MODEL_LOCAL_DYNAMIC
);
11968 /* Recognizes RTL expressions that are valid memory addresses for an
11969 instruction. The MODE argument is the machine mode for the MEM
11970 expression that wants to use this address.
11972 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
11973 convert common non-canonical forms to canonical form so that they will
11977 ix86_legitimate_address_p (enum machine_mode mode ATTRIBUTE_UNUSED
,
11978 rtx addr
, bool strict
)
11980 struct ix86_address parts
;
11981 rtx base
, index
, disp
;
11982 HOST_WIDE_INT scale
;
11984 if (ix86_decompose_address (addr
, &parts
) <= 0)
11985 /* Decomposition failed. */
11989 index
= parts
.index
;
11991 scale
= parts
.scale
;
11993 /* Validate base register.
11995 Don't allow SUBREG's that span more than a word here. It can lead to spill
11996 failures when the base is one word out of a two word structure, which is
11997 represented internally as a DImode int. */
12005 else if (GET_CODE (base
) == SUBREG
12006 && REG_P (SUBREG_REG (base
))
12007 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (base
)))
12009 reg
= SUBREG_REG (base
);
12011 /* Base is not a register. */
12014 if (GET_MODE (base
) != Pmode
)
12015 /* Base is not in Pmode. */
12018 if ((strict
&& ! REG_OK_FOR_BASE_STRICT_P (reg
))
12019 || (! strict
&& ! REG_OK_FOR_BASE_NONSTRICT_P (reg
)))
12020 /* Base is not valid. */
12024 /* Validate index register.
12026 Don't allow SUBREG's that span more than a word here -- same as above. */
12034 else if (GET_CODE (index
) == SUBREG
12035 && REG_P (SUBREG_REG (index
))
12036 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (index
)))
12038 reg
= SUBREG_REG (index
);
12040 /* Index is not a register. */
12043 if (GET_MODE (index
) != Pmode
)
12044 /* Index is not in Pmode. */
12047 if ((strict
&& ! REG_OK_FOR_INDEX_STRICT_P (reg
))
12048 || (! strict
&& ! REG_OK_FOR_INDEX_NONSTRICT_P (reg
)))
12049 /* Index is not valid. */
12053 /* Validate scale factor. */
12057 /* Scale without index. */
12060 if (scale
!= 2 && scale
!= 4 && scale
!= 8)
12061 /* Scale is not a valid multiplier. */
12065 /* Validate displacement. */
12068 if (GET_CODE (disp
) == CONST
12069 && GET_CODE (XEXP (disp
, 0)) == UNSPEC
12070 && XINT (XEXP (disp
, 0), 1) != UNSPEC_MACHOPIC_OFFSET
)
12071 switch (XINT (XEXP (disp
, 0), 1))
12073 /* Refuse GOTOFF and GOT in 64bit mode since it is always 64bit when
12074 used. While ABI specify also 32bit relocations, we don't produce
12075 them at all and use IP relative instead. */
12077 case UNSPEC_GOTOFF
:
12078 gcc_assert (flag_pic
);
12080 goto is_legitimate_pic
;
12082 /* 64bit address unspec. */
12085 case UNSPEC_GOTPCREL
:
12087 gcc_assert (flag_pic
);
12088 goto is_legitimate_pic
;
12090 case UNSPEC_GOTTPOFF
:
12091 case UNSPEC_GOTNTPOFF
:
12092 case UNSPEC_INDNTPOFF
:
12093 case UNSPEC_NTPOFF
:
12094 case UNSPEC_DTPOFF
:
12097 case UNSPEC_STACK_CHECK
:
12098 gcc_assert (flag_split_stack
);
12102 /* Invalid address unspec. */
12106 else if (SYMBOLIC_CONST (disp
)
12110 && MACHOPIC_INDIRECT
12111 && !machopic_operand_p (disp
)
12117 if (TARGET_64BIT
&& (index
|| base
))
12119 /* foo@dtpoff(%rX) is ok. */
12120 if (GET_CODE (disp
) != CONST
12121 || GET_CODE (XEXP (disp
, 0)) != PLUS
12122 || GET_CODE (XEXP (XEXP (disp
, 0), 0)) != UNSPEC
12123 || !CONST_INT_P (XEXP (XEXP (disp
, 0), 1))
12124 || (XINT (XEXP (XEXP (disp
, 0), 0), 1) != UNSPEC_DTPOFF
12125 && XINT (XEXP (XEXP (disp
, 0), 0), 1) != UNSPEC_NTPOFF
))
12126 /* Non-constant pic memory reference. */
12129 else if ((!TARGET_MACHO
|| flag_pic
)
12130 && ! legitimate_pic_address_disp_p (disp
))
12131 /* Displacement is an invalid pic construct. */
12134 else if (MACHO_DYNAMIC_NO_PIC_P
&& !legitimate_constant_p (disp
))
12135 /* displacment must be referenced via non_lazy_pointer */
12139 /* This code used to verify that a symbolic pic displacement
12140 includes the pic_offset_table_rtx register.
12142 While this is good idea, unfortunately these constructs may
12143 be created by "adds using lea" optimization for incorrect
12152 This code is nonsensical, but results in addressing
12153 GOT table with pic_offset_table_rtx base. We can't
12154 just refuse it easily, since it gets matched by
12155 "addsi3" pattern, that later gets split to lea in the
12156 case output register differs from input. While this
12157 can be handled by separate addsi pattern for this case
12158 that never results in lea, this seems to be easier and
12159 correct fix for crash to disable this test. */
12161 else if (GET_CODE (disp
) != LABEL_REF
12162 && !CONST_INT_P (disp
)
12163 && (GET_CODE (disp
) != CONST
12164 || !legitimate_constant_p (disp
))
12165 && (GET_CODE (disp
) != SYMBOL_REF
12166 || !legitimate_constant_p (disp
)))
12167 /* Displacement is not constant. */
12169 else if (TARGET_64BIT
12170 && !x86_64_immediate_operand (disp
, VOIDmode
))
12171 /* Displacement is out of range. */
12175 /* Everything looks valid. */
12179 /* Determine if a given RTX is a valid constant address. */
12182 constant_address_p (rtx x
)
12184 return CONSTANT_P (x
) && ix86_legitimate_address_p (Pmode
, x
, 1);
12187 /* Return a unique alias set for the GOT. */
12189 static alias_set_type
12190 ix86_GOT_alias_set (void)
12192 static alias_set_type set
= -1;
12194 set
= new_alias_set ();
12198 /* Return a legitimate reference for ORIG (an address) using the
12199 register REG. If REG is 0, a new pseudo is generated.
12201 There are two types of references that must be handled:
12203 1. Global data references must load the address from the GOT, via
12204 the PIC reg. An insn is emitted to do this load, and the reg is
12207 2. Static data references, constant pool addresses, and code labels
12208 compute the address as an offset from the GOT, whose base is in
12209 the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to
12210 differentiate them from global data objects. The returned
12211 address is the PIC reg + an unspec constant.
12213 TARGET_LEGITIMATE_ADDRESS_P rejects symbolic references unless the PIC
12214 reg also appears in the address. */
12217 legitimize_pic_address (rtx orig
, rtx reg
)
12220 rtx new_rtx
= orig
;
12224 if (TARGET_MACHO
&& !TARGET_64BIT
)
12227 reg
= gen_reg_rtx (Pmode
);
12228 /* Use the generic Mach-O PIC machinery. */
12229 return machopic_legitimize_pic_address (orig
, GET_MODE (orig
), reg
);
12233 if (TARGET_64BIT
&& legitimate_pic_address_disp_p (addr
))
12235 else if (TARGET_64BIT
12236 && ix86_cmodel
!= CM_SMALL_PIC
12237 && gotoff_operand (addr
, Pmode
))
12240 /* This symbol may be referenced via a displacement from the PIC
12241 base address (@GOTOFF). */
12243 if (reload_in_progress
)
12244 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM
, true);
12245 if (GET_CODE (addr
) == CONST
)
12246 addr
= XEXP (addr
, 0);
12247 if (GET_CODE (addr
) == PLUS
)
12249 new_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, XEXP (addr
, 0)),
12251 new_rtx
= gen_rtx_PLUS (Pmode
, new_rtx
, XEXP (addr
, 1));
12254 new_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, addr
), UNSPEC_GOTOFF
);
12255 new_rtx
= gen_rtx_CONST (Pmode
, new_rtx
);
12257 tmpreg
= gen_reg_rtx (Pmode
);
12260 emit_move_insn (tmpreg
, new_rtx
);
12264 new_rtx
= expand_simple_binop (Pmode
, PLUS
, reg
, pic_offset_table_rtx
,
12265 tmpreg
, 1, OPTAB_DIRECT
);
12268 else new_rtx
= gen_rtx_PLUS (Pmode
, pic_offset_table_rtx
, tmpreg
);
12270 else if (!TARGET_64BIT
&& gotoff_operand (addr
, Pmode
))
12272 /* This symbol may be referenced via a displacement from the PIC
12273 base address (@GOTOFF). */
12275 if (reload_in_progress
)
12276 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM
, true);
12277 if (GET_CODE (addr
) == CONST
)
12278 addr
= XEXP (addr
, 0);
12279 if (GET_CODE (addr
) == PLUS
)
12281 new_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, XEXP (addr
, 0)),
12283 new_rtx
= gen_rtx_PLUS (Pmode
, new_rtx
, XEXP (addr
, 1));
12286 new_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, addr
), UNSPEC_GOTOFF
);
12287 new_rtx
= gen_rtx_CONST (Pmode
, new_rtx
);
12288 new_rtx
= gen_rtx_PLUS (Pmode
, pic_offset_table_rtx
, new_rtx
);
12292 emit_move_insn (reg
, new_rtx
);
12296 else if ((GET_CODE (addr
) == SYMBOL_REF
&& SYMBOL_REF_TLS_MODEL (addr
) == 0)
12297 /* We can't use @GOTOFF for text labels on VxWorks;
12298 see gotoff_operand. */
12299 || (TARGET_VXWORKS_RTP
&& GET_CODE (addr
) == LABEL_REF
))
12301 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
)
12303 if (GET_CODE (addr
) == SYMBOL_REF
&& SYMBOL_REF_DLLIMPORT_P (addr
))
12304 return legitimize_dllimport_symbol (addr
, true);
12305 if (GET_CODE (addr
) == CONST
&& GET_CODE (XEXP (addr
, 0)) == PLUS
12306 && GET_CODE (XEXP (XEXP (addr
, 0), 0)) == SYMBOL_REF
12307 && SYMBOL_REF_DLLIMPORT_P (XEXP (XEXP (addr
, 0), 0)))
12309 rtx t
= legitimize_dllimport_symbol (XEXP (XEXP (addr
, 0), 0), true);
12310 return gen_rtx_PLUS (Pmode
, t
, XEXP (XEXP (addr
, 0), 1));
12314 /* For x64 PE-COFF there is no GOT table. So we use address
12316 if (TARGET_64BIT
&& DEFAULT_ABI
== MS_ABI
)
12318 new_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, addr
), UNSPEC_PCREL
);
12319 new_rtx
= gen_rtx_CONST (Pmode
, new_rtx
);
12322 reg
= gen_reg_rtx (Pmode
);
12323 emit_move_insn (reg
, new_rtx
);
12326 else if (TARGET_64BIT
&& ix86_cmodel
!= CM_LARGE_PIC
)
12328 new_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, addr
), UNSPEC_GOTPCREL
);
12329 new_rtx
= gen_rtx_CONST (Pmode
, new_rtx
);
12330 new_rtx
= gen_const_mem (Pmode
, new_rtx
);
12331 set_mem_alias_set (new_rtx
, ix86_GOT_alias_set ());
12334 reg
= gen_reg_rtx (Pmode
);
12335 /* Use directly gen_movsi, otherwise the address is loaded
12336 into register for CSE. We don't want to CSE this addresses,
12337 instead we CSE addresses from the GOT table, so skip this. */
12338 emit_insn (gen_movsi (reg
, new_rtx
));
12343 /* This symbol must be referenced via a load from the
12344 Global Offset Table (@GOT). */
12346 if (reload_in_progress
)
12347 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM
, true);
12348 new_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, addr
), UNSPEC_GOT
);
12349 new_rtx
= gen_rtx_CONST (Pmode
, new_rtx
);
12351 new_rtx
= force_reg (Pmode
, new_rtx
);
12352 new_rtx
= gen_rtx_PLUS (Pmode
, pic_offset_table_rtx
, new_rtx
);
12353 new_rtx
= gen_const_mem (Pmode
, new_rtx
);
12354 set_mem_alias_set (new_rtx
, ix86_GOT_alias_set ());
12357 reg
= gen_reg_rtx (Pmode
);
12358 emit_move_insn (reg
, new_rtx
);
12364 if (CONST_INT_P (addr
)
12365 && !x86_64_immediate_operand (addr
, VOIDmode
))
12369 emit_move_insn (reg
, addr
);
12373 new_rtx
= force_reg (Pmode
, addr
);
12375 else if (GET_CODE (addr
) == CONST
)
12377 addr
= XEXP (addr
, 0);
12379 /* We must match stuff we generate before. Assume the only
12380 unspecs that can get here are ours. Not that we could do
12381 anything with them anyway.... */
12382 if (GET_CODE (addr
) == UNSPEC
12383 || (GET_CODE (addr
) == PLUS
12384 && GET_CODE (XEXP (addr
, 0)) == UNSPEC
))
12386 gcc_assert (GET_CODE (addr
) == PLUS
);
12388 if (GET_CODE (addr
) == PLUS
)
12390 rtx op0
= XEXP (addr
, 0), op1
= XEXP (addr
, 1);
12392 /* Check first to see if this is a constant offset from a @GOTOFF
12393 symbol reference. */
12394 if (gotoff_operand (op0
, Pmode
)
12395 && CONST_INT_P (op1
))
12399 if (reload_in_progress
)
12400 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM
, true);
12401 new_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, op0
),
12403 new_rtx
= gen_rtx_PLUS (Pmode
, new_rtx
, op1
);
12404 new_rtx
= gen_rtx_CONST (Pmode
, new_rtx
);
12405 new_rtx
= gen_rtx_PLUS (Pmode
, pic_offset_table_rtx
, new_rtx
);
12409 emit_move_insn (reg
, new_rtx
);
12415 if (INTVAL (op1
) < -16*1024*1024
12416 || INTVAL (op1
) >= 16*1024*1024)
12418 if (!x86_64_immediate_operand (op1
, Pmode
))
12419 op1
= force_reg (Pmode
, op1
);
12420 new_rtx
= gen_rtx_PLUS (Pmode
, force_reg (Pmode
, op0
), op1
);
12426 base
= legitimize_pic_address (XEXP (addr
, 0), reg
);
12427 new_rtx
= legitimize_pic_address (XEXP (addr
, 1),
12428 base
== reg
? NULL_RTX
: reg
);
12430 if (CONST_INT_P (new_rtx
))
12431 new_rtx
= plus_constant (base
, INTVAL (new_rtx
));
12434 if (GET_CODE (new_rtx
) == PLUS
&& CONSTANT_P (XEXP (new_rtx
, 1)))
12436 base
= gen_rtx_PLUS (Pmode
, base
, XEXP (new_rtx
, 0));
12437 new_rtx
= XEXP (new_rtx
, 1);
12439 new_rtx
= gen_rtx_PLUS (Pmode
, base
, new_rtx
);
12447 /* Load the thread pointer. If TO_REG is true, force it into a register. */
12450 get_thread_pointer (int to_reg
)
12454 tp
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, const0_rtx
), UNSPEC_TP
);
12458 reg
= gen_reg_rtx (Pmode
);
12459 insn
= gen_rtx_SET (VOIDmode
, reg
, tp
);
12460 insn
= emit_insn (insn
);
12465 /* A subroutine of ix86_legitimize_address and ix86_expand_move. FOR_MOV is
12466 false if we expect this to be used for a memory address and true if
12467 we expect to load the address into a register. */
12470 legitimize_tls_address (rtx x
, enum tls_model model
, int for_mov
)
12472 rtx dest
, base
, off
, pic
, tp
;
12477 case TLS_MODEL_GLOBAL_DYNAMIC
:
12478 dest
= gen_reg_rtx (Pmode
);
12479 tp
= TARGET_GNU2_TLS
? get_thread_pointer (1) : 0;
12481 if (TARGET_64BIT
&& ! TARGET_GNU2_TLS
)
12483 rtx rax
= gen_rtx_REG (Pmode
, AX_REG
), insns
;
12486 emit_call_insn (gen_tls_global_dynamic_64 (rax
, x
));
12487 insns
= get_insns ();
12490 RTL_CONST_CALL_P (insns
) = 1;
12491 emit_libcall_block (insns
, dest
, rax
, x
);
12493 else if (TARGET_64BIT
&& TARGET_GNU2_TLS
)
12494 emit_insn (gen_tls_global_dynamic_64 (dest
, x
));
12496 emit_insn (gen_tls_global_dynamic_32 (dest
, x
));
12498 if (TARGET_GNU2_TLS
)
12500 dest
= force_reg (Pmode
, gen_rtx_PLUS (Pmode
, tp
, dest
));
12502 set_unique_reg_note (get_last_insn (), REG_EQUIV
, x
);
12506 case TLS_MODEL_LOCAL_DYNAMIC
:
12507 base
= gen_reg_rtx (Pmode
);
12508 tp
= TARGET_GNU2_TLS
? get_thread_pointer (1) : 0;
12510 if (TARGET_64BIT
&& ! TARGET_GNU2_TLS
)
12512 rtx rax
= gen_rtx_REG (Pmode
, AX_REG
), insns
, note
;
12515 emit_call_insn (gen_tls_local_dynamic_base_64 (rax
));
12516 insns
= get_insns ();
12519 note
= gen_rtx_EXPR_LIST (VOIDmode
, const0_rtx
, NULL
);
12520 note
= gen_rtx_EXPR_LIST (VOIDmode
, ix86_tls_get_addr (), note
);
12521 RTL_CONST_CALL_P (insns
) = 1;
12522 emit_libcall_block (insns
, base
, rax
, note
);
12524 else if (TARGET_64BIT
&& TARGET_GNU2_TLS
)
12525 emit_insn (gen_tls_local_dynamic_base_64 (base
));
12527 emit_insn (gen_tls_local_dynamic_base_32 (base
));
12529 if (TARGET_GNU2_TLS
)
12531 rtx x
= ix86_tls_module_base ();
12533 set_unique_reg_note (get_last_insn (), REG_EQUIV
,
12534 gen_rtx_MINUS (Pmode
, x
, tp
));
12537 off
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, x
), UNSPEC_DTPOFF
);
12538 off
= gen_rtx_CONST (Pmode
, off
);
12540 dest
= force_reg (Pmode
, gen_rtx_PLUS (Pmode
, base
, off
));
12542 if (TARGET_GNU2_TLS
)
12544 dest
= force_reg (Pmode
, gen_rtx_PLUS (Pmode
, dest
, tp
));
12546 set_unique_reg_note (get_last_insn (), REG_EQUIV
, x
);
12551 case TLS_MODEL_INITIAL_EXEC
:
12555 type
= UNSPEC_GOTNTPOFF
;
12559 if (reload_in_progress
)
12560 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM
, true);
12561 pic
= pic_offset_table_rtx
;
12562 type
= TARGET_ANY_GNU_TLS
? UNSPEC_GOTNTPOFF
: UNSPEC_GOTTPOFF
;
12564 else if (!TARGET_ANY_GNU_TLS
)
12566 pic
= gen_reg_rtx (Pmode
);
12567 emit_insn (gen_set_got (pic
));
12568 type
= UNSPEC_GOTTPOFF
;
12573 type
= UNSPEC_INDNTPOFF
;
12576 off
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, x
), type
);
12577 off
= gen_rtx_CONST (Pmode
, off
);
12579 off
= gen_rtx_PLUS (Pmode
, pic
, off
);
12580 off
= gen_const_mem (Pmode
, off
);
12581 set_mem_alias_set (off
, ix86_GOT_alias_set ());
12583 if (TARGET_64BIT
|| TARGET_ANY_GNU_TLS
)
12585 base
= get_thread_pointer (for_mov
|| !TARGET_TLS_DIRECT_SEG_REFS
);
12586 off
= force_reg (Pmode
, off
);
12587 return gen_rtx_PLUS (Pmode
, base
, off
);
12591 base
= get_thread_pointer (true);
12592 dest
= gen_reg_rtx (Pmode
);
12593 emit_insn (gen_subsi3 (dest
, base
, off
));
12597 case TLS_MODEL_LOCAL_EXEC
:
12598 off
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, x
),
12599 (TARGET_64BIT
|| TARGET_ANY_GNU_TLS
)
12600 ? UNSPEC_NTPOFF
: UNSPEC_TPOFF
);
12601 off
= gen_rtx_CONST (Pmode
, off
);
12603 if (TARGET_64BIT
|| TARGET_ANY_GNU_TLS
)
12605 base
= get_thread_pointer (for_mov
|| !TARGET_TLS_DIRECT_SEG_REFS
);
12606 return gen_rtx_PLUS (Pmode
, base
, off
);
12610 base
= get_thread_pointer (true);
12611 dest
= gen_reg_rtx (Pmode
);
12612 emit_insn (gen_subsi3 (dest
, base
, off
));
12617 gcc_unreachable ();
12623 /* Create or return the unique __imp_DECL dllimport symbol corresponding
12626 static GTY((if_marked ("tree_map_marked_p"), param_is (struct tree_map
)))
12627 htab_t dllimport_map
;
12630 get_dllimport_decl (tree decl
)
12632 struct tree_map
*h
, in
;
12635 const char *prefix
;
12636 size_t namelen
, prefixlen
;
12641 if (!dllimport_map
)
12642 dllimport_map
= htab_create_ggc (512, tree_map_hash
, tree_map_eq
, 0);
12644 in
.hash
= htab_hash_pointer (decl
);
12645 in
.base
.from
= decl
;
12646 loc
= htab_find_slot_with_hash (dllimport_map
, &in
, in
.hash
, INSERT
);
12647 h
= (struct tree_map
*) *loc
;
12651 *loc
= h
= ggc_alloc_tree_map ();
12653 h
->base
.from
= decl
;
12654 h
->to
= to
= build_decl (DECL_SOURCE_LOCATION (decl
),
12655 VAR_DECL
, NULL
, ptr_type_node
);
12656 DECL_ARTIFICIAL (to
) = 1;
12657 DECL_IGNORED_P (to
) = 1;
12658 DECL_EXTERNAL (to
) = 1;
12659 TREE_READONLY (to
) = 1;
12661 name
= IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl
));
12662 name
= targetm
.strip_name_encoding (name
);
12663 prefix
= name
[0] == FASTCALL_PREFIX
|| user_label_prefix
[0] == 0
12664 ? "*__imp_" : "*__imp__";
12665 namelen
= strlen (name
);
12666 prefixlen
= strlen (prefix
);
12667 imp_name
= (char *) alloca (namelen
+ prefixlen
+ 1);
12668 memcpy (imp_name
, prefix
, prefixlen
);
12669 memcpy (imp_name
+ prefixlen
, name
, namelen
+ 1);
12671 name
= ggc_alloc_string (imp_name
, namelen
+ prefixlen
);
12672 rtl
= gen_rtx_SYMBOL_REF (Pmode
, name
);
12673 SET_SYMBOL_REF_DECL (rtl
, to
);
12674 SYMBOL_REF_FLAGS (rtl
) = SYMBOL_FLAG_LOCAL
;
12676 rtl
= gen_const_mem (Pmode
, rtl
);
12677 set_mem_alias_set (rtl
, ix86_GOT_alias_set ());
12679 SET_DECL_RTL (to
, rtl
);
12680 SET_DECL_ASSEMBLER_NAME (to
, get_identifier (name
));
12685 /* Expand SYMBOL into its corresponding dllimport symbol. WANT_REG is
12686 true if we require the result be a register. */
12689 legitimize_dllimport_symbol (rtx symbol
, bool want_reg
)
12694 gcc_assert (SYMBOL_REF_DECL (symbol
));
12695 imp_decl
= get_dllimport_decl (SYMBOL_REF_DECL (symbol
));
12697 x
= DECL_RTL (imp_decl
);
12699 x
= force_reg (Pmode
, x
);
12703 /* Try machine-dependent ways of modifying an illegitimate address
12704 to be legitimate. If we find one, return the new, valid address.
12705 This macro is used in only one place: `memory_address' in explow.c.
12707 OLDX is the address as it was before break_out_memory_refs was called.
12708 In some cases it is useful to look at this to decide what needs to be done.
12710 It is always safe for this macro to do nothing. It exists to recognize
12711 opportunities to optimize the output.
12713 For the 80386, we handle X+REG by loading X into a register R and
12714 using R+REG. R will go in a general reg and indexing will be used.
12715 However, if REG is a broken-out memory address or multiplication,
12716 nothing needs to be done because REG can certainly go in a general reg.
12718 When -fpic is used, special handling is needed for symbolic references.
12719 See comments by legitimize_pic_address in i386.c for details. */
12722 ix86_legitimize_address (rtx x
, rtx oldx ATTRIBUTE_UNUSED
,
12723 enum machine_mode mode
)
12728 log
= GET_CODE (x
) == SYMBOL_REF
? SYMBOL_REF_TLS_MODEL (x
) : 0;
12730 return legitimize_tls_address (x
, (enum tls_model
) log
, false);
12731 if (GET_CODE (x
) == CONST
12732 && GET_CODE (XEXP (x
, 0)) == PLUS
12733 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == SYMBOL_REF
12734 && (log
= SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x
, 0), 0))))
12736 rtx t
= legitimize_tls_address (XEXP (XEXP (x
, 0), 0),
12737 (enum tls_model
) log
, false);
12738 return gen_rtx_PLUS (Pmode
, t
, XEXP (XEXP (x
, 0), 1));
12741 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
)
12743 if (GET_CODE (x
) == SYMBOL_REF
&& SYMBOL_REF_DLLIMPORT_P (x
))
12744 return legitimize_dllimport_symbol (x
, true);
12745 if (GET_CODE (x
) == CONST
12746 && GET_CODE (XEXP (x
, 0)) == PLUS
12747 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == SYMBOL_REF
12748 && SYMBOL_REF_DLLIMPORT_P (XEXP (XEXP (x
, 0), 0)))
12750 rtx t
= legitimize_dllimport_symbol (XEXP (XEXP (x
, 0), 0), true);
12751 return gen_rtx_PLUS (Pmode
, t
, XEXP (XEXP (x
, 0), 1));
12755 if (flag_pic
&& SYMBOLIC_CONST (x
))
12756 return legitimize_pic_address (x
, 0);
12759 if (MACHO_DYNAMIC_NO_PIC_P
&& SYMBOLIC_CONST (x
))
12760 return machopic_indirect_data_reference (x
, 0);
12763 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
12764 if (GET_CODE (x
) == ASHIFT
12765 && CONST_INT_P (XEXP (x
, 1))
12766 && (unsigned HOST_WIDE_INT
) INTVAL (XEXP (x
, 1)) < 4)
12769 log
= INTVAL (XEXP (x
, 1));
12770 x
= gen_rtx_MULT (Pmode
, force_reg (Pmode
, XEXP (x
, 0)),
12771 GEN_INT (1 << log
));
12774 if (GET_CODE (x
) == PLUS
)
12776 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
12778 if (GET_CODE (XEXP (x
, 0)) == ASHIFT
12779 && CONST_INT_P (XEXP (XEXP (x
, 0), 1))
12780 && (unsigned HOST_WIDE_INT
) INTVAL (XEXP (XEXP (x
, 0), 1)) < 4)
12783 log
= INTVAL (XEXP (XEXP (x
, 0), 1));
12784 XEXP (x
, 0) = gen_rtx_MULT (Pmode
,
12785 force_reg (Pmode
, XEXP (XEXP (x
, 0), 0)),
12786 GEN_INT (1 << log
));
12789 if (GET_CODE (XEXP (x
, 1)) == ASHIFT
12790 && CONST_INT_P (XEXP (XEXP (x
, 1), 1))
12791 && (unsigned HOST_WIDE_INT
) INTVAL (XEXP (XEXP (x
, 1), 1)) < 4)
12794 log
= INTVAL (XEXP (XEXP (x
, 1), 1));
12795 XEXP (x
, 1) = gen_rtx_MULT (Pmode
,
12796 force_reg (Pmode
, XEXP (XEXP (x
, 1), 0)),
12797 GEN_INT (1 << log
));
12800 /* Put multiply first if it isn't already. */
12801 if (GET_CODE (XEXP (x
, 1)) == MULT
)
12803 rtx tmp
= XEXP (x
, 0);
12804 XEXP (x
, 0) = XEXP (x
, 1);
12809 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
12810 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
12811 created by virtual register instantiation, register elimination, and
12812 similar optimizations. */
12813 if (GET_CODE (XEXP (x
, 0)) == MULT
&& GET_CODE (XEXP (x
, 1)) == PLUS
)
12816 x
= gen_rtx_PLUS (Pmode
,
12817 gen_rtx_PLUS (Pmode
, XEXP (x
, 0),
12818 XEXP (XEXP (x
, 1), 0)),
12819 XEXP (XEXP (x
, 1), 1));
12823 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
12824 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
12825 else if (GET_CODE (x
) == PLUS
&& GET_CODE (XEXP (x
, 0)) == PLUS
12826 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == MULT
12827 && GET_CODE (XEXP (XEXP (x
, 0), 1)) == PLUS
12828 && CONSTANT_P (XEXP (x
, 1)))
12831 rtx other
= NULL_RTX
;
12833 if (CONST_INT_P (XEXP (x
, 1)))
12835 constant
= XEXP (x
, 1);
12836 other
= XEXP (XEXP (XEXP (x
, 0), 1), 1);
12838 else if (CONST_INT_P (XEXP (XEXP (XEXP (x
, 0), 1), 1)))
12840 constant
= XEXP (XEXP (XEXP (x
, 0), 1), 1);
12841 other
= XEXP (x
, 1);
12849 x
= gen_rtx_PLUS (Pmode
,
12850 gen_rtx_PLUS (Pmode
, XEXP (XEXP (x
, 0), 0),
12851 XEXP (XEXP (XEXP (x
, 0), 1), 0)),
12852 plus_constant (other
, INTVAL (constant
)));
12856 if (changed
&& ix86_legitimate_address_p (mode
, x
, false))
12859 if (GET_CODE (XEXP (x
, 0)) == MULT
)
12862 XEXP (x
, 0) = force_operand (XEXP (x
, 0), 0);
12865 if (GET_CODE (XEXP (x
, 1)) == MULT
)
12868 XEXP (x
, 1) = force_operand (XEXP (x
, 1), 0);
12872 && REG_P (XEXP (x
, 1))
12873 && REG_P (XEXP (x
, 0)))
12876 if (flag_pic
&& SYMBOLIC_CONST (XEXP (x
, 1)))
12879 x
= legitimize_pic_address (x
, 0);
12882 if (changed
&& ix86_legitimate_address_p (mode
, x
, false))
12885 if (REG_P (XEXP (x
, 0)))
12887 rtx temp
= gen_reg_rtx (Pmode
);
12888 rtx val
= force_operand (XEXP (x
, 1), temp
);
12890 emit_move_insn (temp
, val
);
12892 XEXP (x
, 1) = temp
;
12896 else if (REG_P (XEXP (x
, 1)))
12898 rtx temp
= gen_reg_rtx (Pmode
);
12899 rtx val
= force_operand (XEXP (x
, 0), temp
);
12901 emit_move_insn (temp
, val
);
12903 XEXP (x
, 0) = temp
;
12911 /* Print an integer constant expression in assembler syntax. Addition
12912 and subtraction are the only arithmetic that may appear in these
12913 expressions. FILE is the stdio stream to write to, X is the rtx, and
12914 CODE is the operand print code from the output string. */
12917 output_pic_addr_const (FILE *file
, rtx x
, int code
)
12921 switch (GET_CODE (x
))
12924 gcc_assert (flag_pic
);
12929 if (TARGET_64BIT
|| ! TARGET_MACHO_BRANCH_ISLANDS
)
12930 output_addr_const (file
, x
);
12933 const char *name
= XSTR (x
, 0);
12935 /* Mark the decl as referenced so that cgraph will
12936 output the function. */
12937 if (SYMBOL_REF_DECL (x
))
12938 mark_decl_referenced (SYMBOL_REF_DECL (x
));
12941 if (MACHOPIC_INDIRECT
12942 && machopic_classify_symbol (x
) == MACHOPIC_UNDEFINED_FUNCTION
)
12943 name
= machopic_indirection_name (x
, /*stub_p=*/true);
12945 assemble_name (file
, name
);
12947 if (!TARGET_MACHO
&& !(TARGET_64BIT
&& DEFAULT_ABI
== MS_ABI
)
12948 && code
== 'P' && ! SYMBOL_REF_LOCAL_P (x
))
12949 fputs ("@PLT", file
);
12956 ASM_GENERATE_INTERNAL_LABEL (buf
, "L", CODE_LABEL_NUMBER (x
));
12957 assemble_name (asm_out_file
, buf
);
12961 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (x
));
12965 /* This used to output parentheses around the expression,
12966 but that does not work on the 386 (either ATT or BSD assembler). */
12967 output_pic_addr_const (file
, XEXP (x
, 0), code
);
12971 if (GET_MODE (x
) == VOIDmode
)
12973 /* We can use %d if the number is <32 bits and positive. */
12974 if (CONST_DOUBLE_HIGH (x
) || CONST_DOUBLE_LOW (x
) < 0)
12975 fprintf (file
, "0x%lx%08lx",
12976 (unsigned long) CONST_DOUBLE_HIGH (x
),
12977 (unsigned long) CONST_DOUBLE_LOW (x
));
12979 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, CONST_DOUBLE_LOW (x
));
12982 /* We can't handle floating point constants;
12983 TARGET_PRINT_OPERAND must handle them. */
12984 output_operand_lossage ("floating constant misused");
12988 /* Some assemblers need integer constants to appear first. */
12989 if (CONST_INT_P (XEXP (x
, 0)))
12991 output_pic_addr_const (file
, XEXP (x
, 0), code
);
12993 output_pic_addr_const (file
, XEXP (x
, 1), code
);
12997 gcc_assert (CONST_INT_P (XEXP (x
, 1)));
12998 output_pic_addr_const (file
, XEXP (x
, 1), code
);
13000 output_pic_addr_const (file
, XEXP (x
, 0), code
);
13006 putc (ASSEMBLER_DIALECT
== ASM_INTEL
? '(' : '[', file
);
13007 output_pic_addr_const (file
, XEXP (x
, 0), code
);
13009 output_pic_addr_const (file
, XEXP (x
, 1), code
);
13011 putc (ASSEMBLER_DIALECT
== ASM_INTEL
? ')' : ']', file
);
13015 if (XINT (x
, 1) == UNSPEC_STACK_CHECK
)
13017 bool f
= i386_asm_output_addr_const_extra (file
, x
);
13022 gcc_assert (XVECLEN (x
, 0) == 1);
13023 output_pic_addr_const (file
, XVECEXP (x
, 0, 0), code
);
13024 switch (XINT (x
, 1))
13027 fputs ("@GOT", file
);
13029 case UNSPEC_GOTOFF
:
13030 fputs ("@GOTOFF", file
);
13032 case UNSPEC_PLTOFF
:
13033 fputs ("@PLTOFF", file
);
13036 fputs (ASSEMBLER_DIALECT
== ASM_ATT
?
13037 "(%rip)" : "[rip]", file
);
13039 case UNSPEC_GOTPCREL
:
13040 fputs (ASSEMBLER_DIALECT
== ASM_ATT
?
13041 "@GOTPCREL(%rip)" : "@GOTPCREL[rip]", file
);
13043 case UNSPEC_GOTTPOFF
:
13044 /* FIXME: This might be @TPOFF in Sun ld too. */
13045 fputs ("@gottpoff", file
);
13048 fputs ("@tpoff", file
);
13050 case UNSPEC_NTPOFF
:
13052 fputs ("@tpoff", file
);
13054 fputs ("@ntpoff", file
);
13056 case UNSPEC_DTPOFF
:
13057 fputs ("@dtpoff", file
);
13059 case UNSPEC_GOTNTPOFF
:
13061 fputs (ASSEMBLER_DIALECT
== ASM_ATT
?
13062 "@gottpoff(%rip)": "@gottpoff[rip]", file
);
13064 fputs ("@gotntpoff", file
);
13066 case UNSPEC_INDNTPOFF
:
13067 fputs ("@indntpoff", file
);
13070 case UNSPEC_MACHOPIC_OFFSET
:
13072 machopic_output_function_base_name (file
);
13076 output_operand_lossage ("invalid UNSPEC as operand");
13082 output_operand_lossage ("invalid expression as operand");
13086 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
13087 We need to emit DTP-relative relocations. */
13089 static void ATTRIBUTE_UNUSED
13090 i386_output_dwarf_dtprel (FILE *file
, int size
, rtx x
)
13092 fputs (ASM_LONG
, file
);
13093 output_addr_const (file
, x
);
13094 fputs ("@dtpoff", file
);
13100 fputs (", 0", file
);
13103 gcc_unreachable ();
13107 /* Return true if X is a representation of the PIC register. This copes
13108 with calls from ix86_find_base_term, where the register might have
13109 been replaced by a cselib value. */
13112 ix86_pic_register_p (rtx x
)
13114 if (GET_CODE (x
) == VALUE
&& CSELIB_VAL_PTR (x
))
13115 return (pic_offset_table_rtx
13116 && rtx_equal_for_cselib_p (x
, pic_offset_table_rtx
));
13118 return REG_P (x
) && REGNO (x
) == PIC_OFFSET_TABLE_REGNUM
;
13121 /* Helper function for ix86_delegitimize_address.
13122 Attempt to delegitimize TLS local-exec accesses. */
13125 ix86_delegitimize_tls_address (rtx orig_x
)
13127 rtx x
= orig_x
, unspec
;
13128 struct ix86_address addr
;
13130 if (!TARGET_TLS_DIRECT_SEG_REFS
)
13134 if (GET_CODE (x
) != PLUS
|| GET_MODE (x
) != Pmode
)
13136 if (ix86_decompose_address (x
, &addr
) == 0
13137 || addr
.seg
!= (TARGET_64BIT
? SEG_FS
: SEG_GS
)
13138 || addr
.disp
== NULL_RTX
13139 || GET_CODE (addr
.disp
) != CONST
)
13141 unspec
= XEXP (addr
.disp
, 0);
13142 if (GET_CODE (unspec
) == PLUS
&& CONST_INT_P (XEXP (unspec
, 1)))
13143 unspec
= XEXP (unspec
, 0);
13144 if (GET_CODE (unspec
) != UNSPEC
|| XINT (unspec
, 1) != UNSPEC_NTPOFF
)
13146 x
= XVECEXP (unspec
, 0, 0);
13147 gcc_assert (GET_CODE (x
) == SYMBOL_REF
);
13148 if (unspec
!= XEXP (addr
.disp
, 0))
13149 x
= gen_rtx_PLUS (Pmode
, x
, XEXP (XEXP (addr
.disp
, 0), 1));
13152 rtx idx
= addr
.index
;
13153 if (addr
.scale
!= 1)
13154 idx
= gen_rtx_MULT (Pmode
, idx
, GEN_INT (addr
.scale
));
13155 x
= gen_rtx_PLUS (Pmode
, idx
, x
);
13158 x
= gen_rtx_PLUS (Pmode
, addr
.base
, x
);
13159 if (MEM_P (orig_x
))
13160 x
= replace_equiv_address_nv (orig_x
, x
);
13164 /* In the name of slightly smaller debug output, and to cater to
13165 general assembler lossage, recognize PIC+GOTOFF and turn it back
13166 into a direct symbol reference.
13168 On Darwin, this is necessary to avoid a crash, because Darwin
13169 has a different PIC label for each routine but the DWARF debugging
13170 information is not associated with any particular routine, so it's
13171 necessary to remove references to the PIC label from RTL stored by
13172 the DWARF output code. */
13175 ix86_delegitimize_address (rtx x
)
13177 rtx orig_x
= delegitimize_mem_from_attrs (x
);
13178 /* addend is NULL or some rtx if x is something+GOTOFF where
13179 something doesn't include the PIC register. */
13180 rtx addend
= NULL_RTX
;
13181 /* reg_addend is NULL or a multiple of some register. */
13182 rtx reg_addend
= NULL_RTX
;
13183 /* const_addend is NULL or a const_int. */
13184 rtx const_addend
= NULL_RTX
;
13185 /* This is the result, or NULL. */
13186 rtx result
= NULL_RTX
;
13195 if (GET_CODE (x
) != CONST
13196 || GET_CODE (XEXP (x
, 0)) != UNSPEC
13197 || (XINT (XEXP (x
, 0), 1) != UNSPEC_GOTPCREL
13198 && XINT (XEXP (x
, 0), 1) != UNSPEC_PCREL
)
13199 || !MEM_P (orig_x
))
13200 return ix86_delegitimize_tls_address (orig_x
);
13201 x
= XVECEXP (XEXP (x
, 0), 0, 0);
13202 if (GET_MODE (orig_x
) != Pmode
)
13203 return simplify_gen_subreg (GET_MODE (orig_x
), x
, Pmode
, 0);
13207 if (GET_CODE (x
) != PLUS
13208 || GET_CODE (XEXP (x
, 1)) != CONST
)
13209 return ix86_delegitimize_tls_address (orig_x
);
13211 if (ix86_pic_register_p (XEXP (x
, 0)))
13212 /* %ebx + GOT/GOTOFF */
13214 else if (GET_CODE (XEXP (x
, 0)) == PLUS
)
13216 /* %ebx + %reg * scale + GOT/GOTOFF */
13217 reg_addend
= XEXP (x
, 0);
13218 if (ix86_pic_register_p (XEXP (reg_addend
, 0)))
13219 reg_addend
= XEXP (reg_addend
, 1);
13220 else if (ix86_pic_register_p (XEXP (reg_addend
, 1)))
13221 reg_addend
= XEXP (reg_addend
, 0);
13224 reg_addend
= NULL_RTX
;
13225 addend
= XEXP (x
, 0);
13229 addend
= XEXP (x
, 0);
13231 x
= XEXP (XEXP (x
, 1), 0);
13232 if (GET_CODE (x
) == PLUS
13233 && CONST_INT_P (XEXP (x
, 1)))
13235 const_addend
= XEXP (x
, 1);
13239 if (GET_CODE (x
) == UNSPEC
13240 && ((XINT (x
, 1) == UNSPEC_GOT
&& MEM_P (orig_x
) && !addend
)
13241 || (XINT (x
, 1) == UNSPEC_GOTOFF
&& !MEM_P (orig_x
))))
13242 result
= XVECEXP (x
, 0, 0);
13244 if (TARGET_MACHO
&& darwin_local_data_pic (x
)
13245 && !MEM_P (orig_x
))
13246 result
= XVECEXP (x
, 0, 0);
13249 return ix86_delegitimize_tls_address (orig_x
);
13252 result
= gen_rtx_CONST (Pmode
, gen_rtx_PLUS (Pmode
, result
, const_addend
));
13254 result
= gen_rtx_PLUS (Pmode
, reg_addend
, result
);
13257 /* If the rest of original X doesn't involve the PIC register, add
13258 addend and subtract pic_offset_table_rtx. This can happen e.g.
13260 leal (%ebx, %ecx, 4), %ecx
13262 movl foo@GOTOFF(%ecx), %edx
13263 in which case we return (%ecx - %ebx) + foo. */
13264 if (pic_offset_table_rtx
)
13265 result
= gen_rtx_PLUS (Pmode
, gen_rtx_MINUS (Pmode
, copy_rtx (addend
),
13266 pic_offset_table_rtx
),
13271 if (GET_MODE (orig_x
) != Pmode
&& MEM_P (orig_x
))
13272 return simplify_gen_subreg (GET_MODE (orig_x
), result
, Pmode
, 0);
13276 /* If X is a machine specific address (i.e. a symbol or label being
13277 referenced as a displacement from the GOT implemented using an
13278 UNSPEC), then return the base term. Otherwise return X. */
13281 ix86_find_base_term (rtx x
)
13287 if (GET_CODE (x
) != CONST
)
13289 term
= XEXP (x
, 0);
13290 if (GET_CODE (term
) == PLUS
13291 && (CONST_INT_P (XEXP (term
, 1))
13292 || GET_CODE (XEXP (term
, 1)) == CONST_DOUBLE
))
13293 term
= XEXP (term
, 0);
13294 if (GET_CODE (term
) != UNSPEC
13295 || (XINT (term
, 1) != UNSPEC_GOTPCREL
13296 && XINT (term
, 1) != UNSPEC_PCREL
))
13299 return XVECEXP (term
, 0, 0);
13302 return ix86_delegitimize_address (x
);
13306 put_condition_code (enum rtx_code code
, enum machine_mode mode
, int reverse
,
13307 int fp
, FILE *file
)
13309 const char *suffix
;
13311 if (mode
== CCFPmode
|| mode
== CCFPUmode
)
13313 code
= ix86_fp_compare_code_to_integer (code
);
13317 code
= reverse_condition (code
);
13368 gcc_assert (mode
== CCmode
|| mode
== CCNOmode
|| mode
== CCGCmode
);
13372 /* ??? Use "nbe" instead of "a" for fcmov lossage on some assemblers.
13373 Those same assemblers have the same but opposite lossage on cmov. */
13374 if (mode
== CCmode
)
13375 suffix
= fp
? "nbe" : "a";
13376 else if (mode
== CCCmode
)
13379 gcc_unreachable ();
13395 gcc_unreachable ();
13399 gcc_assert (mode
== CCmode
|| mode
== CCCmode
);
13416 gcc_unreachable ();
13420 /* ??? As above. */
13421 gcc_assert (mode
== CCmode
|| mode
== CCCmode
);
13422 suffix
= fp
? "nb" : "ae";
13425 gcc_assert (mode
== CCmode
|| mode
== CCGCmode
|| mode
== CCNOmode
);
13429 /* ??? As above. */
13430 if (mode
== CCmode
)
13432 else if (mode
== CCCmode
)
13433 suffix
= fp
? "nb" : "ae";
13435 gcc_unreachable ();
13438 suffix
= fp
? "u" : "p";
13441 suffix
= fp
? "nu" : "np";
13444 gcc_unreachable ();
13446 fputs (suffix
, file
);
13449 /* Print the name of register X to FILE based on its machine mode and number.
13450 If CODE is 'w', pretend the mode is HImode.
13451 If CODE is 'b', pretend the mode is QImode.
13452 If CODE is 'k', pretend the mode is SImode.
13453 If CODE is 'q', pretend the mode is DImode.
13454 If CODE is 'x', pretend the mode is V4SFmode.
13455 If CODE is 't', pretend the mode is V8SFmode.
13456 If CODE is 'h', pretend the reg is the 'high' byte register.
13457 If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op.
13458 If CODE is 'd', duplicate the operand for AVX instruction.
13462 print_reg (rtx x
, int code
, FILE *file
)
13465 bool duplicated
= code
== 'd' && TARGET_AVX
;
13467 gcc_assert (x
== pc_rtx
13468 || (REGNO (x
) != ARG_POINTER_REGNUM
13469 && REGNO (x
) != FRAME_POINTER_REGNUM
13470 && REGNO (x
) != FLAGS_REG
13471 && REGNO (x
) != FPSR_REG
13472 && REGNO (x
) != FPCR_REG
));
13474 if (ASSEMBLER_DIALECT
== ASM_ATT
)
13479 gcc_assert (TARGET_64BIT
);
13480 fputs ("rip", file
);
13484 if (code
== 'w' || MMX_REG_P (x
))
13486 else if (code
== 'b')
13488 else if (code
== 'k')
13490 else if (code
== 'q')
13492 else if (code
== 'y')
13494 else if (code
== 'h')
13496 else if (code
== 'x')
13498 else if (code
== 't')
13501 code
= GET_MODE_SIZE (GET_MODE (x
));
13503 /* Irritatingly, AMD extended registers use different naming convention
13504 from the normal registers. */
13505 if (REX_INT_REG_P (x
))
13507 gcc_assert (TARGET_64BIT
);
13511 error ("extended registers have no high halves");
13514 fprintf (file
, "r%ib", REGNO (x
) - FIRST_REX_INT_REG
+ 8);
13517 fprintf (file
, "r%iw", REGNO (x
) - FIRST_REX_INT_REG
+ 8);
13520 fprintf (file
, "r%id", REGNO (x
) - FIRST_REX_INT_REG
+ 8);
13523 fprintf (file
, "r%i", REGNO (x
) - FIRST_REX_INT_REG
+ 8);
13526 error ("unsupported operand size for extended register");
13536 if (STACK_TOP_P (x
))
13545 if (! ANY_FP_REG_P (x
))
13546 putc (code
== 8 && TARGET_64BIT
? 'r' : 'e', file
);
13551 reg
= hi_reg_name
[REGNO (x
)];
13554 if (REGNO (x
) >= ARRAY_SIZE (qi_reg_name
))
13556 reg
= qi_reg_name
[REGNO (x
)];
13559 if (REGNO (x
) >= ARRAY_SIZE (qi_high_reg_name
))
13561 reg
= qi_high_reg_name
[REGNO (x
)];
13566 gcc_assert (!duplicated
);
13568 fputs (hi_reg_name
[REGNO (x
)] + 1, file
);
13573 gcc_unreachable ();
13579 if (ASSEMBLER_DIALECT
== ASM_ATT
)
13580 fprintf (file
, ", %%%s", reg
);
13582 fprintf (file
, ", %s", reg
);
13586 /* Locate some local-dynamic symbol still in use by this function
13587 so that we can print its name in some tls_local_dynamic_base
13591 get_some_local_dynamic_name_1 (rtx
*px
, void *data ATTRIBUTE_UNUSED
)
13595 if (GET_CODE (x
) == SYMBOL_REF
13596 && SYMBOL_REF_TLS_MODEL (x
) == TLS_MODEL_LOCAL_DYNAMIC
)
13598 cfun
->machine
->some_ld_name
= XSTR (x
, 0);
13605 static const char *
13606 get_some_local_dynamic_name (void)
13610 if (cfun
->machine
->some_ld_name
)
13611 return cfun
->machine
->some_ld_name
;
13613 for (insn
= get_insns (); insn
; insn
= NEXT_INSN (insn
))
13614 if (NONDEBUG_INSN_P (insn
)
13615 && for_each_rtx (&PATTERN (insn
), get_some_local_dynamic_name_1
, 0))
13616 return cfun
->machine
->some_ld_name
;
13621 /* Meaning of CODE:
13622 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
13623 C -- print opcode suffix for set/cmov insn.
13624 c -- like C, but print reversed condition
13625 F,f -- likewise, but for floating-point.
13626 O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.",
13628 R -- print the prefix for register names.
13629 z -- print the opcode suffix for the size of the current operand.
13630 Z -- likewise, with special suffixes for x87 instructions.
13631 * -- print a star (in certain assembler syntax)
13632 A -- print an absolute memory reference.
13633 w -- print the operand as if it's a "word" (HImode) even if it isn't.
13634 s -- print a shift double count, followed by the assemblers argument
13636 b -- print the QImode name of the register for the indicated operand.
13637 %b0 would print %al if operands[0] is reg 0.
13638 w -- likewise, print the HImode name of the register.
13639 k -- likewise, print the SImode name of the register.
13640 q -- likewise, print the DImode name of the register.
13641 x -- likewise, print the V4SFmode name of the register.
13642 t -- likewise, print the V8SFmode name of the register.
13643 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
13644 y -- print "st(0)" instead of "st" as a register.
13645 d -- print duplicated register operand for AVX instruction.
13646 D -- print condition for SSE cmp instruction.
13647 P -- if PIC, print an @PLT suffix.
13648 X -- don't print any sort of PIC '@' suffix for a symbol.
13649 & -- print some in-use local-dynamic symbol name.
13650 H -- print a memory address offset by 8; used for sse high-parts
13651 Y -- print condition for XOP pcom* instruction.
13652 + -- print a branch hint as 'cs' or 'ds' prefix
13653 ; -- print a semicolon (after prefixes due to bug in older gas).
13654 @ -- print a segment register of thread base pointer load
13658 ix86_print_operand (FILE *file
, rtx x
, int code
)
13665 if (ASSEMBLER_DIALECT
== ASM_ATT
)
13671 const char *name
= get_some_local_dynamic_name ();
13673 output_operand_lossage ("'%%&' used without any "
13674 "local dynamic TLS references");
13676 assemble_name (file
, name
);
13681 switch (ASSEMBLER_DIALECT
)
13688 /* Intel syntax. For absolute addresses, registers should not
13689 be surrounded by braces. */
13693 ix86_print_operand (file
, x
, 0);
13700 gcc_unreachable ();
13703 ix86_print_operand (file
, x
, 0);
13708 if (ASSEMBLER_DIALECT
== ASM_ATT
)
13713 if (ASSEMBLER_DIALECT
== ASM_ATT
)
13718 if (ASSEMBLER_DIALECT
== ASM_ATT
)
13723 if (ASSEMBLER_DIALECT
== ASM_ATT
)
13728 if (ASSEMBLER_DIALECT
== ASM_ATT
)
13733 if (ASSEMBLER_DIALECT
== ASM_ATT
)
13738 if (GET_MODE_CLASS (GET_MODE (x
)) == MODE_INT
)
13740 /* Opcodes don't get size suffixes if using Intel opcodes. */
13741 if (ASSEMBLER_DIALECT
== ASM_INTEL
)
13744 switch (GET_MODE_SIZE (GET_MODE (x
)))
13763 output_operand_lossage
13764 ("invalid operand size for operand code '%c'", code
);
13769 if (GET_MODE_CLASS (GET_MODE (x
)) == MODE_FLOAT
)
13771 (0, "non-integer operand used with operand code '%c'", code
);
13775 /* 387 opcodes don't get size suffixes if using Intel opcodes. */
13776 if (ASSEMBLER_DIALECT
== ASM_INTEL
)
13779 if (GET_MODE_CLASS (GET_MODE (x
)) == MODE_INT
)
13781 switch (GET_MODE_SIZE (GET_MODE (x
)))
13784 #ifdef HAVE_AS_IX86_FILDS
13794 #ifdef HAVE_AS_IX86_FILDQ
13797 fputs ("ll", file
);
13805 else if (GET_MODE_CLASS (GET_MODE (x
)) == MODE_FLOAT
)
13807 /* 387 opcodes don't get size suffixes
13808 if the operands are registers. */
13809 if (STACK_REG_P (x
))
13812 switch (GET_MODE_SIZE (GET_MODE (x
)))
13833 output_operand_lossage
13834 ("invalid operand type used with operand code '%c'", code
);
13838 output_operand_lossage
13839 ("invalid operand size for operand code '%c'", code
);
13856 if (CONST_INT_P (x
) || ! SHIFT_DOUBLE_OMITS_COUNT
)
13858 ix86_print_operand (file
, x
, 0);
13859 fputs (", ", file
);
13864 /* Little bit of braindamage here. The SSE compare instructions
13865 does use completely different names for the comparisons that the
13866 fp conditional moves. */
13869 switch (GET_CODE (x
))
13872 fputs ("eq", file
);
13875 fputs ("eq_us", file
);
13878 fputs ("lt", file
);
13881 fputs ("nge", file
);
13884 fputs ("le", file
);
13887 fputs ("ngt", file
);
13890 fputs ("unord", file
);
13893 fputs ("neq", file
);
13896 fputs ("neq_oq", file
);
13899 fputs ("ge", file
);
13902 fputs ("nlt", file
);
13905 fputs ("gt", file
);
13908 fputs ("nle", file
);
13911 fputs ("ord", file
);
13914 output_operand_lossage ("operand is not a condition code, "
13915 "invalid operand code 'D'");
13921 switch (GET_CODE (x
))
13925 fputs ("eq", file
);
13929 fputs ("lt", file
);
13933 fputs ("le", file
);
13936 fputs ("unord", file
);
13940 fputs ("neq", file
);
13944 fputs ("nlt", file
);
13948 fputs ("nle", file
);
13951 fputs ("ord", file
);
13954 output_operand_lossage ("operand is not a condition code, "
13955 "invalid operand code 'D'");
13961 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
13962 if (ASSEMBLER_DIALECT
== ASM_ATT
)
13964 switch (GET_MODE (x
))
13966 case HImode
: putc ('w', file
); break;
13968 case SFmode
: putc ('l', file
); break;
13970 case DFmode
: putc ('q', file
); break;
13971 default: gcc_unreachable ();
13978 if (!COMPARISON_P (x
))
13980 output_operand_lossage ("operand is neither a constant nor a "
13981 "condition code, invalid operand code "
13985 put_condition_code (GET_CODE (x
), GET_MODE (XEXP (x
, 0)), 0, 0, file
);
13988 if (!COMPARISON_P (x
))
13990 output_operand_lossage ("operand is neither a constant nor a "
13991 "condition code, invalid operand code "
13995 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
13996 if (ASSEMBLER_DIALECT
== ASM_ATT
)
13999 put_condition_code (GET_CODE (x
), GET_MODE (XEXP (x
, 0)), 0, 1, file
);
14002 /* Like above, but reverse condition */
14004 /* Check to see if argument to %c is really a constant
14005 and not a condition code which needs to be reversed. */
14006 if (!COMPARISON_P (x
))
14008 output_operand_lossage ("operand is neither a constant nor a "
14009 "condition code, invalid operand "
14013 put_condition_code (GET_CODE (x
), GET_MODE (XEXP (x
, 0)), 1, 0, file
);
14016 if (!COMPARISON_P (x
))
14018 output_operand_lossage ("operand is neither a constant nor a "
14019 "condition code, invalid operand "
14023 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
14024 if (ASSEMBLER_DIALECT
== ASM_ATT
)
14027 put_condition_code (GET_CODE (x
), GET_MODE (XEXP (x
, 0)), 1, 1, file
);
14031 /* It doesn't actually matter what mode we use here, as we're
14032 only going to use this for printing. */
14033 x
= adjust_address_nv (x
, DImode
, 8);
14041 || optimize_function_for_size_p (cfun
) || !TARGET_BRANCH_PREDICTION_HINTS
)
14044 x
= find_reg_note (current_output_insn
, REG_BR_PROB
, 0);
14047 int pred_val
= INTVAL (XEXP (x
, 0));
14049 if (pred_val
< REG_BR_PROB_BASE
* 45 / 100
14050 || pred_val
> REG_BR_PROB_BASE
* 55 / 100)
14052 int taken
= pred_val
> REG_BR_PROB_BASE
/ 2;
14053 int cputaken
= final_forward_branch_p (current_output_insn
) == 0;
14055 /* Emit hints only in the case default branch prediction
14056 heuristics would fail. */
14057 if (taken
!= cputaken
)
14059 /* We use 3e (DS) prefix for taken branches and
14060 2e (CS) prefix for not taken branches. */
14062 fputs ("ds ; ", file
);
14064 fputs ("cs ; ", file
);
14072 switch (GET_CODE (x
))
14075 fputs ("neq", file
);
14078 fputs ("eq", file
);
14082 fputs (INTEGRAL_MODE_P (GET_MODE (x
)) ? "ge" : "unlt", file
);
14086 fputs (INTEGRAL_MODE_P (GET_MODE (x
)) ? "gt" : "unle", file
);
14090 fputs ("le", file
);
14094 fputs ("lt", file
);
14097 fputs ("unord", file
);
14100 fputs ("ord", file
);
14103 fputs ("ueq", file
);
14106 fputs ("nlt", file
);
14109 fputs ("nle", file
);
14112 fputs ("ule", file
);
14115 fputs ("ult", file
);
14118 fputs ("une", file
);
14121 output_operand_lossage ("operand is not a condition code, "
14122 "invalid operand code 'Y'");
14128 #ifndef HAVE_AS_IX86_REP_LOCK_PREFIX
14134 if (ASSEMBLER_DIALECT
== ASM_ATT
)
14137 /* The kernel uses a different segment register for performance
14138 reasons; a system call would not have to trash the userspace
14139 segment register, which would be expensive. */
14140 if (TARGET_64BIT
&& ix86_cmodel
!= CM_KERNEL
)
14141 fputs ("fs", file
);
14143 fputs ("gs", file
);
14147 output_operand_lossage ("invalid operand code '%c'", code
);
14152 print_reg (x
, code
, file
);
14154 else if (MEM_P (x
))
14156 /* No `byte ptr' prefix for call instructions or BLKmode operands. */
14157 if (ASSEMBLER_DIALECT
== ASM_INTEL
&& code
!= 'X' && code
!= 'P'
14158 && GET_MODE (x
) != BLKmode
)
14161 switch (GET_MODE_SIZE (GET_MODE (x
)))
14163 case 1: size
= "BYTE"; break;
14164 case 2: size
= "WORD"; break;
14165 case 4: size
= "DWORD"; break;
14166 case 8: size
= "QWORD"; break;
14167 case 12: size
= "TBYTE"; break;
14169 if (GET_MODE (x
) == XFmode
)
14174 case 32: size
= "YMMWORD"; break;
14176 gcc_unreachable ();
14179 /* Check for explicit size override (codes 'b', 'w' and 'k') */
14182 else if (code
== 'w')
14184 else if (code
== 'k')
14187 fputs (size
, file
);
14188 fputs (" PTR ", file
);
14192 /* Avoid (%rip) for call operands. */
14193 if (CONSTANT_ADDRESS_P (x
) && code
== 'P'
14194 && !CONST_INT_P (x
))
14195 output_addr_const (file
, x
);
14196 else if (this_is_asm_operands
&& ! address_operand (x
, VOIDmode
))
14197 output_operand_lossage ("invalid constraints for operand");
14199 output_address (x
);
14202 else if (GET_CODE (x
) == CONST_DOUBLE
&& GET_MODE (x
) == SFmode
)
14207 REAL_VALUE_FROM_CONST_DOUBLE (r
, x
);
14208 REAL_VALUE_TO_TARGET_SINGLE (r
, l
);
14210 if (ASSEMBLER_DIALECT
== ASM_ATT
)
14212 /* Sign extend 32bit SFmode immediate to 8 bytes. */
14214 fprintf (file
, "0x%08llx", (unsigned long long) (int) l
);
14216 fprintf (file
, "0x%08x", (unsigned int) l
);
14219 /* These float cases don't actually occur as immediate operands. */
14220 else if (GET_CODE (x
) == CONST_DOUBLE
&& GET_MODE (x
) == DFmode
)
14224 real_to_decimal (dstr
, CONST_DOUBLE_REAL_VALUE (x
), sizeof (dstr
), 0, 1);
14225 fputs (dstr
, file
);
14228 else if (GET_CODE (x
) == CONST_DOUBLE
14229 && GET_MODE (x
) == XFmode
)
14233 real_to_decimal (dstr
, CONST_DOUBLE_REAL_VALUE (x
), sizeof (dstr
), 0, 1);
14234 fputs (dstr
, file
);
14239 /* We have patterns that allow zero sets of memory, for instance.
14240 In 64-bit mode, we should probably support all 8-byte vectors,
14241 since we can in fact encode that into an immediate. */
14242 if (GET_CODE (x
) == CONST_VECTOR
)
14244 gcc_assert (x
== CONST0_RTX (GET_MODE (x
)));
14250 if (CONST_INT_P (x
) || GET_CODE (x
) == CONST_DOUBLE
)
14252 if (ASSEMBLER_DIALECT
== ASM_ATT
)
14255 else if (GET_CODE (x
) == CONST
|| GET_CODE (x
) == SYMBOL_REF
14256 || GET_CODE (x
) == LABEL_REF
)
14258 if (ASSEMBLER_DIALECT
== ASM_ATT
)
14261 fputs ("OFFSET FLAT:", file
);
14264 if (CONST_INT_P (x
))
14265 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (x
));
14266 else if (flag_pic
|| MACHOPIC_INDIRECT
)
14267 output_pic_addr_const (file
, x
, code
);
14269 output_addr_const (file
, x
);
14274 ix86_print_operand_punct_valid_p (unsigned char code
)
14276 return (code
== '@' || code
== '*' || code
== '+'
14277 || code
== '&' || code
== ';');
14280 /* Print a memory operand whose address is ADDR. */
14283 ix86_print_operand_address (FILE *file
, rtx addr
)
14285 struct ix86_address parts
;
14286 rtx base
, index
, disp
;
14288 int ok
= ix86_decompose_address (addr
, &parts
);
14293 index
= parts
.index
;
14295 scale
= parts
.scale
;
14303 if (ASSEMBLER_DIALECT
== ASM_ATT
)
14305 fputs ((parts
.seg
== SEG_FS
? "fs:" : "gs:"), file
);
14308 gcc_unreachable ();
14311 /* Use one byte shorter RIP relative addressing for 64bit mode. */
14312 if (TARGET_64BIT
&& !base
&& !index
)
14316 if (GET_CODE (disp
) == CONST
14317 && GET_CODE (XEXP (disp
, 0)) == PLUS
14318 && CONST_INT_P (XEXP (XEXP (disp
, 0), 1)))
14319 symbol
= XEXP (XEXP (disp
, 0), 0);
14321 if (GET_CODE (symbol
) == LABEL_REF
14322 || (GET_CODE (symbol
) == SYMBOL_REF
14323 && SYMBOL_REF_TLS_MODEL (symbol
) == 0))
14326 if (!base
&& !index
)
14328 /* Displacement only requires special attention. */
14330 if (CONST_INT_P (disp
))
14332 if (ASSEMBLER_DIALECT
== ASM_INTEL
&& parts
.seg
== SEG_DEFAULT
)
14333 fputs ("ds:", file
);
14334 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (disp
));
14337 output_pic_addr_const (file
, disp
, 0);
14339 output_addr_const (file
, disp
);
14343 if (ASSEMBLER_DIALECT
== ASM_ATT
)
14348 output_pic_addr_const (file
, disp
, 0);
14349 else if (GET_CODE (disp
) == LABEL_REF
)
14350 output_asm_label (disp
);
14352 output_addr_const (file
, disp
);
14357 print_reg (base
, 0, file
);
14361 print_reg (index
, 0, file
);
14363 fprintf (file
, ",%d", scale
);
14369 rtx offset
= NULL_RTX
;
14373 /* Pull out the offset of a symbol; print any symbol itself. */
14374 if (GET_CODE (disp
) == CONST
14375 && GET_CODE (XEXP (disp
, 0)) == PLUS
14376 && CONST_INT_P (XEXP (XEXP (disp
, 0), 1)))
14378 offset
= XEXP (XEXP (disp
, 0), 1);
14379 disp
= gen_rtx_CONST (VOIDmode
,
14380 XEXP (XEXP (disp
, 0), 0));
14384 output_pic_addr_const (file
, disp
, 0);
14385 else if (GET_CODE (disp
) == LABEL_REF
)
14386 output_asm_label (disp
);
14387 else if (CONST_INT_P (disp
))
14390 output_addr_const (file
, disp
);
14396 print_reg (base
, 0, file
);
14399 if (INTVAL (offset
) >= 0)
14401 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (offset
));
14405 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (offset
));
14412 print_reg (index
, 0, file
);
14414 fprintf (file
, "*%d", scale
);
14421 /* Implementation of TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */
14424 i386_asm_output_addr_const_extra (FILE *file
, rtx x
)
14428 if (GET_CODE (x
) != UNSPEC
)
14431 op
= XVECEXP (x
, 0, 0);
14432 switch (XINT (x
, 1))
14434 case UNSPEC_GOTTPOFF
:
14435 output_addr_const (file
, op
);
14436 /* FIXME: This might be @TPOFF in Sun ld. */
14437 fputs ("@gottpoff", file
);
14440 output_addr_const (file
, op
);
14441 fputs ("@tpoff", file
);
14443 case UNSPEC_NTPOFF
:
14444 output_addr_const (file
, op
);
14446 fputs ("@tpoff", file
);
14448 fputs ("@ntpoff", file
);
14450 case UNSPEC_DTPOFF
:
14451 output_addr_const (file
, op
);
14452 fputs ("@dtpoff", file
);
14454 case UNSPEC_GOTNTPOFF
:
14455 output_addr_const (file
, op
);
14457 fputs (ASSEMBLER_DIALECT
== ASM_ATT
?
14458 "@gottpoff(%rip)" : "@gottpoff[rip]", file
);
14460 fputs ("@gotntpoff", file
);
14462 case UNSPEC_INDNTPOFF
:
14463 output_addr_const (file
, op
);
14464 fputs ("@indntpoff", file
);
14467 case UNSPEC_MACHOPIC_OFFSET
:
14468 output_addr_const (file
, op
);
14470 machopic_output_function_base_name (file
);
14474 case UNSPEC_STACK_CHECK
:
14478 gcc_assert (flag_split_stack
);
14480 #ifdef TARGET_THREAD_SPLIT_STACK_OFFSET
14481 offset
= TARGET_THREAD_SPLIT_STACK_OFFSET
;
14483 gcc_unreachable ();
14486 fprintf (file
, "%s:%d", TARGET_64BIT
? "%fs" : "%gs", offset
);
14497 /* Split one or more double-mode RTL references into pairs of half-mode
14498 references. The RTL can be REG, offsettable MEM, integer constant, or
14499 CONST_DOUBLE. "operands" is a pointer to an array of double-mode RTLs to
14500 split and "num" is its length. lo_half and hi_half are output arrays
14501 that parallel "operands". */
14504 split_double_mode (enum machine_mode mode
, rtx operands
[],
14505 int num
, rtx lo_half
[], rtx hi_half
[])
14507 enum machine_mode half_mode
;
14513 half_mode
= DImode
;
14516 half_mode
= SImode
;
14519 gcc_unreachable ();
14522 byte
= GET_MODE_SIZE (half_mode
);
14526 rtx op
= operands
[num
];
14528 /* simplify_subreg refuse to split volatile memory addresses,
14529 but we still have to handle it. */
14532 lo_half
[num
] = adjust_address (op
, half_mode
, 0);
14533 hi_half
[num
] = adjust_address (op
, half_mode
, byte
);
14537 lo_half
[num
] = simplify_gen_subreg (half_mode
, op
,
14538 GET_MODE (op
) == VOIDmode
14539 ? mode
: GET_MODE (op
), 0);
14540 hi_half
[num
] = simplify_gen_subreg (half_mode
, op
,
14541 GET_MODE (op
) == VOIDmode
14542 ? mode
: GET_MODE (op
), byte
);
14547 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
14548 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
14549 is the expression of the binary operation. The output may either be
14550 emitted here, or returned to the caller, like all output_* functions.
14552 There is no guarantee that the operands are the same mode, as they
14553 might be within FLOAT or FLOAT_EXTEND expressions. */
14555 #ifndef SYSV386_COMPAT
14556 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
14557 wants to fix the assemblers because that causes incompatibility
14558 with gcc. No-one wants to fix gcc because that causes
14559 incompatibility with assemblers... You can use the option of
14560 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
14561 #define SYSV386_COMPAT 1
14565 output_387_binary_op (rtx insn
, rtx
*operands
)
14567 static char buf
[40];
14570 int is_sse
= SSE_REG_P (operands
[0]) || SSE_REG_P (operands
[1]) || SSE_REG_P (operands
[2]);
14572 #ifdef ENABLE_CHECKING
14573 /* Even if we do not want to check the inputs, this documents input
14574 constraints. Which helps in understanding the following code. */
14575 if (STACK_REG_P (operands
[0])
14576 && ((REG_P (operands
[1])
14577 && REGNO (operands
[0]) == REGNO (operands
[1])
14578 && (STACK_REG_P (operands
[2]) || MEM_P (operands
[2])))
14579 || (REG_P (operands
[2])
14580 && REGNO (operands
[0]) == REGNO (operands
[2])
14581 && (STACK_REG_P (operands
[1]) || MEM_P (operands
[1]))))
14582 && (STACK_TOP_P (operands
[1]) || STACK_TOP_P (operands
[2])))
14585 gcc_assert (is_sse
);
14588 switch (GET_CODE (operands
[3]))
14591 if (GET_MODE_CLASS (GET_MODE (operands
[1])) == MODE_INT
14592 || GET_MODE_CLASS (GET_MODE (operands
[2])) == MODE_INT
)
14600 if (GET_MODE_CLASS (GET_MODE (operands
[1])) == MODE_INT
14601 || GET_MODE_CLASS (GET_MODE (operands
[2])) == MODE_INT
)
14609 if (GET_MODE_CLASS (GET_MODE (operands
[1])) == MODE_INT
14610 || GET_MODE_CLASS (GET_MODE (operands
[2])) == MODE_INT
)
14618 if (GET_MODE_CLASS (GET_MODE (operands
[1])) == MODE_INT
14619 || GET_MODE_CLASS (GET_MODE (operands
[2])) == MODE_INT
)
14627 gcc_unreachable ();
14634 strcpy (buf
, ssep
);
14635 if (GET_MODE (operands
[0]) == SFmode
)
14636 strcat (buf
, "ss\t{%2, %1, %0|%0, %1, %2}");
14638 strcat (buf
, "sd\t{%2, %1, %0|%0, %1, %2}");
14642 strcpy (buf
, ssep
+ 1);
14643 if (GET_MODE (operands
[0]) == SFmode
)
14644 strcat (buf
, "ss\t{%2, %0|%0, %2}");
14646 strcat (buf
, "sd\t{%2, %0|%0, %2}");
14652 switch (GET_CODE (operands
[3]))
14656 if (REG_P (operands
[2]) && REGNO (operands
[0]) == REGNO (operands
[2]))
14658 rtx temp
= operands
[2];
14659 operands
[2] = operands
[1];
14660 operands
[1] = temp
;
14663 /* know operands[0] == operands[1]. */
14665 if (MEM_P (operands
[2]))
14671 if (find_regno_note (insn
, REG_DEAD
, REGNO (operands
[2])))
14673 if (STACK_TOP_P (operands
[0]))
14674 /* How is it that we are storing to a dead operand[2]?
14675 Well, presumably operands[1] is dead too. We can't
14676 store the result to st(0) as st(0) gets popped on this
14677 instruction. Instead store to operands[2] (which I
14678 think has to be st(1)). st(1) will be popped later.
14679 gcc <= 2.8.1 didn't have this check and generated
14680 assembly code that the Unixware assembler rejected. */
14681 p
= "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
14683 p
= "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
14687 if (STACK_TOP_P (operands
[0]))
14688 p
= "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
14690 p
= "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
14695 if (MEM_P (operands
[1]))
14701 if (MEM_P (operands
[2]))
14707 if (find_regno_note (insn
, REG_DEAD
, REGNO (operands
[2])))
14710 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
14711 derived assemblers, confusingly reverse the direction of
14712 the operation for fsub{r} and fdiv{r} when the
14713 destination register is not st(0). The Intel assembler
14714 doesn't have this brain damage. Read !SYSV386_COMPAT to
14715 figure out what the hardware really does. */
14716 if (STACK_TOP_P (operands
[0]))
14717 p
= "{p\t%0, %2|rp\t%2, %0}";
14719 p
= "{rp\t%2, %0|p\t%0, %2}";
14721 if (STACK_TOP_P (operands
[0]))
14722 /* As above for fmul/fadd, we can't store to st(0). */
14723 p
= "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
14725 p
= "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
14730 if (find_regno_note (insn
, REG_DEAD
, REGNO (operands
[1])))
14733 if (STACK_TOP_P (operands
[0]))
14734 p
= "{rp\t%0, %1|p\t%1, %0}";
14736 p
= "{p\t%1, %0|rp\t%0, %1}";
14738 if (STACK_TOP_P (operands
[0]))
14739 p
= "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
14741 p
= "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
14746 if (STACK_TOP_P (operands
[0]))
14748 if (STACK_TOP_P (operands
[1]))
14749 p
= "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
14751 p
= "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
14754 else if (STACK_TOP_P (operands
[1]))
14757 p
= "{\t%1, %0|r\t%0, %1}";
14759 p
= "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
14765 p
= "{r\t%2, %0|\t%0, %2}";
14767 p
= "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
14773 gcc_unreachable ();
14780 /* Return needed mode for entity in optimize_mode_switching pass. */
14783 ix86_mode_needed (int entity
, rtx insn
)
14785 enum attr_i387_cw mode
;
14787 /* The mode UNINITIALIZED is used to store control word after a
14788 function call or ASM pattern. The mode ANY specify that function
14789 has no requirements on the control word and make no changes in the
14790 bits we are interested in. */
14793 || (NONJUMP_INSN_P (insn
)
14794 && (asm_noperands (PATTERN (insn
)) >= 0
14795 || GET_CODE (PATTERN (insn
)) == ASM_INPUT
)))
14796 return I387_CW_UNINITIALIZED
;
14798 if (recog_memoized (insn
) < 0)
14799 return I387_CW_ANY
;
14801 mode
= get_attr_i387_cw (insn
);
14806 if (mode
== I387_CW_TRUNC
)
14811 if (mode
== I387_CW_FLOOR
)
14816 if (mode
== I387_CW_CEIL
)
14821 if (mode
== I387_CW_MASK_PM
)
14826 gcc_unreachable ();
14829 return I387_CW_ANY
;
14832 /* Output code to initialize control word copies used by trunc?f?i and
14833 rounding patterns. CURRENT_MODE is set to current control word,
14834 while NEW_MODE is set to new control word. */
14837 emit_i387_cw_initialization (int mode
)
14839 rtx stored_mode
= assign_386_stack_local (HImode
, SLOT_CW_STORED
);
14842 enum ix86_stack_slot slot
;
14844 rtx reg
= gen_reg_rtx (HImode
);
14846 emit_insn (gen_x86_fnstcw_1 (stored_mode
));
14847 emit_move_insn (reg
, copy_rtx (stored_mode
));
14849 if (TARGET_64BIT
|| TARGET_PARTIAL_REG_STALL
14850 || optimize_function_for_size_p (cfun
))
14854 case I387_CW_TRUNC
:
14855 /* round toward zero (truncate) */
14856 emit_insn (gen_iorhi3 (reg
, reg
, GEN_INT (0x0c00)));
14857 slot
= SLOT_CW_TRUNC
;
14860 case I387_CW_FLOOR
:
14861 /* round down toward -oo */
14862 emit_insn (gen_andhi3 (reg
, reg
, GEN_INT (~0x0c00)));
14863 emit_insn (gen_iorhi3 (reg
, reg
, GEN_INT (0x0400)));
14864 slot
= SLOT_CW_FLOOR
;
14868 /* round up toward +oo */
14869 emit_insn (gen_andhi3 (reg
, reg
, GEN_INT (~0x0c00)));
14870 emit_insn (gen_iorhi3 (reg
, reg
, GEN_INT (0x0800)));
14871 slot
= SLOT_CW_CEIL
;
14874 case I387_CW_MASK_PM
:
14875 /* mask precision exception for nearbyint() */
14876 emit_insn (gen_iorhi3 (reg
, reg
, GEN_INT (0x0020)));
14877 slot
= SLOT_CW_MASK_PM
;
14881 gcc_unreachable ();
14888 case I387_CW_TRUNC
:
14889 /* round toward zero (truncate) */
14890 emit_insn (gen_movsi_insv_1 (reg
, GEN_INT (0xc)));
14891 slot
= SLOT_CW_TRUNC
;
14894 case I387_CW_FLOOR
:
14895 /* round down toward -oo */
14896 emit_insn (gen_movsi_insv_1 (reg
, GEN_INT (0x4)));
14897 slot
= SLOT_CW_FLOOR
;
14901 /* round up toward +oo */
14902 emit_insn (gen_movsi_insv_1 (reg
, GEN_INT (0x8)));
14903 slot
= SLOT_CW_CEIL
;
14906 case I387_CW_MASK_PM
:
14907 /* mask precision exception for nearbyint() */
14908 emit_insn (gen_iorhi3 (reg
, reg
, GEN_INT (0x0020)));
14909 slot
= SLOT_CW_MASK_PM
;
14913 gcc_unreachable ();
14917 gcc_assert (slot
< MAX_386_STACK_LOCALS
);
14919 new_mode
= assign_386_stack_local (HImode
, slot
);
14920 emit_move_insn (new_mode
, reg
);
14923 /* Output code for INSN to convert a float to a signed int. OPERANDS
14924 are the insn operands. The output may be [HSD]Imode and the input
14925 operand may be [SDX]Fmode. */
14928 output_fix_trunc (rtx insn
, rtx
*operands
, int fisttp
)
14930 int stack_top_dies
= find_regno_note (insn
, REG_DEAD
, FIRST_STACK_REG
) != 0;
14931 int dimode_p
= GET_MODE (operands
[0]) == DImode
;
14932 int round_mode
= get_attr_i387_cw (insn
);
14934 /* Jump through a hoop or two for DImode, since the hardware has no
14935 non-popping instruction. We used to do this a different way, but
14936 that was somewhat fragile and broke with post-reload splitters. */
14937 if ((dimode_p
|| fisttp
) && !stack_top_dies
)
14938 output_asm_insn ("fld\t%y1", operands
);
14940 gcc_assert (STACK_TOP_P (operands
[1]));
14941 gcc_assert (MEM_P (operands
[0]));
14942 gcc_assert (GET_MODE (operands
[1]) != TFmode
);
14945 output_asm_insn ("fisttp%Z0\t%0", operands
);
14948 if (round_mode
!= I387_CW_ANY
)
14949 output_asm_insn ("fldcw\t%3", operands
);
14950 if (stack_top_dies
|| dimode_p
)
14951 output_asm_insn ("fistp%Z0\t%0", operands
);
14953 output_asm_insn ("fist%Z0\t%0", operands
);
14954 if (round_mode
!= I387_CW_ANY
)
14955 output_asm_insn ("fldcw\t%2", operands
);
14961 /* Output code for x87 ffreep insn. The OPNO argument, which may only
14962 have the values zero or one, indicates the ffreep insn's operand
14963 from the OPERANDS array. */
14965 static const char *
14966 output_387_ffreep (rtx
*operands ATTRIBUTE_UNUSED
, int opno
)
14968 if (TARGET_USE_FFREEP
)
14969 #ifdef HAVE_AS_IX86_FFREEP
14970 return opno
? "ffreep\t%y1" : "ffreep\t%y0";
14973 static char retval
[32];
14974 int regno
= REGNO (operands
[opno
]);
14976 gcc_assert (FP_REGNO_P (regno
));
14978 regno
-= FIRST_STACK_REG
;
14980 snprintf (retval
, sizeof (retval
), ASM_SHORT
"0xc%ddf", regno
);
14985 return opno
? "fstp\t%y1" : "fstp\t%y0";
14989 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
14990 should be used. UNORDERED_P is true when fucom should be used. */
14993 output_fp_compare (rtx insn
, rtx
*operands
, int eflags_p
, int unordered_p
)
14995 int stack_top_dies
;
14996 rtx cmp_op0
, cmp_op1
;
14997 int is_sse
= SSE_REG_P (operands
[0]) || SSE_REG_P (operands
[1]);
15001 cmp_op0
= operands
[0];
15002 cmp_op1
= operands
[1];
15006 cmp_op0
= operands
[1];
15007 cmp_op1
= operands
[2];
15012 static const char ucomiss
[] = "vucomiss\t{%1, %0|%0, %1}";
15013 static const char ucomisd
[] = "vucomisd\t{%1, %0|%0, %1}";
15014 static const char comiss
[] = "vcomiss\t{%1, %0|%0, %1}";
15015 static const char comisd
[] = "vcomisd\t{%1, %0|%0, %1}";
15017 if (GET_MODE (operands
[0]) == SFmode
)
15019 return &ucomiss
[TARGET_AVX
? 0 : 1];
15021 return &comiss
[TARGET_AVX
? 0 : 1];
15024 return &ucomisd
[TARGET_AVX
? 0 : 1];
15026 return &comisd
[TARGET_AVX
? 0 : 1];
15029 gcc_assert (STACK_TOP_P (cmp_op0
));
15031 stack_top_dies
= find_regno_note (insn
, REG_DEAD
, FIRST_STACK_REG
) != 0;
15033 if (cmp_op1
== CONST0_RTX (GET_MODE (cmp_op1
)))
15035 if (stack_top_dies
)
15037 output_asm_insn ("ftst\n\tfnstsw\t%0", operands
);
15038 return output_387_ffreep (operands
, 1);
15041 return "ftst\n\tfnstsw\t%0";
15044 if (STACK_REG_P (cmp_op1
)
15046 && find_regno_note (insn
, REG_DEAD
, REGNO (cmp_op1
))
15047 && REGNO (cmp_op1
) != FIRST_STACK_REG
)
15049 /* If both the top of the 387 stack dies, and the other operand
15050 is also a stack register that dies, then this must be a
15051 `fcompp' float compare */
15055 /* There is no double popping fcomi variant. Fortunately,
15056 eflags is immune from the fstp's cc clobbering. */
15058 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands
);
15060 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands
);
15061 return output_387_ffreep (operands
, 0);
15066 return "fucompp\n\tfnstsw\t%0";
15068 return "fcompp\n\tfnstsw\t%0";
15073 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
15075 static const char * const alt
[16] =
15077 "fcom%Z2\t%y2\n\tfnstsw\t%0",
15078 "fcomp%Z2\t%y2\n\tfnstsw\t%0",
15079 "fucom%Z2\t%y2\n\tfnstsw\t%0",
15080 "fucomp%Z2\t%y2\n\tfnstsw\t%0",
15082 "ficom%Z2\t%y2\n\tfnstsw\t%0",
15083 "ficomp%Z2\t%y2\n\tfnstsw\t%0",
15087 "fcomi\t{%y1, %0|%0, %y1}",
15088 "fcomip\t{%y1, %0|%0, %y1}",
15089 "fucomi\t{%y1, %0|%0, %y1}",
15090 "fucomip\t{%y1, %0|%0, %y1}",
15101 mask
= eflags_p
<< 3;
15102 mask
|= (GET_MODE_CLASS (GET_MODE (cmp_op1
)) == MODE_INT
) << 2;
15103 mask
|= unordered_p
<< 1;
15104 mask
|= stack_top_dies
;
15106 gcc_assert (mask
< 16);
15115 ix86_output_addr_vec_elt (FILE *file
, int value
)
15117 const char *directive
= ASM_LONG
;
15121 directive
= ASM_QUAD
;
15123 gcc_assert (!TARGET_64BIT
);
15126 fprintf (file
, "%s%s%d\n", directive
, LPREFIX
, value
);
15130 ix86_output_addr_diff_elt (FILE *file
, int value
, int rel
)
15132 const char *directive
= ASM_LONG
;
15135 if (TARGET_64BIT
&& CASE_VECTOR_MODE
== DImode
)
15136 directive
= ASM_QUAD
;
15138 gcc_assert (!TARGET_64BIT
);
15140 /* We can't use @GOTOFF for text labels on VxWorks; see gotoff_operand. */
15141 if (TARGET_64BIT
|| TARGET_VXWORKS_RTP
)
15142 fprintf (file
, "%s%s%d-%s%d\n",
15143 directive
, LPREFIX
, value
, LPREFIX
, rel
);
15144 else if (HAVE_AS_GOTOFF_IN_DATA
)
15145 fprintf (file
, ASM_LONG
"%s%d@GOTOFF\n", LPREFIX
, value
);
15147 else if (TARGET_MACHO
)
15149 fprintf (file
, ASM_LONG
"%s%d-", LPREFIX
, value
);
15150 machopic_output_function_base_name (file
);
15155 asm_fprintf (file
, ASM_LONG
"%U%s+[.-%s%d]\n",
15156 GOT_SYMBOL_NAME
, LPREFIX
, value
);
15159 /* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
15163 ix86_expand_clear (rtx dest
)
15167 /* We play register width games, which are only valid after reload. */
15168 gcc_assert (reload_completed
);
15170 /* Avoid HImode and its attendant prefix byte. */
15171 if (GET_MODE_SIZE (GET_MODE (dest
)) < 4)
15172 dest
= gen_rtx_REG (SImode
, REGNO (dest
));
15173 tmp
= gen_rtx_SET (VOIDmode
, dest
, const0_rtx
);
15175 /* This predicate should match that for movsi_xor and movdi_xor_rex64. */
15176 if (!TARGET_USE_MOV0
|| optimize_insn_for_speed_p ())
15178 rtx clob
= gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCmode
, FLAGS_REG
));
15179 tmp
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, tmp
, clob
));
15185 /* X is an unchanging MEM. If it is a constant pool reference, return
15186 the constant pool rtx, else NULL. */
15189 maybe_get_pool_constant (rtx x
)
15191 x
= ix86_delegitimize_address (XEXP (x
, 0));
15193 if (GET_CODE (x
) == SYMBOL_REF
&& CONSTANT_POOL_ADDRESS_P (x
))
15194 return get_pool_constant (x
);
15200 ix86_expand_move (enum machine_mode mode
, rtx operands
[])
15203 enum tls_model model
;
15208 if (GET_CODE (op1
) == SYMBOL_REF
)
15210 model
= SYMBOL_REF_TLS_MODEL (op1
);
15213 op1
= legitimize_tls_address (op1
, model
, true);
15214 op1
= force_operand (op1
, op0
);
15218 else if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
15219 && SYMBOL_REF_DLLIMPORT_P (op1
))
15220 op1
= legitimize_dllimport_symbol (op1
, false);
15222 else if (GET_CODE (op1
) == CONST
15223 && GET_CODE (XEXP (op1
, 0)) == PLUS
15224 && GET_CODE (XEXP (XEXP (op1
, 0), 0)) == SYMBOL_REF
)
15226 rtx addend
= XEXP (XEXP (op1
, 0), 1);
15227 rtx symbol
= XEXP (XEXP (op1
, 0), 0);
15230 model
= SYMBOL_REF_TLS_MODEL (symbol
);
15232 tmp
= legitimize_tls_address (symbol
, model
, true);
15233 else if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
15234 && SYMBOL_REF_DLLIMPORT_P (symbol
))
15235 tmp
= legitimize_dllimport_symbol (symbol
, true);
15239 tmp
= force_operand (tmp
, NULL
);
15240 tmp
= expand_simple_binop (Pmode
, PLUS
, tmp
, addend
,
15241 op0
, 1, OPTAB_DIRECT
);
15247 if ((flag_pic
|| MACHOPIC_INDIRECT
)
15248 && mode
== Pmode
&& symbolic_operand (op1
, Pmode
))
15250 if (TARGET_MACHO
&& !TARGET_64BIT
)
15253 /* dynamic-no-pic */
15254 if (MACHOPIC_INDIRECT
)
15256 rtx temp
= ((reload_in_progress
15257 || ((op0
&& REG_P (op0
))
15259 ? op0
: gen_reg_rtx (Pmode
));
15260 op1
= machopic_indirect_data_reference (op1
, temp
);
15262 op1
= machopic_legitimize_pic_address (op1
, mode
,
15263 temp
== op1
? 0 : temp
);
15265 if (op0
!= op1
&& GET_CODE (op0
) != MEM
)
15267 rtx insn
= gen_rtx_SET (VOIDmode
, op0
, op1
);
15271 if (GET_CODE (op0
) == MEM
)
15272 op1
= force_reg (Pmode
, op1
);
15276 if (GET_CODE (temp
) != REG
)
15277 temp
= gen_reg_rtx (Pmode
);
15278 temp
= legitimize_pic_address (op1
, temp
);
15283 /* dynamic-no-pic */
15289 op1
= force_reg (Pmode
, op1
);
15290 else if (!TARGET_64BIT
|| !x86_64_movabs_operand (op1
, Pmode
))
15292 rtx reg
= can_create_pseudo_p () ? NULL_RTX
: op0
;
15293 op1
= legitimize_pic_address (op1
, reg
);
15302 && (PUSH_ROUNDING (GET_MODE_SIZE (mode
)) != GET_MODE_SIZE (mode
)
15303 || !push_operand (op0
, mode
))
15305 op1
= force_reg (mode
, op1
);
15307 if (push_operand (op0
, mode
)
15308 && ! general_no_elim_operand (op1
, mode
))
15309 op1
= copy_to_mode_reg (mode
, op1
);
15311 /* Force large constants in 64bit compilation into register
15312 to get them CSEed. */
15313 if (can_create_pseudo_p ()
15314 && (mode
== DImode
) && TARGET_64BIT
15315 && immediate_operand (op1
, mode
)
15316 && !x86_64_zext_immediate_operand (op1
, VOIDmode
)
15317 && !register_operand (op0
, mode
)
15319 op1
= copy_to_mode_reg (mode
, op1
);
15321 if (can_create_pseudo_p ()
15322 && FLOAT_MODE_P (mode
)
15323 && GET_CODE (op1
) == CONST_DOUBLE
)
15325 /* If we are loading a floating point constant to a register,
15326 force the value to memory now, since we'll get better code
15327 out the back end. */
15329 op1
= validize_mem (force_const_mem (mode
, op1
));
15330 if (!register_operand (op0
, mode
))
15332 rtx temp
= gen_reg_rtx (mode
);
15333 emit_insn (gen_rtx_SET (VOIDmode
, temp
, op1
));
15334 emit_move_insn (op0
, temp
);
15340 emit_insn (gen_rtx_SET (VOIDmode
, op0
, op1
));
15344 ix86_expand_vector_move (enum machine_mode mode
, rtx operands
[])
15346 rtx op0
= operands
[0], op1
= operands
[1];
15347 unsigned int align
= GET_MODE_ALIGNMENT (mode
);
15349 /* Force constants other than zero into memory. We do not know how
15350 the instructions used to build constants modify the upper 64 bits
15351 of the register, once we have that information we may be able
15352 to handle some of them more efficiently. */
15353 if (can_create_pseudo_p ()
15354 && register_operand (op0
, mode
)
15355 && (CONSTANT_P (op1
)
15356 || (GET_CODE (op1
) == SUBREG
15357 && CONSTANT_P (SUBREG_REG (op1
))))
15358 && !standard_sse_constant_p (op1
))
15359 op1
= validize_mem (force_const_mem (mode
, op1
));
15361 /* We need to check memory alignment for SSE mode since attribute
15362 can make operands unaligned. */
15363 if (can_create_pseudo_p ()
15364 && SSE_REG_MODE_P (mode
)
15365 && ((MEM_P (op0
) && (MEM_ALIGN (op0
) < align
))
15366 || (MEM_P (op1
) && (MEM_ALIGN (op1
) < align
))))
15370 /* ix86_expand_vector_move_misalign() does not like constants ... */
15371 if (CONSTANT_P (op1
)
15372 || (GET_CODE (op1
) == SUBREG
15373 && CONSTANT_P (SUBREG_REG (op1
))))
15374 op1
= validize_mem (force_const_mem (mode
, op1
));
15376 /* ... nor both arguments in memory. */
15377 if (!register_operand (op0
, mode
)
15378 && !register_operand (op1
, mode
))
15379 op1
= force_reg (mode
, op1
);
15381 tmp
[0] = op0
; tmp
[1] = op1
;
15382 ix86_expand_vector_move_misalign (mode
, tmp
);
15386 /* Make operand1 a register if it isn't already. */
15387 if (can_create_pseudo_p ()
15388 && !register_operand (op0
, mode
)
15389 && !register_operand (op1
, mode
))
15391 emit_move_insn (op0
, force_reg (GET_MODE (op0
), op1
));
15395 emit_insn (gen_rtx_SET (VOIDmode
, op0
, op1
));
15398 /* Implement the movmisalign patterns for SSE. Non-SSE modes go
15399 straight to ix86_expand_vector_move. */
15400 /* Code generation for scalar reg-reg moves of single and double precision data:
15401 if (x86_sse_partial_reg_dependency == true | x86_sse_split_regs == true)
15405 if (x86_sse_partial_reg_dependency == true)
15410 Code generation for scalar loads of double precision data:
15411 if (x86_sse_split_regs == true)
15412 movlpd mem, reg (gas syntax)
15416 Code generation for unaligned packed loads of single precision data
15417 (x86_sse_unaligned_move_optimal overrides x86_sse_partial_reg_dependency):
15418 if (x86_sse_unaligned_move_optimal)
15421 if (x86_sse_partial_reg_dependency == true)
15433 Code generation for unaligned packed loads of double precision data
15434 (x86_sse_unaligned_move_optimal overrides x86_sse_split_regs):
15435 if (x86_sse_unaligned_move_optimal)
15438 if (x86_sse_split_regs == true)
15451 ix86_expand_vector_move_misalign (enum machine_mode mode
, rtx operands
[])
15460 switch (GET_MODE_CLASS (mode
))
15462 case MODE_VECTOR_INT
:
15464 switch (GET_MODE_SIZE (mode
))
15467 /* If we're optimizing for size, movups is the smallest. */
15468 if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
)
15470 op0
= gen_lowpart (V4SFmode
, op0
);
15471 op1
= gen_lowpart (V4SFmode
, op1
);
15472 emit_insn (gen_avx_movups (op0
, op1
));
15475 op0
= gen_lowpart (V16QImode
, op0
);
15476 op1
= gen_lowpart (V16QImode
, op1
);
15477 emit_insn (gen_avx_movdqu (op0
, op1
));
15480 op0
= gen_lowpart (V32QImode
, op0
);
15481 op1
= gen_lowpart (V32QImode
, op1
);
15482 emit_insn (gen_avx_movdqu256 (op0
, op1
));
15485 gcc_unreachable ();
15488 case MODE_VECTOR_FLOAT
:
15489 op0
= gen_lowpart (mode
, op0
);
15490 op1
= gen_lowpart (mode
, op1
);
15495 emit_insn (gen_avx_movups (op0
, op1
));
15498 emit_insn (gen_avx_movups256 (op0
, op1
));
15501 if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
)
15503 op0
= gen_lowpart (V4SFmode
, op0
);
15504 op1
= gen_lowpart (V4SFmode
, op1
);
15505 emit_insn (gen_avx_movups (op0
, op1
));
15508 emit_insn (gen_avx_movupd (op0
, op1
));
15511 emit_insn (gen_avx_movupd256 (op0
, op1
));
15514 gcc_unreachable ();
15519 gcc_unreachable ();
15527 /* If we're optimizing for size, movups is the smallest. */
15528 if (optimize_insn_for_size_p ()
15529 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
)
15531 op0
= gen_lowpart (V4SFmode
, op0
);
15532 op1
= gen_lowpart (V4SFmode
, op1
);
15533 emit_insn (gen_sse_movups (op0
, op1
));
15537 /* ??? If we have typed data, then it would appear that using
15538 movdqu is the only way to get unaligned data loaded with
15540 if (TARGET_SSE2
&& GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
)
15542 op0
= gen_lowpart (V16QImode
, op0
);
15543 op1
= gen_lowpart (V16QImode
, op1
);
15544 emit_insn (gen_sse2_movdqu (op0
, op1
));
15548 if (TARGET_SSE2
&& mode
== V2DFmode
)
15552 if (TARGET_SSE_UNALIGNED_LOAD_OPTIMAL
)
15554 op0
= gen_lowpart (V2DFmode
, op0
);
15555 op1
= gen_lowpart (V2DFmode
, op1
);
15556 emit_insn (gen_sse2_movupd (op0
, op1
));
15560 /* When SSE registers are split into halves, we can avoid
15561 writing to the top half twice. */
15562 if (TARGET_SSE_SPLIT_REGS
)
15564 emit_clobber (op0
);
15569 /* ??? Not sure about the best option for the Intel chips.
15570 The following would seem to satisfy; the register is
15571 entirely cleared, breaking the dependency chain. We
15572 then store to the upper half, with a dependency depth
15573 of one. A rumor has it that Intel recommends two movsd
15574 followed by an unpacklpd, but this is unconfirmed. And
15575 given that the dependency depth of the unpacklpd would
15576 still be one, I'm not sure why this would be better. */
15577 zero
= CONST0_RTX (V2DFmode
);
15580 m
= adjust_address (op1
, DFmode
, 0);
15581 emit_insn (gen_sse2_loadlpd (op0
, zero
, m
));
15582 m
= adjust_address (op1
, DFmode
, 8);
15583 emit_insn (gen_sse2_loadhpd (op0
, op0
, m
));
15587 if (TARGET_SSE_UNALIGNED_LOAD_OPTIMAL
)
15589 op0
= gen_lowpart (V4SFmode
, op0
);
15590 op1
= gen_lowpart (V4SFmode
, op1
);
15591 emit_insn (gen_sse_movups (op0
, op1
));
15595 if (TARGET_SSE_PARTIAL_REG_DEPENDENCY
)
15596 emit_move_insn (op0
, CONST0_RTX (mode
));
15598 emit_clobber (op0
);
15600 if (mode
!= V4SFmode
)
15601 op0
= gen_lowpart (V4SFmode
, op0
);
15602 m
= adjust_address (op1
, V2SFmode
, 0);
15603 emit_insn (gen_sse_loadlps (op0
, op0
, m
));
15604 m
= adjust_address (op1
, V2SFmode
, 8);
15605 emit_insn (gen_sse_loadhps (op0
, op0
, m
));
15608 else if (MEM_P (op0
))
15610 /* If we're optimizing for size, movups is the smallest. */
15611 if (optimize_insn_for_size_p ()
15612 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
)
15614 op0
= gen_lowpart (V4SFmode
, op0
);
15615 op1
= gen_lowpart (V4SFmode
, op1
);
15616 emit_insn (gen_sse_movups (op0
, op1
));
15620 /* ??? Similar to above, only less clear because of quote
15621 typeless stores unquote. */
15622 if (TARGET_SSE2
&& !TARGET_SSE_TYPELESS_STORES
15623 && GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
)
15625 op0
= gen_lowpart (V16QImode
, op0
);
15626 op1
= gen_lowpart (V16QImode
, op1
);
15627 emit_insn (gen_sse2_movdqu (op0
, op1
));
15631 if (TARGET_SSE2
&& mode
== V2DFmode
)
15633 if (TARGET_SSE_UNALIGNED_STORE_OPTIMAL
)
15635 op0
= gen_lowpart (V2DFmode
, op0
);
15636 op1
= gen_lowpart (V2DFmode
, op1
);
15637 emit_insn (gen_sse2_movupd (op0
, op1
));
15641 m
= adjust_address (op0
, DFmode
, 0);
15642 emit_insn (gen_sse2_storelpd (m
, op1
));
15643 m
= adjust_address (op0
, DFmode
, 8);
15644 emit_insn (gen_sse2_storehpd (m
, op1
));
15649 if (mode
!= V4SFmode
)
15650 op1
= gen_lowpart (V4SFmode
, op1
);
15652 if (TARGET_SSE_UNALIGNED_STORE_OPTIMAL
)
15654 op0
= gen_lowpart (V4SFmode
, op0
);
15655 emit_insn (gen_sse_movups (op0
, op1
));
15659 m
= adjust_address (op0
, V2SFmode
, 0);
15660 emit_insn (gen_sse_storelps (m
, op1
));
15661 m
= adjust_address (op0
, V2SFmode
, 8);
15662 emit_insn (gen_sse_storehps (m
, op1
));
15667 gcc_unreachable ();
15670 /* Expand a push in MODE. This is some mode for which we do not support
15671 proper push instructions, at least from the registers that we expect
15672 the value to live in. */
15675 ix86_expand_push (enum machine_mode mode
, rtx x
)
15679 tmp
= expand_simple_binop (Pmode
, PLUS
, stack_pointer_rtx
,
15680 GEN_INT (-GET_MODE_SIZE (mode
)),
15681 stack_pointer_rtx
, 1, OPTAB_DIRECT
);
15682 if (tmp
!= stack_pointer_rtx
)
15683 emit_move_insn (stack_pointer_rtx
, tmp
);
15685 tmp
= gen_rtx_MEM (mode
, stack_pointer_rtx
);
15687 /* When we push an operand onto stack, it has to be aligned at least
15688 at the function argument boundary. However since we don't have
15689 the argument type, we can't determine the actual argument
15691 emit_move_insn (tmp
, x
);
15694 /* Helper function of ix86_fixup_binary_operands to canonicalize
15695 operand order. Returns true if the operands should be swapped. */
15698 ix86_swap_binary_operands_p (enum rtx_code code
, enum machine_mode mode
,
15701 rtx dst
= operands
[0];
15702 rtx src1
= operands
[1];
15703 rtx src2
= operands
[2];
15705 /* If the operation is not commutative, we can't do anything. */
15706 if (GET_RTX_CLASS (code
) != RTX_COMM_ARITH
)
15709 /* Highest priority is that src1 should match dst. */
15710 if (rtx_equal_p (dst
, src1
))
15712 if (rtx_equal_p (dst
, src2
))
15715 /* Next highest priority is that immediate constants come second. */
15716 if (immediate_operand (src2
, mode
))
15718 if (immediate_operand (src1
, mode
))
15721 /* Lowest priority is that memory references should come second. */
15731 /* Fix up OPERANDS to satisfy ix86_binary_operator_ok. Return the
15732 destination to use for the operation. If different from the true
15733 destination in operands[0], a copy operation will be required. */
15736 ix86_fixup_binary_operands (enum rtx_code code
, enum machine_mode mode
,
15739 rtx dst
= operands
[0];
15740 rtx src1
= operands
[1];
15741 rtx src2
= operands
[2];
15743 /* Canonicalize operand order. */
15744 if (ix86_swap_binary_operands_p (code
, mode
, operands
))
15748 /* It is invalid to swap operands of different modes. */
15749 gcc_assert (GET_MODE (src1
) == GET_MODE (src2
));
15756 /* Both source operands cannot be in memory. */
15757 if (MEM_P (src1
) && MEM_P (src2
))
15759 /* Optimization: Only read from memory once. */
15760 if (rtx_equal_p (src1
, src2
))
15762 src2
= force_reg (mode
, src2
);
15766 src2
= force_reg (mode
, src2
);
15769 /* If the destination is memory, and we do not have matching source
15770 operands, do things in registers. */
15771 if (MEM_P (dst
) && !rtx_equal_p (dst
, src1
))
15772 dst
= gen_reg_rtx (mode
);
15774 /* Source 1 cannot be a constant. */
15775 if (CONSTANT_P (src1
))
15776 src1
= force_reg (mode
, src1
);
15778 /* Source 1 cannot be a non-matching memory. */
15779 if (MEM_P (src1
) && !rtx_equal_p (dst
, src1
))
15780 src1
= force_reg (mode
, src1
);
15782 operands
[1] = src1
;
15783 operands
[2] = src2
;
15787 /* Similarly, but assume that the destination has already been
15788 set up properly. */
15791 ix86_fixup_binary_operands_no_copy (enum rtx_code code
,
15792 enum machine_mode mode
, rtx operands
[])
15794 rtx dst
= ix86_fixup_binary_operands (code
, mode
, operands
);
15795 gcc_assert (dst
== operands
[0]);
15798 /* Attempt to expand a binary operator. Make the expansion closer to the
15799 actual machine, then just general_operand, which will allow 3 separate
15800 memory references (one output, two input) in a single insn. */
15803 ix86_expand_binary_operator (enum rtx_code code
, enum machine_mode mode
,
15806 rtx src1
, src2
, dst
, op
, clob
;
15808 dst
= ix86_fixup_binary_operands (code
, mode
, operands
);
15809 src1
= operands
[1];
15810 src2
= operands
[2];
15812 /* Emit the instruction. */
15814 op
= gen_rtx_SET (VOIDmode
, dst
, gen_rtx_fmt_ee (code
, mode
, src1
, src2
));
15815 if (reload_in_progress
)
15817 /* Reload doesn't know about the flags register, and doesn't know that
15818 it doesn't want to clobber it. We can only do this with PLUS. */
15819 gcc_assert (code
== PLUS
);
15822 else if (reload_completed
15824 && !rtx_equal_p (dst
, src1
))
15826 /* This is going to be an LEA; avoid splitting it later. */
15831 clob
= gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCmode
, FLAGS_REG
));
15832 emit_insn (gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, op
, clob
)));
15835 /* Fix up the destination if needed. */
15836 if (dst
!= operands
[0])
15837 emit_move_insn (operands
[0], dst
);
15840 /* Return TRUE or FALSE depending on whether the binary operator meets the
15841 appropriate constraints. */
15844 ix86_binary_operator_ok (enum rtx_code code
, enum machine_mode mode
,
15847 rtx dst
= operands
[0];
15848 rtx src1
= operands
[1];
15849 rtx src2
= operands
[2];
15851 /* Both source operands cannot be in memory. */
15852 if (MEM_P (src1
) && MEM_P (src2
))
15855 /* Canonicalize operand order for commutative operators. */
15856 if (ix86_swap_binary_operands_p (code
, mode
, operands
))
15863 /* If the destination is memory, we must have a matching source operand. */
15864 if (MEM_P (dst
) && !rtx_equal_p (dst
, src1
))
15867 /* Source 1 cannot be a constant. */
15868 if (CONSTANT_P (src1
))
15871 /* Source 1 cannot be a non-matching memory. */
15872 if (MEM_P (src1
) && !rtx_equal_p (dst
, src1
))
15874 /* Support "andhi/andsi/anddi" as a zero-extending move. */
15875 return (code
== AND
15878 || (TARGET_64BIT
&& mode
== DImode
))
15879 && CONST_INT_P (src2
)
15880 && (INTVAL (src2
) == 0xff
15881 || INTVAL (src2
) == 0xffff));
15887 /* Attempt to expand a unary operator. Make the expansion closer to the
15888 actual machine, then just general_operand, which will allow 2 separate
15889 memory references (one output, one input) in a single insn. */
15892 ix86_expand_unary_operator (enum rtx_code code
, enum machine_mode mode
,
15895 int matching_memory
;
15896 rtx src
, dst
, op
, clob
;
15901 /* If the destination is memory, and we do not have matching source
15902 operands, do things in registers. */
15903 matching_memory
= 0;
15906 if (rtx_equal_p (dst
, src
))
15907 matching_memory
= 1;
15909 dst
= gen_reg_rtx (mode
);
15912 /* When source operand is memory, destination must match. */
15913 if (MEM_P (src
) && !matching_memory
)
15914 src
= force_reg (mode
, src
);
15916 /* Emit the instruction. */
15918 op
= gen_rtx_SET (VOIDmode
, dst
, gen_rtx_fmt_e (code
, mode
, src
));
15919 if (reload_in_progress
|| code
== NOT
)
15921 /* Reload doesn't know about the flags register, and doesn't know that
15922 it doesn't want to clobber it. */
15923 gcc_assert (code
== NOT
);
15928 clob
= gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCmode
, FLAGS_REG
));
15929 emit_insn (gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, op
, clob
)));
15932 /* Fix up the destination if needed. */
15933 if (dst
!= operands
[0])
15934 emit_move_insn (operands
[0], dst
);
15937 /* Split 32bit/64bit divmod with 8bit unsigned divmod if dividend and
15938 divisor are within the the range [0-255]. */
15941 ix86_split_idivmod (enum machine_mode mode
, rtx operands
[],
15944 rtx end_label
, qimode_label
;
15945 rtx insn
, div
, mod
;
15946 rtx scratch
, tmp0
, tmp1
, tmp2
;
15947 rtx (*gen_divmod4_1
) (rtx
, rtx
, rtx
, rtx
);
15948 rtx (*gen_zero_extend
) (rtx
, rtx
);
15949 rtx (*gen_test_ccno_1
) (rtx
, rtx
);
15954 gen_divmod4_1
= signed_p
? gen_divmodsi4_1
: gen_udivmodsi4_1
;
15955 gen_test_ccno_1
= gen_testsi_ccno_1
;
15956 gen_zero_extend
= gen_zero_extendqisi2
;
15959 gen_divmod4_1
= signed_p
? gen_divmoddi4_1
: gen_udivmoddi4_1
;
15960 gen_test_ccno_1
= gen_testdi_ccno_1
;
15961 gen_zero_extend
= gen_zero_extendqidi2
;
15964 gcc_unreachable ();
15967 end_label
= gen_label_rtx ();
15968 qimode_label
= gen_label_rtx ();
15970 scratch
= gen_reg_rtx (mode
);
15972 /* Use 8bit unsigned divimod if dividend and divisor are within the
15973 the range [0-255]. */
15974 emit_move_insn (scratch
, operands
[2]);
15975 scratch
= expand_simple_binop (mode
, IOR
, scratch
, operands
[3],
15976 scratch
, 1, OPTAB_DIRECT
);
15977 emit_insn (gen_test_ccno_1 (scratch
, GEN_INT (-0x100)));
15978 tmp0
= gen_rtx_REG (CCNOmode
, FLAGS_REG
);
15979 tmp0
= gen_rtx_EQ (VOIDmode
, tmp0
, const0_rtx
);
15980 tmp0
= gen_rtx_IF_THEN_ELSE (VOIDmode
, tmp0
,
15981 gen_rtx_LABEL_REF (VOIDmode
, qimode_label
),
15983 insn
= emit_jump_insn (gen_rtx_SET (VOIDmode
, pc_rtx
, tmp0
));
15984 predict_jump (REG_BR_PROB_BASE
* 50 / 100);
15985 JUMP_LABEL (insn
) = qimode_label
;
15987 /* Generate original signed/unsigned divimod. */
15988 div
= gen_divmod4_1 (operands
[0], operands
[1],
15989 operands
[2], operands
[3]);
15992 /* Branch to the end. */
15993 emit_jump_insn (gen_jump (end_label
));
15996 /* Generate 8bit unsigned divide. */
15997 emit_label (qimode_label
);
15998 /* Don't use operands[0] for result of 8bit divide since not all
15999 registers support QImode ZERO_EXTRACT. */
16000 tmp0
= simplify_gen_subreg (HImode
, scratch
, mode
, 0);
16001 tmp1
= simplify_gen_subreg (HImode
, operands
[2], mode
, 0);
16002 tmp2
= simplify_gen_subreg (QImode
, operands
[3], mode
, 0);
16003 emit_insn (gen_udivmodhiqi3 (tmp0
, tmp1
, tmp2
));
16007 div
= gen_rtx_DIV (SImode
, operands
[2], operands
[3]);
16008 mod
= gen_rtx_MOD (SImode
, operands
[2], operands
[3]);
16012 div
= gen_rtx_UDIV (SImode
, operands
[2], operands
[3]);
16013 mod
= gen_rtx_UMOD (SImode
, operands
[2], operands
[3]);
16016 /* Extract remainder from AH. */
16017 tmp1
= gen_rtx_ZERO_EXTRACT (mode
, tmp0
, GEN_INT (8), GEN_INT (8));
16018 if (REG_P (operands
[1]))
16019 insn
= emit_move_insn (operands
[1], tmp1
);
16022 /* Need a new scratch register since the old one has result
16024 scratch
= gen_reg_rtx (mode
);
16025 emit_move_insn (scratch
, tmp1
);
16026 insn
= emit_move_insn (operands
[1], scratch
);
16028 set_unique_reg_note (insn
, REG_EQUAL
, mod
);
16030 /* Zero extend quotient from AL. */
16031 tmp1
= gen_lowpart (QImode
, tmp0
);
16032 insn
= emit_insn (gen_zero_extend (operands
[0], tmp1
));
16033 set_unique_reg_note (insn
, REG_EQUAL
, div
);
16035 emit_label (end_label
);
16038 #define LEA_SEARCH_THRESHOLD 12
16040 /* Search backward for non-agu definition of register number REGNO1
16041 or register number REGNO2 in INSN's basic block until
16042 1. Pass LEA_SEARCH_THRESHOLD instructions, or
16043 2. Reach BB boundary, or
16044 3. Reach agu definition.
16045 Returns the distance between the non-agu definition point and INSN.
16046 If no definition point, returns -1. */
16049 distance_non_agu_define (unsigned int regno1
, unsigned int regno2
,
16052 basic_block bb
= BLOCK_FOR_INSN (insn
);
16055 enum attr_type insn_type
;
16057 if (insn
!= BB_HEAD (bb
))
16059 rtx prev
= PREV_INSN (insn
);
16060 while (prev
&& distance
< LEA_SEARCH_THRESHOLD
)
16062 if (NONDEBUG_INSN_P (prev
))
16065 for (def_rec
= DF_INSN_DEFS (prev
); *def_rec
; def_rec
++)
16066 if (DF_REF_TYPE (*def_rec
) == DF_REF_REG_DEF
16067 && !DF_REF_IS_ARTIFICIAL (*def_rec
)
16068 && (regno1
== DF_REF_REGNO (*def_rec
)
16069 || regno2
== DF_REF_REGNO (*def_rec
)))
16071 insn_type
= get_attr_type (prev
);
16072 if (insn_type
!= TYPE_LEA
)
16076 if (prev
== BB_HEAD (bb
))
16078 prev
= PREV_INSN (prev
);
16082 if (distance
< LEA_SEARCH_THRESHOLD
)
16086 bool simple_loop
= false;
16088 FOR_EACH_EDGE (e
, ei
, bb
->preds
)
16091 simple_loop
= true;
16097 rtx prev
= BB_END (bb
);
16100 && distance
< LEA_SEARCH_THRESHOLD
)
16102 if (NONDEBUG_INSN_P (prev
))
16105 for (def_rec
= DF_INSN_DEFS (prev
); *def_rec
; def_rec
++)
16106 if (DF_REF_TYPE (*def_rec
) == DF_REF_REG_DEF
16107 && !DF_REF_IS_ARTIFICIAL (*def_rec
)
16108 && (regno1
== DF_REF_REGNO (*def_rec
)
16109 || regno2
== DF_REF_REGNO (*def_rec
)))
16111 insn_type
= get_attr_type (prev
);
16112 if (insn_type
!= TYPE_LEA
)
16116 prev
= PREV_INSN (prev
);
16124 /* get_attr_type may modify recog data. We want to make sure
16125 that recog data is valid for instruction INSN, on which
16126 distance_non_agu_define is called. INSN is unchanged here. */
16127 extract_insn_cached (insn
);
16131 /* Return the distance between INSN and the next insn that uses
16132 register number REGNO0 in memory address. Return -1 if no such
16133 a use is found within LEA_SEARCH_THRESHOLD or REGNO0 is set. */
16136 distance_agu_use (unsigned int regno0
, rtx insn
)
16138 basic_block bb
= BLOCK_FOR_INSN (insn
);
16143 if (insn
!= BB_END (bb
))
16145 rtx next
= NEXT_INSN (insn
);
16146 while (next
&& distance
< LEA_SEARCH_THRESHOLD
)
16148 if (NONDEBUG_INSN_P (next
))
16152 for (use_rec
= DF_INSN_USES (next
); *use_rec
; use_rec
++)
16153 if ((DF_REF_TYPE (*use_rec
) == DF_REF_REG_MEM_LOAD
16154 || DF_REF_TYPE (*use_rec
) == DF_REF_REG_MEM_STORE
)
16155 && regno0
== DF_REF_REGNO (*use_rec
))
16157 /* Return DISTANCE if OP0 is used in memory
16158 address in NEXT. */
16162 for (def_rec
= DF_INSN_DEFS (next
); *def_rec
; def_rec
++)
16163 if (DF_REF_TYPE (*def_rec
) == DF_REF_REG_DEF
16164 && !DF_REF_IS_ARTIFICIAL (*def_rec
)
16165 && regno0
== DF_REF_REGNO (*def_rec
))
16167 /* Return -1 if OP0 is set in NEXT. */
16171 if (next
== BB_END (bb
))
16173 next
= NEXT_INSN (next
);
16177 if (distance
< LEA_SEARCH_THRESHOLD
)
16181 bool simple_loop
= false;
16183 FOR_EACH_EDGE (e
, ei
, bb
->succs
)
16186 simple_loop
= true;
16192 rtx next
= BB_HEAD (bb
);
16195 && distance
< LEA_SEARCH_THRESHOLD
)
16197 if (NONDEBUG_INSN_P (next
))
16201 for (use_rec
= DF_INSN_USES (next
); *use_rec
; use_rec
++)
16202 if ((DF_REF_TYPE (*use_rec
) == DF_REF_REG_MEM_LOAD
16203 || DF_REF_TYPE (*use_rec
) == DF_REF_REG_MEM_STORE
)
16204 && regno0
== DF_REF_REGNO (*use_rec
))
16206 /* Return DISTANCE if OP0 is used in memory
16207 address in NEXT. */
16211 for (def_rec
= DF_INSN_DEFS (next
); *def_rec
; def_rec
++)
16212 if (DF_REF_TYPE (*def_rec
) == DF_REF_REG_DEF
16213 && !DF_REF_IS_ARTIFICIAL (*def_rec
)
16214 && regno0
== DF_REF_REGNO (*def_rec
))
16216 /* Return -1 if OP0 is set in NEXT. */
16221 next
= NEXT_INSN (next
);
16229 /* Define this macro to tune LEA priority vs ADD, it take effect when
16230 there is a dilemma of choicing LEA or ADD
16231 Negative value: ADD is more preferred than LEA
16233 Positive value: LEA is more preferred than ADD*/
16234 #define IX86_LEA_PRIORITY 2
16236 /* Return true if it is ok to optimize an ADD operation to LEA
16237 operation to avoid flag register consumation. For most processors,
16238 ADD is faster than LEA. For the processors like ATOM, if the
16239 destination register of LEA holds an actual address which will be
16240 used soon, LEA is better and otherwise ADD is better. */
16243 ix86_lea_for_add_ok (rtx insn
, rtx operands
[])
16245 unsigned int regno0
= true_regnum (operands
[0]);
16246 unsigned int regno1
= true_regnum (operands
[1]);
16247 unsigned int regno2
= true_regnum (operands
[2]);
16249 /* If a = b + c, (a!=b && a!=c), must use lea form. */
16250 if (regno0
!= regno1
&& regno0
!= regno2
)
16253 if (!TARGET_OPT_AGU
|| optimize_function_for_size_p (cfun
))
16257 int dist_define
, dist_use
;
16259 /* Return false if REGNO0 isn't used in memory address. */
16260 dist_use
= distance_agu_use (regno0
, insn
);
16264 dist_define
= distance_non_agu_define (regno1
, regno2
, insn
);
16265 if (dist_define
<= 0)
16268 /* If this insn has both backward non-agu dependence and forward
16269 agu dependence, the one with short distance take effect. */
16270 if ((dist_define
+ IX86_LEA_PRIORITY
) < dist_use
)
16277 /* Return true if destination reg of SET_BODY is shift count of
16281 ix86_dep_by_shift_count_body (const_rtx set_body
, const_rtx use_body
)
16287 /* Retrieve destination of SET_BODY. */
16288 switch (GET_CODE (set_body
))
16291 set_dest
= SET_DEST (set_body
);
16292 if (!set_dest
|| !REG_P (set_dest
))
16296 for (i
= XVECLEN (set_body
, 0) - 1; i
>= 0; i
--)
16297 if (ix86_dep_by_shift_count_body (XVECEXP (set_body
, 0, i
),
16305 /* Retrieve shift count of USE_BODY. */
16306 switch (GET_CODE (use_body
))
16309 shift_rtx
= XEXP (use_body
, 1);
16312 for (i
= XVECLEN (use_body
, 0) - 1; i
>= 0; i
--)
16313 if (ix86_dep_by_shift_count_body (set_body
,
16314 XVECEXP (use_body
, 0, i
)))
16322 && (GET_CODE (shift_rtx
) == ASHIFT
16323 || GET_CODE (shift_rtx
) == LSHIFTRT
16324 || GET_CODE (shift_rtx
) == ASHIFTRT
16325 || GET_CODE (shift_rtx
) == ROTATE
16326 || GET_CODE (shift_rtx
) == ROTATERT
))
16328 rtx shift_count
= XEXP (shift_rtx
, 1);
16330 /* Return true if shift count is dest of SET_BODY. */
16331 if (REG_P (shift_count
)
16332 && true_regnum (set_dest
) == true_regnum (shift_count
))
16339 /* Return true if destination reg of SET_INSN is shift count of
16343 ix86_dep_by_shift_count (const_rtx set_insn
, const_rtx use_insn
)
16345 return ix86_dep_by_shift_count_body (PATTERN (set_insn
),
16346 PATTERN (use_insn
));
16349 /* Return TRUE or FALSE depending on whether the unary operator meets the
16350 appropriate constraints. */
16353 ix86_unary_operator_ok (enum rtx_code code ATTRIBUTE_UNUSED
,
16354 enum machine_mode mode ATTRIBUTE_UNUSED
,
16355 rtx operands
[2] ATTRIBUTE_UNUSED
)
16357 /* If one of operands is memory, source and destination must match. */
16358 if ((MEM_P (operands
[0])
16359 || MEM_P (operands
[1]))
16360 && ! rtx_equal_p (operands
[0], operands
[1]))
16365 /* Return TRUE if the operands to a vec_interleave_{high,low}v2df
16366 are ok, keeping in mind the possible movddup alternative. */
16369 ix86_vec_interleave_v2df_operator_ok (rtx operands
[3], bool high
)
16371 if (MEM_P (operands
[0]))
16372 return rtx_equal_p (operands
[0], operands
[1 + high
]);
16373 if (MEM_P (operands
[1]) && MEM_P (operands
[2]))
16374 return TARGET_SSE3
&& rtx_equal_p (operands
[1], operands
[2]);
16378 /* Post-reload splitter for converting an SF or DFmode value in an
16379 SSE register into an unsigned SImode. */
16382 ix86_split_convert_uns_si_sse (rtx operands
[])
16384 enum machine_mode vecmode
;
16385 rtx value
, large
, zero_or_two31
, input
, two31
, x
;
16387 large
= operands
[1];
16388 zero_or_two31
= operands
[2];
16389 input
= operands
[3];
16390 two31
= operands
[4];
16391 vecmode
= GET_MODE (large
);
16392 value
= gen_rtx_REG (vecmode
, REGNO (operands
[0]));
16394 /* Load up the value into the low element. We must ensure that the other
16395 elements are valid floats -- zero is the easiest such value. */
16398 if (vecmode
== V4SFmode
)
16399 emit_insn (gen_vec_setv4sf_0 (value
, CONST0_RTX (V4SFmode
), input
));
16401 emit_insn (gen_sse2_loadlpd (value
, CONST0_RTX (V2DFmode
), input
));
16405 input
= gen_rtx_REG (vecmode
, REGNO (input
));
16406 emit_move_insn (value
, CONST0_RTX (vecmode
));
16407 if (vecmode
== V4SFmode
)
16408 emit_insn (gen_sse_movss (value
, value
, input
));
16410 emit_insn (gen_sse2_movsd (value
, value
, input
));
16413 emit_move_insn (large
, two31
);
16414 emit_move_insn (zero_or_two31
, MEM_P (two31
) ? large
: two31
);
16416 x
= gen_rtx_fmt_ee (LE
, vecmode
, large
, value
);
16417 emit_insn (gen_rtx_SET (VOIDmode
, large
, x
));
16419 x
= gen_rtx_AND (vecmode
, zero_or_two31
, large
);
16420 emit_insn (gen_rtx_SET (VOIDmode
, zero_or_two31
, x
));
16422 x
= gen_rtx_MINUS (vecmode
, value
, zero_or_two31
);
16423 emit_insn (gen_rtx_SET (VOIDmode
, value
, x
));
16425 large
= gen_rtx_REG (V4SImode
, REGNO (large
));
16426 emit_insn (gen_ashlv4si3 (large
, large
, GEN_INT (31)));
16428 x
= gen_rtx_REG (V4SImode
, REGNO (value
));
16429 if (vecmode
== V4SFmode
)
16430 emit_insn (gen_sse2_cvttps2dq (x
, value
));
16432 emit_insn (gen_sse2_cvttpd2dq (x
, value
));
16435 emit_insn (gen_xorv4si3 (value
, value
, large
));
16438 /* Convert an unsigned DImode value into a DFmode, using only SSE.
16439 Expects the 64-bit DImode to be supplied in a pair of integral
16440 registers. Requires SSE2; will use SSE3 if available. For x86_32,
16441 -mfpmath=sse, !optimize_size only. */
16444 ix86_expand_convert_uns_didf_sse (rtx target
, rtx input
)
16446 REAL_VALUE_TYPE bias_lo_rvt
, bias_hi_rvt
;
16447 rtx int_xmm
, fp_xmm
;
16448 rtx biases
, exponents
;
16451 int_xmm
= gen_reg_rtx (V4SImode
);
16452 if (TARGET_INTER_UNIT_MOVES
)
16453 emit_insn (gen_movdi_to_sse (int_xmm
, input
));
16454 else if (TARGET_SSE_SPLIT_REGS
)
16456 emit_clobber (int_xmm
);
16457 emit_move_insn (gen_lowpart (DImode
, int_xmm
), input
);
16461 x
= gen_reg_rtx (V2DImode
);
16462 ix86_expand_vector_init_one_nonzero (false, V2DImode
, x
, input
, 0);
16463 emit_move_insn (int_xmm
, gen_lowpart (V4SImode
, x
));
16466 x
= gen_rtx_CONST_VECTOR (V4SImode
,
16467 gen_rtvec (4, GEN_INT (0x43300000UL
),
16468 GEN_INT (0x45300000UL
),
16469 const0_rtx
, const0_rtx
));
16470 exponents
= validize_mem (force_const_mem (V4SImode
, x
));
16472 /* int_xmm = {0x45300000UL, fp_xmm/hi, 0x43300000, fp_xmm/lo } */
16473 emit_insn (gen_vec_interleave_lowv4si (int_xmm
, int_xmm
, exponents
));
16475 /* Concatenating (juxtaposing) (0x43300000UL ## fp_value_low_xmm)
16476 yields a valid DF value equal to (0x1.0p52 + double(fp_value_lo_xmm)).
16477 Similarly (0x45300000UL ## fp_value_hi_xmm) yields
16478 (0x1.0p84 + double(fp_value_hi_xmm)).
16479 Note these exponents differ by 32. */
16481 fp_xmm
= copy_to_mode_reg (V2DFmode
, gen_lowpart (V2DFmode
, int_xmm
));
16483 /* Subtract off those 0x1.0p52 and 0x1.0p84 biases, to produce values
16484 in [0,2**32-1] and [0]+[2**32,2**64-1] respectively. */
16485 real_ldexp (&bias_lo_rvt
, &dconst1
, 52);
16486 real_ldexp (&bias_hi_rvt
, &dconst1
, 84);
16487 biases
= const_double_from_real_value (bias_lo_rvt
, DFmode
);
16488 x
= const_double_from_real_value (bias_hi_rvt
, DFmode
);
16489 biases
= gen_rtx_CONST_VECTOR (V2DFmode
, gen_rtvec (2, biases
, x
));
16490 biases
= validize_mem (force_const_mem (V2DFmode
, biases
));
16491 emit_insn (gen_subv2df3 (fp_xmm
, fp_xmm
, biases
));
16493 /* Add the upper and lower DFmode values together. */
16495 emit_insn (gen_sse3_haddv2df3 (fp_xmm
, fp_xmm
, fp_xmm
));
16498 x
= copy_to_mode_reg (V2DFmode
, fp_xmm
);
16499 emit_insn (gen_vec_interleave_highv2df (fp_xmm
, fp_xmm
, fp_xmm
));
16500 emit_insn (gen_addv2df3 (fp_xmm
, fp_xmm
, x
));
16503 ix86_expand_vector_extract (false, target
, fp_xmm
, 0);
16506 /* Not used, but eases macroization of patterns. */
16508 ix86_expand_convert_uns_sixf_sse (rtx target ATTRIBUTE_UNUSED
,
16509 rtx input ATTRIBUTE_UNUSED
)
16511 gcc_unreachable ();
16514 /* Convert an unsigned SImode value into a DFmode. Only currently used
16515 for SSE, but applicable anywhere. */
16518 ix86_expand_convert_uns_sidf_sse (rtx target
, rtx input
)
16520 REAL_VALUE_TYPE TWO31r
;
16523 x
= expand_simple_binop (SImode
, PLUS
, input
, GEN_INT (-2147483647 - 1),
16524 NULL
, 1, OPTAB_DIRECT
);
16526 fp
= gen_reg_rtx (DFmode
);
16527 emit_insn (gen_floatsidf2 (fp
, x
));
16529 real_ldexp (&TWO31r
, &dconst1
, 31);
16530 x
= const_double_from_real_value (TWO31r
, DFmode
);
16532 x
= expand_simple_binop (DFmode
, PLUS
, fp
, x
, target
, 0, OPTAB_DIRECT
);
16534 emit_move_insn (target
, x
);
16537 /* Convert a signed DImode value into a DFmode. Only used for SSE in
16538 32-bit mode; otherwise we have a direct convert instruction. */
16541 ix86_expand_convert_sign_didf_sse (rtx target
, rtx input
)
16543 REAL_VALUE_TYPE TWO32r
;
16544 rtx fp_lo
, fp_hi
, x
;
16546 fp_lo
= gen_reg_rtx (DFmode
);
16547 fp_hi
= gen_reg_rtx (DFmode
);
16549 emit_insn (gen_floatsidf2 (fp_hi
, gen_highpart (SImode
, input
)));
16551 real_ldexp (&TWO32r
, &dconst1
, 32);
16552 x
= const_double_from_real_value (TWO32r
, DFmode
);
16553 fp_hi
= expand_simple_binop (DFmode
, MULT
, fp_hi
, x
, fp_hi
, 0, OPTAB_DIRECT
);
16555 ix86_expand_convert_uns_sidf_sse (fp_lo
, gen_lowpart (SImode
, input
));
16557 x
= expand_simple_binop (DFmode
, PLUS
, fp_hi
, fp_lo
, target
,
16560 emit_move_insn (target
, x
);
16563 /* Convert an unsigned SImode value into a SFmode, using only SSE.
16564 For x86_32, -mfpmath=sse, !optimize_size only. */
16566 ix86_expand_convert_uns_sisf_sse (rtx target
, rtx input
)
16568 REAL_VALUE_TYPE ONE16r
;
16569 rtx fp_hi
, fp_lo
, int_hi
, int_lo
, x
;
16571 real_ldexp (&ONE16r
, &dconst1
, 16);
16572 x
= const_double_from_real_value (ONE16r
, SFmode
);
16573 int_lo
= expand_simple_binop (SImode
, AND
, input
, GEN_INT(0xffff),
16574 NULL
, 0, OPTAB_DIRECT
);
16575 int_hi
= expand_simple_binop (SImode
, LSHIFTRT
, input
, GEN_INT(16),
16576 NULL
, 0, OPTAB_DIRECT
);
16577 fp_hi
= gen_reg_rtx (SFmode
);
16578 fp_lo
= gen_reg_rtx (SFmode
);
16579 emit_insn (gen_floatsisf2 (fp_hi
, int_hi
));
16580 emit_insn (gen_floatsisf2 (fp_lo
, int_lo
));
16581 fp_hi
= expand_simple_binop (SFmode
, MULT
, fp_hi
, x
, fp_hi
,
16583 fp_hi
= expand_simple_binop (SFmode
, PLUS
, fp_hi
, fp_lo
, target
,
16585 if (!rtx_equal_p (target
, fp_hi
))
16586 emit_move_insn (target
, fp_hi
);
16589 /* A subroutine of ix86_build_signbit_mask. If VECT is true,
16590 then replicate the value for all elements of the vector
16594 ix86_build_const_vector (enum machine_mode mode
, bool vect
, rtx value
)
16601 v
= gen_rtvec (4, value
, value
, value
, value
);
16602 return gen_rtx_CONST_VECTOR (V4SImode
, v
);
16606 v
= gen_rtvec (2, value
, value
);
16607 return gen_rtx_CONST_VECTOR (V2DImode
, v
);
16611 v
= gen_rtvec (8, value
, value
, value
, value
,
16612 value
, value
, value
, value
);
16614 v
= gen_rtvec (8, value
, CONST0_RTX (SFmode
),
16615 CONST0_RTX (SFmode
), CONST0_RTX (SFmode
),
16616 CONST0_RTX (SFmode
), CONST0_RTX (SFmode
),
16617 CONST0_RTX (SFmode
), CONST0_RTX (SFmode
));
16618 return gen_rtx_CONST_VECTOR (V8SFmode
, v
);
16622 v
= gen_rtvec (4, value
, value
, value
, value
);
16624 v
= gen_rtvec (4, value
, CONST0_RTX (SFmode
),
16625 CONST0_RTX (SFmode
), CONST0_RTX (SFmode
));
16626 return gen_rtx_CONST_VECTOR (V4SFmode
, v
);
16630 v
= gen_rtvec (4, value
, value
, value
, value
);
16632 v
= gen_rtvec (4, value
, CONST0_RTX (DFmode
),
16633 CONST0_RTX (DFmode
), CONST0_RTX (DFmode
));
16634 return gen_rtx_CONST_VECTOR (V4DFmode
, v
);
16638 v
= gen_rtvec (2, value
, value
);
16640 v
= gen_rtvec (2, value
, CONST0_RTX (DFmode
));
16641 return gen_rtx_CONST_VECTOR (V2DFmode
, v
);
16644 gcc_unreachable ();
16648 /* A subroutine of ix86_expand_fp_absneg_operator, copysign expanders
16649 and ix86_expand_int_vcond. Create a mask for the sign bit in MODE
16650 for an SSE register. If VECT is true, then replicate the mask for
16651 all elements of the vector register. If INVERT is true, then create
16652 a mask excluding the sign bit. */
16655 ix86_build_signbit_mask (enum machine_mode mode
, bool vect
, bool invert
)
16657 enum machine_mode vec_mode
, imode
;
16658 HOST_WIDE_INT hi
, lo
;
16663 /* Find the sign bit, sign extended to 2*HWI. */
16670 mode
= GET_MODE_INNER (mode
);
16672 lo
= 0x80000000, hi
= lo
< 0;
16679 mode
= GET_MODE_INNER (mode
);
16681 if (HOST_BITS_PER_WIDE_INT
>= 64)
16682 lo
= (HOST_WIDE_INT
)1 << shift
, hi
= -1;
16684 lo
= 0, hi
= (HOST_WIDE_INT
)1 << (shift
- HOST_BITS_PER_WIDE_INT
);
16689 vec_mode
= VOIDmode
;
16690 if (HOST_BITS_PER_WIDE_INT
>= 64)
16693 lo
= 0, hi
= (HOST_WIDE_INT
)1 << shift
;
16700 lo
= 0, hi
= (HOST_WIDE_INT
)1 << (shift
- HOST_BITS_PER_WIDE_INT
);
16704 lo
= ~lo
, hi
= ~hi
;
16710 mask
= immed_double_const (lo
, hi
, imode
);
16712 vec
= gen_rtvec (2, v
, mask
);
16713 v
= gen_rtx_CONST_VECTOR (V2DImode
, vec
);
16714 v
= copy_to_mode_reg (mode
, gen_lowpart (mode
, v
));
16721 gcc_unreachable ();
16725 lo
= ~lo
, hi
= ~hi
;
16727 /* Force this value into the low part of a fp vector constant. */
16728 mask
= immed_double_const (lo
, hi
, imode
);
16729 mask
= gen_lowpart (mode
, mask
);
16731 if (vec_mode
== VOIDmode
)
16732 return force_reg (mode
, mask
);
16734 v
= ix86_build_const_vector (vec_mode
, vect
, mask
);
16735 return force_reg (vec_mode
, v
);
16738 /* Generate code for floating point ABS or NEG. */
16741 ix86_expand_fp_absneg_operator (enum rtx_code code
, enum machine_mode mode
,
16744 rtx mask
, set
, dst
, src
;
16745 bool use_sse
= false;
16746 bool vector_mode
= VECTOR_MODE_P (mode
);
16747 enum machine_mode vmode
= mode
;
16751 else if (mode
== TFmode
)
16753 else if (TARGET_SSE_MATH
)
16755 use_sse
= SSE_FLOAT_MODE_P (mode
);
16756 if (mode
== SFmode
)
16758 else if (mode
== DFmode
)
16762 /* NEG and ABS performed with SSE use bitwise mask operations.
16763 Create the appropriate mask now. */
16765 mask
= ix86_build_signbit_mask (vmode
, vector_mode
, code
== ABS
);
16772 set
= gen_rtx_fmt_e (code
, mode
, src
);
16773 set
= gen_rtx_SET (VOIDmode
, dst
, set
);
16780 use
= gen_rtx_USE (VOIDmode
, mask
);
16782 par
= gen_rtvec (2, set
, use
);
16785 clob
= gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCmode
, FLAGS_REG
));
16786 par
= gen_rtvec (3, set
, use
, clob
);
16788 emit_insn (gen_rtx_PARALLEL (VOIDmode
, par
));
16794 /* Expand a copysign operation. Special case operand 0 being a constant. */
16797 ix86_expand_copysign (rtx operands
[])
16799 enum machine_mode mode
, vmode
;
16800 rtx dest
, op0
, op1
, mask
, nmask
;
16802 dest
= operands
[0];
16806 mode
= GET_MODE (dest
);
16808 if (mode
== SFmode
)
16810 else if (mode
== DFmode
)
16815 if (GET_CODE (op0
) == CONST_DOUBLE
)
16817 rtx (*copysign_insn
)(rtx
, rtx
, rtx
, rtx
);
16819 if (real_isneg (CONST_DOUBLE_REAL_VALUE (op0
)))
16820 op0
= simplify_unary_operation (ABS
, mode
, op0
, mode
);
16822 if (mode
== SFmode
|| mode
== DFmode
)
16824 if (op0
== CONST0_RTX (mode
))
16825 op0
= CONST0_RTX (vmode
);
16828 rtx v
= ix86_build_const_vector (vmode
, false, op0
);
16830 op0
= force_reg (vmode
, v
);
16833 else if (op0
!= CONST0_RTX (mode
))
16834 op0
= force_reg (mode
, op0
);
16836 mask
= ix86_build_signbit_mask (vmode
, 0, 0);
16838 if (mode
== SFmode
)
16839 copysign_insn
= gen_copysignsf3_const
;
16840 else if (mode
== DFmode
)
16841 copysign_insn
= gen_copysigndf3_const
;
16843 copysign_insn
= gen_copysigntf3_const
;
16845 emit_insn (copysign_insn (dest
, op0
, op1
, mask
));
16849 rtx (*copysign_insn
)(rtx
, rtx
, rtx
, rtx
, rtx
, rtx
);
16851 nmask
= ix86_build_signbit_mask (vmode
, 0, 1);
16852 mask
= ix86_build_signbit_mask (vmode
, 0, 0);
16854 if (mode
== SFmode
)
16855 copysign_insn
= gen_copysignsf3_var
;
16856 else if (mode
== DFmode
)
16857 copysign_insn
= gen_copysigndf3_var
;
16859 copysign_insn
= gen_copysigntf3_var
;
16861 emit_insn (copysign_insn (dest
, NULL_RTX
, op0
, op1
, nmask
, mask
));
16865 /* Deconstruct a copysign operation into bit masks. Operand 0 is known to
16866 be a constant, and so has already been expanded into a vector constant. */
16869 ix86_split_copysign_const (rtx operands
[])
16871 enum machine_mode mode
, vmode
;
16872 rtx dest
, op0
, mask
, x
;
16874 dest
= operands
[0];
16876 mask
= operands
[3];
16878 mode
= GET_MODE (dest
);
16879 vmode
= GET_MODE (mask
);
16881 dest
= simplify_gen_subreg (vmode
, dest
, mode
, 0);
16882 x
= gen_rtx_AND (vmode
, dest
, mask
);
16883 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
16885 if (op0
!= CONST0_RTX (vmode
))
16887 x
= gen_rtx_IOR (vmode
, dest
, op0
);
16888 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
16892 /* Deconstruct a copysign operation into bit masks. Operand 0 is variable,
16893 so we have to do two masks. */
16896 ix86_split_copysign_var (rtx operands
[])
16898 enum machine_mode mode
, vmode
;
16899 rtx dest
, scratch
, op0
, op1
, mask
, nmask
, x
;
16901 dest
= operands
[0];
16902 scratch
= operands
[1];
16905 nmask
= operands
[4];
16906 mask
= operands
[5];
16908 mode
= GET_MODE (dest
);
16909 vmode
= GET_MODE (mask
);
16911 if (rtx_equal_p (op0
, op1
))
16913 /* Shouldn't happen often (it's useless, obviously), but when it does
16914 we'd generate incorrect code if we continue below. */
16915 emit_move_insn (dest
, op0
);
16919 if (REG_P (mask
) && REGNO (dest
) == REGNO (mask
)) /* alternative 0 */
16921 gcc_assert (REGNO (op1
) == REGNO (scratch
));
16923 x
= gen_rtx_AND (vmode
, scratch
, mask
);
16924 emit_insn (gen_rtx_SET (VOIDmode
, scratch
, x
));
16927 op0
= simplify_gen_subreg (vmode
, op0
, mode
, 0);
16928 x
= gen_rtx_NOT (vmode
, dest
);
16929 x
= gen_rtx_AND (vmode
, x
, op0
);
16930 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
16934 if (REGNO (op1
) == REGNO (scratch
)) /* alternative 1,3 */
16936 x
= gen_rtx_AND (vmode
, scratch
, mask
);
16938 else /* alternative 2,4 */
16940 gcc_assert (REGNO (mask
) == REGNO (scratch
));
16941 op1
= simplify_gen_subreg (vmode
, op1
, mode
, 0);
16942 x
= gen_rtx_AND (vmode
, scratch
, op1
);
16944 emit_insn (gen_rtx_SET (VOIDmode
, scratch
, x
));
16946 if (REGNO (op0
) == REGNO (dest
)) /* alternative 1,2 */
16948 dest
= simplify_gen_subreg (vmode
, op0
, mode
, 0);
16949 x
= gen_rtx_AND (vmode
, dest
, nmask
);
16951 else /* alternative 3,4 */
16953 gcc_assert (REGNO (nmask
) == REGNO (dest
));
16955 op0
= simplify_gen_subreg (vmode
, op0
, mode
, 0);
16956 x
= gen_rtx_AND (vmode
, dest
, op0
);
16958 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
16961 x
= gen_rtx_IOR (vmode
, dest
, scratch
);
16962 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
16965 /* Return TRUE or FALSE depending on whether the first SET in INSN
16966 has source and destination with matching CC modes, and that the
16967 CC mode is at least as constrained as REQ_MODE. */
16970 ix86_match_ccmode (rtx insn
, enum machine_mode req_mode
)
16973 enum machine_mode set_mode
;
16975 set
= PATTERN (insn
);
16976 if (GET_CODE (set
) == PARALLEL
)
16977 set
= XVECEXP (set
, 0, 0);
16978 gcc_assert (GET_CODE (set
) == SET
);
16979 gcc_assert (GET_CODE (SET_SRC (set
)) == COMPARE
);
16981 set_mode
= GET_MODE (SET_DEST (set
));
16985 if (req_mode
!= CCNOmode
16986 && (req_mode
!= CCmode
16987 || XEXP (SET_SRC (set
), 1) != const0_rtx
))
16991 if (req_mode
== CCGCmode
)
16995 if (req_mode
== CCGOCmode
|| req_mode
== CCNOmode
)
16999 if (req_mode
== CCZmode
)
17010 gcc_unreachable ();
17013 return GET_MODE (SET_SRC (set
)) == set_mode
;
17016 /* Generate insn patterns to do an integer compare of OPERANDS. */
17019 ix86_expand_int_compare (enum rtx_code code
, rtx op0
, rtx op1
)
17021 enum machine_mode cmpmode
;
17024 cmpmode
= SELECT_CC_MODE (code
, op0
, op1
);
17025 flags
= gen_rtx_REG (cmpmode
, FLAGS_REG
);
17027 /* This is very simple, but making the interface the same as in the
17028 FP case makes the rest of the code easier. */
17029 tmp
= gen_rtx_COMPARE (cmpmode
, op0
, op1
);
17030 emit_insn (gen_rtx_SET (VOIDmode
, flags
, tmp
));
17032 /* Return the test that should be put into the flags user, i.e.
17033 the bcc, scc, or cmov instruction. */
17034 return gen_rtx_fmt_ee (code
, VOIDmode
, flags
, const0_rtx
);
17037 /* Figure out whether to use ordered or unordered fp comparisons.
17038 Return the appropriate mode to use. */
17041 ix86_fp_compare_mode (enum rtx_code code ATTRIBUTE_UNUSED
)
17043 /* ??? In order to make all comparisons reversible, we do all comparisons
17044 non-trapping when compiling for IEEE. Once gcc is able to distinguish
17045 all forms trapping and nontrapping comparisons, we can make inequality
17046 comparisons trapping again, since it results in better code when using
17047 FCOM based compares. */
17048 return TARGET_IEEE_FP
? CCFPUmode
: CCFPmode
;
17052 ix86_cc_mode (enum rtx_code code
, rtx op0
, rtx op1
)
17054 enum machine_mode mode
= GET_MODE (op0
);
17056 if (SCALAR_FLOAT_MODE_P (mode
))
17058 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode
));
17059 return ix86_fp_compare_mode (code
);
17064 /* Only zero flag is needed. */
17065 case EQ
: /* ZF=0 */
17066 case NE
: /* ZF!=0 */
17068 /* Codes needing carry flag. */
17069 case GEU
: /* CF=0 */
17070 case LTU
: /* CF=1 */
17071 /* Detect overflow checks. They need just the carry flag. */
17072 if (GET_CODE (op0
) == PLUS
17073 && rtx_equal_p (op1
, XEXP (op0
, 0)))
17077 case GTU
: /* CF=0 & ZF=0 */
17078 case LEU
: /* CF=1 | ZF=1 */
17079 /* Detect overflow checks. They need just the carry flag. */
17080 if (GET_CODE (op0
) == MINUS
17081 && rtx_equal_p (op1
, XEXP (op0
, 0)))
17085 /* Codes possibly doable only with sign flag when
17086 comparing against zero. */
17087 case GE
: /* SF=OF or SF=0 */
17088 case LT
: /* SF<>OF or SF=1 */
17089 if (op1
== const0_rtx
)
17092 /* For other cases Carry flag is not required. */
17094 /* Codes doable only with sign flag when comparing
17095 against zero, but we miss jump instruction for it
17096 so we need to use relational tests against overflow
17097 that thus needs to be zero. */
17098 case GT
: /* ZF=0 & SF=OF */
17099 case LE
: /* ZF=1 | SF<>OF */
17100 if (op1
== const0_rtx
)
17104 /* strcmp pattern do (use flags) and combine may ask us for proper
17109 gcc_unreachable ();
17113 /* Return the fixed registers used for condition codes. */
17116 ix86_fixed_condition_code_regs (unsigned int *p1
, unsigned int *p2
)
17123 /* If two condition code modes are compatible, return a condition code
17124 mode which is compatible with both. Otherwise, return
17127 static enum machine_mode
17128 ix86_cc_modes_compatible (enum machine_mode m1
, enum machine_mode m2
)
17133 if (GET_MODE_CLASS (m1
) != MODE_CC
|| GET_MODE_CLASS (m2
) != MODE_CC
)
17136 if ((m1
== CCGCmode
&& m2
== CCGOCmode
)
17137 || (m1
== CCGOCmode
&& m2
== CCGCmode
))
17143 gcc_unreachable ();
17173 /* These are only compatible with themselves, which we already
17180 /* Return a comparison we can do and that it is equivalent to
17181 swap_condition (code) apart possibly from orderedness.
17182 But, never change orderedness if TARGET_IEEE_FP, returning
17183 UNKNOWN in that case if necessary. */
17185 static enum rtx_code
17186 ix86_fp_swap_condition (enum rtx_code code
)
17190 case GT
: /* GTU - CF=0 & ZF=0 */
17191 return TARGET_IEEE_FP
? UNKNOWN
: UNLT
;
17192 case GE
: /* GEU - CF=0 */
17193 return TARGET_IEEE_FP
? UNKNOWN
: UNLE
;
17194 case UNLT
: /* LTU - CF=1 */
17195 return TARGET_IEEE_FP
? UNKNOWN
: GT
;
17196 case UNLE
: /* LEU - CF=1 | ZF=1 */
17197 return TARGET_IEEE_FP
? UNKNOWN
: GE
;
17199 return swap_condition (code
);
17203 /* Return cost of comparison CODE using the best strategy for performance.
17204 All following functions do use number of instructions as a cost metrics.
17205 In future this should be tweaked to compute bytes for optimize_size and
17206 take into account performance of various instructions on various CPUs. */
17209 ix86_fp_comparison_cost (enum rtx_code code
)
17213 /* The cost of code using bit-twiddling on %ah. */
17230 arith_cost
= TARGET_IEEE_FP
? 5 : 4;
17234 arith_cost
= TARGET_IEEE_FP
? 6 : 4;
17237 gcc_unreachable ();
17240 switch (ix86_fp_comparison_strategy (code
))
17242 case IX86_FPCMP_COMI
:
17243 return arith_cost
> 4 ? 3 : 2;
17244 case IX86_FPCMP_SAHF
:
17245 return arith_cost
> 4 ? 4 : 3;
17251 /* Return strategy to use for floating-point. We assume that fcomi is always
17252 preferrable where available, since that is also true when looking at size
17253 (2 bytes, vs. 3 for fnstsw+sahf and at least 5 for fnstsw+test). */
17255 enum ix86_fpcmp_strategy
17256 ix86_fp_comparison_strategy (enum rtx_code code ATTRIBUTE_UNUSED
)
17258 /* Do fcomi/sahf based test when profitable. */
17261 return IX86_FPCMP_COMI
;
17263 if (TARGET_SAHF
&& (TARGET_USE_SAHF
|| optimize_function_for_size_p (cfun
)))
17264 return IX86_FPCMP_SAHF
;
17266 return IX86_FPCMP_ARITH
;
17269 /* Swap, force into registers, or otherwise massage the two operands
17270 to a fp comparison. The operands are updated in place; the new
17271 comparison code is returned. */
17273 static enum rtx_code
17274 ix86_prepare_fp_compare_args (enum rtx_code code
, rtx
*pop0
, rtx
*pop1
)
17276 enum machine_mode fpcmp_mode
= ix86_fp_compare_mode (code
);
17277 rtx op0
= *pop0
, op1
= *pop1
;
17278 enum machine_mode op_mode
= GET_MODE (op0
);
17279 int is_sse
= TARGET_SSE_MATH
&& SSE_FLOAT_MODE_P (op_mode
);
17281 /* All of the unordered compare instructions only work on registers.
17282 The same is true of the fcomi compare instructions. The XFmode
17283 compare instructions require registers except when comparing
17284 against zero or when converting operand 1 from fixed point to
17288 && (fpcmp_mode
== CCFPUmode
17289 || (op_mode
== XFmode
17290 && ! (standard_80387_constant_p (op0
) == 1
17291 || standard_80387_constant_p (op1
) == 1)
17292 && GET_CODE (op1
) != FLOAT
)
17293 || ix86_fp_comparison_strategy (code
) == IX86_FPCMP_COMI
))
17295 op0
= force_reg (op_mode
, op0
);
17296 op1
= force_reg (op_mode
, op1
);
17300 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
17301 things around if they appear profitable, otherwise force op0
17302 into a register. */
17304 if (standard_80387_constant_p (op0
) == 0
17306 && ! (standard_80387_constant_p (op1
) == 0
17309 enum rtx_code new_code
= ix86_fp_swap_condition (code
);
17310 if (new_code
!= UNKNOWN
)
17313 tmp
= op0
, op0
= op1
, op1
= tmp
;
17319 op0
= force_reg (op_mode
, op0
);
17321 if (CONSTANT_P (op1
))
17323 int tmp
= standard_80387_constant_p (op1
);
17325 op1
= validize_mem (force_const_mem (op_mode
, op1
));
17329 op1
= force_reg (op_mode
, op1
);
17332 op1
= force_reg (op_mode
, op1
);
17336 /* Try to rearrange the comparison to make it cheaper. */
17337 if (ix86_fp_comparison_cost (code
)
17338 > ix86_fp_comparison_cost (swap_condition (code
))
17339 && (REG_P (op1
) || can_create_pseudo_p ()))
17342 tmp
= op0
, op0
= op1
, op1
= tmp
;
17343 code
= swap_condition (code
);
17345 op0
= force_reg (op_mode
, op0
);
17353 /* Convert comparison codes we use to represent FP comparison to integer
17354 code that will result in proper branch. Return UNKNOWN if no such code
17358 ix86_fp_compare_code_to_integer (enum rtx_code code
)
17387 /* Generate insn patterns to do a floating point compare of OPERANDS. */
17390 ix86_expand_fp_compare (enum rtx_code code
, rtx op0
, rtx op1
, rtx scratch
)
17392 enum machine_mode fpcmp_mode
, intcmp_mode
;
17395 fpcmp_mode
= ix86_fp_compare_mode (code
);
17396 code
= ix86_prepare_fp_compare_args (code
, &op0
, &op1
);
17398 /* Do fcomi/sahf based test when profitable. */
17399 switch (ix86_fp_comparison_strategy (code
))
17401 case IX86_FPCMP_COMI
:
17402 intcmp_mode
= fpcmp_mode
;
17403 tmp
= gen_rtx_COMPARE (fpcmp_mode
, op0
, op1
);
17404 tmp
= gen_rtx_SET (VOIDmode
, gen_rtx_REG (fpcmp_mode
, FLAGS_REG
),
17409 case IX86_FPCMP_SAHF
:
17410 intcmp_mode
= fpcmp_mode
;
17411 tmp
= gen_rtx_COMPARE (fpcmp_mode
, op0
, op1
);
17412 tmp
= gen_rtx_SET (VOIDmode
, gen_rtx_REG (fpcmp_mode
, FLAGS_REG
),
17416 scratch
= gen_reg_rtx (HImode
);
17417 tmp2
= gen_rtx_CLOBBER (VOIDmode
, scratch
);
17418 emit_insn (gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, tmp
, tmp2
)));
17421 case IX86_FPCMP_ARITH
:
17422 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
17423 tmp
= gen_rtx_COMPARE (fpcmp_mode
, op0
, op1
);
17424 tmp2
= gen_rtx_UNSPEC (HImode
, gen_rtvec (1, tmp
), UNSPEC_FNSTSW
);
17426 scratch
= gen_reg_rtx (HImode
);
17427 emit_insn (gen_rtx_SET (VOIDmode
, scratch
, tmp2
));
17429 /* In the unordered case, we have to check C2 for NaN's, which
17430 doesn't happen to work out to anything nice combination-wise.
17431 So do some bit twiddling on the value we've got in AH to come
17432 up with an appropriate set of condition codes. */
17434 intcmp_mode
= CCNOmode
;
17439 if (code
== GT
|| !TARGET_IEEE_FP
)
17441 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x45)));
17446 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
17447 emit_insn (gen_addqi_ext_1 (scratch
, scratch
, constm1_rtx
));
17448 emit_insn (gen_cmpqi_ext_3 (scratch
, GEN_INT (0x44)));
17449 intcmp_mode
= CCmode
;
17455 if (code
== LT
&& TARGET_IEEE_FP
)
17457 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
17458 emit_insn (gen_cmpqi_ext_3 (scratch
, const1_rtx
));
17459 intcmp_mode
= CCmode
;
17464 emit_insn (gen_testqi_ext_ccno_0 (scratch
, const1_rtx
));
17470 if (code
== GE
|| !TARGET_IEEE_FP
)
17472 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x05)));
17477 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
17478 emit_insn (gen_xorqi_cc_ext_1 (scratch
, scratch
, const1_rtx
));
17484 if (code
== LE
&& TARGET_IEEE_FP
)
17486 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
17487 emit_insn (gen_addqi_ext_1 (scratch
, scratch
, constm1_rtx
));
17488 emit_insn (gen_cmpqi_ext_3 (scratch
, GEN_INT (0x40)));
17489 intcmp_mode
= CCmode
;
17494 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x45)));
17500 if (code
== EQ
&& TARGET_IEEE_FP
)
17502 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
17503 emit_insn (gen_cmpqi_ext_3 (scratch
, GEN_INT (0x40)));
17504 intcmp_mode
= CCmode
;
17509 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x40)));
17515 if (code
== NE
&& TARGET_IEEE_FP
)
17517 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
17518 emit_insn (gen_xorqi_cc_ext_1 (scratch
, scratch
,
17524 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x40)));
17530 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x04)));
17534 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x04)));
17539 gcc_unreachable ();
17547 /* Return the test that should be put into the flags user, i.e.
17548 the bcc, scc, or cmov instruction. */
17549 return gen_rtx_fmt_ee (code
, VOIDmode
,
17550 gen_rtx_REG (intcmp_mode
, FLAGS_REG
),
17555 ix86_expand_compare (enum rtx_code code
, rtx op0
, rtx op1
)
17559 if (GET_MODE_CLASS (GET_MODE (op0
)) == MODE_CC
)
17560 ret
= gen_rtx_fmt_ee (code
, VOIDmode
, op0
, op1
);
17562 else if (SCALAR_FLOAT_MODE_P (GET_MODE (op0
)))
17564 gcc_assert (!DECIMAL_FLOAT_MODE_P (GET_MODE (op0
)));
17565 ret
= ix86_expand_fp_compare (code
, op0
, op1
, NULL_RTX
);
17568 ret
= ix86_expand_int_compare (code
, op0
, op1
);
17574 ix86_expand_branch (enum rtx_code code
, rtx op0
, rtx op1
, rtx label
)
17576 enum machine_mode mode
= GET_MODE (op0
);
17588 tmp
= ix86_expand_compare (code
, op0
, op1
);
17589 tmp
= gen_rtx_IF_THEN_ELSE (VOIDmode
, tmp
,
17590 gen_rtx_LABEL_REF (VOIDmode
, label
),
17592 emit_jump_insn (gen_rtx_SET (VOIDmode
, pc_rtx
, tmp
));
17599 /* Expand DImode branch into multiple compare+branch. */
17601 rtx lo
[2], hi
[2], label2
;
17602 enum rtx_code code1
, code2
, code3
;
17603 enum machine_mode submode
;
17605 if (CONSTANT_P (op0
) && !CONSTANT_P (op1
))
17607 tmp
= op0
, op0
= op1
, op1
= tmp
;
17608 code
= swap_condition (code
);
17611 split_double_mode (mode
, &op0
, 1, lo
+0, hi
+0);
17612 split_double_mode (mode
, &op1
, 1, lo
+1, hi
+1);
17614 submode
= mode
== DImode
? SImode
: DImode
;
17616 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
17617 avoid two branches. This costs one extra insn, so disable when
17618 optimizing for size. */
17620 if ((code
== EQ
|| code
== NE
)
17621 && (!optimize_insn_for_size_p ()
17622 || hi
[1] == const0_rtx
|| lo
[1] == const0_rtx
))
17627 if (hi
[1] != const0_rtx
)
17628 xor1
= expand_binop (submode
, xor_optab
, xor1
, hi
[1],
17629 NULL_RTX
, 0, OPTAB_WIDEN
);
17632 if (lo
[1] != const0_rtx
)
17633 xor0
= expand_binop (submode
, xor_optab
, xor0
, lo
[1],
17634 NULL_RTX
, 0, OPTAB_WIDEN
);
17636 tmp
= expand_binop (submode
, ior_optab
, xor1
, xor0
,
17637 NULL_RTX
, 0, OPTAB_WIDEN
);
17639 ix86_expand_branch (code
, tmp
, const0_rtx
, label
);
17643 /* Otherwise, if we are doing less-than or greater-or-equal-than,
17644 op1 is a constant and the low word is zero, then we can just
17645 examine the high word. Similarly for low word -1 and
17646 less-or-equal-than or greater-than. */
17648 if (CONST_INT_P (hi
[1]))
17651 case LT
: case LTU
: case GE
: case GEU
:
17652 if (lo
[1] == const0_rtx
)
17654 ix86_expand_branch (code
, hi
[0], hi
[1], label
);
17658 case LE
: case LEU
: case GT
: case GTU
:
17659 if (lo
[1] == constm1_rtx
)
17661 ix86_expand_branch (code
, hi
[0], hi
[1], label
);
17669 /* Otherwise, we need two or three jumps. */
17671 label2
= gen_label_rtx ();
17674 code2
= swap_condition (code
);
17675 code3
= unsigned_condition (code
);
17679 case LT
: case GT
: case LTU
: case GTU
:
17682 case LE
: code1
= LT
; code2
= GT
; break;
17683 case GE
: code1
= GT
; code2
= LT
; break;
17684 case LEU
: code1
= LTU
; code2
= GTU
; break;
17685 case GEU
: code1
= GTU
; code2
= LTU
; break;
17687 case EQ
: code1
= UNKNOWN
; code2
= NE
; break;
17688 case NE
: code2
= UNKNOWN
; break;
17691 gcc_unreachable ();
17696 * if (hi(a) < hi(b)) goto true;
17697 * if (hi(a) > hi(b)) goto false;
17698 * if (lo(a) < lo(b)) goto true;
17702 if (code1
!= UNKNOWN
)
17703 ix86_expand_branch (code1
, hi
[0], hi
[1], label
);
17704 if (code2
!= UNKNOWN
)
17705 ix86_expand_branch (code2
, hi
[0], hi
[1], label2
);
17707 ix86_expand_branch (code3
, lo
[0], lo
[1], label
);
17709 if (code2
!= UNKNOWN
)
17710 emit_label (label2
);
17715 gcc_assert (GET_MODE_CLASS (GET_MODE (op0
)) == MODE_CC
);
17720 /* Split branch based on floating point condition. */
17722 ix86_split_fp_branch (enum rtx_code code
, rtx op1
, rtx op2
,
17723 rtx target1
, rtx target2
, rtx tmp
, rtx pushed
)
17728 if (target2
!= pc_rtx
)
17731 code
= reverse_condition_maybe_unordered (code
);
17736 condition
= ix86_expand_fp_compare (code
, op1
, op2
,
17739 /* Remove pushed operand from stack. */
17741 ix86_free_from_memory (GET_MODE (pushed
));
17743 i
= emit_jump_insn (gen_rtx_SET
17745 gen_rtx_IF_THEN_ELSE (VOIDmode
,
17746 condition
, target1
, target2
)));
17747 if (split_branch_probability
>= 0)
17748 add_reg_note (i
, REG_BR_PROB
, GEN_INT (split_branch_probability
));
17752 ix86_expand_setcc (rtx dest
, enum rtx_code code
, rtx op0
, rtx op1
)
17756 gcc_assert (GET_MODE (dest
) == QImode
);
17758 ret
= ix86_expand_compare (code
, op0
, op1
);
17759 PUT_MODE (ret
, QImode
);
17760 emit_insn (gen_rtx_SET (VOIDmode
, dest
, ret
));
17763 /* Expand comparison setting or clearing carry flag. Return true when
17764 successful and set pop for the operation. */
17766 ix86_expand_carry_flag_compare (enum rtx_code code
, rtx op0
, rtx op1
, rtx
*pop
)
17768 enum machine_mode mode
=
17769 GET_MODE (op0
) != VOIDmode
? GET_MODE (op0
) : GET_MODE (op1
);
17771 /* Do not handle double-mode compares that go through special path. */
17772 if (mode
== (TARGET_64BIT
? TImode
: DImode
))
17775 if (SCALAR_FLOAT_MODE_P (mode
))
17777 rtx compare_op
, compare_seq
;
17779 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode
));
17781 /* Shortcut: following common codes never translate
17782 into carry flag compares. */
17783 if (code
== EQ
|| code
== NE
|| code
== UNEQ
|| code
== LTGT
17784 || code
== ORDERED
|| code
== UNORDERED
)
17787 /* These comparisons require zero flag; swap operands so they won't. */
17788 if ((code
== GT
|| code
== UNLE
|| code
== LE
|| code
== UNGT
)
17789 && !TARGET_IEEE_FP
)
17794 code
= swap_condition (code
);
17797 /* Try to expand the comparison and verify that we end up with
17798 carry flag based comparison. This fails to be true only when
17799 we decide to expand comparison using arithmetic that is not
17800 too common scenario. */
17802 compare_op
= ix86_expand_fp_compare (code
, op0
, op1
, NULL_RTX
);
17803 compare_seq
= get_insns ();
17806 if (GET_MODE (XEXP (compare_op
, 0)) == CCFPmode
17807 || GET_MODE (XEXP (compare_op
, 0)) == CCFPUmode
)
17808 code
= ix86_fp_compare_code_to_integer (GET_CODE (compare_op
));
17810 code
= GET_CODE (compare_op
);
17812 if (code
!= LTU
&& code
!= GEU
)
17815 emit_insn (compare_seq
);
17820 if (!INTEGRAL_MODE_P (mode
))
17829 /* Convert a==0 into (unsigned)a<1. */
17832 if (op1
!= const0_rtx
)
17835 code
= (code
== EQ
? LTU
: GEU
);
17838 /* Convert a>b into b<a or a>=b-1. */
17841 if (CONST_INT_P (op1
))
17843 op1
= gen_int_mode (INTVAL (op1
) + 1, GET_MODE (op0
));
17844 /* Bail out on overflow. We still can swap operands but that
17845 would force loading of the constant into register. */
17846 if (op1
== const0_rtx
17847 || !x86_64_immediate_operand (op1
, GET_MODE (op1
)))
17849 code
= (code
== GTU
? GEU
: LTU
);
17856 code
= (code
== GTU
? LTU
: GEU
);
17860 /* Convert a>=0 into (unsigned)a<0x80000000. */
17863 if (mode
== DImode
|| op1
!= const0_rtx
)
17865 op1
= gen_int_mode (1 << (GET_MODE_BITSIZE (mode
) - 1), mode
);
17866 code
= (code
== LT
? GEU
: LTU
);
17870 if (mode
== DImode
|| op1
!= constm1_rtx
)
17872 op1
= gen_int_mode (1 << (GET_MODE_BITSIZE (mode
) - 1), mode
);
17873 code
= (code
== LE
? GEU
: LTU
);
17879 /* Swapping operands may cause constant to appear as first operand. */
17880 if (!nonimmediate_operand (op0
, VOIDmode
))
17882 if (!can_create_pseudo_p ())
17884 op0
= force_reg (mode
, op0
);
17886 *pop
= ix86_expand_compare (code
, op0
, op1
);
17887 gcc_assert (GET_CODE (*pop
) == LTU
|| GET_CODE (*pop
) == GEU
);
17892 ix86_expand_int_movcc (rtx operands
[])
17894 enum rtx_code code
= GET_CODE (operands
[1]), compare_code
;
17895 rtx compare_seq
, compare_op
;
17896 enum machine_mode mode
= GET_MODE (operands
[0]);
17897 bool sign_bit_compare_p
= false;
17898 rtx op0
= XEXP (operands
[1], 0);
17899 rtx op1
= XEXP (operands
[1], 1);
17902 compare_op
= ix86_expand_compare (code
, op0
, op1
);
17903 compare_seq
= get_insns ();
17906 compare_code
= GET_CODE (compare_op
);
17908 if ((op1
== const0_rtx
&& (code
== GE
|| code
== LT
))
17909 || (op1
== constm1_rtx
&& (code
== GT
|| code
== LE
)))
17910 sign_bit_compare_p
= true;
17912 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
17913 HImode insns, we'd be swallowed in word prefix ops. */
17915 if ((mode
!= HImode
|| TARGET_FAST_PREFIX
)
17916 && (mode
!= (TARGET_64BIT
? TImode
: DImode
))
17917 && CONST_INT_P (operands
[2])
17918 && CONST_INT_P (operands
[3]))
17920 rtx out
= operands
[0];
17921 HOST_WIDE_INT ct
= INTVAL (operands
[2]);
17922 HOST_WIDE_INT cf
= INTVAL (operands
[3]);
17923 HOST_WIDE_INT diff
;
17926 /* Sign bit compares are better done using shifts than we do by using
17928 if (sign_bit_compare_p
17929 || ix86_expand_carry_flag_compare (code
, op0
, op1
, &compare_op
))
17931 /* Detect overlap between destination and compare sources. */
17934 if (!sign_bit_compare_p
)
17937 bool fpcmp
= false;
17939 compare_code
= GET_CODE (compare_op
);
17941 flags
= XEXP (compare_op
, 0);
17943 if (GET_MODE (flags
) == CCFPmode
17944 || GET_MODE (flags
) == CCFPUmode
)
17948 = ix86_fp_compare_code_to_integer (compare_code
);
17951 /* To simplify rest of code, restrict to the GEU case. */
17952 if (compare_code
== LTU
)
17954 HOST_WIDE_INT tmp
= ct
;
17957 compare_code
= reverse_condition (compare_code
);
17958 code
= reverse_condition (code
);
17963 PUT_CODE (compare_op
,
17964 reverse_condition_maybe_unordered
17965 (GET_CODE (compare_op
)));
17967 PUT_CODE (compare_op
,
17968 reverse_condition (GET_CODE (compare_op
)));
17972 if (reg_overlap_mentioned_p (out
, op0
)
17973 || reg_overlap_mentioned_p (out
, op1
))
17974 tmp
= gen_reg_rtx (mode
);
17976 if (mode
== DImode
)
17977 emit_insn (gen_x86_movdicc_0_m1 (tmp
, flags
, compare_op
));
17979 emit_insn (gen_x86_movsicc_0_m1 (gen_lowpart (SImode
, tmp
),
17980 flags
, compare_op
));
17984 if (code
== GT
|| code
== GE
)
17985 code
= reverse_condition (code
);
17988 HOST_WIDE_INT tmp
= ct
;
17993 tmp
= emit_store_flag (tmp
, code
, op0
, op1
, VOIDmode
, 0, -1);
18006 tmp
= expand_simple_binop (mode
, PLUS
,
18008 copy_rtx (tmp
), 1, OPTAB_DIRECT
);
18019 tmp
= expand_simple_binop (mode
, IOR
,
18021 copy_rtx (tmp
), 1, OPTAB_DIRECT
);
18023 else if (diff
== -1 && ct
)
18033 tmp
= expand_simple_unop (mode
, NOT
, tmp
, copy_rtx (tmp
), 1);
18035 tmp
= expand_simple_binop (mode
, PLUS
,
18036 copy_rtx (tmp
), GEN_INT (cf
),
18037 copy_rtx (tmp
), 1, OPTAB_DIRECT
);
18045 * andl cf - ct, dest
18055 tmp
= expand_simple_unop (mode
, NOT
, tmp
, copy_rtx (tmp
), 1);
18058 tmp
= expand_simple_binop (mode
, AND
,
18060 gen_int_mode (cf
- ct
, mode
),
18061 copy_rtx (tmp
), 1, OPTAB_DIRECT
);
18063 tmp
= expand_simple_binop (mode
, PLUS
,
18064 copy_rtx (tmp
), GEN_INT (ct
),
18065 copy_rtx (tmp
), 1, OPTAB_DIRECT
);
18068 if (!rtx_equal_p (tmp
, out
))
18069 emit_move_insn (copy_rtx (out
), copy_rtx (tmp
));
18076 enum machine_mode cmp_mode
= GET_MODE (op0
);
18079 tmp
= ct
, ct
= cf
, cf
= tmp
;
18082 if (SCALAR_FLOAT_MODE_P (cmp_mode
))
18084 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode
));
18086 /* We may be reversing unordered compare to normal compare, that
18087 is not valid in general (we may convert non-trapping condition
18088 to trapping one), however on i386 we currently emit all
18089 comparisons unordered. */
18090 compare_code
= reverse_condition_maybe_unordered (compare_code
);
18091 code
= reverse_condition_maybe_unordered (code
);
18095 compare_code
= reverse_condition (compare_code
);
18096 code
= reverse_condition (code
);
18100 compare_code
= UNKNOWN
;
18101 if (GET_MODE_CLASS (GET_MODE (op0
)) == MODE_INT
18102 && CONST_INT_P (op1
))
18104 if (op1
== const0_rtx
18105 && (code
== LT
|| code
== GE
))
18106 compare_code
= code
;
18107 else if (op1
== constm1_rtx
)
18111 else if (code
== GT
)
18116 /* Optimize dest = (op0 < 0) ? -1 : cf. */
18117 if (compare_code
!= UNKNOWN
18118 && GET_MODE (op0
) == GET_MODE (out
)
18119 && (cf
== -1 || ct
== -1))
18121 /* If lea code below could be used, only optimize
18122 if it results in a 2 insn sequence. */
18124 if (! (diff
== 1 || diff
== 2 || diff
== 4 || diff
== 8
18125 || diff
== 3 || diff
== 5 || diff
== 9)
18126 || (compare_code
== LT
&& ct
== -1)
18127 || (compare_code
== GE
&& cf
== -1))
18130 * notl op1 (if necessary)
18138 code
= reverse_condition (code
);
18141 out
= emit_store_flag (out
, code
, op0
, op1
, VOIDmode
, 0, -1);
18143 out
= expand_simple_binop (mode
, IOR
,
18145 out
, 1, OPTAB_DIRECT
);
18146 if (out
!= operands
[0])
18147 emit_move_insn (operands
[0], out
);
18154 if ((diff
== 1 || diff
== 2 || diff
== 4 || diff
== 8
18155 || diff
== 3 || diff
== 5 || diff
== 9)
18156 && ((mode
!= QImode
&& mode
!= HImode
) || !TARGET_PARTIAL_REG_STALL
)
18158 || x86_64_immediate_operand (GEN_INT (cf
), VOIDmode
)))
18164 * lea cf(dest*(ct-cf)),dest
18168 * This also catches the degenerate setcc-only case.
18174 out
= emit_store_flag (out
, code
, op0
, op1
, VOIDmode
, 0, 1);
18177 /* On x86_64 the lea instruction operates on Pmode, so we need
18178 to get arithmetics done in proper mode to match. */
18180 tmp
= copy_rtx (out
);
18184 out1
= copy_rtx (out
);
18185 tmp
= gen_rtx_MULT (mode
, out1
, GEN_INT (diff
& ~1));
18189 tmp
= gen_rtx_PLUS (mode
, tmp
, out1
);
18195 tmp
= gen_rtx_PLUS (mode
, tmp
, GEN_INT (cf
));
18198 if (!rtx_equal_p (tmp
, out
))
18201 out
= force_operand (tmp
, copy_rtx (out
));
18203 emit_insn (gen_rtx_SET (VOIDmode
, copy_rtx (out
), copy_rtx (tmp
)));
18205 if (!rtx_equal_p (out
, operands
[0]))
18206 emit_move_insn (operands
[0], copy_rtx (out
));
18212 * General case: Jumpful:
18213 * xorl dest,dest cmpl op1, op2
18214 * cmpl op1, op2 movl ct, dest
18215 * setcc dest jcc 1f
18216 * decl dest movl cf, dest
18217 * andl (cf-ct),dest 1:
18220 * Size 20. Size 14.
18222 * This is reasonably steep, but branch mispredict costs are
18223 * high on modern cpus, so consider failing only if optimizing
18227 if ((!TARGET_CMOVE
|| (mode
== QImode
&& TARGET_PARTIAL_REG_STALL
))
18228 && BRANCH_COST (optimize_insn_for_speed_p (),
18233 enum machine_mode cmp_mode
= GET_MODE (op0
);
18238 if (SCALAR_FLOAT_MODE_P (cmp_mode
))
18240 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode
));
18242 /* We may be reversing unordered compare to normal compare,
18243 that is not valid in general (we may convert non-trapping
18244 condition to trapping one), however on i386 we currently
18245 emit all comparisons unordered. */
18246 code
= reverse_condition_maybe_unordered (code
);
18250 code
= reverse_condition (code
);
18251 if (compare_code
!= UNKNOWN
)
18252 compare_code
= reverse_condition (compare_code
);
18256 if (compare_code
!= UNKNOWN
)
18258 /* notl op1 (if needed)
18263 For x < 0 (resp. x <= -1) there will be no notl,
18264 so if possible swap the constants to get rid of the
18266 True/false will be -1/0 while code below (store flag
18267 followed by decrement) is 0/-1, so the constants need
18268 to be exchanged once more. */
18270 if (compare_code
== GE
|| !cf
)
18272 code
= reverse_condition (code
);
18277 HOST_WIDE_INT tmp
= cf
;
18282 out
= emit_store_flag (out
, code
, op0
, op1
, VOIDmode
, 0, -1);
18286 out
= emit_store_flag (out
, code
, op0
, op1
, VOIDmode
, 0, 1);
18288 out
= expand_simple_binop (mode
, PLUS
, copy_rtx (out
),
18290 copy_rtx (out
), 1, OPTAB_DIRECT
);
18293 out
= expand_simple_binop (mode
, AND
, copy_rtx (out
),
18294 gen_int_mode (cf
- ct
, mode
),
18295 copy_rtx (out
), 1, OPTAB_DIRECT
);
18297 out
= expand_simple_binop (mode
, PLUS
, copy_rtx (out
), GEN_INT (ct
),
18298 copy_rtx (out
), 1, OPTAB_DIRECT
);
18299 if (!rtx_equal_p (out
, operands
[0]))
18300 emit_move_insn (operands
[0], copy_rtx (out
));
18306 if (!TARGET_CMOVE
|| (mode
== QImode
&& TARGET_PARTIAL_REG_STALL
))
18308 /* Try a few things more with specific constants and a variable. */
18311 rtx var
, orig_out
, out
, tmp
;
18313 if (BRANCH_COST (optimize_insn_for_speed_p (), false) <= 2)
18316 /* If one of the two operands is an interesting constant, load a
18317 constant with the above and mask it in with a logical operation. */
18319 if (CONST_INT_P (operands
[2]))
18322 if (INTVAL (operands
[2]) == 0 && operands
[3] != constm1_rtx
)
18323 operands
[3] = constm1_rtx
, op
= and_optab
;
18324 else if (INTVAL (operands
[2]) == -1 && operands
[3] != const0_rtx
)
18325 operands
[3] = const0_rtx
, op
= ior_optab
;
18329 else if (CONST_INT_P (operands
[3]))
18332 if (INTVAL (operands
[3]) == 0 && operands
[2] != constm1_rtx
)
18333 operands
[2] = constm1_rtx
, op
= and_optab
;
18334 else if (INTVAL (operands
[3]) == -1 && operands
[3] != const0_rtx
)
18335 operands
[2] = const0_rtx
, op
= ior_optab
;
18342 orig_out
= operands
[0];
18343 tmp
= gen_reg_rtx (mode
);
18346 /* Recurse to get the constant loaded. */
18347 if (ix86_expand_int_movcc (operands
) == 0)
18350 /* Mask in the interesting variable. */
18351 out
= expand_binop (mode
, op
, var
, tmp
, orig_out
, 0,
18353 if (!rtx_equal_p (out
, orig_out
))
18354 emit_move_insn (copy_rtx (orig_out
), copy_rtx (out
));
18360 * For comparison with above,
18370 if (! nonimmediate_operand (operands
[2], mode
))
18371 operands
[2] = force_reg (mode
, operands
[2]);
18372 if (! nonimmediate_operand (operands
[3], mode
))
18373 operands
[3] = force_reg (mode
, operands
[3]);
18375 if (! register_operand (operands
[2], VOIDmode
)
18377 || ! register_operand (operands
[3], VOIDmode
)))
18378 operands
[2] = force_reg (mode
, operands
[2]);
18381 && ! register_operand (operands
[3], VOIDmode
))
18382 operands
[3] = force_reg (mode
, operands
[3]);
18384 emit_insn (compare_seq
);
18385 emit_insn (gen_rtx_SET (VOIDmode
, operands
[0],
18386 gen_rtx_IF_THEN_ELSE (mode
,
18387 compare_op
, operands
[2],
18392 /* Swap, force into registers, or otherwise massage the two operands
18393 to an sse comparison with a mask result. Thus we differ a bit from
18394 ix86_prepare_fp_compare_args which expects to produce a flags result.
18396 The DEST operand exists to help determine whether to commute commutative
18397 operators. The POP0/POP1 operands are updated in place. The new
18398 comparison code is returned, or UNKNOWN if not implementable. */
18400 static enum rtx_code
18401 ix86_prepare_sse_fp_compare_args (rtx dest
, enum rtx_code code
,
18402 rtx
*pop0
, rtx
*pop1
)
18410 /* We have no LTGT as an operator. We could implement it with
18411 NE & ORDERED, but this requires an extra temporary. It's
18412 not clear that it's worth it. */
18419 /* These are supported directly. */
18426 /* For commutative operators, try to canonicalize the destination
18427 operand to be first in the comparison - this helps reload to
18428 avoid extra moves. */
18429 if (!dest
|| !rtx_equal_p (dest
, *pop1
))
18437 /* These are not supported directly. Swap the comparison operands
18438 to transform into something that is supported. */
18442 code
= swap_condition (code
);
18446 gcc_unreachable ();
18452 /* Detect conditional moves that exactly match min/max operational
18453 semantics. Note that this is IEEE safe, as long as we don't
18454 interchange the operands.
18456 Returns FALSE if this conditional move doesn't match a MIN/MAX,
18457 and TRUE if the operation is successful and instructions are emitted. */
18460 ix86_expand_sse_fp_minmax (rtx dest
, enum rtx_code code
, rtx cmp_op0
,
18461 rtx cmp_op1
, rtx if_true
, rtx if_false
)
18463 enum machine_mode mode
;
18469 else if (code
== UNGE
)
18472 if_true
= if_false
;
18478 if (rtx_equal_p (cmp_op0
, if_true
) && rtx_equal_p (cmp_op1
, if_false
))
18480 else if (rtx_equal_p (cmp_op1
, if_true
) && rtx_equal_p (cmp_op0
, if_false
))
18485 mode
= GET_MODE (dest
);
18487 /* We want to check HONOR_NANS and HONOR_SIGNED_ZEROS here,
18488 but MODE may be a vector mode and thus not appropriate. */
18489 if (!flag_finite_math_only
|| !flag_unsafe_math_optimizations
)
18491 int u
= is_min
? UNSPEC_IEEE_MIN
: UNSPEC_IEEE_MAX
;
18494 if_true
= force_reg (mode
, if_true
);
18495 v
= gen_rtvec (2, if_true
, if_false
);
18496 tmp
= gen_rtx_UNSPEC (mode
, v
, u
);
18500 code
= is_min
? SMIN
: SMAX
;
18501 tmp
= gen_rtx_fmt_ee (code
, mode
, if_true
, if_false
);
18504 emit_insn (gen_rtx_SET (VOIDmode
, dest
, tmp
));
18508 /* Expand an sse vector comparison. Return the register with the result. */
18511 ix86_expand_sse_cmp (rtx dest
, enum rtx_code code
, rtx cmp_op0
, rtx cmp_op1
,
18512 rtx op_true
, rtx op_false
)
18514 enum machine_mode mode
= GET_MODE (dest
);
18517 cmp_op0
= force_reg (mode
, cmp_op0
);
18518 if (!nonimmediate_operand (cmp_op1
, mode
))
18519 cmp_op1
= force_reg (mode
, cmp_op1
);
18522 || reg_overlap_mentioned_p (dest
, op_true
)
18523 || reg_overlap_mentioned_p (dest
, op_false
))
18524 dest
= gen_reg_rtx (mode
);
18526 x
= gen_rtx_fmt_ee (code
, mode
, cmp_op0
, cmp_op1
);
18527 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
18532 /* Expand DEST = CMP ? OP_TRUE : OP_FALSE into a sequence of logical
18533 operations. This is used for both scalar and vector conditional moves. */
18536 ix86_expand_sse_movcc (rtx dest
, rtx cmp
, rtx op_true
, rtx op_false
)
18538 enum machine_mode mode
= GET_MODE (dest
);
18541 if (op_false
== CONST0_RTX (mode
))
18543 op_true
= force_reg (mode
, op_true
);
18544 x
= gen_rtx_AND (mode
, cmp
, op_true
);
18545 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
18547 else if (op_true
== CONST0_RTX (mode
))
18549 op_false
= force_reg (mode
, op_false
);
18550 x
= gen_rtx_NOT (mode
, cmp
);
18551 x
= gen_rtx_AND (mode
, x
, op_false
);
18552 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
18554 else if (TARGET_XOP
)
18556 rtx pcmov
= gen_rtx_SET (mode
, dest
,
18557 gen_rtx_IF_THEN_ELSE (mode
, cmp
,
18564 op_true
= force_reg (mode
, op_true
);
18565 op_false
= force_reg (mode
, op_false
);
18567 t2
= gen_reg_rtx (mode
);
18569 t3
= gen_reg_rtx (mode
);
18573 x
= gen_rtx_AND (mode
, op_true
, cmp
);
18574 emit_insn (gen_rtx_SET (VOIDmode
, t2
, x
));
18576 x
= gen_rtx_NOT (mode
, cmp
);
18577 x
= gen_rtx_AND (mode
, x
, op_false
);
18578 emit_insn (gen_rtx_SET (VOIDmode
, t3
, x
));
18580 x
= gen_rtx_IOR (mode
, t3
, t2
);
18581 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
18585 /* Expand a floating-point conditional move. Return true if successful. */
18588 ix86_expand_fp_movcc (rtx operands
[])
18590 enum machine_mode mode
= GET_MODE (operands
[0]);
18591 enum rtx_code code
= GET_CODE (operands
[1]);
18592 rtx tmp
, compare_op
;
18593 rtx op0
= XEXP (operands
[1], 0);
18594 rtx op1
= XEXP (operands
[1], 1);
18596 if (TARGET_SSE_MATH
&& SSE_FLOAT_MODE_P (mode
))
18598 enum machine_mode cmode
;
18600 /* Since we've no cmove for sse registers, don't force bad register
18601 allocation just to gain access to it. Deny movcc when the
18602 comparison mode doesn't match the move mode. */
18603 cmode
= GET_MODE (op0
);
18604 if (cmode
== VOIDmode
)
18605 cmode
= GET_MODE (op1
);
18609 code
= ix86_prepare_sse_fp_compare_args (operands
[0], code
, &op0
, &op1
);
18610 if (code
== UNKNOWN
)
18613 if (ix86_expand_sse_fp_minmax (operands
[0], code
, op0
, op1
,
18614 operands
[2], operands
[3]))
18617 tmp
= ix86_expand_sse_cmp (operands
[0], code
, op0
, op1
,
18618 operands
[2], operands
[3]);
18619 ix86_expand_sse_movcc (operands
[0], tmp
, operands
[2], operands
[3]);
18623 /* The floating point conditional move instructions don't directly
18624 support conditions resulting from a signed integer comparison. */
18626 compare_op
= ix86_expand_compare (code
, op0
, op1
);
18627 if (!fcmov_comparison_operator (compare_op
, VOIDmode
))
18629 tmp
= gen_reg_rtx (QImode
);
18630 ix86_expand_setcc (tmp
, code
, op0
, op1
);
18632 compare_op
= ix86_expand_compare (NE
, tmp
, const0_rtx
);
18635 emit_insn (gen_rtx_SET (VOIDmode
, operands
[0],
18636 gen_rtx_IF_THEN_ELSE (mode
, compare_op
,
18637 operands
[2], operands
[3])));
18642 /* Expand a floating-point vector conditional move; a vcond operation
18643 rather than a movcc operation. */
18646 ix86_expand_fp_vcond (rtx operands
[])
18648 enum rtx_code code
= GET_CODE (operands
[3]);
18651 code
= ix86_prepare_sse_fp_compare_args (operands
[0], code
,
18652 &operands
[4], &operands
[5]);
18653 if (code
== UNKNOWN
)
18656 if (ix86_expand_sse_fp_minmax (operands
[0], code
, operands
[4],
18657 operands
[5], operands
[1], operands
[2]))
18660 cmp
= ix86_expand_sse_cmp (operands
[0], code
, operands
[4], operands
[5],
18661 operands
[1], operands
[2]);
18662 ix86_expand_sse_movcc (operands
[0], cmp
, operands
[1], operands
[2]);
18666 /* Expand a signed/unsigned integral vector conditional move. */
18669 ix86_expand_int_vcond (rtx operands
[])
18671 enum machine_mode mode
= GET_MODE (operands
[0]);
18672 enum rtx_code code
= GET_CODE (operands
[3]);
18673 bool negate
= false;
18676 cop0
= operands
[4];
18677 cop1
= operands
[5];
18679 /* XOP supports all of the comparisons on all vector int types. */
18682 /* Canonicalize the comparison to EQ, GT, GTU. */
18693 code
= reverse_condition (code
);
18699 code
= reverse_condition (code
);
18705 code
= swap_condition (code
);
18706 x
= cop0
, cop0
= cop1
, cop1
= x
;
18710 gcc_unreachable ();
18713 /* Only SSE4.1/SSE4.2 supports V2DImode. */
18714 if (mode
== V2DImode
)
18719 /* SSE4.1 supports EQ. */
18720 if (!TARGET_SSE4_1
)
18726 /* SSE4.2 supports GT/GTU. */
18727 if (!TARGET_SSE4_2
)
18732 gcc_unreachable ();
18736 /* Unsigned parallel compare is not supported by the hardware.
18737 Play some tricks to turn this into a signed comparison
18741 cop0
= force_reg (mode
, cop0
);
18749 rtx (*gen_sub3
) (rtx
, rtx
, rtx
);
18751 /* Subtract (-(INT MAX) - 1) from both operands to make
18753 mask
= ix86_build_signbit_mask (mode
, true, false);
18754 gen_sub3
= (mode
== V4SImode
18755 ? gen_subv4si3
: gen_subv2di3
);
18756 t1
= gen_reg_rtx (mode
);
18757 emit_insn (gen_sub3 (t1
, cop0
, mask
));
18759 t2
= gen_reg_rtx (mode
);
18760 emit_insn (gen_sub3 (t2
, cop1
, mask
));
18770 /* Perform a parallel unsigned saturating subtraction. */
18771 x
= gen_reg_rtx (mode
);
18772 emit_insn (gen_rtx_SET (VOIDmode
, x
,
18773 gen_rtx_US_MINUS (mode
, cop0
, cop1
)));
18776 cop1
= CONST0_RTX (mode
);
18782 gcc_unreachable ();
18787 x
= ix86_expand_sse_cmp (operands
[0], code
, cop0
, cop1
,
18788 operands
[1+negate
], operands
[2-negate
]);
18790 ix86_expand_sse_movcc (operands
[0], x
, operands
[1+negate
],
18791 operands
[2-negate
]);
18795 /* Unpack OP[1] into the next wider integer vector type. UNSIGNED_P is
18796 true if we should do zero extension, else sign extension. HIGH_P is
18797 true if we want the N/2 high elements, else the low elements. */
18800 ix86_expand_sse_unpack (rtx operands
[2], bool unsigned_p
, bool high_p
)
18802 enum machine_mode imode
= GET_MODE (operands
[1]);
18803 rtx (*unpack
)(rtx
, rtx
, rtx
);
18810 unpack
= gen_vec_interleave_highv16qi
;
18812 unpack
= gen_vec_interleave_lowv16qi
;
18816 unpack
= gen_vec_interleave_highv8hi
;
18818 unpack
= gen_vec_interleave_lowv8hi
;
18822 unpack
= gen_vec_interleave_highv4si
;
18824 unpack
= gen_vec_interleave_lowv4si
;
18827 gcc_unreachable ();
18830 dest
= gen_lowpart (imode
, operands
[0]);
18833 se
= force_reg (imode
, CONST0_RTX (imode
));
18835 se
= ix86_expand_sse_cmp (gen_reg_rtx (imode
), GT
, CONST0_RTX (imode
),
18836 operands
[1], pc_rtx
, pc_rtx
);
18838 emit_insn (unpack (dest
, operands
[1], se
));
18841 /* This function performs the same task as ix86_expand_sse_unpack,
18842 but with SSE4.1 instructions. */
18845 ix86_expand_sse4_unpack (rtx operands
[2], bool unsigned_p
, bool high_p
)
18847 enum machine_mode imode
= GET_MODE (operands
[1]);
18848 rtx (*unpack
)(rtx
, rtx
);
18855 unpack
= gen_sse4_1_zero_extendv8qiv8hi2
;
18857 unpack
= gen_sse4_1_sign_extendv8qiv8hi2
;
18861 unpack
= gen_sse4_1_zero_extendv4hiv4si2
;
18863 unpack
= gen_sse4_1_sign_extendv4hiv4si2
;
18867 unpack
= gen_sse4_1_zero_extendv2siv2di2
;
18869 unpack
= gen_sse4_1_sign_extendv2siv2di2
;
18872 gcc_unreachable ();
18875 dest
= operands
[0];
18878 /* Shift higher 8 bytes to lower 8 bytes. */
18879 src
= gen_reg_rtx (imode
);
18880 emit_insn (gen_sse2_lshrv1ti3 (gen_lowpart (V1TImode
, src
),
18881 gen_lowpart (V1TImode
, operands
[1]),
18887 emit_insn (unpack (dest
, src
));
18890 /* Expand conditional increment or decrement using adb/sbb instructions.
18891 The default case using setcc followed by the conditional move can be
18892 done by generic code. */
18894 ix86_expand_int_addcc (rtx operands
[])
18896 enum rtx_code code
= GET_CODE (operands
[1]);
18898 rtx (*insn
)(rtx
, rtx
, rtx
, rtx
, rtx
);
18900 rtx val
= const0_rtx
;
18901 bool fpcmp
= false;
18902 enum machine_mode mode
;
18903 rtx op0
= XEXP (operands
[1], 0);
18904 rtx op1
= XEXP (operands
[1], 1);
18906 if (operands
[3] != const1_rtx
18907 && operands
[3] != constm1_rtx
)
18909 if (!ix86_expand_carry_flag_compare (code
, op0
, op1
, &compare_op
))
18911 code
= GET_CODE (compare_op
);
18913 flags
= XEXP (compare_op
, 0);
18915 if (GET_MODE (flags
) == CCFPmode
18916 || GET_MODE (flags
) == CCFPUmode
)
18919 code
= ix86_fp_compare_code_to_integer (code
);
18926 PUT_CODE (compare_op
,
18927 reverse_condition_maybe_unordered
18928 (GET_CODE (compare_op
)));
18930 PUT_CODE (compare_op
, reverse_condition (GET_CODE (compare_op
)));
18933 mode
= GET_MODE (operands
[0]);
18935 /* Construct either adc or sbb insn. */
18936 if ((code
== LTU
) == (operands
[3] == constm1_rtx
))
18941 insn
= gen_subqi3_carry
;
18944 insn
= gen_subhi3_carry
;
18947 insn
= gen_subsi3_carry
;
18950 insn
= gen_subdi3_carry
;
18953 gcc_unreachable ();
18961 insn
= gen_addqi3_carry
;
18964 insn
= gen_addhi3_carry
;
18967 insn
= gen_addsi3_carry
;
18970 insn
= gen_adddi3_carry
;
18973 gcc_unreachable ();
18976 emit_insn (insn (operands
[0], operands
[2], val
, flags
, compare_op
));
18982 /* Split operands 0 and 1 into half-mode parts. Similar to split_double_mode,
18983 but works for floating pointer parameters and nonoffsetable memories.
18984 For pushes, it returns just stack offsets; the values will be saved
18985 in the right order. Maximally three parts are generated. */
18988 ix86_split_to_parts (rtx operand
, rtx
*parts
, enum machine_mode mode
)
18993 size
= mode
==XFmode
? 3 : GET_MODE_SIZE (mode
) / 4;
18995 size
= (GET_MODE_SIZE (mode
) + 4) / 8;
18997 gcc_assert (!REG_P (operand
) || !MMX_REGNO_P (REGNO (operand
)));
18998 gcc_assert (size
>= 2 && size
<= 4);
19000 /* Optimize constant pool reference to immediates. This is used by fp
19001 moves, that force all constants to memory to allow combining. */
19002 if (MEM_P (operand
) && MEM_READONLY_P (operand
))
19004 rtx tmp
= maybe_get_pool_constant (operand
);
19009 if (MEM_P (operand
) && !offsettable_memref_p (operand
))
19011 /* The only non-offsetable memories we handle are pushes. */
19012 int ok
= push_operand (operand
, VOIDmode
);
19016 operand
= copy_rtx (operand
);
19017 PUT_MODE (operand
, Pmode
);
19018 parts
[0] = parts
[1] = parts
[2] = parts
[3] = operand
;
19022 if (GET_CODE (operand
) == CONST_VECTOR
)
19024 enum machine_mode imode
= int_mode_for_mode (mode
);
19025 /* Caution: if we looked through a constant pool memory above,
19026 the operand may actually have a different mode now. That's
19027 ok, since we want to pun this all the way back to an integer. */
19028 operand
= simplify_subreg (imode
, operand
, GET_MODE (operand
), 0);
19029 gcc_assert (operand
!= NULL
);
19035 if (mode
== DImode
)
19036 split_double_mode (mode
, &operand
, 1, &parts
[0], &parts
[1]);
19041 if (REG_P (operand
))
19043 gcc_assert (reload_completed
);
19044 for (i
= 0; i
< size
; i
++)
19045 parts
[i
] = gen_rtx_REG (SImode
, REGNO (operand
) + i
);
19047 else if (offsettable_memref_p (operand
))
19049 operand
= adjust_address (operand
, SImode
, 0);
19050 parts
[0] = operand
;
19051 for (i
= 1; i
< size
; i
++)
19052 parts
[i
] = adjust_address (operand
, SImode
, 4 * i
);
19054 else if (GET_CODE (operand
) == CONST_DOUBLE
)
19059 REAL_VALUE_FROM_CONST_DOUBLE (r
, operand
);
19063 real_to_target (l
, &r
, mode
);
19064 parts
[3] = gen_int_mode (l
[3], SImode
);
19065 parts
[2] = gen_int_mode (l
[2], SImode
);
19068 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r
, l
);
19069 parts
[2] = gen_int_mode (l
[2], SImode
);
19072 REAL_VALUE_TO_TARGET_DOUBLE (r
, l
);
19075 gcc_unreachable ();
19077 parts
[1] = gen_int_mode (l
[1], SImode
);
19078 parts
[0] = gen_int_mode (l
[0], SImode
);
19081 gcc_unreachable ();
19086 if (mode
== TImode
)
19087 split_double_mode (mode
, &operand
, 1, &parts
[0], &parts
[1]);
19088 if (mode
== XFmode
|| mode
== TFmode
)
19090 enum machine_mode upper_mode
= mode
==XFmode
? SImode
: DImode
;
19091 if (REG_P (operand
))
19093 gcc_assert (reload_completed
);
19094 parts
[0] = gen_rtx_REG (DImode
, REGNO (operand
) + 0);
19095 parts
[1] = gen_rtx_REG (upper_mode
, REGNO (operand
) + 1);
19097 else if (offsettable_memref_p (operand
))
19099 operand
= adjust_address (operand
, DImode
, 0);
19100 parts
[0] = operand
;
19101 parts
[1] = adjust_address (operand
, upper_mode
, 8);
19103 else if (GET_CODE (operand
) == CONST_DOUBLE
)
19108 REAL_VALUE_FROM_CONST_DOUBLE (r
, operand
);
19109 real_to_target (l
, &r
, mode
);
19111 /* Do not use shift by 32 to avoid warning on 32bit systems. */
19112 if (HOST_BITS_PER_WIDE_INT
>= 64)
19115 ((l
[0] & (((HOST_WIDE_INT
) 2 << 31) - 1))
19116 + ((((HOST_WIDE_INT
) l
[1]) << 31) << 1),
19119 parts
[0] = immed_double_const (l
[0], l
[1], DImode
);
19121 if (upper_mode
== SImode
)
19122 parts
[1] = gen_int_mode (l
[2], SImode
);
19123 else if (HOST_BITS_PER_WIDE_INT
>= 64)
19126 ((l
[2] & (((HOST_WIDE_INT
) 2 << 31) - 1))
19127 + ((((HOST_WIDE_INT
) l
[3]) << 31) << 1),
19130 parts
[1] = immed_double_const (l
[2], l
[3], DImode
);
19133 gcc_unreachable ();
19140 /* Emit insns to perform a move or push of DI, DF, XF, and TF values.
19141 Return false when normal moves are needed; true when all required
19142 insns have been emitted. Operands 2-4 contain the input values
19143 int the correct order; operands 5-7 contain the output values. */
19146 ix86_split_long_move (rtx operands
[])
19151 int collisions
= 0;
19152 enum machine_mode mode
= GET_MODE (operands
[0]);
19153 bool collisionparts
[4];
19155 /* The DFmode expanders may ask us to move double.
19156 For 64bit target this is single move. By hiding the fact
19157 here we simplify i386.md splitters. */
19158 if (TARGET_64BIT
&& GET_MODE_SIZE (GET_MODE (operands
[0])) == 8)
19160 /* Optimize constant pool reference to immediates. This is used by
19161 fp moves, that force all constants to memory to allow combining. */
19163 if (MEM_P (operands
[1])
19164 && GET_CODE (XEXP (operands
[1], 0)) == SYMBOL_REF
19165 && CONSTANT_POOL_ADDRESS_P (XEXP (operands
[1], 0)))
19166 operands
[1] = get_pool_constant (XEXP (operands
[1], 0));
19167 if (push_operand (operands
[0], VOIDmode
))
19169 operands
[0] = copy_rtx (operands
[0]);
19170 PUT_MODE (operands
[0], Pmode
);
19173 operands
[0] = gen_lowpart (DImode
, operands
[0]);
19174 operands
[1] = gen_lowpart (DImode
, operands
[1]);
19175 emit_move_insn (operands
[0], operands
[1]);
19179 /* The only non-offsettable memory we handle is push. */
19180 if (push_operand (operands
[0], VOIDmode
))
19183 gcc_assert (!MEM_P (operands
[0])
19184 || offsettable_memref_p (operands
[0]));
19186 nparts
= ix86_split_to_parts (operands
[1], part
[1], GET_MODE (operands
[0]));
19187 ix86_split_to_parts (operands
[0], part
[0], GET_MODE (operands
[0]));
19189 /* When emitting push, take care for source operands on the stack. */
19190 if (push
&& MEM_P (operands
[1])
19191 && reg_overlap_mentioned_p (stack_pointer_rtx
, operands
[1]))
19193 rtx src_base
= XEXP (part
[1][nparts
- 1], 0);
19195 /* Compensate for the stack decrement by 4. */
19196 if (!TARGET_64BIT
&& nparts
== 3
19197 && mode
== XFmode
&& TARGET_128BIT_LONG_DOUBLE
)
19198 src_base
= plus_constant (src_base
, 4);
19200 /* src_base refers to the stack pointer and is
19201 automatically decreased by emitted push. */
19202 for (i
= 0; i
< nparts
; i
++)
19203 part
[1][i
] = change_address (part
[1][i
],
19204 GET_MODE (part
[1][i
]), src_base
);
19207 /* We need to do copy in the right order in case an address register
19208 of the source overlaps the destination. */
19209 if (REG_P (part
[0][0]) && MEM_P (part
[1][0]))
19213 for (i
= 0; i
< nparts
; i
++)
19216 = reg_overlap_mentioned_p (part
[0][i
], XEXP (part
[1][0], 0));
19217 if (collisionparts
[i
])
19221 /* Collision in the middle part can be handled by reordering. */
19222 if (collisions
== 1 && nparts
== 3 && collisionparts
[1])
19224 tmp
= part
[0][1]; part
[0][1] = part
[0][2]; part
[0][2] = tmp
;
19225 tmp
= part
[1][1]; part
[1][1] = part
[1][2]; part
[1][2] = tmp
;
19227 else if (collisions
== 1
19229 && (collisionparts
[1] || collisionparts
[2]))
19231 if (collisionparts
[1])
19233 tmp
= part
[0][1]; part
[0][1] = part
[0][2]; part
[0][2] = tmp
;
19234 tmp
= part
[1][1]; part
[1][1] = part
[1][2]; part
[1][2] = tmp
;
19238 tmp
= part
[0][2]; part
[0][2] = part
[0][3]; part
[0][3] = tmp
;
19239 tmp
= part
[1][2]; part
[1][2] = part
[1][3]; part
[1][3] = tmp
;
19243 /* If there are more collisions, we can't handle it by reordering.
19244 Do an lea to the last part and use only one colliding move. */
19245 else if (collisions
> 1)
19251 base
= part
[0][nparts
- 1];
19253 /* Handle the case when the last part isn't valid for lea.
19254 Happens in 64-bit mode storing the 12-byte XFmode. */
19255 if (GET_MODE (base
) != Pmode
)
19256 base
= gen_rtx_REG (Pmode
, REGNO (base
));
19258 emit_insn (gen_rtx_SET (VOIDmode
, base
, XEXP (part
[1][0], 0)));
19259 part
[1][0] = replace_equiv_address (part
[1][0], base
);
19260 for (i
= 1; i
< nparts
; i
++)
19262 tmp
= plus_constant (base
, UNITS_PER_WORD
* i
);
19263 part
[1][i
] = replace_equiv_address (part
[1][i
], tmp
);
19274 if (TARGET_128BIT_LONG_DOUBLE
&& mode
== XFmode
)
19275 emit_insn (gen_addsi3 (stack_pointer_rtx
,
19276 stack_pointer_rtx
, GEN_INT (-4)));
19277 emit_move_insn (part
[0][2], part
[1][2]);
19279 else if (nparts
== 4)
19281 emit_move_insn (part
[0][3], part
[1][3]);
19282 emit_move_insn (part
[0][2], part
[1][2]);
19287 /* In 64bit mode we don't have 32bit push available. In case this is
19288 register, it is OK - we will just use larger counterpart. We also
19289 retype memory - these comes from attempt to avoid REX prefix on
19290 moving of second half of TFmode value. */
19291 if (GET_MODE (part
[1][1]) == SImode
)
19293 switch (GET_CODE (part
[1][1]))
19296 part
[1][1] = adjust_address (part
[1][1], DImode
, 0);
19300 part
[1][1] = gen_rtx_REG (DImode
, REGNO (part
[1][1]));
19304 gcc_unreachable ();
19307 if (GET_MODE (part
[1][0]) == SImode
)
19308 part
[1][0] = part
[1][1];
19311 emit_move_insn (part
[0][1], part
[1][1]);
19312 emit_move_insn (part
[0][0], part
[1][0]);
19316 /* Choose correct order to not overwrite the source before it is copied. */
19317 if ((REG_P (part
[0][0])
19318 && REG_P (part
[1][1])
19319 && (REGNO (part
[0][0]) == REGNO (part
[1][1])
19321 && REGNO (part
[0][0]) == REGNO (part
[1][2]))
19323 && REGNO (part
[0][0]) == REGNO (part
[1][3]))))
19325 && reg_overlap_mentioned_p (part
[0][0], XEXP (part
[1][0], 0))))
19327 for (i
= 0, j
= nparts
- 1; i
< nparts
; i
++, j
--)
19329 operands
[2 + i
] = part
[0][j
];
19330 operands
[6 + i
] = part
[1][j
];
19335 for (i
= 0; i
< nparts
; i
++)
19337 operands
[2 + i
] = part
[0][i
];
19338 operands
[6 + i
] = part
[1][i
];
19342 /* If optimizing for size, attempt to locally unCSE nonzero constants. */
19343 if (optimize_insn_for_size_p ())
19345 for (j
= 0; j
< nparts
- 1; j
++)
19346 if (CONST_INT_P (operands
[6 + j
])
19347 && operands
[6 + j
] != const0_rtx
19348 && REG_P (operands
[2 + j
]))
19349 for (i
= j
; i
< nparts
- 1; i
++)
19350 if (CONST_INT_P (operands
[7 + i
])
19351 && INTVAL (operands
[7 + i
]) == INTVAL (operands
[6 + j
]))
19352 operands
[7 + i
] = operands
[2 + j
];
19355 for (i
= 0; i
< nparts
; i
++)
19356 emit_move_insn (operands
[2 + i
], operands
[6 + i
]);
19361 /* Helper function of ix86_split_ashl used to generate an SImode/DImode
19362 left shift by a constant, either using a single shift or
19363 a sequence of add instructions. */
19366 ix86_expand_ashl_const (rtx operand
, int count
, enum machine_mode mode
)
19368 rtx (*insn
)(rtx
, rtx
, rtx
);
19371 || (count
* ix86_cost
->add
<= ix86_cost
->shift_const
19372 && !optimize_insn_for_size_p ()))
19374 insn
= mode
== DImode
? gen_addsi3
: gen_adddi3
;
19375 while (count
-- > 0)
19376 emit_insn (insn (operand
, operand
, operand
));
19380 insn
= mode
== DImode
? gen_ashlsi3
: gen_ashldi3
;
19381 emit_insn (insn (operand
, operand
, GEN_INT (count
)));
19386 ix86_split_ashl (rtx
*operands
, rtx scratch
, enum machine_mode mode
)
19388 rtx (*gen_ashl3
)(rtx
, rtx
, rtx
);
19389 rtx (*gen_shld
)(rtx
, rtx
, rtx
);
19390 int half_width
= GET_MODE_BITSIZE (mode
) >> 1;
19392 rtx low
[2], high
[2];
19395 if (CONST_INT_P (operands
[2]))
19397 split_double_mode (mode
, operands
, 2, low
, high
);
19398 count
= INTVAL (operands
[2]) & (GET_MODE_BITSIZE (mode
) - 1);
19400 if (count
>= half_width
)
19402 emit_move_insn (high
[0], low
[1]);
19403 emit_move_insn (low
[0], const0_rtx
);
19405 if (count
> half_width
)
19406 ix86_expand_ashl_const (high
[0], count
- half_width
, mode
);
19410 gen_shld
= mode
== DImode
? gen_x86_shld
: gen_x86_64_shld
;
19412 if (!rtx_equal_p (operands
[0], operands
[1]))
19413 emit_move_insn (operands
[0], operands
[1]);
19415 emit_insn (gen_shld (high
[0], low
[0], GEN_INT (count
)));
19416 ix86_expand_ashl_const (low
[0], count
, mode
);
19421 split_double_mode (mode
, operands
, 1, low
, high
);
19423 gen_ashl3
= mode
== DImode
? gen_ashlsi3
: gen_ashldi3
;
19425 if (operands
[1] == const1_rtx
)
19427 /* Assuming we've chosen a QImode capable registers, then 1 << N
19428 can be done with two 32/64-bit shifts, no branches, no cmoves. */
19429 if (ANY_QI_REG_P (low
[0]) && ANY_QI_REG_P (high
[0]))
19431 rtx s
, d
, flags
= gen_rtx_REG (CCZmode
, FLAGS_REG
);
19433 ix86_expand_clear (low
[0]);
19434 ix86_expand_clear (high
[0]);
19435 emit_insn (gen_testqi_ccz_1 (operands
[2], GEN_INT (half_width
)));
19437 d
= gen_lowpart (QImode
, low
[0]);
19438 d
= gen_rtx_STRICT_LOW_PART (VOIDmode
, d
);
19439 s
= gen_rtx_EQ (QImode
, flags
, const0_rtx
);
19440 emit_insn (gen_rtx_SET (VOIDmode
, d
, s
));
19442 d
= gen_lowpart (QImode
, high
[0]);
19443 d
= gen_rtx_STRICT_LOW_PART (VOIDmode
, d
);
19444 s
= gen_rtx_NE (QImode
, flags
, const0_rtx
);
19445 emit_insn (gen_rtx_SET (VOIDmode
, d
, s
));
19448 /* Otherwise, we can get the same results by manually performing
19449 a bit extract operation on bit 5/6, and then performing the two
19450 shifts. The two methods of getting 0/1 into low/high are exactly
19451 the same size. Avoiding the shift in the bit extract case helps
19452 pentium4 a bit; no one else seems to care much either way. */
19455 enum machine_mode half_mode
;
19456 rtx (*gen_lshr3
)(rtx
, rtx
, rtx
);
19457 rtx (*gen_and3
)(rtx
, rtx
, rtx
);
19458 rtx (*gen_xor3
)(rtx
, rtx
, rtx
);
19459 HOST_WIDE_INT bits
;
19462 if (mode
== DImode
)
19464 half_mode
= SImode
;
19465 gen_lshr3
= gen_lshrsi3
;
19466 gen_and3
= gen_andsi3
;
19467 gen_xor3
= gen_xorsi3
;
19472 half_mode
= DImode
;
19473 gen_lshr3
= gen_lshrdi3
;
19474 gen_and3
= gen_anddi3
;
19475 gen_xor3
= gen_xordi3
;
19479 if (TARGET_PARTIAL_REG_STALL
&& !optimize_insn_for_size_p ())
19480 x
= gen_rtx_ZERO_EXTEND (half_mode
, operands
[2]);
19482 x
= gen_lowpart (half_mode
, operands
[2]);
19483 emit_insn (gen_rtx_SET (VOIDmode
, high
[0], x
));
19485 emit_insn (gen_lshr3 (high
[0], high
[0], GEN_INT (bits
)));
19486 emit_insn (gen_and3 (high
[0], high
[0], const1_rtx
));
19487 emit_move_insn (low
[0], high
[0]);
19488 emit_insn (gen_xor3 (low
[0], low
[0], const1_rtx
));
19491 emit_insn (gen_ashl3 (low
[0], low
[0], operands
[2]));
19492 emit_insn (gen_ashl3 (high
[0], high
[0], operands
[2]));
19496 if (operands
[1] == constm1_rtx
)
19498 /* For -1 << N, we can avoid the shld instruction, because we
19499 know that we're shifting 0...31/63 ones into a -1. */
19500 emit_move_insn (low
[0], constm1_rtx
);
19501 if (optimize_insn_for_size_p ())
19502 emit_move_insn (high
[0], low
[0]);
19504 emit_move_insn (high
[0], constm1_rtx
);
19508 gen_shld
= mode
== DImode
? gen_x86_shld
: gen_x86_64_shld
;
19510 if (!rtx_equal_p (operands
[0], operands
[1]))
19511 emit_move_insn (operands
[0], operands
[1]);
19513 split_double_mode (mode
, operands
, 1, low
, high
);
19514 emit_insn (gen_shld (high
[0], low
[0], operands
[2]));
19517 emit_insn (gen_ashl3 (low
[0], low
[0], operands
[2]));
19519 if (TARGET_CMOVE
&& scratch
)
19521 rtx (*gen_x86_shift_adj_1
)(rtx
, rtx
, rtx
, rtx
)
19522 = mode
== DImode
? gen_x86_shiftsi_adj_1
: gen_x86_shiftdi_adj_1
;
19524 ix86_expand_clear (scratch
);
19525 emit_insn (gen_x86_shift_adj_1 (high
[0], low
[0], operands
[2], scratch
));
19529 rtx (*gen_x86_shift_adj_2
)(rtx
, rtx
, rtx
)
19530 = mode
== DImode
? gen_x86_shiftsi_adj_2
: gen_x86_shiftdi_adj_2
;
19532 emit_insn (gen_x86_shift_adj_2 (high
[0], low
[0], operands
[2]));
19537 ix86_split_ashr (rtx
*operands
, rtx scratch
, enum machine_mode mode
)
19539 rtx (*gen_ashr3
)(rtx
, rtx
, rtx
)
19540 = mode
== DImode
? gen_ashrsi3
: gen_ashrdi3
;
19541 rtx (*gen_shrd
)(rtx
, rtx
, rtx
);
19542 int half_width
= GET_MODE_BITSIZE (mode
) >> 1;
19544 rtx low
[2], high
[2];
19547 if (CONST_INT_P (operands
[2]))
19549 split_double_mode (mode
, operands
, 2, low
, high
);
19550 count
= INTVAL (operands
[2]) & (GET_MODE_BITSIZE (mode
) - 1);
19552 if (count
== GET_MODE_BITSIZE (mode
) - 1)
19554 emit_move_insn (high
[0], high
[1]);
19555 emit_insn (gen_ashr3 (high
[0], high
[0],
19556 GEN_INT (half_width
- 1)));
19557 emit_move_insn (low
[0], high
[0]);
19560 else if (count
>= half_width
)
19562 emit_move_insn (low
[0], high
[1]);
19563 emit_move_insn (high
[0], low
[0]);
19564 emit_insn (gen_ashr3 (high
[0], high
[0],
19565 GEN_INT (half_width
- 1)));
19567 if (count
> half_width
)
19568 emit_insn (gen_ashr3 (low
[0], low
[0],
19569 GEN_INT (count
- half_width
)));
19573 gen_shrd
= mode
== DImode
? gen_x86_shrd
: gen_x86_64_shrd
;
19575 if (!rtx_equal_p (operands
[0], operands
[1]))
19576 emit_move_insn (operands
[0], operands
[1]);
19578 emit_insn (gen_shrd (low
[0], high
[0], GEN_INT (count
)));
19579 emit_insn (gen_ashr3 (high
[0], high
[0], GEN_INT (count
)));
19584 gen_shrd
= mode
== DImode
? gen_x86_shrd
: gen_x86_64_shrd
;
19586 if (!rtx_equal_p (operands
[0], operands
[1]))
19587 emit_move_insn (operands
[0], operands
[1]);
19589 split_double_mode (mode
, operands
, 1, low
, high
);
19591 emit_insn (gen_shrd (low
[0], high
[0], operands
[2]));
19592 emit_insn (gen_ashr3 (high
[0], high
[0], operands
[2]));
19594 if (TARGET_CMOVE
&& scratch
)
19596 rtx (*gen_x86_shift_adj_1
)(rtx
, rtx
, rtx
, rtx
)
19597 = mode
== DImode
? gen_x86_shiftsi_adj_1
: gen_x86_shiftdi_adj_1
;
19599 emit_move_insn (scratch
, high
[0]);
19600 emit_insn (gen_ashr3 (scratch
, scratch
,
19601 GEN_INT (half_width
- 1)));
19602 emit_insn (gen_x86_shift_adj_1 (low
[0], high
[0], operands
[2],
19607 rtx (*gen_x86_shift_adj_3
)(rtx
, rtx
, rtx
)
19608 = mode
== DImode
? gen_x86_shiftsi_adj_3
: gen_x86_shiftdi_adj_3
;
19610 emit_insn (gen_x86_shift_adj_3 (low
[0], high
[0], operands
[2]));
19616 ix86_split_lshr (rtx
*operands
, rtx scratch
, enum machine_mode mode
)
19618 rtx (*gen_lshr3
)(rtx
, rtx
, rtx
)
19619 = mode
== DImode
? gen_lshrsi3
: gen_lshrdi3
;
19620 rtx (*gen_shrd
)(rtx
, rtx
, rtx
);
19621 int half_width
= GET_MODE_BITSIZE (mode
) >> 1;
19623 rtx low
[2], high
[2];
19626 if (CONST_INT_P (operands
[2]))
19628 split_double_mode (mode
, operands
, 2, low
, high
);
19629 count
= INTVAL (operands
[2]) & (GET_MODE_BITSIZE (mode
) - 1);
19631 if (count
>= half_width
)
19633 emit_move_insn (low
[0], high
[1]);
19634 ix86_expand_clear (high
[0]);
19636 if (count
> half_width
)
19637 emit_insn (gen_lshr3 (low
[0], low
[0],
19638 GEN_INT (count
- half_width
)));
19642 gen_shrd
= mode
== DImode
? gen_x86_shrd
: gen_x86_64_shrd
;
19644 if (!rtx_equal_p (operands
[0], operands
[1]))
19645 emit_move_insn (operands
[0], operands
[1]);
19647 emit_insn (gen_shrd (low
[0], high
[0], GEN_INT (count
)));
19648 emit_insn (gen_lshr3 (high
[0], high
[0], GEN_INT (count
)));
19653 gen_shrd
= mode
== DImode
? gen_x86_shrd
: gen_x86_64_shrd
;
19655 if (!rtx_equal_p (operands
[0], operands
[1]))
19656 emit_move_insn (operands
[0], operands
[1]);
19658 split_double_mode (mode
, operands
, 1, low
, high
);
19660 emit_insn (gen_shrd (low
[0], high
[0], operands
[2]));
19661 emit_insn (gen_lshr3 (high
[0], high
[0], operands
[2]));
19663 if (TARGET_CMOVE
&& scratch
)
19665 rtx (*gen_x86_shift_adj_1
)(rtx
, rtx
, rtx
, rtx
)
19666 = mode
== DImode
? gen_x86_shiftsi_adj_1
: gen_x86_shiftdi_adj_1
;
19668 ix86_expand_clear (scratch
);
19669 emit_insn (gen_x86_shift_adj_1 (low
[0], high
[0], operands
[2],
19674 rtx (*gen_x86_shift_adj_2
)(rtx
, rtx
, rtx
)
19675 = mode
== DImode
? gen_x86_shiftsi_adj_2
: gen_x86_shiftdi_adj_2
;
19677 emit_insn (gen_x86_shift_adj_2 (low
[0], high
[0], operands
[2]));
19682 /* Predict just emitted jump instruction to be taken with probability PROB. */
19684 predict_jump (int prob
)
19686 rtx insn
= get_last_insn ();
19687 gcc_assert (JUMP_P (insn
));
19688 add_reg_note (insn
, REG_BR_PROB
, GEN_INT (prob
));
19691 /* Helper function for the string operations below. Dest VARIABLE whether
19692 it is aligned to VALUE bytes. If true, jump to the label. */
19694 ix86_expand_aligntest (rtx variable
, int value
, bool epilogue
)
19696 rtx label
= gen_label_rtx ();
19697 rtx tmpcount
= gen_reg_rtx (GET_MODE (variable
));
19698 if (GET_MODE (variable
) == DImode
)
19699 emit_insn (gen_anddi3 (tmpcount
, variable
, GEN_INT (value
)));
19701 emit_insn (gen_andsi3 (tmpcount
, variable
, GEN_INT (value
)));
19702 emit_cmp_and_jump_insns (tmpcount
, const0_rtx
, EQ
, 0, GET_MODE (variable
),
19705 predict_jump (REG_BR_PROB_BASE
* 50 / 100);
19707 predict_jump (REG_BR_PROB_BASE
* 90 / 100);
19711 /* Adjust COUNTER by the VALUE. */
19713 ix86_adjust_counter (rtx countreg
, HOST_WIDE_INT value
)
19715 rtx (*gen_add
)(rtx
, rtx
, rtx
)
19716 = GET_MODE (countreg
) == DImode
? gen_adddi3
: gen_addsi3
;
19718 emit_insn (gen_add (countreg
, countreg
, GEN_INT (-value
)));
19721 /* Zero extend possibly SImode EXP to Pmode register. */
19723 ix86_zero_extend_to_Pmode (rtx exp
)
19726 if (GET_MODE (exp
) == VOIDmode
)
19727 return force_reg (Pmode
, exp
);
19728 if (GET_MODE (exp
) == Pmode
)
19729 return copy_to_mode_reg (Pmode
, exp
);
19730 r
= gen_reg_rtx (Pmode
);
19731 emit_insn (gen_zero_extendsidi2 (r
, exp
));
19735 /* Divide COUNTREG by SCALE. */
19737 scale_counter (rtx countreg
, int scale
)
19743 if (CONST_INT_P (countreg
))
19744 return GEN_INT (INTVAL (countreg
) / scale
);
19745 gcc_assert (REG_P (countreg
));
19747 sc
= expand_simple_binop (GET_MODE (countreg
), LSHIFTRT
, countreg
,
19748 GEN_INT (exact_log2 (scale
)),
19749 NULL
, 1, OPTAB_DIRECT
);
19753 /* Return mode for the memcpy/memset loop counter. Prefer SImode over
19754 DImode for constant loop counts. */
19756 static enum machine_mode
19757 counter_mode (rtx count_exp
)
19759 if (GET_MODE (count_exp
) != VOIDmode
)
19760 return GET_MODE (count_exp
);
19761 if (!CONST_INT_P (count_exp
))
19763 if (TARGET_64BIT
&& (INTVAL (count_exp
) & ~0xffffffff))
19768 /* When SRCPTR is non-NULL, output simple loop to move memory
19769 pointer to SRCPTR to DESTPTR via chunks of MODE unrolled UNROLL times,
19770 overall size is COUNT specified in bytes. When SRCPTR is NULL, output the
19771 equivalent loop to set memory by VALUE (supposed to be in MODE).
19773 The size is rounded down to whole number of chunk size moved at once.
19774 SRCMEM and DESTMEM provide MEMrtx to feed proper aliasing info. */
19778 expand_set_or_movmem_via_loop (rtx destmem
, rtx srcmem
,
19779 rtx destptr
, rtx srcptr
, rtx value
,
19780 rtx count
, enum machine_mode mode
, int unroll
,
19783 rtx out_label
, top_label
, iter
, tmp
;
19784 enum machine_mode iter_mode
= counter_mode (count
);
19785 rtx piece_size
= GEN_INT (GET_MODE_SIZE (mode
) * unroll
);
19786 rtx piece_size_mask
= GEN_INT (~((GET_MODE_SIZE (mode
) * unroll
) - 1));
19792 top_label
= gen_label_rtx ();
19793 out_label
= gen_label_rtx ();
19794 iter
= gen_reg_rtx (iter_mode
);
19796 size
= expand_simple_binop (iter_mode
, AND
, count
, piece_size_mask
,
19797 NULL
, 1, OPTAB_DIRECT
);
19798 /* Those two should combine. */
19799 if (piece_size
== const1_rtx
)
19801 emit_cmp_and_jump_insns (size
, const0_rtx
, EQ
, NULL_RTX
, iter_mode
,
19803 predict_jump (REG_BR_PROB_BASE
* 10 / 100);
19805 emit_move_insn (iter
, const0_rtx
);
19807 emit_label (top_label
);
19809 tmp
= convert_modes (Pmode
, iter_mode
, iter
, true);
19810 x_addr
= gen_rtx_PLUS (Pmode
, destptr
, tmp
);
19811 destmem
= change_address (destmem
, mode
, x_addr
);
19815 y_addr
= gen_rtx_PLUS (Pmode
, srcptr
, copy_rtx (tmp
));
19816 srcmem
= change_address (srcmem
, mode
, y_addr
);
19818 /* When unrolling for chips that reorder memory reads and writes,
19819 we can save registers by using single temporary.
19820 Also using 4 temporaries is overkill in 32bit mode. */
19821 if (!TARGET_64BIT
&& 0)
19823 for (i
= 0; i
< unroll
; i
++)
19828 adjust_address (copy_rtx (destmem
), mode
, GET_MODE_SIZE (mode
));
19830 adjust_address (copy_rtx (srcmem
), mode
, GET_MODE_SIZE (mode
));
19832 emit_move_insn (destmem
, srcmem
);
19838 gcc_assert (unroll
<= 4);
19839 for (i
= 0; i
< unroll
; i
++)
19841 tmpreg
[i
] = gen_reg_rtx (mode
);
19845 adjust_address (copy_rtx (srcmem
), mode
, GET_MODE_SIZE (mode
));
19847 emit_move_insn (tmpreg
[i
], srcmem
);
19849 for (i
= 0; i
< unroll
; i
++)
19854 adjust_address (copy_rtx (destmem
), mode
, GET_MODE_SIZE (mode
));
19856 emit_move_insn (destmem
, tmpreg
[i
]);
19861 for (i
= 0; i
< unroll
; i
++)
19865 adjust_address (copy_rtx (destmem
), mode
, GET_MODE_SIZE (mode
));
19866 emit_move_insn (destmem
, value
);
19869 tmp
= expand_simple_binop (iter_mode
, PLUS
, iter
, piece_size
, iter
,
19870 true, OPTAB_LIB_WIDEN
);
19872 emit_move_insn (iter
, tmp
);
19874 emit_cmp_and_jump_insns (iter
, size
, LT
, NULL_RTX
, iter_mode
,
19876 if (expected_size
!= -1)
19878 expected_size
/= GET_MODE_SIZE (mode
) * unroll
;
19879 if (expected_size
== 0)
19881 else if (expected_size
> REG_BR_PROB_BASE
)
19882 predict_jump (REG_BR_PROB_BASE
- 1);
19884 predict_jump (REG_BR_PROB_BASE
- (REG_BR_PROB_BASE
+ expected_size
/ 2) / expected_size
);
19887 predict_jump (REG_BR_PROB_BASE
* 80 / 100);
19888 iter
= ix86_zero_extend_to_Pmode (iter
);
19889 tmp
= expand_simple_binop (Pmode
, PLUS
, destptr
, iter
, destptr
,
19890 true, OPTAB_LIB_WIDEN
);
19891 if (tmp
!= destptr
)
19892 emit_move_insn (destptr
, tmp
);
19895 tmp
= expand_simple_binop (Pmode
, PLUS
, srcptr
, iter
, srcptr
,
19896 true, OPTAB_LIB_WIDEN
);
19898 emit_move_insn (srcptr
, tmp
);
19900 emit_label (out_label
);
19903 /* Output "rep; mov" instruction.
19904 Arguments have same meaning as for previous function */
19906 expand_movmem_via_rep_mov (rtx destmem
, rtx srcmem
,
19907 rtx destptr
, rtx srcptr
,
19909 enum machine_mode mode
)
19915 /* If the size is known, it is shorter to use rep movs. */
19916 if (mode
== QImode
&& CONST_INT_P (count
)
19917 && !(INTVAL (count
) & 3))
19920 if (destptr
!= XEXP (destmem
, 0) || GET_MODE (destmem
) != BLKmode
)
19921 destmem
= adjust_automodify_address_nv (destmem
, BLKmode
, destptr
, 0);
19922 if (srcptr
!= XEXP (srcmem
, 0) || GET_MODE (srcmem
) != BLKmode
)
19923 srcmem
= adjust_automodify_address_nv (srcmem
, BLKmode
, srcptr
, 0);
19924 countreg
= ix86_zero_extend_to_Pmode (scale_counter (count
, GET_MODE_SIZE (mode
)));
19925 if (mode
!= QImode
)
19927 destexp
= gen_rtx_ASHIFT (Pmode
, countreg
,
19928 GEN_INT (exact_log2 (GET_MODE_SIZE (mode
))));
19929 destexp
= gen_rtx_PLUS (Pmode
, destexp
, destptr
);
19930 srcexp
= gen_rtx_ASHIFT (Pmode
, countreg
,
19931 GEN_INT (exact_log2 (GET_MODE_SIZE (mode
))));
19932 srcexp
= gen_rtx_PLUS (Pmode
, srcexp
, srcptr
);
19936 destexp
= gen_rtx_PLUS (Pmode
, destptr
, countreg
);
19937 srcexp
= gen_rtx_PLUS (Pmode
, srcptr
, countreg
);
19939 if (CONST_INT_P (count
))
19941 count
= GEN_INT (INTVAL (count
)
19942 & ~((HOST_WIDE_INT
) GET_MODE_SIZE (mode
) - 1));
19943 destmem
= shallow_copy_rtx (destmem
);
19944 srcmem
= shallow_copy_rtx (srcmem
);
19945 set_mem_size (destmem
, count
);
19946 set_mem_size (srcmem
, count
);
19950 if (MEM_SIZE (destmem
))
19951 set_mem_size (destmem
, NULL_RTX
);
19952 if (MEM_SIZE (srcmem
))
19953 set_mem_size (srcmem
, NULL_RTX
);
19955 emit_insn (gen_rep_mov (destptr
, destmem
, srcptr
, srcmem
, countreg
,
19959 /* Output "rep; stos" instruction.
19960 Arguments have same meaning as for previous function */
19962 expand_setmem_via_rep_stos (rtx destmem
, rtx destptr
, rtx value
,
19963 rtx count
, enum machine_mode mode
,
19969 if (destptr
!= XEXP (destmem
, 0) || GET_MODE (destmem
) != BLKmode
)
19970 destmem
= adjust_automodify_address_nv (destmem
, BLKmode
, destptr
, 0);
19971 value
= force_reg (mode
, gen_lowpart (mode
, value
));
19972 countreg
= ix86_zero_extend_to_Pmode (scale_counter (count
, GET_MODE_SIZE (mode
)));
19973 if (mode
!= QImode
)
19975 destexp
= gen_rtx_ASHIFT (Pmode
, countreg
,
19976 GEN_INT (exact_log2 (GET_MODE_SIZE (mode
))));
19977 destexp
= gen_rtx_PLUS (Pmode
, destexp
, destptr
);
19980 destexp
= gen_rtx_PLUS (Pmode
, destptr
, countreg
);
19981 if (orig_value
== const0_rtx
&& CONST_INT_P (count
))
19983 count
= GEN_INT (INTVAL (count
)
19984 & ~((HOST_WIDE_INT
) GET_MODE_SIZE (mode
) - 1));
19985 destmem
= shallow_copy_rtx (destmem
);
19986 set_mem_size (destmem
, count
);
19988 else if (MEM_SIZE (destmem
))
19989 set_mem_size (destmem
, NULL_RTX
);
19990 emit_insn (gen_rep_stos (destptr
, countreg
, destmem
, value
, destexp
));
19994 emit_strmov (rtx destmem
, rtx srcmem
,
19995 rtx destptr
, rtx srcptr
, enum machine_mode mode
, int offset
)
19997 rtx src
= adjust_automodify_address_nv (srcmem
, mode
, srcptr
, offset
);
19998 rtx dest
= adjust_automodify_address_nv (destmem
, mode
, destptr
, offset
);
19999 emit_insn (gen_strmov (destptr
, dest
, srcptr
, src
));
20002 /* Output code to copy at most count & (max_size - 1) bytes from SRC to DEST. */
20004 expand_movmem_epilogue (rtx destmem
, rtx srcmem
,
20005 rtx destptr
, rtx srcptr
, rtx count
, int max_size
)
20008 if (CONST_INT_P (count
))
20010 HOST_WIDE_INT countval
= INTVAL (count
);
20013 if ((countval
& 0x10) && max_size
> 16)
20017 emit_strmov (destmem
, srcmem
, destptr
, srcptr
, DImode
, offset
);
20018 emit_strmov (destmem
, srcmem
, destptr
, srcptr
, DImode
, offset
+ 8);
20021 gcc_unreachable ();
20024 if ((countval
& 0x08) && max_size
> 8)
20027 emit_strmov (destmem
, srcmem
, destptr
, srcptr
, DImode
, offset
);
20030 emit_strmov (destmem
, srcmem
, destptr
, srcptr
, SImode
, offset
);
20031 emit_strmov (destmem
, srcmem
, destptr
, srcptr
, SImode
, offset
+ 4);
20035 if ((countval
& 0x04) && max_size
> 4)
20037 emit_strmov (destmem
, srcmem
, destptr
, srcptr
, SImode
, offset
);
20040 if ((countval
& 0x02) && max_size
> 2)
20042 emit_strmov (destmem
, srcmem
, destptr
, srcptr
, HImode
, offset
);
20045 if ((countval
& 0x01) && max_size
> 1)
20047 emit_strmov (destmem
, srcmem
, destptr
, srcptr
, QImode
, offset
);
20054 count
= expand_simple_binop (GET_MODE (count
), AND
, count
, GEN_INT (max_size
- 1),
20055 count
, 1, OPTAB_DIRECT
);
20056 expand_set_or_movmem_via_loop (destmem
, srcmem
, destptr
, srcptr
, NULL
,
20057 count
, QImode
, 1, 4);
20061 /* When there are stringops, we can cheaply increase dest and src pointers.
20062 Otherwise we save code size by maintaining offset (zero is readily
20063 available from preceding rep operation) and using x86 addressing modes.
20065 if (TARGET_SINGLE_STRINGOP
)
20069 rtx label
= ix86_expand_aligntest (count
, 4, true);
20070 src
= change_address (srcmem
, SImode
, srcptr
);
20071 dest
= change_address (destmem
, SImode
, destptr
);
20072 emit_insn (gen_strmov (destptr
, dest
, srcptr
, src
));
20073 emit_label (label
);
20074 LABEL_NUSES (label
) = 1;
20078 rtx label
= ix86_expand_aligntest (count
, 2, true);
20079 src
= change_address (srcmem
, HImode
, srcptr
);
20080 dest
= change_address (destmem
, HImode
, destptr
);
20081 emit_insn (gen_strmov (destptr
, dest
, srcptr
, src
));
20082 emit_label (label
);
20083 LABEL_NUSES (label
) = 1;
20087 rtx label
= ix86_expand_aligntest (count
, 1, true);
20088 src
= change_address (srcmem
, QImode
, srcptr
);
20089 dest
= change_address (destmem
, QImode
, destptr
);
20090 emit_insn (gen_strmov (destptr
, dest
, srcptr
, src
));
20091 emit_label (label
);
20092 LABEL_NUSES (label
) = 1;
20097 rtx offset
= force_reg (Pmode
, const0_rtx
);
20102 rtx label
= ix86_expand_aligntest (count
, 4, true);
20103 src
= change_address (srcmem
, SImode
, srcptr
);
20104 dest
= change_address (destmem
, SImode
, destptr
);
20105 emit_move_insn (dest
, src
);
20106 tmp
= expand_simple_binop (Pmode
, PLUS
, offset
, GEN_INT (4), NULL
,
20107 true, OPTAB_LIB_WIDEN
);
20109 emit_move_insn (offset
, tmp
);
20110 emit_label (label
);
20111 LABEL_NUSES (label
) = 1;
20115 rtx label
= ix86_expand_aligntest (count
, 2, true);
20116 tmp
= gen_rtx_PLUS (Pmode
, srcptr
, offset
);
20117 src
= change_address (srcmem
, HImode
, tmp
);
20118 tmp
= gen_rtx_PLUS (Pmode
, destptr
, offset
);
20119 dest
= change_address (destmem
, HImode
, tmp
);
20120 emit_move_insn (dest
, src
);
20121 tmp
= expand_simple_binop (Pmode
, PLUS
, offset
, GEN_INT (2), tmp
,
20122 true, OPTAB_LIB_WIDEN
);
20124 emit_move_insn (offset
, tmp
);
20125 emit_label (label
);
20126 LABEL_NUSES (label
) = 1;
20130 rtx label
= ix86_expand_aligntest (count
, 1, true);
20131 tmp
= gen_rtx_PLUS (Pmode
, srcptr
, offset
);
20132 src
= change_address (srcmem
, QImode
, tmp
);
20133 tmp
= gen_rtx_PLUS (Pmode
, destptr
, offset
);
20134 dest
= change_address (destmem
, QImode
, tmp
);
20135 emit_move_insn (dest
, src
);
20136 emit_label (label
);
20137 LABEL_NUSES (label
) = 1;
20142 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
20144 expand_setmem_epilogue_via_loop (rtx destmem
, rtx destptr
, rtx value
,
20145 rtx count
, int max_size
)
20148 expand_simple_binop (counter_mode (count
), AND
, count
,
20149 GEN_INT (max_size
- 1), count
, 1, OPTAB_DIRECT
);
20150 expand_set_or_movmem_via_loop (destmem
, NULL
, destptr
, NULL
,
20151 gen_lowpart (QImode
, value
), count
, QImode
,
20155 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
20157 expand_setmem_epilogue (rtx destmem
, rtx destptr
, rtx value
, rtx count
, int max_size
)
20161 if (CONST_INT_P (count
))
20163 HOST_WIDE_INT countval
= INTVAL (count
);
20166 if ((countval
& 0x10) && max_size
> 16)
20170 dest
= adjust_automodify_address_nv (destmem
, DImode
, destptr
, offset
);
20171 emit_insn (gen_strset (destptr
, dest
, value
));
20172 dest
= adjust_automodify_address_nv (destmem
, DImode
, destptr
, offset
+ 8);
20173 emit_insn (gen_strset (destptr
, dest
, value
));
20176 gcc_unreachable ();
20179 if ((countval
& 0x08) && max_size
> 8)
20183 dest
= adjust_automodify_address_nv (destmem
, DImode
, destptr
, offset
);
20184 emit_insn (gen_strset (destptr
, dest
, value
));
20188 dest
= adjust_automodify_address_nv (destmem
, SImode
, destptr
, offset
);
20189 emit_insn (gen_strset (destptr
, dest
, value
));
20190 dest
= adjust_automodify_address_nv (destmem
, SImode
, destptr
, offset
+ 4);
20191 emit_insn (gen_strset (destptr
, dest
, value
));
20195 if ((countval
& 0x04) && max_size
> 4)
20197 dest
= adjust_automodify_address_nv (destmem
, SImode
, destptr
, offset
);
20198 emit_insn (gen_strset (destptr
, dest
, gen_lowpart (SImode
, value
)));
20201 if ((countval
& 0x02) && max_size
> 2)
20203 dest
= adjust_automodify_address_nv (destmem
, HImode
, destptr
, offset
);
20204 emit_insn (gen_strset (destptr
, dest
, gen_lowpart (HImode
, value
)));
20207 if ((countval
& 0x01) && max_size
> 1)
20209 dest
= adjust_automodify_address_nv (destmem
, QImode
, destptr
, offset
);
20210 emit_insn (gen_strset (destptr
, dest
, gen_lowpart (QImode
, value
)));
20217 expand_setmem_epilogue_via_loop (destmem
, destptr
, value
, count
, max_size
);
20222 rtx label
= ix86_expand_aligntest (count
, 16, true);
20225 dest
= change_address (destmem
, DImode
, destptr
);
20226 emit_insn (gen_strset (destptr
, dest
, value
));
20227 emit_insn (gen_strset (destptr
, dest
, value
));
20231 dest
= change_address (destmem
, SImode
, destptr
);
20232 emit_insn (gen_strset (destptr
, dest
, value
));
20233 emit_insn (gen_strset (destptr
, dest
, value
));
20234 emit_insn (gen_strset (destptr
, dest
, value
));
20235 emit_insn (gen_strset (destptr
, dest
, value
));
20237 emit_label (label
);
20238 LABEL_NUSES (label
) = 1;
20242 rtx label
= ix86_expand_aligntest (count
, 8, true);
20245 dest
= change_address (destmem
, DImode
, destptr
);
20246 emit_insn (gen_strset (destptr
, dest
, value
));
20250 dest
= change_address (destmem
, SImode
, destptr
);
20251 emit_insn (gen_strset (destptr
, dest
, value
));
20252 emit_insn (gen_strset (destptr
, dest
, value
));
20254 emit_label (label
);
20255 LABEL_NUSES (label
) = 1;
20259 rtx label
= ix86_expand_aligntest (count
, 4, true);
20260 dest
= change_address (destmem
, SImode
, destptr
);
20261 emit_insn (gen_strset (destptr
, dest
, gen_lowpart (SImode
, value
)));
20262 emit_label (label
);
20263 LABEL_NUSES (label
) = 1;
20267 rtx label
= ix86_expand_aligntest (count
, 2, true);
20268 dest
= change_address (destmem
, HImode
, destptr
);
20269 emit_insn (gen_strset (destptr
, dest
, gen_lowpart (HImode
, value
)));
20270 emit_label (label
);
20271 LABEL_NUSES (label
) = 1;
20275 rtx label
= ix86_expand_aligntest (count
, 1, true);
20276 dest
= change_address (destmem
, QImode
, destptr
);
20277 emit_insn (gen_strset (destptr
, dest
, gen_lowpart (QImode
, value
)));
20278 emit_label (label
);
20279 LABEL_NUSES (label
) = 1;
20283 /* Copy enough from DEST to SRC to align DEST known to by aligned by ALIGN to
20284 DESIRED_ALIGNMENT. */
20286 expand_movmem_prologue (rtx destmem
, rtx srcmem
,
20287 rtx destptr
, rtx srcptr
, rtx count
,
20288 int align
, int desired_alignment
)
20290 if (align
<= 1 && desired_alignment
> 1)
20292 rtx label
= ix86_expand_aligntest (destptr
, 1, false);
20293 srcmem
= change_address (srcmem
, QImode
, srcptr
);
20294 destmem
= change_address (destmem
, QImode
, destptr
);
20295 emit_insn (gen_strmov (destptr
, destmem
, srcptr
, srcmem
));
20296 ix86_adjust_counter (count
, 1);
20297 emit_label (label
);
20298 LABEL_NUSES (label
) = 1;
20300 if (align
<= 2 && desired_alignment
> 2)
20302 rtx label
= ix86_expand_aligntest (destptr
, 2, false);
20303 srcmem
= change_address (srcmem
, HImode
, srcptr
);
20304 destmem
= change_address (destmem
, HImode
, destptr
);
20305 emit_insn (gen_strmov (destptr
, destmem
, srcptr
, srcmem
));
20306 ix86_adjust_counter (count
, 2);
20307 emit_label (label
);
20308 LABEL_NUSES (label
) = 1;
20310 if (align
<= 4 && desired_alignment
> 4)
20312 rtx label
= ix86_expand_aligntest (destptr
, 4, false);
20313 srcmem
= change_address (srcmem
, SImode
, srcptr
);
20314 destmem
= change_address (destmem
, SImode
, destptr
);
20315 emit_insn (gen_strmov (destptr
, destmem
, srcptr
, srcmem
));
20316 ix86_adjust_counter (count
, 4);
20317 emit_label (label
);
20318 LABEL_NUSES (label
) = 1;
20320 gcc_assert (desired_alignment
<= 8);
20323 /* Copy enough from DST to SRC to align DST known to DESIRED_ALIGN.
20324 ALIGN_BYTES is how many bytes need to be copied. */
20326 expand_constant_movmem_prologue (rtx dst
, rtx
*srcp
, rtx destreg
, rtx srcreg
,
20327 int desired_align
, int align_bytes
)
20330 rtx src_size
, dst_size
;
20332 int src_align_bytes
= get_mem_align_offset (src
, desired_align
* BITS_PER_UNIT
);
20333 if (src_align_bytes
>= 0)
20334 src_align_bytes
= desired_align
- src_align_bytes
;
20335 src_size
= MEM_SIZE (src
);
20336 dst_size
= MEM_SIZE (dst
);
20337 if (align_bytes
& 1)
20339 dst
= adjust_automodify_address_nv (dst
, QImode
, destreg
, 0);
20340 src
= adjust_automodify_address_nv (src
, QImode
, srcreg
, 0);
20342 emit_insn (gen_strmov (destreg
, dst
, srcreg
, src
));
20344 if (align_bytes
& 2)
20346 dst
= adjust_automodify_address_nv (dst
, HImode
, destreg
, off
);
20347 src
= adjust_automodify_address_nv (src
, HImode
, srcreg
, off
);
20348 if (MEM_ALIGN (dst
) < 2 * BITS_PER_UNIT
)
20349 set_mem_align (dst
, 2 * BITS_PER_UNIT
);
20350 if (src_align_bytes
>= 0
20351 && (src_align_bytes
& 1) == (align_bytes
& 1)
20352 && MEM_ALIGN (src
) < 2 * BITS_PER_UNIT
)
20353 set_mem_align (src
, 2 * BITS_PER_UNIT
);
20355 emit_insn (gen_strmov (destreg
, dst
, srcreg
, src
));
20357 if (align_bytes
& 4)
20359 dst
= adjust_automodify_address_nv (dst
, SImode
, destreg
, off
);
20360 src
= adjust_automodify_address_nv (src
, SImode
, srcreg
, off
);
20361 if (MEM_ALIGN (dst
) < 4 * BITS_PER_UNIT
)
20362 set_mem_align (dst
, 4 * BITS_PER_UNIT
);
20363 if (src_align_bytes
>= 0)
20365 unsigned int src_align
= 0;
20366 if ((src_align_bytes
& 3) == (align_bytes
& 3))
20368 else if ((src_align_bytes
& 1) == (align_bytes
& 1))
20370 if (MEM_ALIGN (src
) < src_align
* BITS_PER_UNIT
)
20371 set_mem_align (src
, src_align
* BITS_PER_UNIT
);
20374 emit_insn (gen_strmov (destreg
, dst
, srcreg
, src
));
20376 dst
= adjust_automodify_address_nv (dst
, BLKmode
, destreg
, off
);
20377 src
= adjust_automodify_address_nv (src
, BLKmode
, srcreg
, off
);
20378 if (MEM_ALIGN (dst
) < (unsigned int) desired_align
* BITS_PER_UNIT
)
20379 set_mem_align (dst
, desired_align
* BITS_PER_UNIT
);
20380 if (src_align_bytes
>= 0)
20382 unsigned int src_align
= 0;
20383 if ((src_align_bytes
& 7) == (align_bytes
& 7))
20385 else if ((src_align_bytes
& 3) == (align_bytes
& 3))
20387 else if ((src_align_bytes
& 1) == (align_bytes
& 1))
20389 if (src_align
> (unsigned int) desired_align
)
20390 src_align
= desired_align
;
20391 if (MEM_ALIGN (src
) < src_align
* BITS_PER_UNIT
)
20392 set_mem_align (src
, src_align
* BITS_PER_UNIT
);
20395 set_mem_size (dst
, GEN_INT (INTVAL (dst_size
) - align_bytes
));
20397 set_mem_size (dst
, GEN_INT (INTVAL (src_size
) - align_bytes
));
20402 /* Set enough from DEST to align DEST known to by aligned by ALIGN to
20403 DESIRED_ALIGNMENT. */
20405 expand_setmem_prologue (rtx destmem
, rtx destptr
, rtx value
, rtx count
,
20406 int align
, int desired_alignment
)
20408 if (align
<= 1 && desired_alignment
> 1)
20410 rtx label
= ix86_expand_aligntest (destptr
, 1, false);
20411 destmem
= change_address (destmem
, QImode
, destptr
);
20412 emit_insn (gen_strset (destptr
, destmem
, gen_lowpart (QImode
, value
)));
20413 ix86_adjust_counter (count
, 1);
20414 emit_label (label
);
20415 LABEL_NUSES (label
) = 1;
20417 if (align
<= 2 && desired_alignment
> 2)
20419 rtx label
= ix86_expand_aligntest (destptr
, 2, false);
20420 destmem
= change_address (destmem
, HImode
, destptr
);
20421 emit_insn (gen_strset (destptr
, destmem
, gen_lowpart (HImode
, value
)));
20422 ix86_adjust_counter (count
, 2);
20423 emit_label (label
);
20424 LABEL_NUSES (label
) = 1;
20426 if (align
<= 4 && desired_alignment
> 4)
20428 rtx label
= ix86_expand_aligntest (destptr
, 4, false);
20429 destmem
= change_address (destmem
, SImode
, destptr
);
20430 emit_insn (gen_strset (destptr
, destmem
, gen_lowpart (SImode
, value
)));
20431 ix86_adjust_counter (count
, 4);
20432 emit_label (label
);
20433 LABEL_NUSES (label
) = 1;
20435 gcc_assert (desired_alignment
<= 8);
20438 /* Set enough from DST to align DST known to by aligned by ALIGN to
20439 DESIRED_ALIGN. ALIGN_BYTES is how many bytes need to be stored. */
20441 expand_constant_setmem_prologue (rtx dst
, rtx destreg
, rtx value
,
20442 int desired_align
, int align_bytes
)
20445 rtx dst_size
= MEM_SIZE (dst
);
20446 if (align_bytes
& 1)
20448 dst
= adjust_automodify_address_nv (dst
, QImode
, destreg
, 0);
20450 emit_insn (gen_strset (destreg
, dst
,
20451 gen_lowpart (QImode
, value
)));
20453 if (align_bytes
& 2)
20455 dst
= adjust_automodify_address_nv (dst
, HImode
, destreg
, off
);
20456 if (MEM_ALIGN (dst
) < 2 * BITS_PER_UNIT
)
20457 set_mem_align (dst
, 2 * BITS_PER_UNIT
);
20459 emit_insn (gen_strset (destreg
, dst
,
20460 gen_lowpart (HImode
, value
)));
20462 if (align_bytes
& 4)
20464 dst
= adjust_automodify_address_nv (dst
, SImode
, destreg
, off
);
20465 if (MEM_ALIGN (dst
) < 4 * BITS_PER_UNIT
)
20466 set_mem_align (dst
, 4 * BITS_PER_UNIT
);
20468 emit_insn (gen_strset (destreg
, dst
,
20469 gen_lowpart (SImode
, value
)));
20471 dst
= adjust_automodify_address_nv (dst
, BLKmode
, destreg
, off
);
20472 if (MEM_ALIGN (dst
) < (unsigned int) desired_align
* BITS_PER_UNIT
)
20473 set_mem_align (dst
, desired_align
* BITS_PER_UNIT
);
20475 set_mem_size (dst
, GEN_INT (INTVAL (dst_size
) - align_bytes
));
20479 /* Given COUNT and EXPECTED_SIZE, decide on codegen of string operation. */
20480 static enum stringop_alg
20481 decide_alg (HOST_WIDE_INT count
, HOST_WIDE_INT expected_size
, bool memset
,
20482 int *dynamic_check
)
20484 const struct stringop_algs
* algs
;
20485 bool optimize_for_speed
;
20486 /* Algorithms using the rep prefix want at least edi and ecx;
20487 additionally, memset wants eax and memcpy wants esi. Don't
20488 consider such algorithms if the user has appropriated those
20489 registers for their own purposes. */
20490 bool rep_prefix_usable
= !(fixed_regs
[CX_REG
] || fixed_regs
[DI_REG
]
20492 ? fixed_regs
[AX_REG
] : fixed_regs
[SI_REG
]));
20494 #define ALG_USABLE_P(alg) (rep_prefix_usable \
20495 || (alg != rep_prefix_1_byte \
20496 && alg != rep_prefix_4_byte \
20497 && alg != rep_prefix_8_byte))
20498 const struct processor_costs
*cost
;
20500 /* Even if the string operation call is cold, we still might spend a lot
20501 of time processing large blocks. */
20502 if (optimize_function_for_size_p (cfun
)
20503 || (optimize_insn_for_size_p ()
20504 && expected_size
!= -1 && expected_size
< 256))
20505 optimize_for_speed
= false;
20507 optimize_for_speed
= true;
20509 cost
= optimize_for_speed
? ix86_cost
: &ix86_size_cost
;
20511 *dynamic_check
= -1;
20513 algs
= &cost
->memset
[TARGET_64BIT
!= 0];
20515 algs
= &cost
->memcpy
[TARGET_64BIT
!= 0];
20516 if (stringop_alg
!= no_stringop
&& ALG_USABLE_P (stringop_alg
))
20517 return stringop_alg
;
20518 /* rep; movq or rep; movl is the smallest variant. */
20519 else if (!optimize_for_speed
)
20521 if (!count
|| (count
& 3))
20522 return rep_prefix_usable
? rep_prefix_1_byte
: loop_1_byte
;
20524 return rep_prefix_usable
? rep_prefix_4_byte
: loop
;
20526 /* Very tiny blocks are best handled via the loop, REP is expensive to setup.
20528 else if (expected_size
!= -1 && expected_size
< 4)
20529 return loop_1_byte
;
20530 else if (expected_size
!= -1)
20533 enum stringop_alg alg
= libcall
;
20534 for (i
= 0; i
< MAX_STRINGOP_ALGS
; i
++)
20536 /* We get here if the algorithms that were not libcall-based
20537 were rep-prefix based and we are unable to use rep prefixes
20538 based on global register usage. Break out of the loop and
20539 use the heuristic below. */
20540 if (algs
->size
[i
].max
== 0)
20542 if (algs
->size
[i
].max
>= expected_size
|| algs
->size
[i
].max
== -1)
20544 enum stringop_alg candidate
= algs
->size
[i
].alg
;
20546 if (candidate
!= libcall
&& ALG_USABLE_P (candidate
))
20548 /* Honor TARGET_INLINE_ALL_STRINGOPS by picking
20549 last non-libcall inline algorithm. */
20550 if (TARGET_INLINE_ALL_STRINGOPS
)
20552 /* When the current size is best to be copied by a libcall,
20553 but we are still forced to inline, run the heuristic below
20554 that will pick code for medium sized blocks. */
20555 if (alg
!= libcall
)
20559 else if (ALG_USABLE_P (candidate
))
20563 gcc_assert (TARGET_INLINE_ALL_STRINGOPS
|| !rep_prefix_usable
);
20565 /* When asked to inline the call anyway, try to pick meaningful choice.
20566 We look for maximal size of block that is faster to copy by hand and
20567 take blocks of at most of that size guessing that average size will
20568 be roughly half of the block.
20570 If this turns out to be bad, we might simply specify the preferred
20571 choice in ix86_costs. */
20572 if ((TARGET_INLINE_ALL_STRINGOPS
|| TARGET_INLINE_STRINGOPS_DYNAMICALLY
)
20573 && (algs
->unknown_size
== libcall
|| !ALG_USABLE_P (algs
->unknown_size
)))
20576 enum stringop_alg alg
;
20578 bool any_alg_usable_p
= true;
20580 for (i
= 0; i
< MAX_STRINGOP_ALGS
; i
++)
20582 enum stringop_alg candidate
= algs
->size
[i
].alg
;
20583 any_alg_usable_p
= any_alg_usable_p
&& ALG_USABLE_P (candidate
);
20585 if (candidate
!= libcall
&& candidate
20586 && ALG_USABLE_P (candidate
))
20587 max
= algs
->size
[i
].max
;
20589 /* If there aren't any usable algorithms, then recursing on
20590 smaller sizes isn't going to find anything. Just return the
20591 simple byte-at-a-time copy loop. */
20592 if (!any_alg_usable_p
)
20594 /* Pick something reasonable. */
20595 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY
)
20596 *dynamic_check
= 128;
20597 return loop_1_byte
;
20601 alg
= decide_alg (count
, max
/ 2, memset
, dynamic_check
);
20602 gcc_assert (*dynamic_check
== -1);
20603 gcc_assert (alg
!= libcall
);
20604 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY
)
20605 *dynamic_check
= max
;
20608 return ALG_USABLE_P (algs
->unknown_size
) ? algs
->unknown_size
: libcall
;
20609 #undef ALG_USABLE_P
20612 /* Decide on alignment. We know that the operand is already aligned to ALIGN
20613 (ALIGN can be based on profile feedback and thus it is not 100% guaranteed). */
20615 decide_alignment (int align
,
20616 enum stringop_alg alg
,
20619 int desired_align
= 0;
20623 gcc_unreachable ();
20625 case unrolled_loop
:
20626 desired_align
= GET_MODE_SIZE (Pmode
);
20628 case rep_prefix_8_byte
:
20631 case rep_prefix_4_byte
:
20632 /* PentiumPro has special logic triggering for 8 byte aligned blocks.
20633 copying whole cacheline at once. */
20634 if (TARGET_PENTIUMPRO
)
20639 case rep_prefix_1_byte
:
20640 /* PentiumPro has special logic triggering for 8 byte aligned blocks.
20641 copying whole cacheline at once. */
20642 if (TARGET_PENTIUMPRO
)
20656 if (desired_align
< align
)
20657 desired_align
= align
;
20658 if (expected_size
!= -1 && expected_size
< 4)
20659 desired_align
= align
;
20660 return desired_align
;
20663 /* Return the smallest power of 2 greater than VAL. */
20665 smallest_pow2_greater_than (int val
)
20673 /* Expand string move (memcpy) operation. Use i386 string operations when
20674 profitable. expand_setmem contains similar code. The code depends upon
20675 architecture, block size and alignment, but always has the same
20678 1) Prologue guard: Conditional that jumps up to epilogues for small
20679 blocks that can be handled by epilogue alone. This is faster but
20680 also needed for correctness, since prologue assume the block is larger
20681 than the desired alignment.
20683 Optional dynamic check for size and libcall for large
20684 blocks is emitted here too, with -minline-stringops-dynamically.
20686 2) Prologue: copy first few bytes in order to get destination aligned
20687 to DESIRED_ALIGN. It is emitted only when ALIGN is less than
20688 DESIRED_ALIGN and and up to DESIRED_ALIGN - ALIGN bytes can be copied.
20689 We emit either a jump tree on power of two sized blocks, or a byte loop.
20691 3) Main body: the copying loop itself, copying in SIZE_NEEDED chunks
20692 with specified algorithm.
20694 4) Epilogue: code copying tail of the block that is too small to be
20695 handled by main body (or up to size guarded by prologue guard). */
20698 ix86_expand_movmem (rtx dst
, rtx src
, rtx count_exp
, rtx align_exp
,
20699 rtx expected_align_exp
, rtx expected_size_exp
)
20705 rtx jump_around_label
= NULL
;
20706 HOST_WIDE_INT align
= 1;
20707 unsigned HOST_WIDE_INT count
= 0;
20708 HOST_WIDE_INT expected_size
= -1;
20709 int size_needed
= 0, epilogue_size_needed
;
20710 int desired_align
= 0, align_bytes
= 0;
20711 enum stringop_alg alg
;
20713 bool need_zero_guard
= false;
20715 if (CONST_INT_P (align_exp
))
20716 align
= INTVAL (align_exp
);
20717 /* i386 can do misaligned access on reasonably increased cost. */
20718 if (CONST_INT_P (expected_align_exp
)
20719 && INTVAL (expected_align_exp
) > align
)
20720 align
= INTVAL (expected_align_exp
);
20721 /* ALIGN is the minimum of destination and source alignment, but we care here
20722 just about destination alignment. */
20723 else if (MEM_ALIGN (dst
) > (unsigned HOST_WIDE_INT
) align
* BITS_PER_UNIT
)
20724 align
= MEM_ALIGN (dst
) / BITS_PER_UNIT
;
20726 if (CONST_INT_P (count_exp
))
20727 count
= expected_size
= INTVAL (count_exp
);
20728 if (CONST_INT_P (expected_size_exp
) && count
== 0)
20729 expected_size
= INTVAL (expected_size_exp
);
20731 /* Make sure we don't need to care about overflow later on. */
20732 if (count
> ((unsigned HOST_WIDE_INT
) 1 << 30))
20735 /* Step 0: Decide on preferred algorithm, desired alignment and
20736 size of chunks to be copied by main loop. */
20738 alg
= decide_alg (count
, expected_size
, false, &dynamic_check
);
20739 desired_align
= decide_alignment (align
, alg
, expected_size
);
20741 if (!TARGET_ALIGN_STRINGOPS
)
20742 align
= desired_align
;
20744 if (alg
== libcall
)
20746 gcc_assert (alg
!= no_stringop
);
20748 count_exp
= copy_to_mode_reg (GET_MODE (count_exp
), count_exp
);
20749 destreg
= copy_to_mode_reg (Pmode
, XEXP (dst
, 0));
20750 srcreg
= copy_to_mode_reg (Pmode
, XEXP (src
, 0));
20755 gcc_unreachable ();
20757 need_zero_guard
= true;
20758 size_needed
= GET_MODE_SIZE (Pmode
);
20760 case unrolled_loop
:
20761 need_zero_guard
= true;
20762 size_needed
= GET_MODE_SIZE (Pmode
) * (TARGET_64BIT
? 4 : 2);
20764 case rep_prefix_8_byte
:
20767 case rep_prefix_4_byte
:
20770 case rep_prefix_1_byte
:
20774 need_zero_guard
= true;
20779 epilogue_size_needed
= size_needed
;
20781 /* Step 1: Prologue guard. */
20783 /* Alignment code needs count to be in register. */
20784 if (CONST_INT_P (count_exp
) && desired_align
> align
)
20786 if (INTVAL (count_exp
) > desired_align
20787 && INTVAL (count_exp
) > size_needed
)
20790 = get_mem_align_offset (dst
, desired_align
* BITS_PER_UNIT
);
20791 if (align_bytes
<= 0)
20794 align_bytes
= desired_align
- align_bytes
;
20796 if (align_bytes
== 0)
20797 count_exp
= force_reg (counter_mode (count_exp
), count_exp
);
20799 gcc_assert (desired_align
>= 1 && align
>= 1);
20801 /* Ensure that alignment prologue won't copy past end of block. */
20802 if (size_needed
> 1 || (desired_align
> 1 && desired_align
> align
))
20804 epilogue_size_needed
= MAX (size_needed
- 1, desired_align
- align
);
20805 /* Epilogue always copies COUNT_EXP & EPILOGUE_SIZE_NEEDED bytes.
20806 Make sure it is power of 2. */
20807 epilogue_size_needed
= smallest_pow2_greater_than (epilogue_size_needed
);
20811 if (count
< (unsigned HOST_WIDE_INT
)epilogue_size_needed
)
20813 /* If main algorithm works on QImode, no epilogue is needed.
20814 For small sizes just don't align anything. */
20815 if (size_needed
== 1)
20816 desired_align
= align
;
20823 label
= gen_label_rtx ();
20824 emit_cmp_and_jump_insns (count_exp
,
20825 GEN_INT (epilogue_size_needed
),
20826 LTU
, 0, counter_mode (count_exp
), 1, label
);
20827 if (expected_size
== -1 || expected_size
< epilogue_size_needed
)
20828 predict_jump (REG_BR_PROB_BASE
* 60 / 100);
20830 predict_jump (REG_BR_PROB_BASE
* 20 / 100);
20834 /* Emit code to decide on runtime whether library call or inline should be
20836 if (dynamic_check
!= -1)
20838 if (CONST_INT_P (count_exp
))
20840 if (UINTVAL (count_exp
) >= (unsigned HOST_WIDE_INT
)dynamic_check
)
20842 emit_block_move_via_libcall (dst
, src
, count_exp
, false);
20843 count_exp
= const0_rtx
;
20849 rtx hot_label
= gen_label_rtx ();
20850 jump_around_label
= gen_label_rtx ();
20851 emit_cmp_and_jump_insns (count_exp
, GEN_INT (dynamic_check
- 1),
20852 LEU
, 0, GET_MODE (count_exp
), 1, hot_label
);
20853 predict_jump (REG_BR_PROB_BASE
* 90 / 100);
20854 emit_block_move_via_libcall (dst
, src
, count_exp
, false);
20855 emit_jump (jump_around_label
);
20856 emit_label (hot_label
);
20860 /* Step 2: Alignment prologue. */
20862 if (desired_align
> align
)
20864 if (align_bytes
== 0)
20866 /* Except for the first move in epilogue, we no longer know
20867 constant offset in aliasing info. It don't seems to worth
20868 the pain to maintain it for the first move, so throw away
20870 src
= change_address (src
, BLKmode
, srcreg
);
20871 dst
= change_address (dst
, BLKmode
, destreg
);
20872 expand_movmem_prologue (dst
, src
, destreg
, srcreg
, count_exp
, align
,
20877 /* If we know how many bytes need to be stored before dst is
20878 sufficiently aligned, maintain aliasing info accurately. */
20879 dst
= expand_constant_movmem_prologue (dst
, &src
, destreg
, srcreg
,
20880 desired_align
, align_bytes
);
20881 count_exp
= plus_constant (count_exp
, -align_bytes
);
20882 count
-= align_bytes
;
20884 if (need_zero_guard
20885 && (count
< (unsigned HOST_WIDE_INT
) size_needed
20886 || (align_bytes
== 0
20887 && count
< ((unsigned HOST_WIDE_INT
) size_needed
20888 + desired_align
- align
))))
20890 /* It is possible that we copied enough so the main loop will not
20892 gcc_assert (size_needed
> 1);
20893 if (label
== NULL_RTX
)
20894 label
= gen_label_rtx ();
20895 emit_cmp_and_jump_insns (count_exp
,
20896 GEN_INT (size_needed
),
20897 LTU
, 0, counter_mode (count_exp
), 1, label
);
20898 if (expected_size
== -1
20899 || expected_size
< (desired_align
- align
) / 2 + size_needed
)
20900 predict_jump (REG_BR_PROB_BASE
* 20 / 100);
20902 predict_jump (REG_BR_PROB_BASE
* 60 / 100);
20905 if (label
&& size_needed
== 1)
20907 emit_label (label
);
20908 LABEL_NUSES (label
) = 1;
20910 epilogue_size_needed
= 1;
20912 else if (label
== NULL_RTX
)
20913 epilogue_size_needed
= size_needed
;
20915 /* Step 3: Main loop. */
20921 gcc_unreachable ();
20923 expand_set_or_movmem_via_loop (dst
, src
, destreg
, srcreg
, NULL
,
20924 count_exp
, QImode
, 1, expected_size
);
20927 expand_set_or_movmem_via_loop (dst
, src
, destreg
, srcreg
, NULL
,
20928 count_exp
, Pmode
, 1, expected_size
);
20930 case unrolled_loop
:
20931 /* Unroll only by factor of 2 in 32bit mode, since we don't have enough
20932 registers for 4 temporaries anyway. */
20933 expand_set_or_movmem_via_loop (dst
, src
, destreg
, srcreg
, NULL
,
20934 count_exp
, Pmode
, TARGET_64BIT
? 4 : 2,
20937 case rep_prefix_8_byte
:
20938 expand_movmem_via_rep_mov (dst
, src
, destreg
, srcreg
, count_exp
,
20941 case rep_prefix_4_byte
:
20942 expand_movmem_via_rep_mov (dst
, src
, destreg
, srcreg
, count_exp
,
20945 case rep_prefix_1_byte
:
20946 expand_movmem_via_rep_mov (dst
, src
, destreg
, srcreg
, count_exp
,
20950 /* Adjust properly the offset of src and dest memory for aliasing. */
20951 if (CONST_INT_P (count_exp
))
20953 src
= adjust_automodify_address_nv (src
, BLKmode
, srcreg
,
20954 (count
/ size_needed
) * size_needed
);
20955 dst
= adjust_automodify_address_nv (dst
, BLKmode
, destreg
,
20956 (count
/ size_needed
) * size_needed
);
20960 src
= change_address (src
, BLKmode
, srcreg
);
20961 dst
= change_address (dst
, BLKmode
, destreg
);
20964 /* Step 4: Epilogue to copy the remaining bytes. */
20968 /* When the main loop is done, COUNT_EXP might hold original count,
20969 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
20970 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
20971 bytes. Compensate if needed. */
20973 if (size_needed
< epilogue_size_needed
)
20976 expand_simple_binop (counter_mode (count_exp
), AND
, count_exp
,
20977 GEN_INT (size_needed
- 1), count_exp
, 1,
20979 if (tmp
!= count_exp
)
20980 emit_move_insn (count_exp
, tmp
);
20982 emit_label (label
);
20983 LABEL_NUSES (label
) = 1;
20986 if (count_exp
!= const0_rtx
&& epilogue_size_needed
> 1)
20987 expand_movmem_epilogue (dst
, src
, destreg
, srcreg
, count_exp
,
20988 epilogue_size_needed
);
20989 if (jump_around_label
)
20990 emit_label (jump_around_label
);
20994 /* Helper function for memcpy. For QImode value 0xXY produce
20995 0xXYXYXYXY of wide specified by MODE. This is essentially
20996 a * 0x10101010, but we can do slightly better than
20997 synth_mult by unwinding the sequence by hand on CPUs with
21000 promote_duplicated_reg (enum machine_mode mode
, rtx val
)
21002 enum machine_mode valmode
= GET_MODE (val
);
21004 int nops
= mode
== DImode
? 3 : 2;
21006 gcc_assert (mode
== SImode
|| mode
== DImode
);
21007 if (val
== const0_rtx
)
21008 return copy_to_mode_reg (mode
, const0_rtx
);
21009 if (CONST_INT_P (val
))
21011 HOST_WIDE_INT v
= INTVAL (val
) & 255;
21015 if (mode
== DImode
)
21016 v
|= (v
<< 16) << 16;
21017 return copy_to_mode_reg (mode
, gen_int_mode (v
, mode
));
21020 if (valmode
== VOIDmode
)
21022 if (valmode
!= QImode
)
21023 val
= gen_lowpart (QImode
, val
);
21024 if (mode
== QImode
)
21026 if (!TARGET_PARTIAL_REG_STALL
)
21028 if (ix86_cost
->mult_init
[mode
== DImode
? 3 : 2]
21029 + ix86_cost
->mult_bit
* (mode
== DImode
? 8 : 4)
21030 <= (ix86_cost
->shift_const
+ ix86_cost
->add
) * nops
21031 + (COSTS_N_INSNS (TARGET_PARTIAL_REG_STALL
== 0)))
21033 rtx reg
= convert_modes (mode
, QImode
, val
, true);
21034 tmp
= promote_duplicated_reg (mode
, const1_rtx
);
21035 return expand_simple_binop (mode
, MULT
, reg
, tmp
, NULL
, 1,
21040 rtx reg
= convert_modes (mode
, QImode
, val
, true);
21042 if (!TARGET_PARTIAL_REG_STALL
)
21043 if (mode
== SImode
)
21044 emit_insn (gen_movsi_insv_1 (reg
, reg
));
21046 emit_insn (gen_movdi_insv_1 (reg
, reg
));
21049 tmp
= expand_simple_binop (mode
, ASHIFT
, reg
, GEN_INT (8),
21050 NULL
, 1, OPTAB_DIRECT
);
21052 expand_simple_binop (mode
, IOR
, reg
, tmp
, reg
, 1, OPTAB_DIRECT
);
21054 tmp
= expand_simple_binop (mode
, ASHIFT
, reg
, GEN_INT (16),
21055 NULL
, 1, OPTAB_DIRECT
);
21056 reg
= expand_simple_binop (mode
, IOR
, reg
, tmp
, reg
, 1, OPTAB_DIRECT
);
21057 if (mode
== SImode
)
21059 tmp
= expand_simple_binop (mode
, ASHIFT
, reg
, GEN_INT (32),
21060 NULL
, 1, OPTAB_DIRECT
);
21061 reg
= expand_simple_binop (mode
, IOR
, reg
, tmp
, reg
, 1, OPTAB_DIRECT
);
21066 /* Duplicate value VAL using promote_duplicated_reg into maximal size that will
21067 be needed by main loop copying SIZE_NEEDED chunks and prologue getting
21068 alignment from ALIGN to DESIRED_ALIGN. */
21070 promote_duplicated_reg_to_size (rtx val
, int size_needed
, int desired_align
, int align
)
21075 && (size_needed
> 4 || (desired_align
> align
&& desired_align
> 4)))
21076 promoted_val
= promote_duplicated_reg (DImode
, val
);
21077 else if (size_needed
> 2 || (desired_align
> align
&& desired_align
> 2))
21078 promoted_val
= promote_duplicated_reg (SImode
, val
);
21079 else if (size_needed
> 1 || (desired_align
> align
&& desired_align
> 1))
21080 promoted_val
= promote_duplicated_reg (HImode
, val
);
21082 promoted_val
= val
;
21084 return promoted_val
;
21087 /* Expand string clear operation (bzero). Use i386 string operations when
21088 profitable. See expand_movmem comment for explanation of individual
21089 steps performed. */
21091 ix86_expand_setmem (rtx dst
, rtx count_exp
, rtx val_exp
, rtx align_exp
,
21092 rtx expected_align_exp
, rtx expected_size_exp
)
21097 rtx jump_around_label
= NULL
;
21098 HOST_WIDE_INT align
= 1;
21099 unsigned HOST_WIDE_INT count
= 0;
21100 HOST_WIDE_INT expected_size
= -1;
21101 int size_needed
= 0, epilogue_size_needed
;
21102 int desired_align
= 0, align_bytes
= 0;
21103 enum stringop_alg alg
;
21104 rtx promoted_val
= NULL
;
21105 bool force_loopy_epilogue
= false;
21107 bool need_zero_guard
= false;
21109 if (CONST_INT_P (align_exp
))
21110 align
= INTVAL (align_exp
);
21111 /* i386 can do misaligned access on reasonably increased cost. */
21112 if (CONST_INT_P (expected_align_exp
)
21113 && INTVAL (expected_align_exp
) > align
)
21114 align
= INTVAL (expected_align_exp
);
21115 if (CONST_INT_P (count_exp
))
21116 count
= expected_size
= INTVAL (count_exp
);
21117 if (CONST_INT_P (expected_size_exp
) && count
== 0)
21118 expected_size
= INTVAL (expected_size_exp
);
21120 /* Make sure we don't need to care about overflow later on. */
21121 if (count
> ((unsigned HOST_WIDE_INT
) 1 << 30))
21124 /* Step 0: Decide on preferred algorithm, desired alignment and
21125 size of chunks to be copied by main loop. */
21127 alg
= decide_alg (count
, expected_size
, true, &dynamic_check
);
21128 desired_align
= decide_alignment (align
, alg
, expected_size
);
21130 if (!TARGET_ALIGN_STRINGOPS
)
21131 align
= desired_align
;
21133 if (alg
== libcall
)
21135 gcc_assert (alg
!= no_stringop
);
21137 count_exp
= copy_to_mode_reg (counter_mode (count_exp
), count_exp
);
21138 destreg
= copy_to_mode_reg (Pmode
, XEXP (dst
, 0));
21143 gcc_unreachable ();
21145 need_zero_guard
= true;
21146 size_needed
= GET_MODE_SIZE (Pmode
);
21148 case unrolled_loop
:
21149 need_zero_guard
= true;
21150 size_needed
= GET_MODE_SIZE (Pmode
) * 4;
21152 case rep_prefix_8_byte
:
21155 case rep_prefix_4_byte
:
21158 case rep_prefix_1_byte
:
21162 need_zero_guard
= true;
21166 epilogue_size_needed
= size_needed
;
21168 /* Step 1: Prologue guard. */
21170 /* Alignment code needs count to be in register. */
21171 if (CONST_INT_P (count_exp
) && desired_align
> align
)
21173 if (INTVAL (count_exp
) > desired_align
21174 && INTVAL (count_exp
) > size_needed
)
21177 = get_mem_align_offset (dst
, desired_align
* BITS_PER_UNIT
);
21178 if (align_bytes
<= 0)
21181 align_bytes
= desired_align
- align_bytes
;
21183 if (align_bytes
== 0)
21185 enum machine_mode mode
= SImode
;
21186 if (TARGET_64BIT
&& (count
& ~0xffffffff))
21188 count_exp
= force_reg (mode
, count_exp
);
21191 /* Do the cheap promotion to allow better CSE across the
21192 main loop and epilogue (ie one load of the big constant in the
21193 front of all code. */
21194 if (CONST_INT_P (val_exp
))
21195 promoted_val
= promote_duplicated_reg_to_size (val_exp
, size_needed
,
21196 desired_align
, align
);
21197 /* Ensure that alignment prologue won't copy past end of block. */
21198 if (size_needed
> 1 || (desired_align
> 1 && desired_align
> align
))
21200 epilogue_size_needed
= MAX (size_needed
- 1, desired_align
- align
);
21201 /* Epilogue always copies COUNT_EXP & (EPILOGUE_SIZE_NEEDED - 1) bytes.
21202 Make sure it is power of 2. */
21203 epilogue_size_needed
= smallest_pow2_greater_than (epilogue_size_needed
);
21205 /* To improve performance of small blocks, we jump around the VAL
21206 promoting mode. This mean that if the promoted VAL is not constant,
21207 we might not use it in the epilogue and have to use byte
21209 if (epilogue_size_needed
> 2 && !promoted_val
)
21210 force_loopy_epilogue
= true;
21213 if (count
< (unsigned HOST_WIDE_INT
)epilogue_size_needed
)
21215 /* If main algorithm works on QImode, no epilogue is needed.
21216 For small sizes just don't align anything. */
21217 if (size_needed
== 1)
21218 desired_align
= align
;
21225 label
= gen_label_rtx ();
21226 emit_cmp_and_jump_insns (count_exp
,
21227 GEN_INT (epilogue_size_needed
),
21228 LTU
, 0, counter_mode (count_exp
), 1, label
);
21229 if (expected_size
== -1 || expected_size
<= epilogue_size_needed
)
21230 predict_jump (REG_BR_PROB_BASE
* 60 / 100);
21232 predict_jump (REG_BR_PROB_BASE
* 20 / 100);
21235 if (dynamic_check
!= -1)
21237 rtx hot_label
= gen_label_rtx ();
21238 jump_around_label
= gen_label_rtx ();
21239 emit_cmp_and_jump_insns (count_exp
, GEN_INT (dynamic_check
- 1),
21240 LEU
, 0, counter_mode (count_exp
), 1, hot_label
);
21241 predict_jump (REG_BR_PROB_BASE
* 90 / 100);
21242 set_storage_via_libcall (dst
, count_exp
, val_exp
, false);
21243 emit_jump (jump_around_label
);
21244 emit_label (hot_label
);
21247 /* Step 2: Alignment prologue. */
21249 /* Do the expensive promotion once we branched off the small blocks. */
21251 promoted_val
= promote_duplicated_reg_to_size (val_exp
, size_needed
,
21252 desired_align
, align
);
21253 gcc_assert (desired_align
>= 1 && align
>= 1);
21255 if (desired_align
> align
)
21257 if (align_bytes
== 0)
21259 /* Except for the first move in epilogue, we no longer know
21260 constant offset in aliasing info. It don't seems to worth
21261 the pain to maintain it for the first move, so throw away
21263 dst
= change_address (dst
, BLKmode
, destreg
);
21264 expand_setmem_prologue (dst
, destreg
, promoted_val
, count_exp
, align
,
21269 /* If we know how many bytes need to be stored before dst is
21270 sufficiently aligned, maintain aliasing info accurately. */
21271 dst
= expand_constant_setmem_prologue (dst
, destreg
, promoted_val
,
21272 desired_align
, align_bytes
);
21273 count_exp
= plus_constant (count_exp
, -align_bytes
);
21274 count
-= align_bytes
;
21276 if (need_zero_guard
21277 && (count
< (unsigned HOST_WIDE_INT
) size_needed
21278 || (align_bytes
== 0
21279 && count
< ((unsigned HOST_WIDE_INT
) size_needed
21280 + desired_align
- align
))))
21282 /* It is possible that we copied enough so the main loop will not
21284 gcc_assert (size_needed
> 1);
21285 if (label
== NULL_RTX
)
21286 label
= gen_label_rtx ();
21287 emit_cmp_and_jump_insns (count_exp
,
21288 GEN_INT (size_needed
),
21289 LTU
, 0, counter_mode (count_exp
), 1, label
);
21290 if (expected_size
== -1
21291 || expected_size
< (desired_align
- align
) / 2 + size_needed
)
21292 predict_jump (REG_BR_PROB_BASE
* 20 / 100);
21294 predict_jump (REG_BR_PROB_BASE
* 60 / 100);
21297 if (label
&& size_needed
== 1)
21299 emit_label (label
);
21300 LABEL_NUSES (label
) = 1;
21302 promoted_val
= val_exp
;
21303 epilogue_size_needed
= 1;
21305 else if (label
== NULL_RTX
)
21306 epilogue_size_needed
= size_needed
;
21308 /* Step 3: Main loop. */
21314 gcc_unreachable ();
21316 expand_set_or_movmem_via_loop (dst
, NULL
, destreg
, NULL
, promoted_val
,
21317 count_exp
, QImode
, 1, expected_size
);
21320 expand_set_or_movmem_via_loop (dst
, NULL
, destreg
, NULL
, promoted_val
,
21321 count_exp
, Pmode
, 1, expected_size
);
21323 case unrolled_loop
:
21324 expand_set_or_movmem_via_loop (dst
, NULL
, destreg
, NULL
, promoted_val
,
21325 count_exp
, Pmode
, 4, expected_size
);
21327 case rep_prefix_8_byte
:
21328 expand_setmem_via_rep_stos (dst
, destreg
, promoted_val
, count_exp
,
21331 case rep_prefix_4_byte
:
21332 expand_setmem_via_rep_stos (dst
, destreg
, promoted_val
, count_exp
,
21335 case rep_prefix_1_byte
:
21336 expand_setmem_via_rep_stos (dst
, destreg
, promoted_val
, count_exp
,
21340 /* Adjust properly the offset of src and dest memory for aliasing. */
21341 if (CONST_INT_P (count_exp
))
21342 dst
= adjust_automodify_address_nv (dst
, BLKmode
, destreg
,
21343 (count
/ size_needed
) * size_needed
);
21345 dst
= change_address (dst
, BLKmode
, destreg
);
21347 /* Step 4: Epilogue to copy the remaining bytes. */
21351 /* When the main loop is done, COUNT_EXP might hold original count,
21352 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
21353 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
21354 bytes. Compensate if needed. */
21356 if (size_needed
< epilogue_size_needed
)
21359 expand_simple_binop (counter_mode (count_exp
), AND
, count_exp
,
21360 GEN_INT (size_needed
- 1), count_exp
, 1,
21362 if (tmp
!= count_exp
)
21363 emit_move_insn (count_exp
, tmp
);
21365 emit_label (label
);
21366 LABEL_NUSES (label
) = 1;
21369 if (count_exp
!= const0_rtx
&& epilogue_size_needed
> 1)
21371 if (force_loopy_epilogue
)
21372 expand_setmem_epilogue_via_loop (dst
, destreg
, val_exp
, count_exp
,
21373 epilogue_size_needed
);
21375 expand_setmem_epilogue (dst
, destreg
, promoted_val
, count_exp
,
21376 epilogue_size_needed
);
21378 if (jump_around_label
)
21379 emit_label (jump_around_label
);
21383 /* Expand the appropriate insns for doing strlen if not just doing
21386 out = result, initialized with the start address
21387 align_rtx = alignment of the address.
21388 scratch = scratch register, initialized with the startaddress when
21389 not aligned, otherwise undefined
21391 This is just the body. It needs the initializations mentioned above and
21392 some address computing at the end. These things are done in i386.md. */
21395 ix86_expand_strlensi_unroll_1 (rtx out
, rtx src
, rtx align_rtx
)
21399 rtx align_2_label
= NULL_RTX
;
21400 rtx align_3_label
= NULL_RTX
;
21401 rtx align_4_label
= gen_label_rtx ();
21402 rtx end_0_label
= gen_label_rtx ();
21404 rtx tmpreg
= gen_reg_rtx (SImode
);
21405 rtx scratch
= gen_reg_rtx (SImode
);
21409 if (CONST_INT_P (align_rtx
))
21410 align
= INTVAL (align_rtx
);
21412 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
21414 /* Is there a known alignment and is it less than 4? */
21417 rtx scratch1
= gen_reg_rtx (Pmode
);
21418 emit_move_insn (scratch1
, out
);
21419 /* Is there a known alignment and is it not 2? */
21422 align_3_label
= gen_label_rtx (); /* Label when aligned to 3-byte */
21423 align_2_label
= gen_label_rtx (); /* Label when aligned to 2-byte */
21425 /* Leave just the 3 lower bits. */
21426 align_rtx
= expand_binop (Pmode
, and_optab
, scratch1
, GEN_INT (3),
21427 NULL_RTX
, 0, OPTAB_WIDEN
);
21429 emit_cmp_and_jump_insns (align_rtx
, const0_rtx
, EQ
, NULL
,
21430 Pmode
, 1, align_4_label
);
21431 emit_cmp_and_jump_insns (align_rtx
, const2_rtx
, EQ
, NULL
,
21432 Pmode
, 1, align_2_label
);
21433 emit_cmp_and_jump_insns (align_rtx
, const2_rtx
, GTU
, NULL
,
21434 Pmode
, 1, align_3_label
);
21438 /* Since the alignment is 2, we have to check 2 or 0 bytes;
21439 check if is aligned to 4 - byte. */
21441 align_rtx
= expand_binop (Pmode
, and_optab
, scratch1
, const2_rtx
,
21442 NULL_RTX
, 0, OPTAB_WIDEN
);
21444 emit_cmp_and_jump_insns (align_rtx
, const0_rtx
, EQ
, NULL
,
21445 Pmode
, 1, align_4_label
);
21448 mem
= change_address (src
, QImode
, out
);
21450 /* Now compare the bytes. */
21452 /* Compare the first n unaligned byte on a byte per byte basis. */
21453 emit_cmp_and_jump_insns (mem
, const0_rtx
, EQ
, NULL
,
21454 QImode
, 1, end_0_label
);
21456 /* Increment the address. */
21457 emit_insn (ix86_gen_add3 (out
, out
, const1_rtx
));
21459 /* Not needed with an alignment of 2 */
21462 emit_label (align_2_label
);
21464 emit_cmp_and_jump_insns (mem
, const0_rtx
, EQ
, NULL
, QImode
, 1,
21467 emit_insn (ix86_gen_add3 (out
, out
, const1_rtx
));
21469 emit_label (align_3_label
);
21472 emit_cmp_and_jump_insns (mem
, const0_rtx
, EQ
, NULL
, QImode
, 1,
21475 emit_insn (ix86_gen_add3 (out
, out
, const1_rtx
));
21478 /* Generate loop to check 4 bytes at a time. It is not a good idea to
21479 align this loop. It gives only huge programs, but does not help to
21481 emit_label (align_4_label
);
21483 mem
= change_address (src
, SImode
, out
);
21484 emit_move_insn (scratch
, mem
);
21485 emit_insn (ix86_gen_add3 (out
, out
, GEN_INT (4)));
21487 /* This formula yields a nonzero result iff one of the bytes is zero.
21488 This saves three branches inside loop and many cycles. */
21490 emit_insn (gen_addsi3 (tmpreg
, scratch
, GEN_INT (-0x01010101)));
21491 emit_insn (gen_one_cmplsi2 (scratch
, scratch
));
21492 emit_insn (gen_andsi3 (tmpreg
, tmpreg
, scratch
));
21493 emit_insn (gen_andsi3 (tmpreg
, tmpreg
,
21494 gen_int_mode (0x80808080, SImode
)));
21495 emit_cmp_and_jump_insns (tmpreg
, const0_rtx
, EQ
, 0, SImode
, 1,
21500 rtx reg
= gen_reg_rtx (SImode
);
21501 rtx reg2
= gen_reg_rtx (Pmode
);
21502 emit_move_insn (reg
, tmpreg
);
21503 emit_insn (gen_lshrsi3 (reg
, reg
, GEN_INT (16)));
21505 /* If zero is not in the first two bytes, move two bytes forward. */
21506 emit_insn (gen_testsi_ccno_1 (tmpreg
, GEN_INT (0x8080)));
21507 tmp
= gen_rtx_REG (CCNOmode
, FLAGS_REG
);
21508 tmp
= gen_rtx_EQ (VOIDmode
, tmp
, const0_rtx
);
21509 emit_insn (gen_rtx_SET (VOIDmode
, tmpreg
,
21510 gen_rtx_IF_THEN_ELSE (SImode
, tmp
,
21513 /* Emit lea manually to avoid clobbering of flags. */
21514 emit_insn (gen_rtx_SET (SImode
, reg2
,
21515 gen_rtx_PLUS (Pmode
, out
, const2_rtx
)));
21517 tmp
= gen_rtx_REG (CCNOmode
, FLAGS_REG
);
21518 tmp
= gen_rtx_EQ (VOIDmode
, tmp
, const0_rtx
);
21519 emit_insn (gen_rtx_SET (VOIDmode
, out
,
21520 gen_rtx_IF_THEN_ELSE (Pmode
, tmp
,
21526 rtx end_2_label
= gen_label_rtx ();
21527 /* Is zero in the first two bytes? */
21529 emit_insn (gen_testsi_ccno_1 (tmpreg
, GEN_INT (0x8080)));
21530 tmp
= gen_rtx_REG (CCNOmode
, FLAGS_REG
);
21531 tmp
= gen_rtx_NE (VOIDmode
, tmp
, const0_rtx
);
21532 tmp
= gen_rtx_IF_THEN_ELSE (VOIDmode
, tmp
,
21533 gen_rtx_LABEL_REF (VOIDmode
, end_2_label
),
21535 tmp
= emit_jump_insn (gen_rtx_SET (VOIDmode
, pc_rtx
, tmp
));
21536 JUMP_LABEL (tmp
) = end_2_label
;
21538 /* Not in the first two. Move two bytes forward. */
21539 emit_insn (gen_lshrsi3 (tmpreg
, tmpreg
, GEN_INT (16)));
21540 emit_insn (ix86_gen_add3 (out
, out
, const2_rtx
));
21542 emit_label (end_2_label
);
21546 /* Avoid branch in fixing the byte. */
21547 tmpreg
= gen_lowpart (QImode
, tmpreg
);
21548 emit_insn (gen_addqi3_cc (tmpreg
, tmpreg
, tmpreg
));
21549 tmp
= gen_rtx_REG (CCmode
, FLAGS_REG
);
21550 cmp
= gen_rtx_LTU (VOIDmode
, tmp
, const0_rtx
);
21551 emit_insn (ix86_gen_sub3_carry (out
, out
, GEN_INT (3), tmp
, cmp
));
21553 emit_label (end_0_label
);
21556 /* Expand strlen. */
21559 ix86_expand_strlen (rtx out
, rtx src
, rtx eoschar
, rtx align
)
21561 rtx addr
, scratch1
, scratch2
, scratch3
, scratch4
;
21563 /* The generic case of strlen expander is long. Avoid it's
21564 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
21566 if (TARGET_UNROLL_STRLEN
&& eoschar
== const0_rtx
&& optimize
> 1
21567 && !TARGET_INLINE_ALL_STRINGOPS
21568 && !optimize_insn_for_size_p ()
21569 && (!CONST_INT_P (align
) || INTVAL (align
) < 4))
21572 addr
= force_reg (Pmode
, XEXP (src
, 0));
21573 scratch1
= gen_reg_rtx (Pmode
);
21575 if (TARGET_UNROLL_STRLEN
&& eoschar
== const0_rtx
&& optimize
> 1
21576 && !optimize_insn_for_size_p ())
21578 /* Well it seems that some optimizer does not combine a call like
21579 foo(strlen(bar), strlen(bar));
21580 when the move and the subtraction is done here. It does calculate
21581 the length just once when these instructions are done inside of
21582 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
21583 often used and I use one fewer register for the lifetime of
21584 output_strlen_unroll() this is better. */
21586 emit_move_insn (out
, addr
);
21588 ix86_expand_strlensi_unroll_1 (out
, src
, align
);
21590 /* strlensi_unroll_1 returns the address of the zero at the end of
21591 the string, like memchr(), so compute the length by subtracting
21592 the start address. */
21593 emit_insn (ix86_gen_sub3 (out
, out
, addr
));
21599 /* Can't use this if the user has appropriated eax, ecx, or edi. */
21600 if (fixed_regs
[AX_REG
] || fixed_regs
[CX_REG
] || fixed_regs
[DI_REG
])
21603 scratch2
= gen_reg_rtx (Pmode
);
21604 scratch3
= gen_reg_rtx (Pmode
);
21605 scratch4
= force_reg (Pmode
, constm1_rtx
);
21607 emit_move_insn (scratch3
, addr
);
21608 eoschar
= force_reg (QImode
, eoschar
);
21610 src
= replace_equiv_address_nv (src
, scratch3
);
21612 /* If .md starts supporting :P, this can be done in .md. */
21613 unspec
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (4, src
, eoschar
, align
,
21614 scratch4
), UNSPEC_SCAS
);
21615 emit_insn (gen_strlenqi_1 (scratch1
, scratch3
, unspec
));
21616 emit_insn (ix86_gen_one_cmpl2 (scratch2
, scratch1
));
21617 emit_insn (ix86_gen_add3 (out
, scratch2
, constm1_rtx
));
21622 /* For given symbol (function) construct code to compute address of it's PLT
21623 entry in large x86-64 PIC model. */
21625 construct_plt_address (rtx symbol
)
21627 rtx tmp
= gen_reg_rtx (Pmode
);
21628 rtx unspec
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, symbol
), UNSPEC_PLTOFF
);
21630 gcc_assert (GET_CODE (symbol
) == SYMBOL_REF
);
21631 gcc_assert (ix86_cmodel
== CM_LARGE_PIC
);
21633 emit_move_insn (tmp
, gen_rtx_CONST (Pmode
, unspec
));
21634 emit_insn (gen_adddi3 (tmp
, tmp
, pic_offset_table_rtx
));
21639 ix86_expand_call (rtx retval
, rtx fnaddr
, rtx callarg1
,
21641 rtx pop
, int sibcall
)
21643 rtx use
= NULL
, call
;
21645 if (pop
== const0_rtx
)
21647 gcc_assert (!TARGET_64BIT
|| !pop
);
21649 if (TARGET_MACHO
&& !TARGET_64BIT
)
21652 if (flag_pic
&& GET_CODE (XEXP (fnaddr
, 0)) == SYMBOL_REF
)
21653 fnaddr
= machopic_indirect_call_target (fnaddr
);
21658 /* Static functions and indirect calls don't need the pic register. */
21659 if (flag_pic
&& (!TARGET_64BIT
|| ix86_cmodel
== CM_LARGE_PIC
)
21660 && GET_CODE (XEXP (fnaddr
, 0)) == SYMBOL_REF
21661 && ! SYMBOL_REF_LOCAL_P (XEXP (fnaddr
, 0)))
21662 use_reg (&use
, pic_offset_table_rtx
);
21665 if (TARGET_64BIT
&& INTVAL (callarg2
) >= 0)
21667 rtx al
= gen_rtx_REG (QImode
, AX_REG
);
21668 emit_move_insn (al
, callarg2
);
21669 use_reg (&use
, al
);
21672 if (ix86_cmodel
== CM_LARGE_PIC
21674 && GET_CODE (XEXP (fnaddr
, 0)) == SYMBOL_REF
21675 && !local_symbolic_operand (XEXP (fnaddr
, 0), VOIDmode
))
21676 fnaddr
= gen_rtx_MEM (QImode
, construct_plt_address (XEXP (fnaddr
, 0)));
21678 ? !sibcall_insn_operand (XEXP (fnaddr
, 0), Pmode
)
21679 : !call_insn_operand (XEXP (fnaddr
, 0), Pmode
))
21681 fnaddr
= copy_to_mode_reg (Pmode
, XEXP (fnaddr
, 0));
21682 fnaddr
= gen_rtx_MEM (QImode
, fnaddr
);
21685 call
= gen_rtx_CALL (VOIDmode
, fnaddr
, callarg1
);
21687 call
= gen_rtx_SET (VOIDmode
, retval
, call
);
21690 pop
= gen_rtx_PLUS (Pmode
, stack_pointer_rtx
, pop
);
21691 pop
= gen_rtx_SET (VOIDmode
, stack_pointer_rtx
, pop
);
21692 call
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, call
, pop
));
21695 && ix86_cfun_abi () == MS_ABI
21696 && (!callarg2
|| INTVAL (callarg2
) != -2))
21698 /* We need to represent that SI and DI registers are clobbered
21700 static int clobbered_registers
[] = {
21701 XMM6_REG
, XMM7_REG
, XMM8_REG
,
21702 XMM9_REG
, XMM10_REG
, XMM11_REG
,
21703 XMM12_REG
, XMM13_REG
, XMM14_REG
,
21704 XMM15_REG
, SI_REG
, DI_REG
21707 rtx vec
[ARRAY_SIZE (clobbered_registers
) + 2];
21708 rtx unspec
= gen_rtx_UNSPEC (VOIDmode
, gen_rtvec (1, const0_rtx
),
21709 UNSPEC_MS_TO_SYSV_CALL
);
21713 for (i
= 0; i
< ARRAY_SIZE (clobbered_registers
); i
++)
21714 vec
[i
+ 2] = gen_rtx_CLOBBER (SSE_REGNO_P (clobbered_registers
[i
])
21717 (SSE_REGNO_P (clobbered_registers
[i
])
21719 clobbered_registers
[i
]));
21721 call
= gen_rtx_PARALLEL (VOIDmode
,
21722 gen_rtvec_v (ARRAY_SIZE (clobbered_registers
)
21726 /* Add UNSPEC_CALL_NEEDS_VZEROUPPER decoration. */
21727 if (TARGET_VZEROUPPER
)
21732 if (cfun
->machine
->callee_pass_avx256_p
)
21734 if (cfun
->machine
->callee_return_avx256_p
)
21735 avx256
= callee_return_pass_avx256
;
21737 avx256
= callee_pass_avx256
;
21739 else if (cfun
->machine
->callee_return_avx256_p
)
21740 avx256
= callee_return_avx256
;
21742 avx256
= call_no_avx256
;
21744 if (reload_completed
)
21745 emit_insn (gen_avx_vzeroupper (GEN_INT (avx256
)));
21748 unspec
= gen_rtx_UNSPEC (VOIDmode
,
21749 gen_rtvec (1, GEN_INT (avx256
)),
21750 UNSPEC_CALL_NEEDS_VZEROUPPER
);
21751 call
= gen_rtx_PARALLEL (VOIDmode
,
21752 gen_rtvec (2, call
, unspec
));
21756 call
= emit_call_insn (call
);
21758 CALL_INSN_FUNCTION_USAGE (call
) = use
;
21764 ix86_split_call_vzeroupper (rtx insn
, rtx vzeroupper
)
21766 rtx call
= XVECEXP (PATTERN (insn
), 0, 0);
21767 emit_insn (gen_avx_vzeroupper (vzeroupper
));
21768 emit_call_insn (call
);
21771 /* Output the assembly for a call instruction. */
21774 ix86_output_call_insn (rtx insn
, rtx call_op
, int addr_op
)
21776 bool direct_p
= constant_call_address_operand (call_op
, Pmode
);
21777 bool seh_nop_p
= false;
21779 gcc_assert (addr_op
== 0 || addr_op
== 1);
21781 if (SIBLING_CALL_P (insn
))
21784 return addr_op
? "jmp\t%P1" : "jmp\t%P0";
21785 /* SEH epilogue detection requires the indirect branch case
21786 to include REX.W. */
21787 else if (TARGET_SEH
)
21788 return addr_op
? "rex.W jmp %A1" : "rex.W jmp %A0";
21790 return addr_op
? "jmp\t%A1" : "jmp\t%A0";
21793 /* SEH unwinding can require an extra nop to be emitted in several
21794 circumstances. Determine if we have one of those. */
21799 for (i
= NEXT_INSN (insn
); i
; i
= NEXT_INSN (i
))
21801 /* If we get to another real insn, we don't need the nop. */
21805 /* If we get to the epilogue note, prevent a catch region from
21806 being adjacent to the standard epilogue sequence. If non-
21807 call-exceptions, we'll have done this during epilogue emission. */
21808 if (NOTE_P (i
) && NOTE_KIND (i
) == NOTE_INSN_EPILOGUE_BEG
21809 && !flag_non_call_exceptions
21810 && !can_throw_internal (insn
))
21817 /* If we didn't find a real insn following the call, prevent the
21818 unwinder from looking into the next function. */
21826 return addr_op
? "call\t%P1\n\tnop" : "call\t%P0\n\tnop";
21828 return addr_op
? "call\t%P1" : "call\t%P0";
21833 return addr_op
? "call\t%A1\n\tnop" : "call\t%A0\n\tnop";
21835 return addr_op
? "call\t%A1" : "call\t%A0";
21839 /* Clear stack slot assignments remembered from previous functions.
21840 This is called from INIT_EXPANDERS once before RTL is emitted for each
21843 static struct machine_function
*
21844 ix86_init_machine_status (void)
21846 struct machine_function
*f
;
21848 f
= ggc_alloc_cleared_machine_function ();
21849 f
->use_fast_prologue_epilogue_nregs
= -1;
21850 f
->tls_descriptor_call_expanded_p
= 0;
21851 f
->call_abi
= ix86_abi
;
21856 /* Return a MEM corresponding to a stack slot with mode MODE.
21857 Allocate a new slot if necessary.
21859 The RTL for a function can have several slots available: N is
21860 which slot to use. */
21863 assign_386_stack_local (enum machine_mode mode
, enum ix86_stack_slot n
)
21865 struct stack_local_entry
*s
;
21867 gcc_assert (n
< MAX_386_STACK_LOCALS
);
21869 /* Virtual slot is valid only before vregs are instantiated. */
21870 gcc_assert ((n
== SLOT_VIRTUAL
) == !virtuals_instantiated
);
21872 for (s
= ix86_stack_locals
; s
; s
= s
->next
)
21873 if (s
->mode
== mode
&& s
->n
== n
)
21874 return copy_rtx (s
->rtl
);
21876 s
= ggc_alloc_stack_local_entry ();
21879 s
->rtl
= assign_stack_local (mode
, GET_MODE_SIZE (mode
), 0);
21881 s
->next
= ix86_stack_locals
;
21882 ix86_stack_locals
= s
;
21886 /* Construct the SYMBOL_REF for the tls_get_addr function. */
21888 static GTY(()) rtx ix86_tls_symbol
;
21890 ix86_tls_get_addr (void)
21893 if (!ix86_tls_symbol
)
21895 ix86_tls_symbol
= gen_rtx_SYMBOL_REF (Pmode
,
21896 (TARGET_ANY_GNU_TLS
21898 ? "___tls_get_addr"
21899 : "__tls_get_addr");
21902 return ix86_tls_symbol
;
21905 /* Construct the SYMBOL_REF for the _TLS_MODULE_BASE_ symbol. */
21907 static GTY(()) rtx ix86_tls_module_base_symbol
;
21909 ix86_tls_module_base (void)
21912 if (!ix86_tls_module_base_symbol
)
21914 ix86_tls_module_base_symbol
= gen_rtx_SYMBOL_REF (Pmode
,
21915 "_TLS_MODULE_BASE_");
21916 SYMBOL_REF_FLAGS (ix86_tls_module_base_symbol
)
21917 |= TLS_MODEL_GLOBAL_DYNAMIC
<< SYMBOL_FLAG_TLS_SHIFT
;
21920 return ix86_tls_module_base_symbol
;
21923 /* Calculate the length of the memory address in the instruction
21924 encoding. Does not include the one-byte modrm, opcode, or prefix. */
21927 memory_address_length (rtx addr
)
21929 struct ix86_address parts
;
21930 rtx base
, index
, disp
;
21934 if (GET_CODE (addr
) == PRE_DEC
21935 || GET_CODE (addr
) == POST_INC
21936 || GET_CODE (addr
) == PRE_MODIFY
21937 || GET_CODE (addr
) == POST_MODIFY
)
21940 ok
= ix86_decompose_address (addr
, &parts
);
21943 if (parts
.base
&& GET_CODE (parts
.base
) == SUBREG
)
21944 parts
.base
= SUBREG_REG (parts
.base
);
21945 if (parts
.index
&& GET_CODE (parts
.index
) == SUBREG
)
21946 parts
.index
= SUBREG_REG (parts
.index
);
21949 index
= parts
.index
;
21954 - esp as the base always wants an index,
21955 - ebp as the base always wants a displacement,
21956 - r12 as the base always wants an index,
21957 - r13 as the base always wants a displacement. */
21959 /* Register Indirect. */
21960 if (base
&& !index
&& !disp
)
21962 /* esp (for its index) and ebp (for its displacement) need
21963 the two-byte modrm form. Similarly for r12 and r13 in 64-bit
21966 && (addr
== arg_pointer_rtx
21967 || addr
== frame_pointer_rtx
21968 || REGNO (addr
) == SP_REG
21969 || REGNO (addr
) == BP_REG
21970 || REGNO (addr
) == R12_REG
21971 || REGNO (addr
) == R13_REG
))
21975 /* Direct Addressing. In 64-bit mode mod 00 r/m 5
21976 is not disp32, but disp32(%rip), so for disp32
21977 SIB byte is needed, unless print_operand_address
21978 optimizes it into disp32(%rip) or (%rip) is implied
21980 else if (disp
&& !base
&& !index
)
21987 if (GET_CODE (disp
) == CONST
)
21988 symbol
= XEXP (disp
, 0);
21989 if (GET_CODE (symbol
) == PLUS
21990 && CONST_INT_P (XEXP (symbol
, 1)))
21991 symbol
= XEXP (symbol
, 0);
21993 if (GET_CODE (symbol
) != LABEL_REF
21994 && (GET_CODE (symbol
) != SYMBOL_REF
21995 || SYMBOL_REF_TLS_MODEL (symbol
) != 0)
21996 && (GET_CODE (symbol
) != UNSPEC
21997 || (XINT (symbol
, 1) != UNSPEC_GOTPCREL
21998 && XINT (symbol
, 1) != UNSPEC_PCREL
21999 && XINT (symbol
, 1) != UNSPEC_GOTNTPOFF
)))
22006 /* Find the length of the displacement constant. */
22009 if (base
&& satisfies_constraint_K (disp
))
22014 /* ebp always wants a displacement. Similarly r13. */
22015 else if (base
&& REG_P (base
)
22016 && (REGNO (base
) == BP_REG
|| REGNO (base
) == R13_REG
))
22019 /* An index requires the two-byte modrm form.... */
22021 /* ...like esp (or r12), which always wants an index. */
22022 || base
== arg_pointer_rtx
22023 || base
== frame_pointer_rtx
22024 || (base
&& REG_P (base
)
22025 && (REGNO (base
) == SP_REG
|| REGNO (base
) == R12_REG
)))
22042 /* Compute default value for "length_immediate" attribute. When SHORTFORM
22043 is set, expect that insn have 8bit immediate alternative. */
22045 ix86_attr_length_immediate_default (rtx insn
, int shortform
)
22049 extract_insn_cached (insn
);
22050 for (i
= recog_data
.n_operands
- 1; i
>= 0; --i
)
22051 if (CONSTANT_P (recog_data
.operand
[i
]))
22053 enum attr_mode mode
= get_attr_mode (insn
);
22056 if (shortform
&& CONST_INT_P (recog_data
.operand
[i
]))
22058 HOST_WIDE_INT ival
= INTVAL (recog_data
.operand
[i
]);
22065 ival
= trunc_int_for_mode (ival
, HImode
);
22068 ival
= trunc_int_for_mode (ival
, SImode
);
22073 if (IN_RANGE (ival
, -128, 127))
22090 /* Immediates for DImode instructions are encoded as 32bit sign extended values. */
22095 fatal_insn ("unknown insn mode", insn
);
22100 /* Compute default value for "length_address" attribute. */
22102 ix86_attr_length_address_default (rtx insn
)
22106 if (get_attr_type (insn
) == TYPE_LEA
)
22108 rtx set
= PATTERN (insn
), addr
;
22110 if (GET_CODE (set
) == PARALLEL
)
22111 set
= XVECEXP (set
, 0, 0);
22113 gcc_assert (GET_CODE (set
) == SET
);
22115 addr
= SET_SRC (set
);
22116 if (TARGET_64BIT
&& get_attr_mode (insn
) == MODE_SI
)
22118 if (GET_CODE (addr
) == ZERO_EXTEND
)
22119 addr
= XEXP (addr
, 0);
22120 if (GET_CODE (addr
) == SUBREG
)
22121 addr
= SUBREG_REG (addr
);
22124 return memory_address_length (addr
);
22127 extract_insn_cached (insn
);
22128 for (i
= recog_data
.n_operands
- 1; i
>= 0; --i
)
22129 if (MEM_P (recog_data
.operand
[i
]))
22131 constrain_operands_cached (reload_completed
);
22132 if (which_alternative
!= -1)
22134 const char *constraints
= recog_data
.constraints
[i
];
22135 int alt
= which_alternative
;
22137 while (*constraints
== '=' || *constraints
== '+')
22140 while (*constraints
++ != ',')
22142 /* Skip ignored operands. */
22143 if (*constraints
== 'X')
22146 return memory_address_length (XEXP (recog_data
.operand
[i
], 0));
22151 /* Compute default value for "length_vex" attribute. It includes
22152 2 or 3 byte VEX prefix and 1 opcode byte. */
22155 ix86_attr_length_vex_default (rtx insn
, int has_0f_opcode
,
22160 /* Only 0f opcode can use 2 byte VEX prefix and VEX W bit uses 3
22161 byte VEX prefix. */
22162 if (!has_0f_opcode
|| has_vex_w
)
22165 /* We can always use 2 byte VEX prefix in 32bit. */
22169 extract_insn_cached (insn
);
22171 for (i
= recog_data
.n_operands
- 1; i
>= 0; --i
)
22172 if (REG_P (recog_data
.operand
[i
]))
22174 /* REX.W bit uses 3 byte VEX prefix. */
22175 if (GET_MODE (recog_data
.operand
[i
]) == DImode
22176 && GENERAL_REG_P (recog_data
.operand
[i
]))
22181 /* REX.X or REX.B bits use 3 byte VEX prefix. */
22182 if (MEM_P (recog_data
.operand
[i
])
22183 && x86_extended_reg_mentioned_p (recog_data
.operand
[i
]))
22190 /* Return the maximum number of instructions a cpu can issue. */
22193 ix86_issue_rate (void)
22197 case PROCESSOR_PENTIUM
:
22198 case PROCESSOR_ATOM
:
22202 case PROCESSOR_PENTIUMPRO
:
22203 case PROCESSOR_PENTIUM4
:
22204 case PROCESSOR_CORE2_32
:
22205 case PROCESSOR_CORE2_64
:
22206 case PROCESSOR_COREI7_32
:
22207 case PROCESSOR_COREI7_64
:
22208 case PROCESSOR_ATHLON
:
22210 case PROCESSOR_AMDFAM10
:
22211 case PROCESSOR_NOCONA
:
22212 case PROCESSOR_GENERIC32
:
22213 case PROCESSOR_GENERIC64
:
22214 case PROCESSOR_BDVER1
:
22222 /* A subroutine of ix86_adjust_cost -- return true iff INSN reads flags set
22223 by DEP_INSN and nothing set by DEP_INSN. */
22226 ix86_flags_dependent (rtx insn
, rtx dep_insn
, enum attr_type insn_type
)
22230 /* Simplify the test for uninteresting insns. */
22231 if (insn_type
!= TYPE_SETCC
22232 && insn_type
!= TYPE_ICMOV
22233 && insn_type
!= TYPE_FCMOV
22234 && insn_type
!= TYPE_IBR
)
22237 if ((set
= single_set (dep_insn
)) != 0)
22239 set
= SET_DEST (set
);
22242 else if (GET_CODE (PATTERN (dep_insn
)) == PARALLEL
22243 && XVECLEN (PATTERN (dep_insn
), 0) == 2
22244 && GET_CODE (XVECEXP (PATTERN (dep_insn
), 0, 0)) == SET
22245 && GET_CODE (XVECEXP (PATTERN (dep_insn
), 0, 1)) == SET
)
22247 set
= SET_DEST (XVECEXP (PATTERN (dep_insn
), 0, 0));
22248 set2
= SET_DEST (XVECEXP (PATTERN (dep_insn
), 0, 0));
22253 if (!REG_P (set
) || REGNO (set
) != FLAGS_REG
)
22256 /* This test is true if the dependent insn reads the flags but
22257 not any other potentially set register. */
22258 if (!reg_overlap_mentioned_p (set
, PATTERN (insn
)))
22261 if (set2
&& reg_overlap_mentioned_p (set2
, PATTERN (insn
)))
22267 /* Return true iff USE_INSN has a memory address with operands set by
22271 ix86_agi_dependent (rtx set_insn
, rtx use_insn
)
22274 extract_insn_cached (use_insn
);
22275 for (i
= recog_data
.n_operands
- 1; i
>= 0; --i
)
22276 if (MEM_P (recog_data
.operand
[i
]))
22278 rtx addr
= XEXP (recog_data
.operand
[i
], 0);
22279 return modified_in_p (addr
, set_insn
) != 0;
22285 ix86_adjust_cost (rtx insn
, rtx link
, rtx dep_insn
, int cost
)
22287 enum attr_type insn_type
, dep_insn_type
;
22288 enum attr_memory memory
;
22290 int dep_insn_code_number
;
22292 /* Anti and output dependencies have zero cost on all CPUs. */
22293 if (REG_NOTE_KIND (link
) != 0)
22296 dep_insn_code_number
= recog_memoized (dep_insn
);
22298 /* If we can't recognize the insns, we can't really do anything. */
22299 if (dep_insn_code_number
< 0 || recog_memoized (insn
) < 0)
22302 insn_type
= get_attr_type (insn
);
22303 dep_insn_type
= get_attr_type (dep_insn
);
22307 case PROCESSOR_PENTIUM
:
22308 /* Address Generation Interlock adds a cycle of latency. */
22309 if (insn_type
== TYPE_LEA
)
22311 rtx addr
= PATTERN (insn
);
22313 if (GET_CODE (addr
) == PARALLEL
)
22314 addr
= XVECEXP (addr
, 0, 0);
22316 gcc_assert (GET_CODE (addr
) == SET
);
22318 addr
= SET_SRC (addr
);
22319 if (modified_in_p (addr
, dep_insn
))
22322 else if (ix86_agi_dependent (dep_insn
, insn
))
22325 /* ??? Compares pair with jump/setcc. */
22326 if (ix86_flags_dependent (insn
, dep_insn
, insn_type
))
22329 /* Floating point stores require value to be ready one cycle earlier. */
22330 if (insn_type
== TYPE_FMOV
22331 && get_attr_memory (insn
) == MEMORY_STORE
22332 && !ix86_agi_dependent (dep_insn
, insn
))
22336 case PROCESSOR_PENTIUMPRO
:
22337 memory
= get_attr_memory (insn
);
22339 /* INT->FP conversion is expensive. */
22340 if (get_attr_fp_int_src (dep_insn
))
22343 /* There is one cycle extra latency between an FP op and a store. */
22344 if (insn_type
== TYPE_FMOV
22345 && (set
= single_set (dep_insn
)) != NULL_RTX
22346 && (set2
= single_set (insn
)) != NULL_RTX
22347 && rtx_equal_p (SET_DEST (set
), SET_SRC (set2
))
22348 && MEM_P (SET_DEST (set2
)))
22351 /* Show ability of reorder buffer to hide latency of load by executing
22352 in parallel with previous instruction in case
22353 previous instruction is not needed to compute the address. */
22354 if ((memory
== MEMORY_LOAD
|| memory
== MEMORY_BOTH
)
22355 && !ix86_agi_dependent (dep_insn
, insn
))
22357 /* Claim moves to take one cycle, as core can issue one load
22358 at time and the next load can start cycle later. */
22359 if (dep_insn_type
== TYPE_IMOV
22360 || dep_insn_type
== TYPE_FMOV
)
22368 memory
= get_attr_memory (insn
);
22370 /* The esp dependency is resolved before the instruction is really
22372 if ((insn_type
== TYPE_PUSH
|| insn_type
== TYPE_POP
)
22373 && (dep_insn_type
== TYPE_PUSH
|| dep_insn_type
== TYPE_POP
))
22376 /* INT->FP conversion is expensive. */
22377 if (get_attr_fp_int_src (dep_insn
))
22380 /* Show ability of reorder buffer to hide latency of load by executing
22381 in parallel with previous instruction in case
22382 previous instruction is not needed to compute the address. */
22383 if ((memory
== MEMORY_LOAD
|| memory
== MEMORY_BOTH
)
22384 && !ix86_agi_dependent (dep_insn
, insn
))
22386 /* Claim moves to take one cycle, as core can issue one load
22387 at time and the next load can start cycle later. */
22388 if (dep_insn_type
== TYPE_IMOV
22389 || dep_insn_type
== TYPE_FMOV
)
22398 case PROCESSOR_ATHLON
:
22400 case PROCESSOR_AMDFAM10
:
22401 case PROCESSOR_BDVER1
:
22402 case PROCESSOR_ATOM
:
22403 case PROCESSOR_GENERIC32
:
22404 case PROCESSOR_GENERIC64
:
22405 memory
= get_attr_memory (insn
);
22407 /* Show ability of reorder buffer to hide latency of load by executing
22408 in parallel with previous instruction in case
22409 previous instruction is not needed to compute the address. */
22410 if ((memory
== MEMORY_LOAD
|| memory
== MEMORY_BOTH
)
22411 && !ix86_agi_dependent (dep_insn
, insn
))
22413 enum attr_unit unit
= get_attr_unit (insn
);
22416 /* Because of the difference between the length of integer and
22417 floating unit pipeline preparation stages, the memory operands
22418 for floating point are cheaper.
22420 ??? For Athlon it the difference is most probably 2. */
22421 if (unit
== UNIT_INTEGER
|| unit
== UNIT_UNKNOWN
)
22424 loadcost
= TARGET_ATHLON
? 2 : 0;
22426 if (cost
>= loadcost
)
22439 /* How many alternative schedules to try. This should be as wide as the
22440 scheduling freedom in the DFA, but no wider. Making this value too
22441 large results extra work for the scheduler. */
22444 ia32_multipass_dfa_lookahead (void)
22448 case PROCESSOR_PENTIUM
:
22451 case PROCESSOR_PENTIUMPRO
:
22455 case PROCESSOR_CORE2_32
:
22456 case PROCESSOR_CORE2_64
:
22457 case PROCESSOR_COREI7_32
:
22458 case PROCESSOR_COREI7_64
:
22459 /* Generally, we want haifa-sched:max_issue() to look ahead as far
22460 as many instructions can be executed on a cycle, i.e.,
22461 issue_rate. I wonder why tuning for many CPUs does not do this. */
22462 return ix86_issue_rate ();
22471 /* Model decoder of Core 2/i7.
22472 Below hooks for multipass scheduling (see haifa-sched.c:max_issue)
22473 track the instruction fetch block boundaries and make sure that long
22474 (9+ bytes) instructions are assigned to D0. */
22476 /* Maximum length of an insn that can be handled by
22477 a secondary decoder unit. '8' for Core 2/i7. */
22478 static int core2i7_secondary_decoder_max_insn_size
;
22480 /* Ifetch block size, i.e., number of bytes decoder reads per cycle.
22481 '16' for Core 2/i7. */
22482 static int core2i7_ifetch_block_size
;
22484 /* Maximum number of instructions decoder can handle per cycle.
22485 '6' for Core 2/i7. */
22486 static int core2i7_ifetch_block_max_insns
;
22488 typedef struct ix86_first_cycle_multipass_data_
*
22489 ix86_first_cycle_multipass_data_t
;
22490 typedef const struct ix86_first_cycle_multipass_data_
*
22491 const_ix86_first_cycle_multipass_data_t
;
22493 /* A variable to store target state across calls to max_issue within
22495 static struct ix86_first_cycle_multipass_data_ _ix86_first_cycle_multipass_data
,
22496 *ix86_first_cycle_multipass_data
= &_ix86_first_cycle_multipass_data
;
22498 /* Initialize DATA. */
22500 core2i7_first_cycle_multipass_init (void *_data
)
22502 ix86_first_cycle_multipass_data_t data
22503 = (ix86_first_cycle_multipass_data_t
) _data
;
22505 data
->ifetch_block_len
= 0;
22506 data
->ifetch_block_n_insns
= 0;
22507 data
->ready_try_change
= NULL
;
22508 data
->ready_try_change_size
= 0;
22511 /* Advancing the cycle; reset ifetch block counts. */
22513 core2i7_dfa_post_advance_cycle (void)
22515 ix86_first_cycle_multipass_data_t data
= ix86_first_cycle_multipass_data
;
22517 gcc_assert (data
->ifetch_block_n_insns
<= core2i7_ifetch_block_max_insns
);
22519 data
->ifetch_block_len
= 0;
22520 data
->ifetch_block_n_insns
= 0;
22523 static int min_insn_size (rtx
);
22525 /* Filter out insns from ready_try that the core will not be able to issue
22526 on current cycle due to decoder. */
22528 core2i7_first_cycle_multipass_filter_ready_try
22529 (const_ix86_first_cycle_multipass_data_t data
,
22530 char *ready_try
, int n_ready
, bool first_cycle_insn_p
)
22537 if (ready_try
[n_ready
])
22540 insn
= get_ready_element (n_ready
);
22541 insn_size
= min_insn_size (insn
);
22543 if (/* If this is a too long an insn for a secondary decoder ... */
22544 (!first_cycle_insn_p
22545 && insn_size
> core2i7_secondary_decoder_max_insn_size
)
22546 /* ... or it would not fit into the ifetch block ... */
22547 || data
->ifetch_block_len
+ insn_size
> core2i7_ifetch_block_size
22548 /* ... or the decoder is full already ... */
22549 || data
->ifetch_block_n_insns
+ 1 > core2i7_ifetch_block_max_insns
)
22550 /* ... mask the insn out. */
22552 ready_try
[n_ready
] = 1;
22554 if (data
->ready_try_change
)
22555 SET_BIT (data
->ready_try_change
, n_ready
);
22560 /* Prepare for a new round of multipass lookahead scheduling. */
22562 core2i7_first_cycle_multipass_begin (void *_data
, char *ready_try
, int n_ready
,
22563 bool first_cycle_insn_p
)
22565 ix86_first_cycle_multipass_data_t data
22566 = (ix86_first_cycle_multipass_data_t
) _data
;
22567 const_ix86_first_cycle_multipass_data_t prev_data
22568 = ix86_first_cycle_multipass_data
;
22570 /* Restore the state from the end of the previous round. */
22571 data
->ifetch_block_len
= prev_data
->ifetch_block_len
;
22572 data
->ifetch_block_n_insns
= prev_data
->ifetch_block_n_insns
;
22574 /* Filter instructions that cannot be issued on current cycle due to
22575 decoder restrictions. */
22576 core2i7_first_cycle_multipass_filter_ready_try (data
, ready_try
, n_ready
,
22577 first_cycle_insn_p
);
22580 /* INSN is being issued in current solution. Account for its impact on
22581 the decoder model. */
22583 core2i7_first_cycle_multipass_issue (void *_data
, char *ready_try
, int n_ready
,
22584 rtx insn
, const void *_prev_data
)
22586 ix86_first_cycle_multipass_data_t data
22587 = (ix86_first_cycle_multipass_data_t
) _data
;
22588 const_ix86_first_cycle_multipass_data_t prev_data
22589 = (const_ix86_first_cycle_multipass_data_t
) _prev_data
;
22591 int insn_size
= min_insn_size (insn
);
22593 data
->ifetch_block_len
= prev_data
->ifetch_block_len
+ insn_size
;
22594 data
->ifetch_block_n_insns
= prev_data
->ifetch_block_n_insns
+ 1;
22595 gcc_assert (data
->ifetch_block_len
<= core2i7_ifetch_block_size
22596 && data
->ifetch_block_n_insns
<= core2i7_ifetch_block_max_insns
);
22598 /* Allocate or resize the bitmap for storing INSN's effect on ready_try. */
22599 if (!data
->ready_try_change
)
22601 data
->ready_try_change
= sbitmap_alloc (n_ready
);
22602 data
->ready_try_change_size
= n_ready
;
22604 else if (data
->ready_try_change_size
< n_ready
)
22606 data
->ready_try_change
= sbitmap_resize (data
->ready_try_change
,
22608 data
->ready_try_change_size
= n_ready
;
22610 sbitmap_zero (data
->ready_try_change
);
22612 /* Filter out insns from ready_try that the core will not be able to issue
22613 on current cycle due to decoder. */
22614 core2i7_first_cycle_multipass_filter_ready_try (data
, ready_try
, n_ready
,
22618 /* Revert the effect on ready_try. */
22620 core2i7_first_cycle_multipass_backtrack (const void *_data
,
22622 int n_ready ATTRIBUTE_UNUSED
)
22624 const_ix86_first_cycle_multipass_data_t data
22625 = (const_ix86_first_cycle_multipass_data_t
) _data
;
22626 unsigned int i
= 0;
22627 sbitmap_iterator sbi
;
22629 gcc_assert (sbitmap_last_set_bit (data
->ready_try_change
) < n_ready
);
22630 EXECUTE_IF_SET_IN_SBITMAP (data
->ready_try_change
, 0, i
, sbi
)
22636 /* Save the result of multipass lookahead scheduling for the next round. */
22638 core2i7_first_cycle_multipass_end (const void *_data
)
22640 const_ix86_first_cycle_multipass_data_t data
22641 = (const_ix86_first_cycle_multipass_data_t
) _data
;
22642 ix86_first_cycle_multipass_data_t next_data
22643 = ix86_first_cycle_multipass_data
;
22647 next_data
->ifetch_block_len
= data
->ifetch_block_len
;
22648 next_data
->ifetch_block_n_insns
= data
->ifetch_block_n_insns
;
22652 /* Deallocate target data. */
22654 core2i7_first_cycle_multipass_fini (void *_data
)
22656 ix86_first_cycle_multipass_data_t data
22657 = (ix86_first_cycle_multipass_data_t
) _data
;
22659 if (data
->ready_try_change
)
22661 sbitmap_free (data
->ready_try_change
);
22662 data
->ready_try_change
= NULL
;
22663 data
->ready_try_change_size
= 0;
22667 /* Prepare for scheduling pass. */
22669 ix86_sched_init_global (FILE *dump ATTRIBUTE_UNUSED
,
22670 int verbose ATTRIBUTE_UNUSED
,
22671 int max_uid ATTRIBUTE_UNUSED
)
22673 /* Install scheduling hooks for current CPU. Some of these hooks are used
22674 in time-critical parts of the scheduler, so we only set them up when
22675 they are actually used. */
22678 case PROCESSOR_CORE2_32
:
22679 case PROCESSOR_CORE2_64
:
22680 case PROCESSOR_COREI7_32
:
22681 case PROCESSOR_COREI7_64
:
22682 targetm
.sched
.dfa_post_advance_cycle
22683 = core2i7_dfa_post_advance_cycle
;
22684 targetm
.sched
.first_cycle_multipass_init
22685 = core2i7_first_cycle_multipass_init
;
22686 targetm
.sched
.first_cycle_multipass_begin
22687 = core2i7_first_cycle_multipass_begin
;
22688 targetm
.sched
.first_cycle_multipass_issue
22689 = core2i7_first_cycle_multipass_issue
;
22690 targetm
.sched
.first_cycle_multipass_backtrack
22691 = core2i7_first_cycle_multipass_backtrack
;
22692 targetm
.sched
.first_cycle_multipass_end
22693 = core2i7_first_cycle_multipass_end
;
22694 targetm
.sched
.first_cycle_multipass_fini
22695 = core2i7_first_cycle_multipass_fini
;
22697 /* Set decoder parameters. */
22698 core2i7_secondary_decoder_max_insn_size
= 8;
22699 core2i7_ifetch_block_size
= 16;
22700 core2i7_ifetch_block_max_insns
= 6;
22704 targetm
.sched
.dfa_post_advance_cycle
= NULL
;
22705 targetm
.sched
.first_cycle_multipass_init
= NULL
;
22706 targetm
.sched
.first_cycle_multipass_begin
= NULL
;
22707 targetm
.sched
.first_cycle_multipass_issue
= NULL
;
22708 targetm
.sched
.first_cycle_multipass_backtrack
= NULL
;
22709 targetm
.sched
.first_cycle_multipass_end
= NULL
;
22710 targetm
.sched
.first_cycle_multipass_fini
= NULL
;
22716 /* Compute the alignment given to a constant that is being placed in memory.
22717 EXP is the constant and ALIGN is the alignment that the object would
22719 The value of this function is used instead of that alignment to align
22723 ix86_constant_alignment (tree exp
, int align
)
22725 if (TREE_CODE (exp
) == REAL_CST
|| TREE_CODE (exp
) == VECTOR_CST
22726 || TREE_CODE (exp
) == INTEGER_CST
)
22728 if (TYPE_MODE (TREE_TYPE (exp
)) == DFmode
&& align
< 64)
22730 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp
))) && align
< 128)
22733 else if (!optimize_size
&& TREE_CODE (exp
) == STRING_CST
22734 && TREE_STRING_LENGTH (exp
) >= 31 && align
< BITS_PER_WORD
)
22735 return BITS_PER_WORD
;
22740 /* Compute the alignment for a static variable.
22741 TYPE is the data type, and ALIGN is the alignment that
22742 the object would ordinarily have. The value of this function is used
22743 instead of that alignment to align the object. */
22746 ix86_data_alignment (tree type
, int align
)
22748 int max_align
= optimize_size
? BITS_PER_WORD
: MIN (256, MAX_OFILE_ALIGNMENT
);
22750 if (AGGREGATE_TYPE_P (type
)
22751 && TYPE_SIZE (type
)
22752 && TREE_CODE (TYPE_SIZE (type
)) == INTEGER_CST
22753 && (TREE_INT_CST_LOW (TYPE_SIZE (type
)) >= (unsigned) max_align
22754 || TREE_INT_CST_HIGH (TYPE_SIZE (type
)))
22755 && align
< max_align
)
22758 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
22759 to 16byte boundary. */
22762 if (AGGREGATE_TYPE_P (type
)
22763 && TYPE_SIZE (type
)
22764 && TREE_CODE (TYPE_SIZE (type
)) == INTEGER_CST
22765 && (TREE_INT_CST_LOW (TYPE_SIZE (type
)) >= 128
22766 || TREE_INT_CST_HIGH (TYPE_SIZE (type
))) && align
< 128)
22770 if (TREE_CODE (type
) == ARRAY_TYPE
)
22772 if (TYPE_MODE (TREE_TYPE (type
)) == DFmode
&& align
< 64)
22774 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type
))) && align
< 128)
22777 else if (TREE_CODE (type
) == COMPLEX_TYPE
)
22780 if (TYPE_MODE (type
) == DCmode
&& align
< 64)
22782 if ((TYPE_MODE (type
) == XCmode
22783 || TYPE_MODE (type
) == TCmode
) && align
< 128)
22786 else if ((TREE_CODE (type
) == RECORD_TYPE
22787 || TREE_CODE (type
) == UNION_TYPE
22788 || TREE_CODE (type
) == QUAL_UNION_TYPE
)
22789 && TYPE_FIELDS (type
))
22791 if (DECL_MODE (TYPE_FIELDS (type
)) == DFmode
&& align
< 64)
22793 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type
))) && align
< 128)
22796 else if (TREE_CODE (type
) == REAL_TYPE
|| TREE_CODE (type
) == VECTOR_TYPE
22797 || TREE_CODE (type
) == INTEGER_TYPE
)
22799 if (TYPE_MODE (type
) == DFmode
&& align
< 64)
22801 if (ALIGN_MODE_128 (TYPE_MODE (type
)) && align
< 128)
22808 /* Compute the alignment for a local variable or a stack slot. EXP is
22809 the data type or decl itself, MODE is the widest mode available and
22810 ALIGN is the alignment that the object would ordinarily have. The
22811 value of this macro is used instead of that alignment to align the
22815 ix86_local_alignment (tree exp
, enum machine_mode mode
,
22816 unsigned int align
)
22820 if (exp
&& DECL_P (exp
))
22822 type
= TREE_TYPE (exp
);
22831 /* Don't do dynamic stack realignment for long long objects with
22832 -mpreferred-stack-boundary=2. */
22835 && ix86_preferred_stack_boundary
< 64
22836 && (mode
== DImode
|| (type
&& TYPE_MODE (type
) == DImode
))
22837 && (!type
|| !TYPE_USER_ALIGN (type
))
22838 && (!decl
|| !DECL_USER_ALIGN (decl
)))
22841 /* If TYPE is NULL, we are allocating a stack slot for caller-save
22842 register in MODE. We will return the largest alignment of XF
22846 if (mode
== XFmode
&& align
< GET_MODE_ALIGNMENT (DFmode
))
22847 align
= GET_MODE_ALIGNMENT (DFmode
);
22851 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
22852 to 16byte boundary. Exact wording is:
22854 An array uses the same alignment as its elements, except that a local or
22855 global array variable of length at least 16 bytes or
22856 a C99 variable-length array variable always has alignment of at least 16 bytes.
22858 This was added to allow use of aligned SSE instructions at arrays. This
22859 rule is meant for static storage (where compiler can not do the analysis
22860 by itself). We follow it for automatic variables only when convenient.
22861 We fully control everything in the function compiled and functions from
22862 other unit can not rely on the alignment.
22864 Exclude va_list type. It is the common case of local array where
22865 we can not benefit from the alignment. */
22866 if (TARGET_64BIT
&& optimize_function_for_speed_p (cfun
)
22869 if (AGGREGATE_TYPE_P (type
)
22870 && (TYPE_MAIN_VARIANT (type
)
22871 != TYPE_MAIN_VARIANT (va_list_type_node
))
22872 && TYPE_SIZE (type
)
22873 && TREE_CODE (TYPE_SIZE (type
)) == INTEGER_CST
22874 && (TREE_INT_CST_LOW (TYPE_SIZE (type
)) >= 16
22875 || TREE_INT_CST_HIGH (TYPE_SIZE (type
))) && align
< 128)
22878 if (TREE_CODE (type
) == ARRAY_TYPE
)
22880 if (TYPE_MODE (TREE_TYPE (type
)) == DFmode
&& align
< 64)
22882 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type
))) && align
< 128)
22885 else if (TREE_CODE (type
) == COMPLEX_TYPE
)
22887 if (TYPE_MODE (type
) == DCmode
&& align
< 64)
22889 if ((TYPE_MODE (type
) == XCmode
22890 || TYPE_MODE (type
) == TCmode
) && align
< 128)
22893 else if ((TREE_CODE (type
) == RECORD_TYPE
22894 || TREE_CODE (type
) == UNION_TYPE
22895 || TREE_CODE (type
) == QUAL_UNION_TYPE
)
22896 && TYPE_FIELDS (type
))
22898 if (DECL_MODE (TYPE_FIELDS (type
)) == DFmode
&& align
< 64)
22900 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type
))) && align
< 128)
22903 else if (TREE_CODE (type
) == REAL_TYPE
|| TREE_CODE (type
) == VECTOR_TYPE
22904 || TREE_CODE (type
) == INTEGER_TYPE
)
22907 if (TYPE_MODE (type
) == DFmode
&& align
< 64)
22909 if (ALIGN_MODE_128 (TYPE_MODE (type
)) && align
< 128)
22915 /* Compute the minimum required alignment for dynamic stack realignment
22916 purposes for a local variable, parameter or a stack slot. EXP is
22917 the data type or decl itself, MODE is its mode and ALIGN is the
22918 alignment that the object would ordinarily have. */
22921 ix86_minimum_alignment (tree exp
, enum machine_mode mode
,
22922 unsigned int align
)
22926 if (exp
&& DECL_P (exp
))
22928 type
= TREE_TYPE (exp
);
22937 if (TARGET_64BIT
|| align
!= 64 || ix86_preferred_stack_boundary
>= 64)
22940 /* Don't do dynamic stack realignment for long long objects with
22941 -mpreferred-stack-boundary=2. */
22942 if ((mode
== DImode
|| (type
&& TYPE_MODE (type
) == DImode
))
22943 && (!type
|| !TYPE_USER_ALIGN (type
))
22944 && (!decl
|| !DECL_USER_ALIGN (decl
)))
22950 /* Find a location for the static chain incoming to a nested function.
22951 This is a register, unless all free registers are used by arguments. */
22954 ix86_static_chain (const_tree fndecl
, bool incoming_p
)
22958 if (!DECL_STATIC_CHAIN (fndecl
))
22963 /* We always use R10 in 64-bit mode. */
22969 /* By default in 32-bit mode we use ECX to pass the static chain. */
22972 fntype
= TREE_TYPE (fndecl
);
22973 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype
)))
22975 /* Fastcall functions use ecx/edx for arguments, which leaves
22976 us with EAX for the static chain. */
22979 else if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (fntype
)))
22981 /* Thiscall functions use ecx for arguments, which leaves
22982 us with EAX for the static chain. */
22985 else if (ix86_function_regparm (fntype
, fndecl
) == 3)
22987 /* For regparm 3, we have no free call-clobbered registers in
22988 which to store the static chain. In order to implement this,
22989 we have the trampoline push the static chain to the stack.
22990 However, we can't push a value below the return address when
22991 we call the nested function directly, so we have to use an
22992 alternate entry point. For this we use ESI, and have the
22993 alternate entry point push ESI, so that things appear the
22994 same once we're executing the nested function. */
22997 if (fndecl
== current_function_decl
)
22998 ix86_static_chain_on_stack
= true;
22999 return gen_frame_mem (SImode
,
23000 plus_constant (arg_pointer_rtx
, -8));
23006 return gen_rtx_REG (Pmode
, regno
);
23009 /* Emit RTL insns to initialize the variable parts of a trampoline.
23010 FNDECL is the decl of the target address; M_TRAMP is a MEM for
23011 the trampoline, and CHAIN_VALUE is an RTX for the static chain
23012 to be passed to the target function. */
23015 ix86_trampoline_init (rtx m_tramp
, tree fndecl
, rtx chain_value
)
23019 fnaddr
= XEXP (DECL_RTL (fndecl
), 0);
23026 /* Depending on the static chain location, either load a register
23027 with a constant, or push the constant to the stack. All of the
23028 instructions are the same size. */
23029 chain
= ix86_static_chain (fndecl
, true);
23032 if (REGNO (chain
) == CX_REG
)
23034 else if (REGNO (chain
) == AX_REG
)
23037 gcc_unreachable ();
23042 mem
= adjust_address (m_tramp
, QImode
, 0);
23043 emit_move_insn (mem
, gen_int_mode (opcode
, QImode
));
23045 mem
= adjust_address (m_tramp
, SImode
, 1);
23046 emit_move_insn (mem
, chain_value
);
23048 /* Compute offset from the end of the jmp to the target function.
23049 In the case in which the trampoline stores the static chain on
23050 the stack, we need to skip the first insn which pushes the
23051 (call-saved) register static chain; this push is 1 byte. */
23052 disp
= expand_binop (SImode
, sub_optab
, fnaddr
,
23053 plus_constant (XEXP (m_tramp
, 0),
23054 MEM_P (chain
) ? 9 : 10),
23055 NULL_RTX
, 1, OPTAB_DIRECT
);
23057 mem
= adjust_address (m_tramp
, QImode
, 5);
23058 emit_move_insn (mem
, gen_int_mode (0xe9, QImode
));
23060 mem
= adjust_address (m_tramp
, SImode
, 6);
23061 emit_move_insn (mem
, disp
);
23067 /* Load the function address to r11. Try to load address using
23068 the shorter movl instead of movabs. We may want to support
23069 movq for kernel mode, but kernel does not use trampolines at
23071 if (x86_64_zext_immediate_operand (fnaddr
, VOIDmode
))
23073 fnaddr
= copy_to_mode_reg (DImode
, fnaddr
);
23075 mem
= adjust_address (m_tramp
, HImode
, offset
);
23076 emit_move_insn (mem
, gen_int_mode (0xbb41, HImode
));
23078 mem
= adjust_address (m_tramp
, SImode
, offset
+ 2);
23079 emit_move_insn (mem
, gen_lowpart (SImode
, fnaddr
));
23084 mem
= adjust_address (m_tramp
, HImode
, offset
);
23085 emit_move_insn (mem
, gen_int_mode (0xbb49, HImode
));
23087 mem
= adjust_address (m_tramp
, DImode
, offset
+ 2);
23088 emit_move_insn (mem
, fnaddr
);
23092 /* Load static chain using movabs to r10. */
23093 mem
= adjust_address (m_tramp
, HImode
, offset
);
23094 emit_move_insn (mem
, gen_int_mode (0xba49, HImode
));
23096 mem
= adjust_address (m_tramp
, DImode
, offset
+ 2);
23097 emit_move_insn (mem
, chain_value
);
23100 /* Jump to r11; the last (unused) byte is a nop, only there to
23101 pad the write out to a single 32-bit store. */
23102 mem
= adjust_address (m_tramp
, SImode
, offset
);
23103 emit_move_insn (mem
, gen_int_mode (0x90e3ff49, SImode
));
23106 gcc_assert (offset
<= TRAMPOLINE_SIZE
);
23109 #ifdef ENABLE_EXECUTE_STACK
23110 #ifdef CHECK_EXECUTE_STACK_ENABLED
23111 if (CHECK_EXECUTE_STACK_ENABLED
)
23113 emit_library_call (gen_rtx_SYMBOL_REF (Pmode
, "__enable_execute_stack"),
23114 LCT_NORMAL
, VOIDmode
, 1, XEXP (m_tramp
, 0), Pmode
);
23118 /* The following file contains several enumerations and data structures
23119 built from the definitions in i386-builtin-types.def. */
23121 #include "i386-builtin-types.inc"
23123 /* Table for the ix86 builtin non-function types. */
23124 static GTY(()) tree ix86_builtin_type_tab
[(int) IX86_BT_LAST_CPTR
+ 1];
23126 /* Retrieve an element from the above table, building some of
23127 the types lazily. */
23130 ix86_get_builtin_type (enum ix86_builtin_type tcode
)
23132 unsigned int index
;
23135 gcc_assert ((unsigned)tcode
< ARRAY_SIZE(ix86_builtin_type_tab
));
23137 type
= ix86_builtin_type_tab
[(int) tcode
];
23141 gcc_assert (tcode
> IX86_BT_LAST_PRIM
);
23142 if (tcode
<= IX86_BT_LAST_VECT
)
23144 enum machine_mode mode
;
23146 index
= tcode
- IX86_BT_LAST_PRIM
- 1;
23147 itype
= ix86_get_builtin_type (ix86_builtin_type_vect_base
[index
]);
23148 mode
= ix86_builtin_type_vect_mode
[index
];
23150 type
= build_vector_type_for_mode (itype
, mode
);
23156 index
= tcode
- IX86_BT_LAST_VECT
- 1;
23157 if (tcode
<= IX86_BT_LAST_PTR
)
23158 quals
= TYPE_UNQUALIFIED
;
23160 quals
= TYPE_QUAL_CONST
;
23162 itype
= ix86_get_builtin_type (ix86_builtin_type_ptr_base
[index
]);
23163 if (quals
!= TYPE_UNQUALIFIED
)
23164 itype
= build_qualified_type (itype
, quals
);
23166 type
= build_pointer_type (itype
);
23169 ix86_builtin_type_tab
[(int) tcode
] = type
;
23173 /* Table for the ix86 builtin function types. */
23174 static GTY(()) tree ix86_builtin_func_type_tab
[(int) IX86_BT_LAST_ALIAS
+ 1];
23176 /* Retrieve an element from the above table, building some of
23177 the types lazily. */
23180 ix86_get_builtin_func_type (enum ix86_builtin_func_type tcode
)
23184 gcc_assert ((unsigned)tcode
< ARRAY_SIZE (ix86_builtin_func_type_tab
));
23186 type
= ix86_builtin_func_type_tab
[(int) tcode
];
23190 if (tcode
<= IX86_BT_LAST_FUNC
)
23192 unsigned start
= ix86_builtin_func_start
[(int) tcode
];
23193 unsigned after
= ix86_builtin_func_start
[(int) tcode
+ 1];
23194 tree rtype
, atype
, args
= void_list_node
;
23197 rtype
= ix86_get_builtin_type (ix86_builtin_func_args
[start
]);
23198 for (i
= after
- 1; i
> start
; --i
)
23200 atype
= ix86_get_builtin_type (ix86_builtin_func_args
[i
]);
23201 args
= tree_cons (NULL
, atype
, args
);
23204 type
= build_function_type (rtype
, args
);
23208 unsigned index
= tcode
- IX86_BT_LAST_FUNC
- 1;
23209 enum ix86_builtin_func_type icode
;
23211 icode
= ix86_builtin_func_alias_base
[index
];
23212 type
= ix86_get_builtin_func_type (icode
);
23215 ix86_builtin_func_type_tab
[(int) tcode
] = type
;
23220 /* Codes for all the SSE/MMX builtins. */
23223 IX86_BUILTIN_ADDPS
,
23224 IX86_BUILTIN_ADDSS
,
23225 IX86_BUILTIN_DIVPS
,
23226 IX86_BUILTIN_DIVSS
,
23227 IX86_BUILTIN_MULPS
,
23228 IX86_BUILTIN_MULSS
,
23229 IX86_BUILTIN_SUBPS
,
23230 IX86_BUILTIN_SUBSS
,
23232 IX86_BUILTIN_CMPEQPS
,
23233 IX86_BUILTIN_CMPLTPS
,
23234 IX86_BUILTIN_CMPLEPS
,
23235 IX86_BUILTIN_CMPGTPS
,
23236 IX86_BUILTIN_CMPGEPS
,
23237 IX86_BUILTIN_CMPNEQPS
,
23238 IX86_BUILTIN_CMPNLTPS
,
23239 IX86_BUILTIN_CMPNLEPS
,
23240 IX86_BUILTIN_CMPNGTPS
,
23241 IX86_BUILTIN_CMPNGEPS
,
23242 IX86_BUILTIN_CMPORDPS
,
23243 IX86_BUILTIN_CMPUNORDPS
,
23244 IX86_BUILTIN_CMPEQSS
,
23245 IX86_BUILTIN_CMPLTSS
,
23246 IX86_BUILTIN_CMPLESS
,
23247 IX86_BUILTIN_CMPNEQSS
,
23248 IX86_BUILTIN_CMPNLTSS
,
23249 IX86_BUILTIN_CMPNLESS
,
23250 IX86_BUILTIN_CMPNGTSS
,
23251 IX86_BUILTIN_CMPNGESS
,
23252 IX86_BUILTIN_CMPORDSS
,
23253 IX86_BUILTIN_CMPUNORDSS
,
23255 IX86_BUILTIN_COMIEQSS
,
23256 IX86_BUILTIN_COMILTSS
,
23257 IX86_BUILTIN_COMILESS
,
23258 IX86_BUILTIN_COMIGTSS
,
23259 IX86_BUILTIN_COMIGESS
,
23260 IX86_BUILTIN_COMINEQSS
,
23261 IX86_BUILTIN_UCOMIEQSS
,
23262 IX86_BUILTIN_UCOMILTSS
,
23263 IX86_BUILTIN_UCOMILESS
,
23264 IX86_BUILTIN_UCOMIGTSS
,
23265 IX86_BUILTIN_UCOMIGESS
,
23266 IX86_BUILTIN_UCOMINEQSS
,
23268 IX86_BUILTIN_CVTPI2PS
,
23269 IX86_BUILTIN_CVTPS2PI
,
23270 IX86_BUILTIN_CVTSI2SS
,
23271 IX86_BUILTIN_CVTSI642SS
,
23272 IX86_BUILTIN_CVTSS2SI
,
23273 IX86_BUILTIN_CVTSS2SI64
,
23274 IX86_BUILTIN_CVTTPS2PI
,
23275 IX86_BUILTIN_CVTTSS2SI
,
23276 IX86_BUILTIN_CVTTSS2SI64
,
23278 IX86_BUILTIN_MAXPS
,
23279 IX86_BUILTIN_MAXSS
,
23280 IX86_BUILTIN_MINPS
,
23281 IX86_BUILTIN_MINSS
,
23283 IX86_BUILTIN_LOADUPS
,
23284 IX86_BUILTIN_STOREUPS
,
23285 IX86_BUILTIN_MOVSS
,
23287 IX86_BUILTIN_MOVHLPS
,
23288 IX86_BUILTIN_MOVLHPS
,
23289 IX86_BUILTIN_LOADHPS
,
23290 IX86_BUILTIN_LOADLPS
,
23291 IX86_BUILTIN_STOREHPS
,
23292 IX86_BUILTIN_STORELPS
,
23294 IX86_BUILTIN_MASKMOVQ
,
23295 IX86_BUILTIN_MOVMSKPS
,
23296 IX86_BUILTIN_PMOVMSKB
,
23298 IX86_BUILTIN_MOVNTPS
,
23299 IX86_BUILTIN_MOVNTQ
,
23301 IX86_BUILTIN_LOADDQU
,
23302 IX86_BUILTIN_STOREDQU
,
23304 IX86_BUILTIN_PACKSSWB
,
23305 IX86_BUILTIN_PACKSSDW
,
23306 IX86_BUILTIN_PACKUSWB
,
23308 IX86_BUILTIN_PADDB
,
23309 IX86_BUILTIN_PADDW
,
23310 IX86_BUILTIN_PADDD
,
23311 IX86_BUILTIN_PADDQ
,
23312 IX86_BUILTIN_PADDSB
,
23313 IX86_BUILTIN_PADDSW
,
23314 IX86_BUILTIN_PADDUSB
,
23315 IX86_BUILTIN_PADDUSW
,
23316 IX86_BUILTIN_PSUBB
,
23317 IX86_BUILTIN_PSUBW
,
23318 IX86_BUILTIN_PSUBD
,
23319 IX86_BUILTIN_PSUBQ
,
23320 IX86_BUILTIN_PSUBSB
,
23321 IX86_BUILTIN_PSUBSW
,
23322 IX86_BUILTIN_PSUBUSB
,
23323 IX86_BUILTIN_PSUBUSW
,
23326 IX86_BUILTIN_PANDN
,
23330 IX86_BUILTIN_PAVGB
,
23331 IX86_BUILTIN_PAVGW
,
23333 IX86_BUILTIN_PCMPEQB
,
23334 IX86_BUILTIN_PCMPEQW
,
23335 IX86_BUILTIN_PCMPEQD
,
23336 IX86_BUILTIN_PCMPGTB
,
23337 IX86_BUILTIN_PCMPGTW
,
23338 IX86_BUILTIN_PCMPGTD
,
23340 IX86_BUILTIN_PMADDWD
,
23342 IX86_BUILTIN_PMAXSW
,
23343 IX86_BUILTIN_PMAXUB
,
23344 IX86_BUILTIN_PMINSW
,
23345 IX86_BUILTIN_PMINUB
,
23347 IX86_BUILTIN_PMULHUW
,
23348 IX86_BUILTIN_PMULHW
,
23349 IX86_BUILTIN_PMULLW
,
23351 IX86_BUILTIN_PSADBW
,
23352 IX86_BUILTIN_PSHUFW
,
23354 IX86_BUILTIN_PSLLW
,
23355 IX86_BUILTIN_PSLLD
,
23356 IX86_BUILTIN_PSLLQ
,
23357 IX86_BUILTIN_PSRAW
,
23358 IX86_BUILTIN_PSRAD
,
23359 IX86_BUILTIN_PSRLW
,
23360 IX86_BUILTIN_PSRLD
,
23361 IX86_BUILTIN_PSRLQ
,
23362 IX86_BUILTIN_PSLLWI
,
23363 IX86_BUILTIN_PSLLDI
,
23364 IX86_BUILTIN_PSLLQI
,
23365 IX86_BUILTIN_PSRAWI
,
23366 IX86_BUILTIN_PSRADI
,
23367 IX86_BUILTIN_PSRLWI
,
23368 IX86_BUILTIN_PSRLDI
,
23369 IX86_BUILTIN_PSRLQI
,
23371 IX86_BUILTIN_PUNPCKHBW
,
23372 IX86_BUILTIN_PUNPCKHWD
,
23373 IX86_BUILTIN_PUNPCKHDQ
,
23374 IX86_BUILTIN_PUNPCKLBW
,
23375 IX86_BUILTIN_PUNPCKLWD
,
23376 IX86_BUILTIN_PUNPCKLDQ
,
23378 IX86_BUILTIN_SHUFPS
,
23380 IX86_BUILTIN_RCPPS
,
23381 IX86_BUILTIN_RCPSS
,
23382 IX86_BUILTIN_RSQRTPS
,
23383 IX86_BUILTIN_RSQRTPS_NR
,
23384 IX86_BUILTIN_RSQRTSS
,
23385 IX86_BUILTIN_RSQRTF
,
23386 IX86_BUILTIN_SQRTPS
,
23387 IX86_BUILTIN_SQRTPS_NR
,
23388 IX86_BUILTIN_SQRTSS
,
23390 IX86_BUILTIN_UNPCKHPS
,
23391 IX86_BUILTIN_UNPCKLPS
,
23393 IX86_BUILTIN_ANDPS
,
23394 IX86_BUILTIN_ANDNPS
,
23396 IX86_BUILTIN_XORPS
,
23399 IX86_BUILTIN_LDMXCSR
,
23400 IX86_BUILTIN_STMXCSR
,
23401 IX86_BUILTIN_SFENCE
,
23403 /* 3DNow! Original */
23404 IX86_BUILTIN_FEMMS
,
23405 IX86_BUILTIN_PAVGUSB
,
23406 IX86_BUILTIN_PF2ID
,
23407 IX86_BUILTIN_PFACC
,
23408 IX86_BUILTIN_PFADD
,
23409 IX86_BUILTIN_PFCMPEQ
,
23410 IX86_BUILTIN_PFCMPGE
,
23411 IX86_BUILTIN_PFCMPGT
,
23412 IX86_BUILTIN_PFMAX
,
23413 IX86_BUILTIN_PFMIN
,
23414 IX86_BUILTIN_PFMUL
,
23415 IX86_BUILTIN_PFRCP
,
23416 IX86_BUILTIN_PFRCPIT1
,
23417 IX86_BUILTIN_PFRCPIT2
,
23418 IX86_BUILTIN_PFRSQIT1
,
23419 IX86_BUILTIN_PFRSQRT
,
23420 IX86_BUILTIN_PFSUB
,
23421 IX86_BUILTIN_PFSUBR
,
23422 IX86_BUILTIN_PI2FD
,
23423 IX86_BUILTIN_PMULHRW
,
23425 /* 3DNow! Athlon Extensions */
23426 IX86_BUILTIN_PF2IW
,
23427 IX86_BUILTIN_PFNACC
,
23428 IX86_BUILTIN_PFPNACC
,
23429 IX86_BUILTIN_PI2FW
,
23430 IX86_BUILTIN_PSWAPDSI
,
23431 IX86_BUILTIN_PSWAPDSF
,
23434 IX86_BUILTIN_ADDPD
,
23435 IX86_BUILTIN_ADDSD
,
23436 IX86_BUILTIN_DIVPD
,
23437 IX86_BUILTIN_DIVSD
,
23438 IX86_BUILTIN_MULPD
,
23439 IX86_BUILTIN_MULSD
,
23440 IX86_BUILTIN_SUBPD
,
23441 IX86_BUILTIN_SUBSD
,
23443 IX86_BUILTIN_CMPEQPD
,
23444 IX86_BUILTIN_CMPLTPD
,
23445 IX86_BUILTIN_CMPLEPD
,
23446 IX86_BUILTIN_CMPGTPD
,
23447 IX86_BUILTIN_CMPGEPD
,
23448 IX86_BUILTIN_CMPNEQPD
,
23449 IX86_BUILTIN_CMPNLTPD
,
23450 IX86_BUILTIN_CMPNLEPD
,
23451 IX86_BUILTIN_CMPNGTPD
,
23452 IX86_BUILTIN_CMPNGEPD
,
23453 IX86_BUILTIN_CMPORDPD
,
23454 IX86_BUILTIN_CMPUNORDPD
,
23455 IX86_BUILTIN_CMPEQSD
,
23456 IX86_BUILTIN_CMPLTSD
,
23457 IX86_BUILTIN_CMPLESD
,
23458 IX86_BUILTIN_CMPNEQSD
,
23459 IX86_BUILTIN_CMPNLTSD
,
23460 IX86_BUILTIN_CMPNLESD
,
23461 IX86_BUILTIN_CMPORDSD
,
23462 IX86_BUILTIN_CMPUNORDSD
,
23464 IX86_BUILTIN_COMIEQSD
,
23465 IX86_BUILTIN_COMILTSD
,
23466 IX86_BUILTIN_COMILESD
,
23467 IX86_BUILTIN_COMIGTSD
,
23468 IX86_BUILTIN_COMIGESD
,
23469 IX86_BUILTIN_COMINEQSD
,
23470 IX86_BUILTIN_UCOMIEQSD
,
23471 IX86_BUILTIN_UCOMILTSD
,
23472 IX86_BUILTIN_UCOMILESD
,
23473 IX86_BUILTIN_UCOMIGTSD
,
23474 IX86_BUILTIN_UCOMIGESD
,
23475 IX86_BUILTIN_UCOMINEQSD
,
23477 IX86_BUILTIN_MAXPD
,
23478 IX86_BUILTIN_MAXSD
,
23479 IX86_BUILTIN_MINPD
,
23480 IX86_BUILTIN_MINSD
,
23482 IX86_BUILTIN_ANDPD
,
23483 IX86_BUILTIN_ANDNPD
,
23485 IX86_BUILTIN_XORPD
,
23487 IX86_BUILTIN_SQRTPD
,
23488 IX86_BUILTIN_SQRTSD
,
23490 IX86_BUILTIN_UNPCKHPD
,
23491 IX86_BUILTIN_UNPCKLPD
,
23493 IX86_BUILTIN_SHUFPD
,
23495 IX86_BUILTIN_LOADUPD
,
23496 IX86_BUILTIN_STOREUPD
,
23497 IX86_BUILTIN_MOVSD
,
23499 IX86_BUILTIN_LOADHPD
,
23500 IX86_BUILTIN_LOADLPD
,
23502 IX86_BUILTIN_CVTDQ2PD
,
23503 IX86_BUILTIN_CVTDQ2PS
,
23505 IX86_BUILTIN_CVTPD2DQ
,
23506 IX86_BUILTIN_CVTPD2PI
,
23507 IX86_BUILTIN_CVTPD2PS
,
23508 IX86_BUILTIN_CVTTPD2DQ
,
23509 IX86_BUILTIN_CVTTPD2PI
,
23511 IX86_BUILTIN_CVTPI2PD
,
23512 IX86_BUILTIN_CVTSI2SD
,
23513 IX86_BUILTIN_CVTSI642SD
,
23515 IX86_BUILTIN_CVTSD2SI
,
23516 IX86_BUILTIN_CVTSD2SI64
,
23517 IX86_BUILTIN_CVTSD2SS
,
23518 IX86_BUILTIN_CVTSS2SD
,
23519 IX86_BUILTIN_CVTTSD2SI
,
23520 IX86_BUILTIN_CVTTSD2SI64
,
23522 IX86_BUILTIN_CVTPS2DQ
,
23523 IX86_BUILTIN_CVTPS2PD
,
23524 IX86_BUILTIN_CVTTPS2DQ
,
23526 IX86_BUILTIN_MOVNTI
,
23527 IX86_BUILTIN_MOVNTPD
,
23528 IX86_BUILTIN_MOVNTDQ
,
23530 IX86_BUILTIN_MOVQ128
,
23533 IX86_BUILTIN_MASKMOVDQU
,
23534 IX86_BUILTIN_MOVMSKPD
,
23535 IX86_BUILTIN_PMOVMSKB128
,
23537 IX86_BUILTIN_PACKSSWB128
,
23538 IX86_BUILTIN_PACKSSDW128
,
23539 IX86_BUILTIN_PACKUSWB128
,
23541 IX86_BUILTIN_PADDB128
,
23542 IX86_BUILTIN_PADDW128
,
23543 IX86_BUILTIN_PADDD128
,
23544 IX86_BUILTIN_PADDQ128
,
23545 IX86_BUILTIN_PADDSB128
,
23546 IX86_BUILTIN_PADDSW128
,
23547 IX86_BUILTIN_PADDUSB128
,
23548 IX86_BUILTIN_PADDUSW128
,
23549 IX86_BUILTIN_PSUBB128
,
23550 IX86_BUILTIN_PSUBW128
,
23551 IX86_BUILTIN_PSUBD128
,
23552 IX86_BUILTIN_PSUBQ128
,
23553 IX86_BUILTIN_PSUBSB128
,
23554 IX86_BUILTIN_PSUBSW128
,
23555 IX86_BUILTIN_PSUBUSB128
,
23556 IX86_BUILTIN_PSUBUSW128
,
23558 IX86_BUILTIN_PAND128
,
23559 IX86_BUILTIN_PANDN128
,
23560 IX86_BUILTIN_POR128
,
23561 IX86_BUILTIN_PXOR128
,
23563 IX86_BUILTIN_PAVGB128
,
23564 IX86_BUILTIN_PAVGW128
,
23566 IX86_BUILTIN_PCMPEQB128
,
23567 IX86_BUILTIN_PCMPEQW128
,
23568 IX86_BUILTIN_PCMPEQD128
,
23569 IX86_BUILTIN_PCMPGTB128
,
23570 IX86_BUILTIN_PCMPGTW128
,
23571 IX86_BUILTIN_PCMPGTD128
,
23573 IX86_BUILTIN_PMADDWD128
,
23575 IX86_BUILTIN_PMAXSW128
,
23576 IX86_BUILTIN_PMAXUB128
,
23577 IX86_BUILTIN_PMINSW128
,
23578 IX86_BUILTIN_PMINUB128
,
23580 IX86_BUILTIN_PMULUDQ
,
23581 IX86_BUILTIN_PMULUDQ128
,
23582 IX86_BUILTIN_PMULHUW128
,
23583 IX86_BUILTIN_PMULHW128
,
23584 IX86_BUILTIN_PMULLW128
,
23586 IX86_BUILTIN_PSADBW128
,
23587 IX86_BUILTIN_PSHUFHW
,
23588 IX86_BUILTIN_PSHUFLW
,
23589 IX86_BUILTIN_PSHUFD
,
23591 IX86_BUILTIN_PSLLDQI128
,
23592 IX86_BUILTIN_PSLLWI128
,
23593 IX86_BUILTIN_PSLLDI128
,
23594 IX86_BUILTIN_PSLLQI128
,
23595 IX86_BUILTIN_PSRAWI128
,
23596 IX86_BUILTIN_PSRADI128
,
23597 IX86_BUILTIN_PSRLDQI128
,
23598 IX86_BUILTIN_PSRLWI128
,
23599 IX86_BUILTIN_PSRLDI128
,
23600 IX86_BUILTIN_PSRLQI128
,
23602 IX86_BUILTIN_PSLLDQ128
,
23603 IX86_BUILTIN_PSLLW128
,
23604 IX86_BUILTIN_PSLLD128
,
23605 IX86_BUILTIN_PSLLQ128
,
23606 IX86_BUILTIN_PSRAW128
,
23607 IX86_BUILTIN_PSRAD128
,
23608 IX86_BUILTIN_PSRLW128
,
23609 IX86_BUILTIN_PSRLD128
,
23610 IX86_BUILTIN_PSRLQ128
,
23612 IX86_BUILTIN_PUNPCKHBW128
,
23613 IX86_BUILTIN_PUNPCKHWD128
,
23614 IX86_BUILTIN_PUNPCKHDQ128
,
23615 IX86_BUILTIN_PUNPCKHQDQ128
,
23616 IX86_BUILTIN_PUNPCKLBW128
,
23617 IX86_BUILTIN_PUNPCKLWD128
,
23618 IX86_BUILTIN_PUNPCKLDQ128
,
23619 IX86_BUILTIN_PUNPCKLQDQ128
,
23621 IX86_BUILTIN_CLFLUSH
,
23622 IX86_BUILTIN_MFENCE
,
23623 IX86_BUILTIN_LFENCE
,
23625 IX86_BUILTIN_BSRSI
,
23626 IX86_BUILTIN_BSRDI
,
23627 IX86_BUILTIN_RDPMC
,
23628 IX86_BUILTIN_RDTSC
,
23629 IX86_BUILTIN_RDTSCP
,
23630 IX86_BUILTIN_ROLQI
,
23631 IX86_BUILTIN_ROLHI
,
23632 IX86_BUILTIN_RORQI
,
23633 IX86_BUILTIN_RORHI
,
23636 IX86_BUILTIN_ADDSUBPS
,
23637 IX86_BUILTIN_HADDPS
,
23638 IX86_BUILTIN_HSUBPS
,
23639 IX86_BUILTIN_MOVSHDUP
,
23640 IX86_BUILTIN_MOVSLDUP
,
23641 IX86_BUILTIN_ADDSUBPD
,
23642 IX86_BUILTIN_HADDPD
,
23643 IX86_BUILTIN_HSUBPD
,
23644 IX86_BUILTIN_LDDQU
,
23646 IX86_BUILTIN_MONITOR
,
23647 IX86_BUILTIN_MWAIT
,
23650 IX86_BUILTIN_PHADDW
,
23651 IX86_BUILTIN_PHADDD
,
23652 IX86_BUILTIN_PHADDSW
,
23653 IX86_BUILTIN_PHSUBW
,
23654 IX86_BUILTIN_PHSUBD
,
23655 IX86_BUILTIN_PHSUBSW
,
23656 IX86_BUILTIN_PMADDUBSW
,
23657 IX86_BUILTIN_PMULHRSW
,
23658 IX86_BUILTIN_PSHUFB
,
23659 IX86_BUILTIN_PSIGNB
,
23660 IX86_BUILTIN_PSIGNW
,
23661 IX86_BUILTIN_PSIGND
,
23662 IX86_BUILTIN_PALIGNR
,
23663 IX86_BUILTIN_PABSB
,
23664 IX86_BUILTIN_PABSW
,
23665 IX86_BUILTIN_PABSD
,
23667 IX86_BUILTIN_PHADDW128
,
23668 IX86_BUILTIN_PHADDD128
,
23669 IX86_BUILTIN_PHADDSW128
,
23670 IX86_BUILTIN_PHSUBW128
,
23671 IX86_BUILTIN_PHSUBD128
,
23672 IX86_BUILTIN_PHSUBSW128
,
23673 IX86_BUILTIN_PMADDUBSW128
,
23674 IX86_BUILTIN_PMULHRSW128
,
23675 IX86_BUILTIN_PSHUFB128
,
23676 IX86_BUILTIN_PSIGNB128
,
23677 IX86_BUILTIN_PSIGNW128
,
23678 IX86_BUILTIN_PSIGND128
,
23679 IX86_BUILTIN_PALIGNR128
,
23680 IX86_BUILTIN_PABSB128
,
23681 IX86_BUILTIN_PABSW128
,
23682 IX86_BUILTIN_PABSD128
,
23684 /* AMDFAM10 - SSE4A New Instructions. */
23685 IX86_BUILTIN_MOVNTSD
,
23686 IX86_BUILTIN_MOVNTSS
,
23687 IX86_BUILTIN_EXTRQI
,
23688 IX86_BUILTIN_EXTRQ
,
23689 IX86_BUILTIN_INSERTQI
,
23690 IX86_BUILTIN_INSERTQ
,
23693 IX86_BUILTIN_BLENDPD
,
23694 IX86_BUILTIN_BLENDPS
,
23695 IX86_BUILTIN_BLENDVPD
,
23696 IX86_BUILTIN_BLENDVPS
,
23697 IX86_BUILTIN_PBLENDVB128
,
23698 IX86_BUILTIN_PBLENDW128
,
23703 IX86_BUILTIN_INSERTPS128
,
23705 IX86_BUILTIN_MOVNTDQA
,
23706 IX86_BUILTIN_MPSADBW128
,
23707 IX86_BUILTIN_PACKUSDW128
,
23708 IX86_BUILTIN_PCMPEQQ
,
23709 IX86_BUILTIN_PHMINPOSUW128
,
23711 IX86_BUILTIN_PMAXSB128
,
23712 IX86_BUILTIN_PMAXSD128
,
23713 IX86_BUILTIN_PMAXUD128
,
23714 IX86_BUILTIN_PMAXUW128
,
23716 IX86_BUILTIN_PMINSB128
,
23717 IX86_BUILTIN_PMINSD128
,
23718 IX86_BUILTIN_PMINUD128
,
23719 IX86_BUILTIN_PMINUW128
,
23721 IX86_BUILTIN_PMOVSXBW128
,
23722 IX86_BUILTIN_PMOVSXBD128
,
23723 IX86_BUILTIN_PMOVSXBQ128
,
23724 IX86_BUILTIN_PMOVSXWD128
,
23725 IX86_BUILTIN_PMOVSXWQ128
,
23726 IX86_BUILTIN_PMOVSXDQ128
,
23728 IX86_BUILTIN_PMOVZXBW128
,
23729 IX86_BUILTIN_PMOVZXBD128
,
23730 IX86_BUILTIN_PMOVZXBQ128
,
23731 IX86_BUILTIN_PMOVZXWD128
,
23732 IX86_BUILTIN_PMOVZXWQ128
,
23733 IX86_BUILTIN_PMOVZXDQ128
,
23735 IX86_BUILTIN_PMULDQ128
,
23736 IX86_BUILTIN_PMULLD128
,
23738 IX86_BUILTIN_ROUNDPD
,
23739 IX86_BUILTIN_ROUNDPS
,
23740 IX86_BUILTIN_ROUNDSD
,
23741 IX86_BUILTIN_ROUNDSS
,
23743 IX86_BUILTIN_PTESTZ
,
23744 IX86_BUILTIN_PTESTC
,
23745 IX86_BUILTIN_PTESTNZC
,
23747 IX86_BUILTIN_VEC_INIT_V2SI
,
23748 IX86_BUILTIN_VEC_INIT_V4HI
,
23749 IX86_BUILTIN_VEC_INIT_V8QI
,
23750 IX86_BUILTIN_VEC_EXT_V2DF
,
23751 IX86_BUILTIN_VEC_EXT_V2DI
,
23752 IX86_BUILTIN_VEC_EXT_V4SF
,
23753 IX86_BUILTIN_VEC_EXT_V4SI
,
23754 IX86_BUILTIN_VEC_EXT_V8HI
,
23755 IX86_BUILTIN_VEC_EXT_V2SI
,
23756 IX86_BUILTIN_VEC_EXT_V4HI
,
23757 IX86_BUILTIN_VEC_EXT_V16QI
,
23758 IX86_BUILTIN_VEC_SET_V2DI
,
23759 IX86_BUILTIN_VEC_SET_V4SF
,
23760 IX86_BUILTIN_VEC_SET_V4SI
,
23761 IX86_BUILTIN_VEC_SET_V8HI
,
23762 IX86_BUILTIN_VEC_SET_V4HI
,
23763 IX86_BUILTIN_VEC_SET_V16QI
,
23765 IX86_BUILTIN_VEC_PACK_SFIX
,
23768 IX86_BUILTIN_CRC32QI
,
23769 IX86_BUILTIN_CRC32HI
,
23770 IX86_BUILTIN_CRC32SI
,
23771 IX86_BUILTIN_CRC32DI
,
23773 IX86_BUILTIN_PCMPESTRI128
,
23774 IX86_BUILTIN_PCMPESTRM128
,
23775 IX86_BUILTIN_PCMPESTRA128
,
23776 IX86_BUILTIN_PCMPESTRC128
,
23777 IX86_BUILTIN_PCMPESTRO128
,
23778 IX86_BUILTIN_PCMPESTRS128
,
23779 IX86_BUILTIN_PCMPESTRZ128
,
23780 IX86_BUILTIN_PCMPISTRI128
,
23781 IX86_BUILTIN_PCMPISTRM128
,
23782 IX86_BUILTIN_PCMPISTRA128
,
23783 IX86_BUILTIN_PCMPISTRC128
,
23784 IX86_BUILTIN_PCMPISTRO128
,
23785 IX86_BUILTIN_PCMPISTRS128
,
23786 IX86_BUILTIN_PCMPISTRZ128
,
23788 IX86_BUILTIN_PCMPGTQ
,
23790 /* AES instructions */
23791 IX86_BUILTIN_AESENC128
,
23792 IX86_BUILTIN_AESENCLAST128
,
23793 IX86_BUILTIN_AESDEC128
,
23794 IX86_BUILTIN_AESDECLAST128
,
23795 IX86_BUILTIN_AESIMC128
,
23796 IX86_BUILTIN_AESKEYGENASSIST128
,
23798 /* PCLMUL instruction */
23799 IX86_BUILTIN_PCLMULQDQ128
,
23802 IX86_BUILTIN_ADDPD256
,
23803 IX86_BUILTIN_ADDPS256
,
23804 IX86_BUILTIN_ADDSUBPD256
,
23805 IX86_BUILTIN_ADDSUBPS256
,
23806 IX86_BUILTIN_ANDPD256
,
23807 IX86_BUILTIN_ANDPS256
,
23808 IX86_BUILTIN_ANDNPD256
,
23809 IX86_BUILTIN_ANDNPS256
,
23810 IX86_BUILTIN_BLENDPD256
,
23811 IX86_BUILTIN_BLENDPS256
,
23812 IX86_BUILTIN_BLENDVPD256
,
23813 IX86_BUILTIN_BLENDVPS256
,
23814 IX86_BUILTIN_DIVPD256
,
23815 IX86_BUILTIN_DIVPS256
,
23816 IX86_BUILTIN_DPPS256
,
23817 IX86_BUILTIN_HADDPD256
,
23818 IX86_BUILTIN_HADDPS256
,
23819 IX86_BUILTIN_HSUBPD256
,
23820 IX86_BUILTIN_HSUBPS256
,
23821 IX86_BUILTIN_MAXPD256
,
23822 IX86_BUILTIN_MAXPS256
,
23823 IX86_BUILTIN_MINPD256
,
23824 IX86_BUILTIN_MINPS256
,
23825 IX86_BUILTIN_MULPD256
,
23826 IX86_BUILTIN_MULPS256
,
23827 IX86_BUILTIN_ORPD256
,
23828 IX86_BUILTIN_ORPS256
,
23829 IX86_BUILTIN_SHUFPD256
,
23830 IX86_BUILTIN_SHUFPS256
,
23831 IX86_BUILTIN_SUBPD256
,
23832 IX86_BUILTIN_SUBPS256
,
23833 IX86_BUILTIN_XORPD256
,
23834 IX86_BUILTIN_XORPS256
,
23835 IX86_BUILTIN_CMPSD
,
23836 IX86_BUILTIN_CMPSS
,
23837 IX86_BUILTIN_CMPPD
,
23838 IX86_BUILTIN_CMPPS
,
23839 IX86_BUILTIN_CMPPD256
,
23840 IX86_BUILTIN_CMPPS256
,
23841 IX86_BUILTIN_CVTDQ2PD256
,
23842 IX86_BUILTIN_CVTDQ2PS256
,
23843 IX86_BUILTIN_CVTPD2PS256
,
23844 IX86_BUILTIN_CVTPS2DQ256
,
23845 IX86_BUILTIN_CVTPS2PD256
,
23846 IX86_BUILTIN_CVTTPD2DQ256
,
23847 IX86_BUILTIN_CVTPD2DQ256
,
23848 IX86_BUILTIN_CVTTPS2DQ256
,
23849 IX86_BUILTIN_EXTRACTF128PD256
,
23850 IX86_BUILTIN_EXTRACTF128PS256
,
23851 IX86_BUILTIN_EXTRACTF128SI256
,
23852 IX86_BUILTIN_VZEROALL
,
23853 IX86_BUILTIN_VZEROUPPER
,
23854 IX86_BUILTIN_VPERMILVARPD
,
23855 IX86_BUILTIN_VPERMILVARPS
,
23856 IX86_BUILTIN_VPERMILVARPD256
,
23857 IX86_BUILTIN_VPERMILVARPS256
,
23858 IX86_BUILTIN_VPERMILPD
,
23859 IX86_BUILTIN_VPERMILPS
,
23860 IX86_BUILTIN_VPERMILPD256
,
23861 IX86_BUILTIN_VPERMILPS256
,
23862 IX86_BUILTIN_VPERMIL2PD
,
23863 IX86_BUILTIN_VPERMIL2PS
,
23864 IX86_BUILTIN_VPERMIL2PD256
,
23865 IX86_BUILTIN_VPERMIL2PS256
,
23866 IX86_BUILTIN_VPERM2F128PD256
,
23867 IX86_BUILTIN_VPERM2F128PS256
,
23868 IX86_BUILTIN_VPERM2F128SI256
,
23869 IX86_BUILTIN_VBROADCASTSS
,
23870 IX86_BUILTIN_VBROADCASTSD256
,
23871 IX86_BUILTIN_VBROADCASTSS256
,
23872 IX86_BUILTIN_VBROADCASTPD256
,
23873 IX86_BUILTIN_VBROADCASTPS256
,
23874 IX86_BUILTIN_VINSERTF128PD256
,
23875 IX86_BUILTIN_VINSERTF128PS256
,
23876 IX86_BUILTIN_VINSERTF128SI256
,
23877 IX86_BUILTIN_LOADUPD256
,
23878 IX86_BUILTIN_LOADUPS256
,
23879 IX86_BUILTIN_STOREUPD256
,
23880 IX86_BUILTIN_STOREUPS256
,
23881 IX86_BUILTIN_LDDQU256
,
23882 IX86_BUILTIN_MOVNTDQ256
,
23883 IX86_BUILTIN_MOVNTPD256
,
23884 IX86_BUILTIN_MOVNTPS256
,
23885 IX86_BUILTIN_LOADDQU256
,
23886 IX86_BUILTIN_STOREDQU256
,
23887 IX86_BUILTIN_MASKLOADPD
,
23888 IX86_BUILTIN_MASKLOADPS
,
23889 IX86_BUILTIN_MASKSTOREPD
,
23890 IX86_BUILTIN_MASKSTOREPS
,
23891 IX86_BUILTIN_MASKLOADPD256
,
23892 IX86_BUILTIN_MASKLOADPS256
,
23893 IX86_BUILTIN_MASKSTOREPD256
,
23894 IX86_BUILTIN_MASKSTOREPS256
,
23895 IX86_BUILTIN_MOVSHDUP256
,
23896 IX86_BUILTIN_MOVSLDUP256
,
23897 IX86_BUILTIN_MOVDDUP256
,
23899 IX86_BUILTIN_SQRTPD256
,
23900 IX86_BUILTIN_SQRTPS256
,
23901 IX86_BUILTIN_SQRTPS_NR256
,
23902 IX86_BUILTIN_RSQRTPS256
,
23903 IX86_BUILTIN_RSQRTPS_NR256
,
23905 IX86_BUILTIN_RCPPS256
,
23907 IX86_BUILTIN_ROUNDPD256
,
23908 IX86_BUILTIN_ROUNDPS256
,
23910 IX86_BUILTIN_UNPCKHPD256
,
23911 IX86_BUILTIN_UNPCKLPD256
,
23912 IX86_BUILTIN_UNPCKHPS256
,
23913 IX86_BUILTIN_UNPCKLPS256
,
23915 IX86_BUILTIN_SI256_SI
,
23916 IX86_BUILTIN_PS256_PS
,
23917 IX86_BUILTIN_PD256_PD
,
23918 IX86_BUILTIN_SI_SI256
,
23919 IX86_BUILTIN_PS_PS256
,
23920 IX86_BUILTIN_PD_PD256
,
23922 IX86_BUILTIN_VTESTZPD
,
23923 IX86_BUILTIN_VTESTCPD
,
23924 IX86_BUILTIN_VTESTNZCPD
,
23925 IX86_BUILTIN_VTESTZPS
,
23926 IX86_BUILTIN_VTESTCPS
,
23927 IX86_BUILTIN_VTESTNZCPS
,
23928 IX86_BUILTIN_VTESTZPD256
,
23929 IX86_BUILTIN_VTESTCPD256
,
23930 IX86_BUILTIN_VTESTNZCPD256
,
23931 IX86_BUILTIN_VTESTZPS256
,
23932 IX86_BUILTIN_VTESTCPS256
,
23933 IX86_BUILTIN_VTESTNZCPS256
,
23934 IX86_BUILTIN_PTESTZ256
,
23935 IX86_BUILTIN_PTESTC256
,
23936 IX86_BUILTIN_PTESTNZC256
,
23938 IX86_BUILTIN_MOVMSKPD256
,
23939 IX86_BUILTIN_MOVMSKPS256
,
23941 /* TFmode support builtins. */
23943 IX86_BUILTIN_HUGE_VALQ
,
23944 IX86_BUILTIN_FABSQ
,
23945 IX86_BUILTIN_COPYSIGNQ
,
23947 /* Vectorizer support builtins. */
23948 IX86_BUILTIN_CPYSGNPS
,
23949 IX86_BUILTIN_CPYSGNPD
,
23950 IX86_BUILTIN_CPYSGNPS256
,
23951 IX86_BUILTIN_CPYSGNPD256
,
23953 IX86_BUILTIN_CVTUDQ2PS
,
23955 IX86_BUILTIN_VEC_PERM_V2DF
,
23956 IX86_BUILTIN_VEC_PERM_V4SF
,
23957 IX86_BUILTIN_VEC_PERM_V2DI
,
23958 IX86_BUILTIN_VEC_PERM_V4SI
,
23959 IX86_BUILTIN_VEC_PERM_V8HI
,
23960 IX86_BUILTIN_VEC_PERM_V16QI
,
23961 IX86_BUILTIN_VEC_PERM_V2DI_U
,
23962 IX86_BUILTIN_VEC_PERM_V4SI_U
,
23963 IX86_BUILTIN_VEC_PERM_V8HI_U
,
23964 IX86_BUILTIN_VEC_PERM_V16QI_U
,
23965 IX86_BUILTIN_VEC_PERM_V4DF
,
23966 IX86_BUILTIN_VEC_PERM_V8SF
,
23968 /* FMA4 and XOP instructions. */
23969 IX86_BUILTIN_VFMADDSS
,
23970 IX86_BUILTIN_VFMADDSD
,
23971 IX86_BUILTIN_VFMADDPS
,
23972 IX86_BUILTIN_VFMADDPD
,
23973 IX86_BUILTIN_VFMADDPS256
,
23974 IX86_BUILTIN_VFMADDPD256
,
23975 IX86_BUILTIN_VFMADDSUBPS
,
23976 IX86_BUILTIN_VFMADDSUBPD
,
23977 IX86_BUILTIN_VFMADDSUBPS256
,
23978 IX86_BUILTIN_VFMADDSUBPD256
,
23980 IX86_BUILTIN_VPCMOV
,
23981 IX86_BUILTIN_VPCMOV_V2DI
,
23982 IX86_BUILTIN_VPCMOV_V4SI
,
23983 IX86_BUILTIN_VPCMOV_V8HI
,
23984 IX86_BUILTIN_VPCMOV_V16QI
,
23985 IX86_BUILTIN_VPCMOV_V4SF
,
23986 IX86_BUILTIN_VPCMOV_V2DF
,
23987 IX86_BUILTIN_VPCMOV256
,
23988 IX86_BUILTIN_VPCMOV_V4DI256
,
23989 IX86_BUILTIN_VPCMOV_V8SI256
,
23990 IX86_BUILTIN_VPCMOV_V16HI256
,
23991 IX86_BUILTIN_VPCMOV_V32QI256
,
23992 IX86_BUILTIN_VPCMOV_V8SF256
,
23993 IX86_BUILTIN_VPCMOV_V4DF256
,
23995 IX86_BUILTIN_VPPERM
,
23997 IX86_BUILTIN_VPMACSSWW
,
23998 IX86_BUILTIN_VPMACSWW
,
23999 IX86_BUILTIN_VPMACSSWD
,
24000 IX86_BUILTIN_VPMACSWD
,
24001 IX86_BUILTIN_VPMACSSDD
,
24002 IX86_BUILTIN_VPMACSDD
,
24003 IX86_BUILTIN_VPMACSSDQL
,
24004 IX86_BUILTIN_VPMACSSDQH
,
24005 IX86_BUILTIN_VPMACSDQL
,
24006 IX86_BUILTIN_VPMACSDQH
,
24007 IX86_BUILTIN_VPMADCSSWD
,
24008 IX86_BUILTIN_VPMADCSWD
,
24010 IX86_BUILTIN_VPHADDBW
,
24011 IX86_BUILTIN_VPHADDBD
,
24012 IX86_BUILTIN_VPHADDBQ
,
24013 IX86_BUILTIN_VPHADDWD
,
24014 IX86_BUILTIN_VPHADDWQ
,
24015 IX86_BUILTIN_VPHADDDQ
,
24016 IX86_BUILTIN_VPHADDUBW
,
24017 IX86_BUILTIN_VPHADDUBD
,
24018 IX86_BUILTIN_VPHADDUBQ
,
24019 IX86_BUILTIN_VPHADDUWD
,
24020 IX86_BUILTIN_VPHADDUWQ
,
24021 IX86_BUILTIN_VPHADDUDQ
,
24022 IX86_BUILTIN_VPHSUBBW
,
24023 IX86_BUILTIN_VPHSUBWD
,
24024 IX86_BUILTIN_VPHSUBDQ
,
24026 IX86_BUILTIN_VPROTB
,
24027 IX86_BUILTIN_VPROTW
,
24028 IX86_BUILTIN_VPROTD
,
24029 IX86_BUILTIN_VPROTQ
,
24030 IX86_BUILTIN_VPROTB_IMM
,
24031 IX86_BUILTIN_VPROTW_IMM
,
24032 IX86_BUILTIN_VPROTD_IMM
,
24033 IX86_BUILTIN_VPROTQ_IMM
,
24035 IX86_BUILTIN_VPSHLB
,
24036 IX86_BUILTIN_VPSHLW
,
24037 IX86_BUILTIN_VPSHLD
,
24038 IX86_BUILTIN_VPSHLQ
,
24039 IX86_BUILTIN_VPSHAB
,
24040 IX86_BUILTIN_VPSHAW
,
24041 IX86_BUILTIN_VPSHAD
,
24042 IX86_BUILTIN_VPSHAQ
,
24044 IX86_BUILTIN_VFRCZSS
,
24045 IX86_BUILTIN_VFRCZSD
,
24046 IX86_BUILTIN_VFRCZPS
,
24047 IX86_BUILTIN_VFRCZPD
,
24048 IX86_BUILTIN_VFRCZPS256
,
24049 IX86_BUILTIN_VFRCZPD256
,
24051 IX86_BUILTIN_VPCOMEQUB
,
24052 IX86_BUILTIN_VPCOMNEUB
,
24053 IX86_BUILTIN_VPCOMLTUB
,
24054 IX86_BUILTIN_VPCOMLEUB
,
24055 IX86_BUILTIN_VPCOMGTUB
,
24056 IX86_BUILTIN_VPCOMGEUB
,
24057 IX86_BUILTIN_VPCOMFALSEUB
,
24058 IX86_BUILTIN_VPCOMTRUEUB
,
24060 IX86_BUILTIN_VPCOMEQUW
,
24061 IX86_BUILTIN_VPCOMNEUW
,
24062 IX86_BUILTIN_VPCOMLTUW
,
24063 IX86_BUILTIN_VPCOMLEUW
,
24064 IX86_BUILTIN_VPCOMGTUW
,
24065 IX86_BUILTIN_VPCOMGEUW
,
24066 IX86_BUILTIN_VPCOMFALSEUW
,
24067 IX86_BUILTIN_VPCOMTRUEUW
,
24069 IX86_BUILTIN_VPCOMEQUD
,
24070 IX86_BUILTIN_VPCOMNEUD
,
24071 IX86_BUILTIN_VPCOMLTUD
,
24072 IX86_BUILTIN_VPCOMLEUD
,
24073 IX86_BUILTIN_VPCOMGTUD
,
24074 IX86_BUILTIN_VPCOMGEUD
,
24075 IX86_BUILTIN_VPCOMFALSEUD
,
24076 IX86_BUILTIN_VPCOMTRUEUD
,
24078 IX86_BUILTIN_VPCOMEQUQ
,
24079 IX86_BUILTIN_VPCOMNEUQ
,
24080 IX86_BUILTIN_VPCOMLTUQ
,
24081 IX86_BUILTIN_VPCOMLEUQ
,
24082 IX86_BUILTIN_VPCOMGTUQ
,
24083 IX86_BUILTIN_VPCOMGEUQ
,
24084 IX86_BUILTIN_VPCOMFALSEUQ
,
24085 IX86_BUILTIN_VPCOMTRUEUQ
,
24087 IX86_BUILTIN_VPCOMEQB
,
24088 IX86_BUILTIN_VPCOMNEB
,
24089 IX86_BUILTIN_VPCOMLTB
,
24090 IX86_BUILTIN_VPCOMLEB
,
24091 IX86_BUILTIN_VPCOMGTB
,
24092 IX86_BUILTIN_VPCOMGEB
,
24093 IX86_BUILTIN_VPCOMFALSEB
,
24094 IX86_BUILTIN_VPCOMTRUEB
,
24096 IX86_BUILTIN_VPCOMEQW
,
24097 IX86_BUILTIN_VPCOMNEW
,
24098 IX86_BUILTIN_VPCOMLTW
,
24099 IX86_BUILTIN_VPCOMLEW
,
24100 IX86_BUILTIN_VPCOMGTW
,
24101 IX86_BUILTIN_VPCOMGEW
,
24102 IX86_BUILTIN_VPCOMFALSEW
,
24103 IX86_BUILTIN_VPCOMTRUEW
,
24105 IX86_BUILTIN_VPCOMEQD
,
24106 IX86_BUILTIN_VPCOMNED
,
24107 IX86_BUILTIN_VPCOMLTD
,
24108 IX86_BUILTIN_VPCOMLED
,
24109 IX86_BUILTIN_VPCOMGTD
,
24110 IX86_BUILTIN_VPCOMGED
,
24111 IX86_BUILTIN_VPCOMFALSED
,
24112 IX86_BUILTIN_VPCOMTRUED
,
24114 IX86_BUILTIN_VPCOMEQQ
,
24115 IX86_BUILTIN_VPCOMNEQ
,
24116 IX86_BUILTIN_VPCOMLTQ
,
24117 IX86_BUILTIN_VPCOMLEQ
,
24118 IX86_BUILTIN_VPCOMGTQ
,
24119 IX86_BUILTIN_VPCOMGEQ
,
24120 IX86_BUILTIN_VPCOMFALSEQ
,
24121 IX86_BUILTIN_VPCOMTRUEQ
,
24123 /* LWP instructions. */
24124 IX86_BUILTIN_LLWPCB
,
24125 IX86_BUILTIN_SLWPCB
,
24126 IX86_BUILTIN_LWPVAL32
,
24127 IX86_BUILTIN_LWPVAL64
,
24128 IX86_BUILTIN_LWPINS32
,
24129 IX86_BUILTIN_LWPINS64
,
24133 /* BMI instructions. */
24134 IX86_BUILTIN_BEXTR32
,
24135 IX86_BUILTIN_BEXTR64
,
24138 /* TBM instructions. */
24139 IX86_BUILTIN_BEXTRI32
,
24140 IX86_BUILTIN_BEXTRI64
,
24143 /* FSGSBASE instructions. */
24144 IX86_BUILTIN_RDFSBASE32
,
24145 IX86_BUILTIN_RDFSBASE64
,
24146 IX86_BUILTIN_RDGSBASE32
,
24147 IX86_BUILTIN_RDGSBASE64
,
24148 IX86_BUILTIN_WRFSBASE32
,
24149 IX86_BUILTIN_WRFSBASE64
,
24150 IX86_BUILTIN_WRGSBASE32
,
24151 IX86_BUILTIN_WRGSBASE64
,
24153 /* RDRND instructions. */
24154 IX86_BUILTIN_RDRAND16_STEP
,
24155 IX86_BUILTIN_RDRAND32_STEP
,
24156 IX86_BUILTIN_RDRAND64_STEP
,
24158 /* F16C instructions. */
24159 IX86_BUILTIN_CVTPH2PS
,
24160 IX86_BUILTIN_CVTPH2PS256
,
24161 IX86_BUILTIN_CVTPS2PH
,
24162 IX86_BUILTIN_CVTPS2PH256
,
24167 /* Table for the ix86 builtin decls. */
24168 static GTY(()) tree ix86_builtins
[(int) IX86_BUILTIN_MAX
];
24170 /* Table of all of the builtin functions that are possible with different ISA's
24171 but are waiting to be built until a function is declared to use that
24173 struct builtin_isa
{
24174 const char *name
; /* function name */
24175 enum ix86_builtin_func_type tcode
; /* type to use in the declaration */
24176 int isa
; /* isa_flags this builtin is defined for */
24177 bool const_p
; /* true if the declaration is constant */
24178 bool set_and_not_built_p
;
24181 static struct builtin_isa ix86_builtins_isa
[(int) IX86_BUILTIN_MAX
];
24184 /* Add an ix86 target builtin function with CODE, NAME and TYPE. Save the MASK
24185 of which isa_flags to use in the ix86_builtins_isa array. Stores the
24186 function decl in the ix86_builtins array. Returns the function decl or
24187 NULL_TREE, if the builtin was not added.
24189 If the front end has a special hook for builtin functions, delay adding
24190 builtin functions that aren't in the current ISA until the ISA is changed
24191 with function specific optimization. Doing so, can save about 300K for the
24192 default compiler. When the builtin is expanded, check at that time whether
24195 If the front end doesn't have a special hook, record all builtins, even if
24196 it isn't an instruction set in the current ISA in case the user uses
24197 function specific options for a different ISA, so that we don't get scope
24198 errors if a builtin is added in the middle of a function scope. */
24201 def_builtin (int mask
, const char *name
, enum ix86_builtin_func_type tcode
,
24202 enum ix86_builtins code
)
24204 tree decl
= NULL_TREE
;
24206 if (!(mask
& OPTION_MASK_ISA_64BIT
) || TARGET_64BIT
)
24208 ix86_builtins_isa
[(int) code
].isa
= mask
;
24210 mask
&= ~OPTION_MASK_ISA_64BIT
;
24212 || (mask
& ix86_isa_flags
) != 0
24213 || (lang_hooks
.builtin_function
24214 == lang_hooks
.builtin_function_ext_scope
))
24217 tree type
= ix86_get_builtin_func_type (tcode
);
24218 decl
= add_builtin_function (name
, type
, code
, BUILT_IN_MD
,
24220 ix86_builtins
[(int) code
] = decl
;
24221 ix86_builtins_isa
[(int) code
].set_and_not_built_p
= false;
24225 ix86_builtins
[(int) code
] = NULL_TREE
;
24226 ix86_builtins_isa
[(int) code
].tcode
= tcode
;
24227 ix86_builtins_isa
[(int) code
].name
= name
;
24228 ix86_builtins_isa
[(int) code
].const_p
= false;
24229 ix86_builtins_isa
[(int) code
].set_and_not_built_p
= true;
24236 /* Like def_builtin, but also marks the function decl "const". */
24239 def_builtin_const (int mask
, const char *name
,
24240 enum ix86_builtin_func_type tcode
, enum ix86_builtins code
)
24242 tree decl
= def_builtin (mask
, name
, tcode
, code
);
24244 TREE_READONLY (decl
) = 1;
24246 ix86_builtins_isa
[(int) code
].const_p
= true;
24251 /* Add any new builtin functions for a given ISA that may not have been
24252 declared. This saves a bit of space compared to adding all of the
24253 declarations to the tree, even if we didn't use them. */
24256 ix86_add_new_builtins (int isa
)
24260 for (i
= 0; i
< (int)IX86_BUILTIN_MAX
; i
++)
24262 if ((ix86_builtins_isa
[i
].isa
& isa
) != 0
24263 && ix86_builtins_isa
[i
].set_and_not_built_p
)
24267 /* Don't define the builtin again. */
24268 ix86_builtins_isa
[i
].set_and_not_built_p
= false;
24270 type
= ix86_get_builtin_func_type (ix86_builtins_isa
[i
].tcode
);
24271 decl
= add_builtin_function_ext_scope (ix86_builtins_isa
[i
].name
,
24272 type
, i
, BUILT_IN_MD
, NULL
,
24275 ix86_builtins
[i
] = decl
;
24276 if (ix86_builtins_isa
[i
].const_p
)
24277 TREE_READONLY (decl
) = 1;
24282 /* Bits for builtin_description.flag. */
24284 /* Set when we don't support the comparison natively, and should
24285 swap_comparison in order to support it. */
24286 #define BUILTIN_DESC_SWAP_OPERANDS 1
24288 struct builtin_description
24290 const unsigned int mask
;
24291 const enum insn_code icode
;
24292 const char *const name
;
24293 const enum ix86_builtins code
;
24294 const enum rtx_code comparison
;
24298 static const struct builtin_description bdesc_comi
[] =
24300 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_comi
, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS
, UNEQ
, 0 },
24301 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_comi
, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS
, UNLT
, 0 },
24302 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_comi
, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS
, UNLE
, 0 },
24303 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_comi
, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS
, GT
, 0 },
24304 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_comi
, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS
, GE
, 0 },
24305 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_comi
, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS
, LTGT
, 0 },
24306 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS
, UNEQ
, 0 },
24307 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS
, UNLT
, 0 },
24308 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS
, UNLE
, 0 },
24309 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS
, GT
, 0 },
24310 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS
, GE
, 0 },
24311 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS
, LTGT
, 0 },
24312 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD
, UNEQ
, 0 },
24313 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD
, UNLT
, 0 },
24314 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD
, UNLE
, 0 },
24315 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD
, GT
, 0 },
24316 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD
, GE
, 0 },
24317 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD
, LTGT
, 0 },
24318 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD
, UNEQ
, 0 },
24319 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD
, UNLT
, 0 },
24320 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD
, UNLE
, 0 },
24321 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD
, GT
, 0 },
24322 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD
, GE
, 0 },
24323 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD
, LTGT
, 0 },
24326 static const struct builtin_description bdesc_pcmpestr
[] =
24329 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpestr
, "__builtin_ia32_pcmpestri128", IX86_BUILTIN_PCMPESTRI128
, UNKNOWN
, 0 },
24330 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpestr
, "__builtin_ia32_pcmpestrm128", IX86_BUILTIN_PCMPESTRM128
, UNKNOWN
, 0 },
24331 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpestr
, "__builtin_ia32_pcmpestria128", IX86_BUILTIN_PCMPESTRA128
, UNKNOWN
, (int) CCAmode
},
24332 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpestr
, "__builtin_ia32_pcmpestric128", IX86_BUILTIN_PCMPESTRC128
, UNKNOWN
, (int) CCCmode
},
24333 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpestr
, "__builtin_ia32_pcmpestrio128", IX86_BUILTIN_PCMPESTRO128
, UNKNOWN
, (int) CCOmode
},
24334 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpestr
, "__builtin_ia32_pcmpestris128", IX86_BUILTIN_PCMPESTRS128
, UNKNOWN
, (int) CCSmode
},
24335 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpestr
, "__builtin_ia32_pcmpestriz128", IX86_BUILTIN_PCMPESTRZ128
, UNKNOWN
, (int) CCZmode
},
24338 static const struct builtin_description bdesc_pcmpistr
[] =
24341 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpistr
, "__builtin_ia32_pcmpistri128", IX86_BUILTIN_PCMPISTRI128
, UNKNOWN
, 0 },
24342 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpistr
, "__builtin_ia32_pcmpistrm128", IX86_BUILTIN_PCMPISTRM128
, UNKNOWN
, 0 },
24343 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpistr
, "__builtin_ia32_pcmpistria128", IX86_BUILTIN_PCMPISTRA128
, UNKNOWN
, (int) CCAmode
},
24344 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpistr
, "__builtin_ia32_pcmpistric128", IX86_BUILTIN_PCMPISTRC128
, UNKNOWN
, (int) CCCmode
},
24345 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpistr
, "__builtin_ia32_pcmpistrio128", IX86_BUILTIN_PCMPISTRO128
, UNKNOWN
, (int) CCOmode
},
24346 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpistr
, "__builtin_ia32_pcmpistris128", IX86_BUILTIN_PCMPISTRS128
, UNKNOWN
, (int) CCSmode
},
24347 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpistr
, "__builtin_ia32_pcmpistriz128", IX86_BUILTIN_PCMPISTRZ128
, UNKNOWN
, (int) CCZmode
},
24350 /* Special builtins with variable number of arguments. */
24351 static const struct builtin_description bdesc_special_args
[] =
24353 { ~OPTION_MASK_ISA_64BIT
, CODE_FOR_rdtsc
, "__builtin_ia32_rdtsc", IX86_BUILTIN_RDTSC
, UNKNOWN
, (int) UINT64_FTYPE_VOID
},
24354 { ~OPTION_MASK_ISA_64BIT
, CODE_FOR_rdtscp
, "__builtin_ia32_rdtscp", IX86_BUILTIN_RDTSCP
, UNKNOWN
, (int) UINT64_FTYPE_PUNSIGNED
},
24357 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_emms
, "__builtin_ia32_emms", IX86_BUILTIN_EMMS
, UNKNOWN
, (int) VOID_FTYPE_VOID
},
24360 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_femms
, "__builtin_ia32_femms", IX86_BUILTIN_FEMMS
, UNKNOWN
, (int) VOID_FTYPE_VOID
},
24363 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_movups
, "__builtin_ia32_storeups", IX86_BUILTIN_STOREUPS
, UNKNOWN
, (int) VOID_FTYPE_PFLOAT_V4SF
},
24364 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_movntv4sf
, "__builtin_ia32_movntps", IX86_BUILTIN_MOVNTPS
, UNKNOWN
, (int) VOID_FTYPE_PFLOAT_V4SF
},
24365 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_movups
, "__builtin_ia32_loadups", IX86_BUILTIN_LOADUPS
, UNKNOWN
, (int) V4SF_FTYPE_PCFLOAT
},
24367 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_loadhps_exp
, "__builtin_ia32_loadhps", IX86_BUILTIN_LOADHPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_PCV2SF
},
24368 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_loadlps_exp
, "__builtin_ia32_loadlps", IX86_BUILTIN_LOADLPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_PCV2SF
},
24369 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_storehps
, "__builtin_ia32_storehps", IX86_BUILTIN_STOREHPS
, UNKNOWN
, (int) VOID_FTYPE_PV2SF_V4SF
},
24370 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_storelps
, "__builtin_ia32_storelps", IX86_BUILTIN_STORELPS
, UNKNOWN
, (int) VOID_FTYPE_PV2SF_V4SF
},
24372 /* SSE or 3DNow!A */
24373 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_sse_sfence
, "__builtin_ia32_sfence", IX86_BUILTIN_SFENCE
, UNKNOWN
, (int) VOID_FTYPE_VOID
},
24374 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_sse_movntdi
, "__builtin_ia32_movntq", IX86_BUILTIN_MOVNTQ
, UNKNOWN
, (int) VOID_FTYPE_PULONGLONG_ULONGLONG
},
24377 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_lfence
, "__builtin_ia32_lfence", IX86_BUILTIN_LFENCE
, UNKNOWN
, (int) VOID_FTYPE_VOID
},
24378 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_mfence
, 0, IX86_BUILTIN_MFENCE
, UNKNOWN
, (int) VOID_FTYPE_VOID
},
24379 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_movupd
, "__builtin_ia32_storeupd", IX86_BUILTIN_STOREUPD
, UNKNOWN
, (int) VOID_FTYPE_PDOUBLE_V2DF
},
24380 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_movdqu
, "__builtin_ia32_storedqu", IX86_BUILTIN_STOREDQU
, UNKNOWN
, (int) VOID_FTYPE_PCHAR_V16QI
},
24381 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_movntv2df
, "__builtin_ia32_movntpd", IX86_BUILTIN_MOVNTPD
, UNKNOWN
, (int) VOID_FTYPE_PDOUBLE_V2DF
},
24382 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_movntv2di
, "__builtin_ia32_movntdq", IX86_BUILTIN_MOVNTDQ
, UNKNOWN
, (int) VOID_FTYPE_PV2DI_V2DI
},
24383 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_movntsi
, "__builtin_ia32_movnti", IX86_BUILTIN_MOVNTI
, UNKNOWN
, (int) VOID_FTYPE_PINT_INT
},
24384 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_movupd
, "__builtin_ia32_loadupd", IX86_BUILTIN_LOADUPD
, UNKNOWN
, (int) V2DF_FTYPE_PCDOUBLE
},
24385 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_movdqu
, "__builtin_ia32_loaddqu", IX86_BUILTIN_LOADDQU
, UNKNOWN
, (int) V16QI_FTYPE_PCCHAR
},
24387 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_loadhpd_exp
, "__builtin_ia32_loadhpd", IX86_BUILTIN_LOADHPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_PCDOUBLE
},
24388 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_loadlpd_exp
, "__builtin_ia32_loadlpd", IX86_BUILTIN_LOADLPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_PCDOUBLE
},
24391 { OPTION_MASK_ISA_SSE3
, CODE_FOR_sse3_lddqu
, "__builtin_ia32_lddqu", IX86_BUILTIN_LDDQU
, UNKNOWN
, (int) V16QI_FTYPE_PCCHAR
},
24394 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_movntdqa
, "__builtin_ia32_movntdqa", IX86_BUILTIN_MOVNTDQA
, UNKNOWN
, (int) V2DI_FTYPE_PV2DI
},
24397 { OPTION_MASK_ISA_SSE4A
, CODE_FOR_sse4a_vmmovntv2df
, "__builtin_ia32_movntsd", IX86_BUILTIN_MOVNTSD
, UNKNOWN
, (int) VOID_FTYPE_PDOUBLE_V2DF
},
24398 { OPTION_MASK_ISA_SSE4A
, CODE_FOR_sse4a_vmmovntv4sf
, "__builtin_ia32_movntss", IX86_BUILTIN_MOVNTSS
, UNKNOWN
, (int) VOID_FTYPE_PFLOAT_V4SF
},
24401 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vzeroall
, "__builtin_ia32_vzeroall", IX86_BUILTIN_VZEROALL
, UNKNOWN
, (int) VOID_FTYPE_VOID
},
24402 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vzeroupper
, "__builtin_ia32_vzeroupper", IX86_BUILTIN_VZEROUPPER
, UNKNOWN
, (int) VOID_FTYPE_VOID
},
24404 { OPTION_MASK_ISA_AVX
, CODE_FOR_vec_dupv4sf
, "__builtin_ia32_vbroadcastss", IX86_BUILTIN_VBROADCASTSS
, UNKNOWN
, (int) V4SF_FTYPE_PCFLOAT
},
24405 { OPTION_MASK_ISA_AVX
, CODE_FOR_vec_dupv4df
, "__builtin_ia32_vbroadcastsd256", IX86_BUILTIN_VBROADCASTSD256
, UNKNOWN
, (int) V4DF_FTYPE_PCDOUBLE
},
24406 { OPTION_MASK_ISA_AVX
, CODE_FOR_vec_dupv8sf
, "__builtin_ia32_vbroadcastss256", IX86_BUILTIN_VBROADCASTSS256
, UNKNOWN
, (int) V8SF_FTYPE_PCFLOAT
},
24407 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vbroadcastf128_v4df
, "__builtin_ia32_vbroadcastf128_pd256", IX86_BUILTIN_VBROADCASTPD256
, UNKNOWN
, (int) V4DF_FTYPE_PCV2DF
},
24408 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vbroadcastf128_v8sf
, "__builtin_ia32_vbroadcastf128_ps256", IX86_BUILTIN_VBROADCASTPS256
, UNKNOWN
, (int) V8SF_FTYPE_PCV4SF
},
24410 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_movupd256
, "__builtin_ia32_loadupd256", IX86_BUILTIN_LOADUPD256
, UNKNOWN
, (int) V4DF_FTYPE_PCDOUBLE
},
24411 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_movups256
, "__builtin_ia32_loadups256", IX86_BUILTIN_LOADUPS256
, UNKNOWN
, (int) V8SF_FTYPE_PCFLOAT
},
24412 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_movupd256
, "__builtin_ia32_storeupd256", IX86_BUILTIN_STOREUPD256
, UNKNOWN
, (int) VOID_FTYPE_PDOUBLE_V4DF
},
24413 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_movups256
, "__builtin_ia32_storeups256", IX86_BUILTIN_STOREUPS256
, UNKNOWN
, (int) VOID_FTYPE_PFLOAT_V8SF
},
24414 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_movdqu256
, "__builtin_ia32_loaddqu256", IX86_BUILTIN_LOADDQU256
, UNKNOWN
, (int) V32QI_FTYPE_PCCHAR
},
24415 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_movdqu256
, "__builtin_ia32_storedqu256", IX86_BUILTIN_STOREDQU256
, UNKNOWN
, (int) VOID_FTYPE_PCHAR_V32QI
},
24416 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_lddqu256
, "__builtin_ia32_lddqu256", IX86_BUILTIN_LDDQU256
, UNKNOWN
, (int) V32QI_FTYPE_PCCHAR
},
24418 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_movntv4di
, "__builtin_ia32_movntdq256", IX86_BUILTIN_MOVNTDQ256
, UNKNOWN
, (int) VOID_FTYPE_PV4DI_V4DI
},
24419 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_movntv4df
, "__builtin_ia32_movntpd256", IX86_BUILTIN_MOVNTPD256
, UNKNOWN
, (int) VOID_FTYPE_PDOUBLE_V4DF
},
24420 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_movntv8sf
, "__builtin_ia32_movntps256", IX86_BUILTIN_MOVNTPS256
, UNKNOWN
, (int) VOID_FTYPE_PFLOAT_V8SF
},
24422 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_maskloadpd
, "__builtin_ia32_maskloadpd", IX86_BUILTIN_MASKLOADPD
, UNKNOWN
, (int) V2DF_FTYPE_PCV2DF_V2DF
},
24423 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_maskloadps
, "__builtin_ia32_maskloadps", IX86_BUILTIN_MASKLOADPS
, UNKNOWN
, (int) V4SF_FTYPE_PCV4SF_V4SF
},
24424 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_maskloadpd256
, "__builtin_ia32_maskloadpd256", IX86_BUILTIN_MASKLOADPD256
, UNKNOWN
, (int) V4DF_FTYPE_PCV4DF_V4DF
},
24425 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_maskloadps256
, "__builtin_ia32_maskloadps256", IX86_BUILTIN_MASKLOADPS256
, UNKNOWN
, (int) V8SF_FTYPE_PCV8SF_V8SF
},
24426 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_maskstorepd
, "__builtin_ia32_maskstorepd", IX86_BUILTIN_MASKSTOREPD
, UNKNOWN
, (int) VOID_FTYPE_PV2DF_V2DF_V2DF
},
24427 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_maskstoreps
, "__builtin_ia32_maskstoreps", IX86_BUILTIN_MASKSTOREPS
, UNKNOWN
, (int) VOID_FTYPE_PV4SF_V4SF_V4SF
},
24428 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_maskstorepd256
, "__builtin_ia32_maskstorepd256", IX86_BUILTIN_MASKSTOREPD256
, UNKNOWN
, (int) VOID_FTYPE_PV4DF_V4DF_V4DF
},
24429 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_maskstoreps256
, "__builtin_ia32_maskstoreps256", IX86_BUILTIN_MASKSTOREPS256
, UNKNOWN
, (int) VOID_FTYPE_PV8SF_V8SF_V8SF
},
24431 { OPTION_MASK_ISA_LWP
, CODE_FOR_lwp_llwpcb
, "__builtin_ia32_llwpcb", IX86_BUILTIN_LLWPCB
, UNKNOWN
, (int) VOID_FTYPE_PVOID
},
24432 { OPTION_MASK_ISA_LWP
, CODE_FOR_lwp_slwpcb
, "__builtin_ia32_slwpcb", IX86_BUILTIN_SLWPCB
, UNKNOWN
, (int) PVOID_FTYPE_VOID
},
24433 { OPTION_MASK_ISA_LWP
, CODE_FOR_lwp_lwpvalsi3
, "__builtin_ia32_lwpval32", IX86_BUILTIN_LWPVAL32
, UNKNOWN
, (int) VOID_FTYPE_UINT_UINT_UINT
},
24434 { OPTION_MASK_ISA_LWP
, CODE_FOR_lwp_lwpvaldi3
, "__builtin_ia32_lwpval64", IX86_BUILTIN_LWPVAL64
, UNKNOWN
, (int) VOID_FTYPE_UINT64_UINT_UINT
},
24435 { OPTION_MASK_ISA_LWP
, CODE_FOR_lwp_lwpinssi3
, "__builtin_ia32_lwpins32", IX86_BUILTIN_LWPINS32
, UNKNOWN
, (int) UCHAR_FTYPE_UINT_UINT_UINT
},
24436 { OPTION_MASK_ISA_LWP
, CODE_FOR_lwp_lwpinsdi3
, "__builtin_ia32_lwpins64", IX86_BUILTIN_LWPINS64
, UNKNOWN
, (int) UCHAR_FTYPE_UINT64_UINT_UINT
},
24439 { OPTION_MASK_ISA_FSGSBASE
| OPTION_MASK_ISA_64BIT
, CODE_FOR_rdfsbasesi
, "__builtin_ia32_rdfsbase32", IX86_BUILTIN_RDFSBASE32
, UNKNOWN
, (int) UNSIGNED_FTYPE_VOID
},
24440 { OPTION_MASK_ISA_FSGSBASE
| OPTION_MASK_ISA_64BIT
, CODE_FOR_rdfsbasedi
, "__builtin_ia32_rdfsbase64", IX86_BUILTIN_RDFSBASE64
, UNKNOWN
, (int) UINT64_FTYPE_VOID
},
24441 { OPTION_MASK_ISA_FSGSBASE
| OPTION_MASK_ISA_64BIT
, CODE_FOR_rdgsbasesi
, "__builtin_ia32_rdgsbase32", IX86_BUILTIN_RDGSBASE32
, UNKNOWN
, (int) UNSIGNED_FTYPE_VOID
},
24442 { OPTION_MASK_ISA_FSGSBASE
| OPTION_MASK_ISA_64BIT
, CODE_FOR_rdgsbasedi
, "__builtin_ia32_rdgsbase64", IX86_BUILTIN_RDGSBASE64
, UNKNOWN
, (int) UINT64_FTYPE_VOID
},
24443 { OPTION_MASK_ISA_FSGSBASE
| OPTION_MASK_ISA_64BIT
, CODE_FOR_wrfsbasesi
, "__builtin_ia32_wrfsbase32", IX86_BUILTIN_WRFSBASE32
, UNKNOWN
, (int) VOID_FTYPE_UNSIGNED
},
24444 { OPTION_MASK_ISA_FSGSBASE
| OPTION_MASK_ISA_64BIT
, CODE_FOR_wrfsbasedi
, "__builtin_ia32_wrfsbase64", IX86_BUILTIN_WRFSBASE64
, UNKNOWN
, (int) VOID_FTYPE_UINT64
},
24445 { OPTION_MASK_ISA_FSGSBASE
| OPTION_MASK_ISA_64BIT
, CODE_FOR_wrgsbasesi
, "__builtin_ia32_wrgsbase32", IX86_BUILTIN_WRGSBASE32
, UNKNOWN
, (int) VOID_FTYPE_UNSIGNED
},
24446 { OPTION_MASK_ISA_FSGSBASE
| OPTION_MASK_ISA_64BIT
, CODE_FOR_wrgsbasedi
, "__builtin_ia32_wrgsbase64", IX86_BUILTIN_WRGSBASE64
, UNKNOWN
, (int) VOID_FTYPE_UINT64
},
24449 /* Builtins with variable number of arguments. */
24450 static const struct builtin_description bdesc_args
[] =
24452 { ~OPTION_MASK_ISA_64BIT
, CODE_FOR_bsr
, "__builtin_ia32_bsrsi", IX86_BUILTIN_BSRSI
, UNKNOWN
, (int) INT_FTYPE_INT
},
24453 { OPTION_MASK_ISA_64BIT
, CODE_FOR_bsr_rex64
, "__builtin_ia32_bsrdi", IX86_BUILTIN_BSRDI
, UNKNOWN
, (int) INT64_FTYPE_INT64
},
24454 { ~OPTION_MASK_ISA_64BIT
, CODE_FOR_rdpmc
, "__builtin_ia32_rdpmc", IX86_BUILTIN_RDPMC
, UNKNOWN
, (int) UINT64_FTYPE_INT
},
24455 { ~OPTION_MASK_ISA_64BIT
, CODE_FOR_rotlqi3
, "__builtin_ia32_rolqi", IX86_BUILTIN_ROLQI
, UNKNOWN
, (int) UINT8_FTYPE_UINT8_INT
},
24456 { ~OPTION_MASK_ISA_64BIT
, CODE_FOR_rotlhi3
, "__builtin_ia32_rolhi", IX86_BUILTIN_ROLHI
, UNKNOWN
, (int) UINT16_FTYPE_UINT16_INT
},
24457 { ~OPTION_MASK_ISA_64BIT
, CODE_FOR_rotrqi3
, "__builtin_ia32_rorqi", IX86_BUILTIN_RORQI
, UNKNOWN
, (int) UINT8_FTYPE_UINT8_INT
},
24458 { ~OPTION_MASK_ISA_64BIT
, CODE_FOR_rotrhi3
, "__builtin_ia32_rorhi", IX86_BUILTIN_RORHI
, UNKNOWN
, (int) UINT16_FTYPE_UINT16_INT
},
24461 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_addv8qi3
, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
24462 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_addv4hi3
, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
24463 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_addv2si3
, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI
},
24464 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_subv8qi3
, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
24465 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_subv4hi3
, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
24466 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_subv2si3
, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI
},
24468 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ssaddv8qi3
, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
24469 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ssaddv4hi3
, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
24470 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_sssubv8qi3
, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
24471 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_sssubv4hi3
, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
24472 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_usaddv8qi3
, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
24473 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_usaddv4hi3
, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
24474 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ussubv8qi3
, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
24475 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ussubv4hi3
, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
24477 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_mulv4hi3
, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
24478 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_smulv4hi3_highpart
, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
24480 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_andv2si3
, "__builtin_ia32_pand", IX86_BUILTIN_PAND
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI
},
24481 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_andnotv2si3
, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI
},
24482 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_iorv2si3
, "__builtin_ia32_por", IX86_BUILTIN_POR
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI
},
24483 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_xorv2si3
, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI
},
24485 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_eqv8qi3
, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
24486 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_eqv4hi3
, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
24487 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_eqv2si3
, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI
},
24488 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_gtv8qi3
, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
24489 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_gtv4hi3
, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
24490 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_gtv2si3
, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI
},
24492 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_punpckhbw
, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
24493 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_punpckhwd
, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
24494 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_punpckhdq
, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI
},
24495 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_punpcklbw
, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
24496 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_punpcklwd
, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
24497 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_punpckldq
, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI
},
24499 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_packsswb
, "__builtin_ia32_packsswb", IX86_BUILTIN_PACKSSWB
, UNKNOWN
, (int) V8QI_FTYPE_V4HI_V4HI
},
24500 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_packssdw
, "__builtin_ia32_packssdw", IX86_BUILTIN_PACKSSDW
, UNKNOWN
, (int) V4HI_FTYPE_V2SI_V2SI
},
24501 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_packuswb
, "__builtin_ia32_packuswb", IX86_BUILTIN_PACKUSWB
, UNKNOWN
, (int) V8QI_FTYPE_V4HI_V4HI
},
24503 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_pmaddwd
, "__builtin_ia32_pmaddwd", IX86_BUILTIN_PMADDWD
, UNKNOWN
, (int) V2SI_FTYPE_V4HI_V4HI
},
24505 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ashlv4hi3
, "__builtin_ia32_psllwi", IX86_BUILTIN_PSLLWI
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_SI_COUNT
},
24506 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ashlv2si3
, "__builtin_ia32_pslldi", IX86_BUILTIN_PSLLDI
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_SI_COUNT
},
24507 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ashlv1di3
, "__builtin_ia32_psllqi", IX86_BUILTIN_PSLLQI
, UNKNOWN
, (int) V1DI_FTYPE_V1DI_SI_COUNT
},
24508 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ashlv4hi3
, "__builtin_ia32_psllw", IX86_BUILTIN_PSLLW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI_COUNT
},
24509 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ashlv2si3
, "__builtin_ia32_pslld", IX86_BUILTIN_PSLLD
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI_COUNT
},
24510 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ashlv1di3
, "__builtin_ia32_psllq", IX86_BUILTIN_PSLLQ
, UNKNOWN
, (int) V1DI_FTYPE_V1DI_V1DI_COUNT
},
24512 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_lshrv4hi3
, "__builtin_ia32_psrlwi", IX86_BUILTIN_PSRLWI
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_SI_COUNT
},
24513 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_lshrv2si3
, "__builtin_ia32_psrldi", IX86_BUILTIN_PSRLDI
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_SI_COUNT
},
24514 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_lshrv1di3
, "__builtin_ia32_psrlqi", IX86_BUILTIN_PSRLQI
, UNKNOWN
, (int) V1DI_FTYPE_V1DI_SI_COUNT
},
24515 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_lshrv4hi3
, "__builtin_ia32_psrlw", IX86_BUILTIN_PSRLW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI_COUNT
},
24516 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_lshrv2si3
, "__builtin_ia32_psrld", IX86_BUILTIN_PSRLD
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI_COUNT
},
24517 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_lshrv1di3
, "__builtin_ia32_psrlq", IX86_BUILTIN_PSRLQ
, UNKNOWN
, (int) V1DI_FTYPE_V1DI_V1DI_COUNT
},
24519 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ashrv4hi3
, "__builtin_ia32_psrawi", IX86_BUILTIN_PSRAWI
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_SI_COUNT
},
24520 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ashrv2si3
, "__builtin_ia32_psradi", IX86_BUILTIN_PSRADI
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_SI_COUNT
},
24521 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ashrv4hi3
, "__builtin_ia32_psraw", IX86_BUILTIN_PSRAW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI_COUNT
},
24522 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ashrv2si3
, "__builtin_ia32_psrad", IX86_BUILTIN_PSRAD
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI_COUNT
},
24525 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_pf2id
, "__builtin_ia32_pf2id", IX86_BUILTIN_PF2ID
, UNKNOWN
, (int) V2SI_FTYPE_V2SF
},
24526 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_floatv2si2
, "__builtin_ia32_pi2fd", IX86_BUILTIN_PI2FD
, UNKNOWN
, (int) V2SF_FTYPE_V2SI
},
24527 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_rcpv2sf2
, "__builtin_ia32_pfrcp", IX86_BUILTIN_PFRCP
, UNKNOWN
, (int) V2SF_FTYPE_V2SF
},
24528 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_rsqrtv2sf2
, "__builtin_ia32_pfrsqrt", IX86_BUILTIN_PFRSQRT
, UNKNOWN
, (int) V2SF_FTYPE_V2SF
},
24530 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_uavgv8qi3
, "__builtin_ia32_pavgusb", IX86_BUILTIN_PAVGUSB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
24531 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_haddv2sf3
, "__builtin_ia32_pfacc", IX86_BUILTIN_PFACC
, UNKNOWN
, (int) V2SF_FTYPE_V2SF_V2SF
},
24532 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_addv2sf3
, "__builtin_ia32_pfadd", IX86_BUILTIN_PFADD
, UNKNOWN
, (int) V2SF_FTYPE_V2SF_V2SF
},
24533 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_eqv2sf3
, "__builtin_ia32_pfcmpeq", IX86_BUILTIN_PFCMPEQ
, UNKNOWN
, (int) V2SI_FTYPE_V2SF_V2SF
},
24534 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_gev2sf3
, "__builtin_ia32_pfcmpge", IX86_BUILTIN_PFCMPGE
, UNKNOWN
, (int) V2SI_FTYPE_V2SF_V2SF
},
24535 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_gtv2sf3
, "__builtin_ia32_pfcmpgt", IX86_BUILTIN_PFCMPGT
, UNKNOWN
, (int) V2SI_FTYPE_V2SF_V2SF
},
24536 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_smaxv2sf3
, "__builtin_ia32_pfmax", IX86_BUILTIN_PFMAX
, UNKNOWN
, (int) V2SF_FTYPE_V2SF_V2SF
},
24537 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_sminv2sf3
, "__builtin_ia32_pfmin", IX86_BUILTIN_PFMIN
, UNKNOWN
, (int) V2SF_FTYPE_V2SF_V2SF
},
24538 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_mulv2sf3
, "__builtin_ia32_pfmul", IX86_BUILTIN_PFMUL
, UNKNOWN
, (int) V2SF_FTYPE_V2SF_V2SF
},
24539 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_rcpit1v2sf3
, "__builtin_ia32_pfrcpit1", IX86_BUILTIN_PFRCPIT1
, UNKNOWN
, (int) V2SF_FTYPE_V2SF_V2SF
},
24540 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_rcpit2v2sf3
, "__builtin_ia32_pfrcpit2", IX86_BUILTIN_PFRCPIT2
, UNKNOWN
, (int) V2SF_FTYPE_V2SF_V2SF
},
24541 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_rsqit1v2sf3
, "__builtin_ia32_pfrsqit1", IX86_BUILTIN_PFRSQIT1
, UNKNOWN
, (int) V2SF_FTYPE_V2SF_V2SF
},
24542 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_subv2sf3
, "__builtin_ia32_pfsub", IX86_BUILTIN_PFSUB
, UNKNOWN
, (int) V2SF_FTYPE_V2SF_V2SF
},
24543 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_subrv2sf3
, "__builtin_ia32_pfsubr", IX86_BUILTIN_PFSUBR
, UNKNOWN
, (int) V2SF_FTYPE_V2SF_V2SF
},
24544 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_pmulhrwv4hi3
, "__builtin_ia32_pmulhrw", IX86_BUILTIN_PMULHRW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
24547 { OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_pf2iw
, "__builtin_ia32_pf2iw", IX86_BUILTIN_PF2IW
, UNKNOWN
, (int) V2SI_FTYPE_V2SF
},
24548 { OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_pi2fw
, "__builtin_ia32_pi2fw", IX86_BUILTIN_PI2FW
, UNKNOWN
, (int) V2SF_FTYPE_V2SI
},
24549 { OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_pswapdv2si2
, "__builtin_ia32_pswapdsi", IX86_BUILTIN_PSWAPDSI
, UNKNOWN
, (int) V2SI_FTYPE_V2SI
},
24550 { OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_pswapdv2sf2
, "__builtin_ia32_pswapdsf", IX86_BUILTIN_PSWAPDSF
, UNKNOWN
, (int) V2SF_FTYPE_V2SF
},
24551 { OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_hsubv2sf3
, "__builtin_ia32_pfnacc", IX86_BUILTIN_PFNACC
, UNKNOWN
, (int) V2SF_FTYPE_V2SF_V2SF
},
24552 { OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_addsubv2sf3
, "__builtin_ia32_pfpnacc", IX86_BUILTIN_PFPNACC
, UNKNOWN
, (int) V2SF_FTYPE_V2SF_V2SF
},
24555 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_movmskps
, "__builtin_ia32_movmskps", IX86_BUILTIN_MOVMSKPS
, UNKNOWN
, (int) INT_FTYPE_V4SF
},
24556 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_sqrtv4sf2
, "__builtin_ia32_sqrtps", IX86_BUILTIN_SQRTPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF
},
24557 { OPTION_MASK_ISA_SSE
, CODE_FOR_sqrtv4sf2
, "__builtin_ia32_sqrtps_nr", IX86_BUILTIN_SQRTPS_NR
, UNKNOWN
, (int) V4SF_FTYPE_V4SF
},
24558 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_rsqrtv4sf2
, "__builtin_ia32_rsqrtps", IX86_BUILTIN_RSQRTPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF
},
24559 { OPTION_MASK_ISA_SSE
, CODE_FOR_rsqrtv4sf2
, "__builtin_ia32_rsqrtps_nr", IX86_BUILTIN_RSQRTPS_NR
, UNKNOWN
, (int) V4SF_FTYPE_V4SF
},
24560 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_rcpv4sf2
, "__builtin_ia32_rcpps", IX86_BUILTIN_RCPPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF
},
24561 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_cvtps2pi
, "__builtin_ia32_cvtps2pi", IX86_BUILTIN_CVTPS2PI
, UNKNOWN
, (int) V2SI_FTYPE_V4SF
},
24562 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_cvtss2si
, "__builtin_ia32_cvtss2si", IX86_BUILTIN_CVTSS2SI
, UNKNOWN
, (int) INT_FTYPE_V4SF
},
24563 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_64BIT
, CODE_FOR_sse_cvtss2siq
, "__builtin_ia32_cvtss2si64", IX86_BUILTIN_CVTSS2SI64
, UNKNOWN
, (int) INT64_FTYPE_V4SF
},
24564 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_cvttps2pi
, "__builtin_ia32_cvttps2pi", IX86_BUILTIN_CVTTPS2PI
, UNKNOWN
, (int) V2SI_FTYPE_V4SF
},
24565 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_cvttss2si
, "__builtin_ia32_cvttss2si", IX86_BUILTIN_CVTTSS2SI
, UNKNOWN
, (int) INT_FTYPE_V4SF
},
24566 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_64BIT
, CODE_FOR_sse_cvttss2siq
, "__builtin_ia32_cvttss2si64", IX86_BUILTIN_CVTTSS2SI64
, UNKNOWN
, (int) INT64_FTYPE_V4SF
},
24568 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_shufps
, "__builtin_ia32_shufps", IX86_BUILTIN_SHUFPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF_INT
},
24570 { OPTION_MASK_ISA_SSE
, CODE_FOR_addv4sf3
, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
24571 { OPTION_MASK_ISA_SSE
, CODE_FOR_subv4sf3
, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
24572 { OPTION_MASK_ISA_SSE
, CODE_FOR_mulv4sf3
, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
24573 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_divv4sf3
, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
24574 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmaddv4sf3
, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
24575 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmsubv4sf3
, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
24576 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmmulv4sf3
, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
24577 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmdivv4sf3
, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
24579 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS
, EQ
, (int) V4SF_FTYPE_V4SF_V4SF
},
24580 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS
, LT
, (int) V4SF_FTYPE_V4SF_V4SF
},
24581 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS
, LE
, (int) V4SF_FTYPE_V4SF_V4SF
},
24582 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS
, LT
, (int) V4SF_FTYPE_V4SF_V4SF_SWAP
},
24583 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS
, LE
, (int) V4SF_FTYPE_V4SF_V4SF_SWAP
},
24584 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS
, UNORDERED
, (int) V4SF_FTYPE_V4SF_V4SF
},
24585 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS
, NE
, (int) V4SF_FTYPE_V4SF_V4SF
},
24586 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS
, UNGE
, (int) V4SF_FTYPE_V4SF_V4SF
},
24587 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS
, UNGT
, (int) V4SF_FTYPE_V4SF_V4SF
},
24588 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS
, UNGE
, (int) V4SF_FTYPE_V4SF_V4SF_SWAP
},
24589 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS
, UNGT
, (int) V4SF_FTYPE_V4SF_V4SF_SWAP
},
24590 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS
, ORDERED
, (int) V4SF_FTYPE_V4SF_V4SF
},
24591 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS
, EQ
, (int) V4SF_FTYPE_V4SF_V4SF
},
24592 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS
, LT
, (int) V4SF_FTYPE_V4SF_V4SF
},
24593 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS
, LE
, (int) V4SF_FTYPE_V4SF_V4SF
},
24594 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS
, UNORDERED
, (int) V4SF_FTYPE_V4SF_V4SF
},
24595 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS
, NE
, (int) V4SF_FTYPE_V4SF_V4SF
},
24596 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS
, UNGE
, (int) V4SF_FTYPE_V4SF_V4SF
},
24597 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS
, UNGT
, (int) V4SF_FTYPE_V4SF_V4SF
},
24598 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpngtss", IX86_BUILTIN_CMPNGTSS
, UNGE
, (int) V4SF_FTYPE_V4SF_V4SF_SWAP
},
24599 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpngess", IX86_BUILTIN_CMPNGESS
, UNGT
, (int) V4SF_FTYPE_V4SF_V4SF_SWAP
},
24600 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS
, ORDERED
, (int) V4SF_FTYPE_V4SF_V4SF
},
24602 { OPTION_MASK_ISA_SSE
, CODE_FOR_sminv4sf3
, "__builtin_ia32_minps", IX86_BUILTIN_MINPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
24603 { OPTION_MASK_ISA_SSE
, CODE_FOR_smaxv4sf3
, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
24604 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmsminv4sf3
, "__builtin_ia32_minss", IX86_BUILTIN_MINSS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
24605 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmsmaxv4sf3
, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
24607 { OPTION_MASK_ISA_SSE
, CODE_FOR_andv4sf3
, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
24608 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_andnotv4sf3
, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
24609 { OPTION_MASK_ISA_SSE
, CODE_FOR_iorv4sf3
, "__builtin_ia32_orps", IX86_BUILTIN_ORPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
24610 { OPTION_MASK_ISA_SSE
, CODE_FOR_xorv4sf3
, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
24612 { OPTION_MASK_ISA_SSE
, CODE_FOR_copysignv4sf3
, "__builtin_ia32_copysignps", IX86_BUILTIN_CPYSGNPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
24614 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_movss
, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
24615 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_movhlps_exp
, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
24616 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_movlhps_exp
, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
24617 { OPTION_MASK_ISA_SSE
, CODE_FOR_vec_interleave_highv4sf
, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
24618 { OPTION_MASK_ISA_SSE
, CODE_FOR_vec_interleave_lowv4sf
, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
24620 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_cvtpi2ps
, "__builtin_ia32_cvtpi2ps", IX86_BUILTIN_CVTPI2PS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V2SI
},
24621 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_cvtsi2ss
, "__builtin_ia32_cvtsi2ss", IX86_BUILTIN_CVTSI2SS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_SI
},
24622 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_64BIT
, CODE_FOR_sse_cvtsi2ssq
, "__builtin_ia32_cvtsi642ss", IX86_BUILTIN_CVTSI642SS
, UNKNOWN
, V4SF_FTYPE_V4SF_DI
},
24624 { OPTION_MASK_ISA_SSE
, CODE_FOR_rsqrtsf2
, "__builtin_ia32_rsqrtf", IX86_BUILTIN_RSQRTF
, UNKNOWN
, (int) FLOAT_FTYPE_FLOAT
},
24626 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmsqrtv4sf2
, "__builtin_ia32_sqrtss", IX86_BUILTIN_SQRTSS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_VEC_MERGE
},
24627 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmrsqrtv4sf2
, "__builtin_ia32_rsqrtss", IX86_BUILTIN_RSQRTSS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_VEC_MERGE
},
24628 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmrcpv4sf2
, "__builtin_ia32_rcpss", IX86_BUILTIN_RCPSS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_VEC_MERGE
},
24630 /* SSE MMX or 3Dnow!A */
24631 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_uavgv8qi3
, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
24632 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_uavgv4hi3
, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
24633 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_umulv4hi3_highpart
, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
24635 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_umaxv8qi3
, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
24636 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_smaxv4hi3
, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
24637 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_uminv8qi3
, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
24638 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_sminv4hi3
, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
24640 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_psadbw
, "__builtin_ia32_psadbw", IX86_BUILTIN_PSADBW
, UNKNOWN
, (int) V1DI_FTYPE_V8QI_V8QI
},
24641 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_pmovmskb
, "__builtin_ia32_pmovmskb", IX86_BUILTIN_PMOVMSKB
, UNKNOWN
, (int) INT_FTYPE_V8QI
},
24643 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_pshufw
, "__builtin_ia32_pshufw", IX86_BUILTIN_PSHUFW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_INT
},
24646 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_shufpd
, "__builtin_ia32_shufpd", IX86_BUILTIN_SHUFPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF_INT
},
24648 { OPTION_MASK_ISA_SSE2
, CODE_FOR_nothing
, "__builtin_ia32_vec_perm_v2df", IX86_BUILTIN_VEC_PERM_V2DF
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF_V2DI
},
24649 { OPTION_MASK_ISA_SSE
, CODE_FOR_nothing
, "__builtin_ia32_vec_perm_v4sf", IX86_BUILTIN_VEC_PERM_V4SF
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF_V4SI
},
24650 { OPTION_MASK_ISA_SSE2
, CODE_FOR_nothing
, "__builtin_ia32_vec_perm_v2di", IX86_BUILTIN_VEC_PERM_V2DI
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI_V2DI
},
24651 { OPTION_MASK_ISA_SSE2
, CODE_FOR_nothing
, "__builtin_ia32_vec_perm_v4si", IX86_BUILTIN_VEC_PERM_V4SI
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI_V4SI
},
24652 { OPTION_MASK_ISA_SSE2
, CODE_FOR_nothing
, "__builtin_ia32_vec_perm_v8hi", IX86_BUILTIN_VEC_PERM_V8HI
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI_V8HI
},
24653 { OPTION_MASK_ISA_SSE2
, CODE_FOR_nothing
, "__builtin_ia32_vec_perm_v16qi", IX86_BUILTIN_VEC_PERM_V16QI
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI_V16QI
},
24654 { OPTION_MASK_ISA_SSE2
, CODE_FOR_nothing
, "__builtin_ia32_vec_perm_v2di_u", IX86_BUILTIN_VEC_PERM_V2DI_U
, UNKNOWN
, (int) V2UDI_FTYPE_V2UDI_V2UDI_V2UDI
},
24655 { OPTION_MASK_ISA_SSE2
, CODE_FOR_nothing
, "__builtin_ia32_vec_perm_v4si_u", IX86_BUILTIN_VEC_PERM_V4SI_U
, UNKNOWN
, (int) V4USI_FTYPE_V4USI_V4USI_V4USI
},
24656 { OPTION_MASK_ISA_SSE2
, CODE_FOR_nothing
, "__builtin_ia32_vec_perm_v8hi_u", IX86_BUILTIN_VEC_PERM_V8HI_U
, UNKNOWN
, (int) V8UHI_FTYPE_V8UHI_V8UHI_V8UHI
},
24657 { OPTION_MASK_ISA_SSE2
, CODE_FOR_nothing
, "__builtin_ia32_vec_perm_v16qi_u", IX86_BUILTIN_VEC_PERM_V16QI_U
, UNKNOWN
, (int) V16UQI_FTYPE_V16UQI_V16UQI_V16UQI
},
24658 { OPTION_MASK_ISA_AVX
, CODE_FOR_nothing
, "__builtin_ia32_vec_perm_v4df", IX86_BUILTIN_VEC_PERM_V4DF
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF_V4DI
},
24659 { OPTION_MASK_ISA_AVX
, CODE_FOR_nothing
, "__builtin_ia32_vec_perm_v8sf", IX86_BUILTIN_VEC_PERM_V8SF
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF_V8SI
},
24661 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_movmskpd
, "__builtin_ia32_movmskpd", IX86_BUILTIN_MOVMSKPD
, UNKNOWN
, (int) INT_FTYPE_V2DF
},
24662 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_pmovmskb
, "__builtin_ia32_pmovmskb128", IX86_BUILTIN_PMOVMSKB128
, UNKNOWN
, (int) INT_FTYPE_V16QI
},
24663 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sqrtv2df2
, "__builtin_ia32_sqrtpd", IX86_BUILTIN_SQRTPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF
},
24664 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvtdq2pd
, "__builtin_ia32_cvtdq2pd", IX86_BUILTIN_CVTDQ2PD
, UNKNOWN
, (int) V2DF_FTYPE_V4SI
},
24665 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvtdq2ps
, "__builtin_ia32_cvtdq2ps", IX86_BUILTIN_CVTDQ2PS
, UNKNOWN
, (int) V4SF_FTYPE_V4SI
},
24666 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvtudq2ps
, "__builtin_ia32_cvtudq2ps", IX86_BUILTIN_CVTUDQ2PS
, UNKNOWN
, (int) V4SF_FTYPE_V4SI
},
24668 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvtpd2dq
, "__builtin_ia32_cvtpd2dq", IX86_BUILTIN_CVTPD2DQ
, UNKNOWN
, (int) V4SI_FTYPE_V2DF
},
24669 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvtpd2pi
, "__builtin_ia32_cvtpd2pi", IX86_BUILTIN_CVTPD2PI
, UNKNOWN
, (int) V2SI_FTYPE_V2DF
},
24670 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvtpd2ps
, "__builtin_ia32_cvtpd2ps", IX86_BUILTIN_CVTPD2PS
, UNKNOWN
, (int) V4SF_FTYPE_V2DF
},
24671 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvttpd2dq
, "__builtin_ia32_cvttpd2dq", IX86_BUILTIN_CVTTPD2DQ
, UNKNOWN
, (int) V4SI_FTYPE_V2DF
},
24672 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvttpd2pi
, "__builtin_ia32_cvttpd2pi", IX86_BUILTIN_CVTTPD2PI
, UNKNOWN
, (int) V2SI_FTYPE_V2DF
},
24674 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvtpi2pd
, "__builtin_ia32_cvtpi2pd", IX86_BUILTIN_CVTPI2PD
, UNKNOWN
, (int) V2DF_FTYPE_V2SI
},
24676 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvtsd2si
, "__builtin_ia32_cvtsd2si", IX86_BUILTIN_CVTSD2SI
, UNKNOWN
, (int) INT_FTYPE_V2DF
},
24677 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvttsd2si
, "__builtin_ia32_cvttsd2si", IX86_BUILTIN_CVTTSD2SI
, UNKNOWN
, (int) INT_FTYPE_V2DF
},
24678 { OPTION_MASK_ISA_SSE2
| OPTION_MASK_ISA_64BIT
, CODE_FOR_sse2_cvtsd2siq
, "__builtin_ia32_cvtsd2si64", IX86_BUILTIN_CVTSD2SI64
, UNKNOWN
, (int) INT64_FTYPE_V2DF
},
24679 { OPTION_MASK_ISA_SSE2
| OPTION_MASK_ISA_64BIT
, CODE_FOR_sse2_cvttsd2siq
, "__builtin_ia32_cvttsd2si64", IX86_BUILTIN_CVTTSD2SI64
, UNKNOWN
, (int) INT64_FTYPE_V2DF
},
24681 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvtps2dq
, "__builtin_ia32_cvtps2dq", IX86_BUILTIN_CVTPS2DQ
, UNKNOWN
, (int) V4SI_FTYPE_V4SF
},
24682 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvtps2pd
, "__builtin_ia32_cvtps2pd", IX86_BUILTIN_CVTPS2PD
, UNKNOWN
, (int) V2DF_FTYPE_V4SF
},
24683 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvttps2dq
, "__builtin_ia32_cvttps2dq", IX86_BUILTIN_CVTTPS2DQ
, UNKNOWN
, (int) V4SI_FTYPE_V4SF
},
24685 { OPTION_MASK_ISA_SSE2
, CODE_FOR_addv2df3
, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
24686 { OPTION_MASK_ISA_SSE2
, CODE_FOR_subv2df3
, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
24687 { OPTION_MASK_ISA_SSE2
, CODE_FOR_mulv2df3
, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
24688 { OPTION_MASK_ISA_SSE2
, CODE_FOR_divv2df3
, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
24689 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmaddv2df3
, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
24690 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmsubv2df3
, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
24691 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmmulv2df3
, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
24692 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmdivv2df3
, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
24694 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD
, EQ
, (int) V2DF_FTYPE_V2DF_V2DF
},
24695 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD
, LT
, (int) V2DF_FTYPE_V2DF_V2DF
},
24696 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD
, LE
, (int) V2DF_FTYPE_V2DF_V2DF
},
24697 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD
, LT
, (int) V2DF_FTYPE_V2DF_V2DF_SWAP
},
24698 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD
, LE
, (int) V2DF_FTYPE_V2DF_V2DF_SWAP
},
24699 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD
, UNORDERED
, (int) V2DF_FTYPE_V2DF_V2DF
},
24700 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD
, NE
, (int) V2DF_FTYPE_V2DF_V2DF
},
24701 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD
, UNGE
, (int) V2DF_FTYPE_V2DF_V2DF
},
24702 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD
, UNGT
, (int) V2DF_FTYPE_V2DF_V2DF
},
24703 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD
, UNGE
, (int) V2DF_FTYPE_V2DF_V2DF_SWAP
},
24704 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD
, UNGT
, (int) V2DF_FTYPE_V2DF_V2DF_SWAP
},
24705 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD
, ORDERED
, (int) V2DF_FTYPE_V2DF_V2DF
},
24706 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD
, EQ
, (int) V2DF_FTYPE_V2DF_V2DF
},
24707 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD
, LT
, (int) V2DF_FTYPE_V2DF_V2DF
},
24708 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD
, LE
, (int) V2DF_FTYPE_V2DF_V2DF
},
24709 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD
, UNORDERED
, (int) V2DF_FTYPE_V2DF_V2DF
},
24710 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD
, NE
, (int) V2DF_FTYPE_V2DF_V2DF
},
24711 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD
, UNGE
, (int) V2DF_FTYPE_V2DF_V2DF
},
24712 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD
, UNGT
, (int) V2DF_FTYPE_V2DF_V2DF
},
24713 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD
, ORDERED
, (int) V2DF_FTYPE_V2DF_V2DF
},
24715 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sminv2df3
, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
24716 { OPTION_MASK_ISA_SSE2
, CODE_FOR_smaxv2df3
, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
24717 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmsminv2df3
, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
24718 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmsmaxv2df3
, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
24720 { OPTION_MASK_ISA_SSE2
, CODE_FOR_andv2df3
, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
24721 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_andnotv2df3
, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
24722 { OPTION_MASK_ISA_SSE2
, CODE_FOR_iorv2df3
, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
24723 { OPTION_MASK_ISA_SSE2
, CODE_FOR_xorv2df3
, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
24725 { OPTION_MASK_ISA_SSE2
, CODE_FOR_copysignv2df3
, "__builtin_ia32_copysignpd", IX86_BUILTIN_CPYSGNPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
24727 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_movsd
, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
24728 { OPTION_MASK_ISA_SSE2
, CODE_FOR_vec_interleave_highv2df
, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
24729 { OPTION_MASK_ISA_SSE2
, CODE_FOR_vec_interleave_lowv2df
, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
24731 { OPTION_MASK_ISA_SSE2
, CODE_FOR_vec_pack_sfix_v2df
, "__builtin_ia32_vec_pack_sfix", IX86_BUILTIN_VEC_PACK_SFIX
, UNKNOWN
, (int) V4SI_FTYPE_V2DF_V2DF
},
24733 { OPTION_MASK_ISA_SSE2
, CODE_FOR_addv16qi3
, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
24734 { OPTION_MASK_ISA_SSE2
, CODE_FOR_addv8hi3
, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
24735 { OPTION_MASK_ISA_SSE2
, CODE_FOR_addv4si3
, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
24736 { OPTION_MASK_ISA_SSE2
, CODE_FOR_addv2di3
, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
24737 { OPTION_MASK_ISA_SSE2
, CODE_FOR_subv16qi3
, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
24738 { OPTION_MASK_ISA_SSE2
, CODE_FOR_subv8hi3
, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
24739 { OPTION_MASK_ISA_SSE2
, CODE_FOR_subv4si3
, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
24740 { OPTION_MASK_ISA_SSE2
, CODE_FOR_subv2di3
, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
24742 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_ssaddv16qi3
, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
24743 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_ssaddv8hi3
, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
24744 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_sssubv16qi3
, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
24745 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_sssubv8hi3
, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
24746 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_usaddv16qi3
, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
24747 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_usaddv8hi3
, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
24748 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_ussubv16qi3
, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
24749 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_ussubv8hi3
, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
24751 { OPTION_MASK_ISA_SSE2
, CODE_FOR_mulv8hi3
, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
24752 { OPTION_MASK_ISA_SSE2
, CODE_FOR_smulv8hi3_highpart
, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128
, UNKNOWN
,(int) V8HI_FTYPE_V8HI_V8HI
},
24754 { OPTION_MASK_ISA_SSE2
, CODE_FOR_andv2di3
, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
24755 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_andnotv2di3
, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
24756 { OPTION_MASK_ISA_SSE2
, CODE_FOR_iorv2di3
, "__builtin_ia32_por128", IX86_BUILTIN_POR128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
24757 { OPTION_MASK_ISA_SSE2
, CODE_FOR_xorv2di3
, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
24759 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_uavgv16qi3
, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
24760 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_uavgv8hi3
, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
24762 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_eqv16qi3
, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
24763 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_eqv8hi3
, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
24764 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_eqv4si3
, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
24765 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_gtv16qi3
, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
24766 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_gtv8hi3
, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
24767 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_gtv4si3
, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
24769 { OPTION_MASK_ISA_SSE2
, CODE_FOR_umaxv16qi3
, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
24770 { OPTION_MASK_ISA_SSE2
, CODE_FOR_smaxv8hi3
, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
24771 { OPTION_MASK_ISA_SSE2
, CODE_FOR_uminv16qi3
, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
24772 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sminv8hi3
, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
24774 { OPTION_MASK_ISA_SSE2
, CODE_FOR_vec_interleave_highv16qi
, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
24775 { OPTION_MASK_ISA_SSE2
, CODE_FOR_vec_interleave_highv8hi
, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
24776 { OPTION_MASK_ISA_SSE2
, CODE_FOR_vec_interleave_highv4si
, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
24777 { OPTION_MASK_ISA_SSE2
, CODE_FOR_vec_interleave_highv2di
, "__builtin_ia32_punpckhqdq128", IX86_BUILTIN_PUNPCKHQDQ128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
24778 { OPTION_MASK_ISA_SSE2
, CODE_FOR_vec_interleave_lowv16qi
, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
24779 { OPTION_MASK_ISA_SSE2
, CODE_FOR_vec_interleave_lowv8hi
, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
24780 { OPTION_MASK_ISA_SSE2
, CODE_FOR_vec_interleave_lowv4si
, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
24781 { OPTION_MASK_ISA_SSE2
, CODE_FOR_vec_interleave_lowv2di
, "__builtin_ia32_punpcklqdq128", IX86_BUILTIN_PUNPCKLQDQ128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
24783 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_packsswb
, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128
, UNKNOWN
, (int) V16QI_FTYPE_V8HI_V8HI
},
24784 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_packssdw
, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128
, UNKNOWN
, (int) V8HI_FTYPE_V4SI_V4SI
},
24785 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_packuswb
, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128
, UNKNOWN
, (int) V16QI_FTYPE_V8HI_V8HI
},
24787 { OPTION_MASK_ISA_SSE2
, CODE_FOR_umulv8hi3_highpart
, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
24788 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_psadbw
, "__builtin_ia32_psadbw128", IX86_BUILTIN_PSADBW128
, UNKNOWN
, (int) V2DI_FTYPE_V16QI_V16QI
},
24790 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_umulv1siv1di3
, "__builtin_ia32_pmuludq", IX86_BUILTIN_PMULUDQ
, UNKNOWN
, (int) V1DI_FTYPE_V2SI_V2SI
},
24791 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_umulv2siv2di3
, "__builtin_ia32_pmuludq128", IX86_BUILTIN_PMULUDQ128
, UNKNOWN
, (int) V2DI_FTYPE_V4SI_V4SI
},
24793 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_pmaddwd
, "__builtin_ia32_pmaddwd128", IX86_BUILTIN_PMADDWD128
, UNKNOWN
, (int) V4SI_FTYPE_V8HI_V8HI
},
24795 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvtsi2sd
, "__builtin_ia32_cvtsi2sd", IX86_BUILTIN_CVTSI2SD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_SI
},
24796 { OPTION_MASK_ISA_SSE2
| OPTION_MASK_ISA_64BIT
, CODE_FOR_sse2_cvtsi2sdq
, "__builtin_ia32_cvtsi642sd", IX86_BUILTIN_CVTSI642SD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_DI
},
24797 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvtsd2ss
, "__builtin_ia32_cvtsd2ss", IX86_BUILTIN_CVTSD2SS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V2DF
},
24798 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvtss2sd
, "__builtin_ia32_cvtss2sd", IX86_BUILTIN_CVTSS2SD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V4SF
},
24800 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_ashlv1ti3
, "__builtin_ia32_pslldqi128", IX86_BUILTIN_PSLLDQI128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_INT_CONVERT
},
24801 { OPTION_MASK_ISA_SSE2
, CODE_FOR_ashlv8hi3
, "__builtin_ia32_psllwi128", IX86_BUILTIN_PSLLWI128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_SI_COUNT
},
24802 { OPTION_MASK_ISA_SSE2
, CODE_FOR_ashlv4si3
, "__builtin_ia32_pslldi128", IX86_BUILTIN_PSLLDI128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_SI_COUNT
},
24803 { OPTION_MASK_ISA_SSE2
, CODE_FOR_ashlv2di3
, "__builtin_ia32_psllqi128", IX86_BUILTIN_PSLLQI128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_SI_COUNT
},
24804 { OPTION_MASK_ISA_SSE2
, CODE_FOR_ashlv8hi3
, "__builtin_ia32_psllw128", IX86_BUILTIN_PSLLW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI_COUNT
},
24805 { OPTION_MASK_ISA_SSE2
, CODE_FOR_ashlv4si3
, "__builtin_ia32_pslld128", IX86_BUILTIN_PSLLD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI_COUNT
},
24806 { OPTION_MASK_ISA_SSE2
, CODE_FOR_ashlv2di3
, "__builtin_ia32_psllq128", IX86_BUILTIN_PSLLQ128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI_COUNT
},
24808 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_lshrv1ti3
, "__builtin_ia32_psrldqi128", IX86_BUILTIN_PSRLDQI128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_INT_CONVERT
},
24809 { OPTION_MASK_ISA_SSE2
, CODE_FOR_lshrv8hi3
, "__builtin_ia32_psrlwi128", IX86_BUILTIN_PSRLWI128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_SI_COUNT
},
24810 { OPTION_MASK_ISA_SSE2
, CODE_FOR_lshrv4si3
, "__builtin_ia32_psrldi128", IX86_BUILTIN_PSRLDI128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_SI_COUNT
},
24811 { OPTION_MASK_ISA_SSE2
, CODE_FOR_lshrv2di3
, "__builtin_ia32_psrlqi128", IX86_BUILTIN_PSRLQI128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_SI_COUNT
},
24812 { OPTION_MASK_ISA_SSE2
, CODE_FOR_lshrv8hi3
, "__builtin_ia32_psrlw128", IX86_BUILTIN_PSRLW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI_COUNT
},
24813 { OPTION_MASK_ISA_SSE2
, CODE_FOR_lshrv4si3
, "__builtin_ia32_psrld128", IX86_BUILTIN_PSRLD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI_COUNT
},
24814 { OPTION_MASK_ISA_SSE2
, CODE_FOR_lshrv2di3
, "__builtin_ia32_psrlq128", IX86_BUILTIN_PSRLQ128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI_COUNT
},
24816 { OPTION_MASK_ISA_SSE2
, CODE_FOR_ashrv8hi3
, "__builtin_ia32_psrawi128", IX86_BUILTIN_PSRAWI128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_SI_COUNT
},
24817 { OPTION_MASK_ISA_SSE2
, CODE_FOR_ashrv4si3
, "__builtin_ia32_psradi128", IX86_BUILTIN_PSRADI128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_SI_COUNT
},
24818 { OPTION_MASK_ISA_SSE2
, CODE_FOR_ashrv8hi3
, "__builtin_ia32_psraw128", IX86_BUILTIN_PSRAW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI_COUNT
},
24819 { OPTION_MASK_ISA_SSE2
, CODE_FOR_ashrv4si3
, "__builtin_ia32_psrad128", IX86_BUILTIN_PSRAD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI_COUNT
},
24821 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_pshufd
, "__builtin_ia32_pshufd", IX86_BUILTIN_PSHUFD
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_INT
},
24822 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_pshuflw
, "__builtin_ia32_pshuflw", IX86_BUILTIN_PSHUFLW
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_INT
},
24823 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_pshufhw
, "__builtin_ia32_pshufhw", IX86_BUILTIN_PSHUFHW
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_INT
},
24825 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmsqrtv2df2
, "__builtin_ia32_sqrtsd", IX86_BUILTIN_SQRTSD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_VEC_MERGE
},
24827 { OPTION_MASK_ISA_SSE2
, CODE_FOR_abstf2
, 0, IX86_BUILTIN_FABSQ
, UNKNOWN
, (int) FLOAT128_FTYPE_FLOAT128
},
24828 { OPTION_MASK_ISA_SSE2
, CODE_FOR_copysigntf3
, 0, IX86_BUILTIN_COPYSIGNQ
, UNKNOWN
, (int) FLOAT128_FTYPE_FLOAT128_FLOAT128
},
24830 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse2_movq128
, "__builtin_ia32_movq128", IX86_BUILTIN_MOVQ128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI
},
24833 { OPTION_MASK_ISA_SSE2
, CODE_FOR_mmx_addv1di3
, "__builtin_ia32_paddq", IX86_BUILTIN_PADDQ
, UNKNOWN
, (int) V1DI_FTYPE_V1DI_V1DI
},
24834 { OPTION_MASK_ISA_SSE2
, CODE_FOR_mmx_subv1di3
, "__builtin_ia32_psubq", IX86_BUILTIN_PSUBQ
, UNKNOWN
, (int) V1DI_FTYPE_V1DI_V1DI
},
24837 { OPTION_MASK_ISA_SSE3
, CODE_FOR_sse3_movshdup
, "__builtin_ia32_movshdup", IX86_BUILTIN_MOVSHDUP
, UNKNOWN
, (int) V4SF_FTYPE_V4SF
},
24838 { OPTION_MASK_ISA_SSE3
, CODE_FOR_sse3_movsldup
, "__builtin_ia32_movsldup", IX86_BUILTIN_MOVSLDUP
, UNKNOWN
, (int) V4SF_FTYPE_V4SF
},
24840 { OPTION_MASK_ISA_SSE3
, CODE_FOR_sse3_addsubv4sf3
, "__builtin_ia32_addsubps", IX86_BUILTIN_ADDSUBPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
24841 { OPTION_MASK_ISA_SSE3
, CODE_FOR_sse3_addsubv2df3
, "__builtin_ia32_addsubpd", IX86_BUILTIN_ADDSUBPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
24842 { OPTION_MASK_ISA_SSE3
, CODE_FOR_sse3_haddv4sf3
, "__builtin_ia32_haddps", IX86_BUILTIN_HADDPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
24843 { OPTION_MASK_ISA_SSE3
, CODE_FOR_sse3_haddv2df3
, "__builtin_ia32_haddpd", IX86_BUILTIN_HADDPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
24844 { OPTION_MASK_ISA_SSE3
, CODE_FOR_sse3_hsubv4sf3
, "__builtin_ia32_hsubps", IX86_BUILTIN_HSUBPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
24845 { OPTION_MASK_ISA_SSE3
, CODE_FOR_sse3_hsubv2df3
, "__builtin_ia32_hsubpd", IX86_BUILTIN_HSUBPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
24848 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_absv16qi2
, "__builtin_ia32_pabsb128", IX86_BUILTIN_PABSB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI
},
24849 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_absv8qi2
, "__builtin_ia32_pabsb", IX86_BUILTIN_PABSB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI
},
24850 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_absv8hi2
, "__builtin_ia32_pabsw128", IX86_BUILTIN_PABSW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI
},
24851 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_absv4hi2
, "__builtin_ia32_pabsw", IX86_BUILTIN_PABSW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI
},
24852 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_absv4si2
, "__builtin_ia32_pabsd128", IX86_BUILTIN_PABSD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI
},
24853 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_absv2si2
, "__builtin_ia32_pabsd", IX86_BUILTIN_PABSD
, UNKNOWN
, (int) V2SI_FTYPE_V2SI
},
24855 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_phaddwv8hi3
, "__builtin_ia32_phaddw128", IX86_BUILTIN_PHADDW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
24856 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_phaddwv4hi3
, "__builtin_ia32_phaddw", IX86_BUILTIN_PHADDW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
24857 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_phadddv4si3
, "__builtin_ia32_phaddd128", IX86_BUILTIN_PHADDD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
24858 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_phadddv2si3
, "__builtin_ia32_phaddd", IX86_BUILTIN_PHADDD
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI
},
24859 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_phaddswv8hi3
, "__builtin_ia32_phaddsw128", IX86_BUILTIN_PHADDSW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
24860 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_phaddswv4hi3
, "__builtin_ia32_phaddsw", IX86_BUILTIN_PHADDSW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
24861 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_phsubwv8hi3
, "__builtin_ia32_phsubw128", IX86_BUILTIN_PHSUBW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
24862 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_phsubwv4hi3
, "__builtin_ia32_phsubw", IX86_BUILTIN_PHSUBW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
24863 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_phsubdv4si3
, "__builtin_ia32_phsubd128", IX86_BUILTIN_PHSUBD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
24864 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_phsubdv2si3
, "__builtin_ia32_phsubd", IX86_BUILTIN_PHSUBD
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI
},
24865 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_phsubswv8hi3
, "__builtin_ia32_phsubsw128", IX86_BUILTIN_PHSUBSW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
24866 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_phsubswv4hi3
, "__builtin_ia32_phsubsw", IX86_BUILTIN_PHSUBSW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
24867 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_pmaddubsw128
, "__builtin_ia32_pmaddubsw128", IX86_BUILTIN_PMADDUBSW128
, UNKNOWN
, (int) V8HI_FTYPE_V16QI_V16QI
},
24868 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_pmaddubsw
, "__builtin_ia32_pmaddubsw", IX86_BUILTIN_PMADDUBSW
, UNKNOWN
, (int) V4HI_FTYPE_V8QI_V8QI
},
24869 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_pmulhrswv8hi3
, "__builtin_ia32_pmulhrsw128", IX86_BUILTIN_PMULHRSW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
24870 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_pmulhrswv4hi3
, "__builtin_ia32_pmulhrsw", IX86_BUILTIN_PMULHRSW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
24871 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_pshufbv16qi3
, "__builtin_ia32_pshufb128", IX86_BUILTIN_PSHUFB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
24872 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_pshufbv8qi3
, "__builtin_ia32_pshufb", IX86_BUILTIN_PSHUFB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
24873 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_psignv16qi3
, "__builtin_ia32_psignb128", IX86_BUILTIN_PSIGNB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
24874 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_psignv8qi3
, "__builtin_ia32_psignb", IX86_BUILTIN_PSIGNB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
24875 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_psignv8hi3
, "__builtin_ia32_psignw128", IX86_BUILTIN_PSIGNW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
24876 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_psignv4hi3
, "__builtin_ia32_psignw", IX86_BUILTIN_PSIGNW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
24877 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_psignv4si3
, "__builtin_ia32_psignd128", IX86_BUILTIN_PSIGND128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
24878 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_psignv2si3
, "__builtin_ia32_psignd", IX86_BUILTIN_PSIGND
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI
},
24881 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_palignrti
, "__builtin_ia32_palignr128", IX86_BUILTIN_PALIGNR128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI_INT_CONVERT
},
24882 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_palignrdi
, "__builtin_ia32_palignr", IX86_BUILTIN_PALIGNR
, UNKNOWN
, (int) V1DI_FTYPE_V1DI_V1DI_INT_CONVERT
},
24885 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_blendpd
, "__builtin_ia32_blendpd", IX86_BUILTIN_BLENDPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF_INT
},
24886 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_blendps
, "__builtin_ia32_blendps", IX86_BUILTIN_BLENDPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF_INT
},
24887 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_blendvpd
, "__builtin_ia32_blendvpd", IX86_BUILTIN_BLENDVPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF_V2DF
},
24888 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_blendvps
, "__builtin_ia32_blendvps", IX86_BUILTIN_BLENDVPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF_V4SF
},
24889 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_dppd
, "__builtin_ia32_dppd", IX86_BUILTIN_DPPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF_INT
},
24890 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_dpps
, "__builtin_ia32_dpps", IX86_BUILTIN_DPPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF_INT
},
24891 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_insertps
, "__builtin_ia32_insertps128", IX86_BUILTIN_INSERTPS128
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF_INT
},
24892 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_mpsadbw
, "__builtin_ia32_mpsadbw128", IX86_BUILTIN_MPSADBW128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI_INT
},
24893 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_pblendvb
, "__builtin_ia32_pblendvb128", IX86_BUILTIN_PBLENDVB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI_V16QI
},
24894 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_pblendw
, "__builtin_ia32_pblendw128", IX86_BUILTIN_PBLENDW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI_INT
},
24896 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_sign_extendv8qiv8hi2
, "__builtin_ia32_pmovsxbw128", IX86_BUILTIN_PMOVSXBW128
, UNKNOWN
, (int) V8HI_FTYPE_V16QI
},
24897 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_sign_extendv4qiv4si2
, "__builtin_ia32_pmovsxbd128", IX86_BUILTIN_PMOVSXBD128
, UNKNOWN
, (int) V4SI_FTYPE_V16QI
},
24898 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_sign_extendv2qiv2di2
, "__builtin_ia32_pmovsxbq128", IX86_BUILTIN_PMOVSXBQ128
, UNKNOWN
, (int) V2DI_FTYPE_V16QI
},
24899 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_sign_extendv4hiv4si2
, "__builtin_ia32_pmovsxwd128", IX86_BUILTIN_PMOVSXWD128
, UNKNOWN
, (int) V4SI_FTYPE_V8HI
},
24900 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_sign_extendv2hiv2di2
, "__builtin_ia32_pmovsxwq128", IX86_BUILTIN_PMOVSXWQ128
, UNKNOWN
, (int) V2DI_FTYPE_V8HI
},
24901 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_sign_extendv2siv2di2
, "__builtin_ia32_pmovsxdq128", IX86_BUILTIN_PMOVSXDQ128
, UNKNOWN
, (int) V2DI_FTYPE_V4SI
},
24902 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_zero_extendv8qiv8hi2
, "__builtin_ia32_pmovzxbw128", IX86_BUILTIN_PMOVZXBW128
, UNKNOWN
, (int) V8HI_FTYPE_V16QI
},
24903 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_zero_extendv4qiv4si2
, "__builtin_ia32_pmovzxbd128", IX86_BUILTIN_PMOVZXBD128
, UNKNOWN
, (int) V4SI_FTYPE_V16QI
},
24904 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_zero_extendv2qiv2di2
, "__builtin_ia32_pmovzxbq128", IX86_BUILTIN_PMOVZXBQ128
, UNKNOWN
, (int) V2DI_FTYPE_V16QI
},
24905 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_zero_extendv4hiv4si2
, "__builtin_ia32_pmovzxwd128", IX86_BUILTIN_PMOVZXWD128
, UNKNOWN
, (int) V4SI_FTYPE_V8HI
},
24906 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_zero_extendv2hiv2di2
, "__builtin_ia32_pmovzxwq128", IX86_BUILTIN_PMOVZXWQ128
, UNKNOWN
, (int) V2DI_FTYPE_V8HI
},
24907 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_zero_extendv2siv2di2
, "__builtin_ia32_pmovzxdq128", IX86_BUILTIN_PMOVZXDQ128
, UNKNOWN
, (int) V2DI_FTYPE_V4SI
},
24908 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_phminposuw
, "__builtin_ia32_phminposuw128", IX86_BUILTIN_PHMINPOSUW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI
},
24910 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_packusdw
, "__builtin_ia32_packusdw128", IX86_BUILTIN_PACKUSDW128
, UNKNOWN
, (int) V8HI_FTYPE_V4SI_V4SI
},
24911 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_eqv2di3
, "__builtin_ia32_pcmpeqq", IX86_BUILTIN_PCMPEQQ
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
24912 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_smaxv16qi3
, "__builtin_ia32_pmaxsb128", IX86_BUILTIN_PMAXSB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
24913 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_smaxv4si3
, "__builtin_ia32_pmaxsd128", IX86_BUILTIN_PMAXSD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
24914 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_umaxv4si3
, "__builtin_ia32_pmaxud128", IX86_BUILTIN_PMAXUD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
24915 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_umaxv8hi3
, "__builtin_ia32_pmaxuw128", IX86_BUILTIN_PMAXUW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
24916 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sminv16qi3
, "__builtin_ia32_pminsb128", IX86_BUILTIN_PMINSB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
24917 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sminv4si3
, "__builtin_ia32_pminsd128", IX86_BUILTIN_PMINSD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
24918 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_uminv4si3
, "__builtin_ia32_pminud128", IX86_BUILTIN_PMINUD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
24919 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_uminv8hi3
, "__builtin_ia32_pminuw128", IX86_BUILTIN_PMINUW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
24920 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_mulv2siv2di3
, "__builtin_ia32_pmuldq128", IX86_BUILTIN_PMULDQ128
, UNKNOWN
, (int) V2DI_FTYPE_V4SI_V4SI
},
24921 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_mulv4si3
, "__builtin_ia32_pmulld128", IX86_BUILTIN_PMULLD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
24924 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundpd
, "__builtin_ia32_roundpd", IX86_BUILTIN_ROUNDPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_INT
},
24925 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundps
, "__builtin_ia32_roundps", IX86_BUILTIN_ROUNDPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_INT
},
24926 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundsd
, "__builtin_ia32_roundsd", IX86_BUILTIN_ROUNDSD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF_INT
},
24927 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundss
, "__builtin_ia32_roundss", IX86_BUILTIN_ROUNDSS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF_INT
},
24929 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_ptest
, "__builtin_ia32_ptestz128", IX86_BUILTIN_PTESTZ
, EQ
, (int) INT_FTYPE_V2DI_V2DI_PTEST
},
24930 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_ptest
, "__builtin_ia32_ptestc128", IX86_BUILTIN_PTESTC
, LTU
, (int) INT_FTYPE_V2DI_V2DI_PTEST
},
24931 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_ptest
, "__builtin_ia32_ptestnzc128", IX86_BUILTIN_PTESTNZC
, GTU
, (int) INT_FTYPE_V2DI_V2DI_PTEST
},
24934 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_gtv2di3
, "__builtin_ia32_pcmpgtq", IX86_BUILTIN_PCMPGTQ
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
24935 { OPTION_MASK_ISA_SSE4_2
| OPTION_MASK_ISA_CRC32
, CODE_FOR_sse4_2_crc32qi
, "__builtin_ia32_crc32qi", IX86_BUILTIN_CRC32QI
, UNKNOWN
, (int) UINT_FTYPE_UINT_UCHAR
},
24936 { OPTION_MASK_ISA_SSE4_2
| OPTION_MASK_ISA_CRC32
, CODE_FOR_sse4_2_crc32hi
, "__builtin_ia32_crc32hi", IX86_BUILTIN_CRC32HI
, UNKNOWN
, (int) UINT_FTYPE_UINT_USHORT
},
24937 { OPTION_MASK_ISA_SSE4_2
| OPTION_MASK_ISA_CRC32
, CODE_FOR_sse4_2_crc32si
, "__builtin_ia32_crc32si", IX86_BUILTIN_CRC32SI
, UNKNOWN
, (int) UINT_FTYPE_UINT_UINT
},
24938 { OPTION_MASK_ISA_SSE4_2
| OPTION_MASK_ISA_CRC32
| OPTION_MASK_ISA_64BIT
, CODE_FOR_sse4_2_crc32di
, "__builtin_ia32_crc32di", IX86_BUILTIN_CRC32DI
, UNKNOWN
, (int) UINT64_FTYPE_UINT64_UINT64
},
24941 { OPTION_MASK_ISA_SSE4A
, CODE_FOR_sse4a_extrqi
, "__builtin_ia32_extrqi", IX86_BUILTIN_EXTRQI
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_UINT_UINT
},
24942 { OPTION_MASK_ISA_SSE4A
, CODE_FOR_sse4a_extrq
, "__builtin_ia32_extrq", IX86_BUILTIN_EXTRQ
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V16QI
},
24943 { OPTION_MASK_ISA_SSE4A
, CODE_FOR_sse4a_insertqi
, "__builtin_ia32_insertqi", IX86_BUILTIN_INSERTQI
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI_UINT_UINT
},
24944 { OPTION_MASK_ISA_SSE4A
, CODE_FOR_sse4a_insertq
, "__builtin_ia32_insertq", IX86_BUILTIN_INSERTQ
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
24947 { OPTION_MASK_ISA_SSE2
, CODE_FOR_aeskeygenassist
, 0, IX86_BUILTIN_AESKEYGENASSIST128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_INT
},
24948 { OPTION_MASK_ISA_SSE2
, CODE_FOR_aesimc
, 0, IX86_BUILTIN_AESIMC128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI
},
24950 { OPTION_MASK_ISA_SSE2
, CODE_FOR_aesenc
, 0, IX86_BUILTIN_AESENC128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
24951 { OPTION_MASK_ISA_SSE2
, CODE_FOR_aesenclast
, 0, IX86_BUILTIN_AESENCLAST128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
24952 { OPTION_MASK_ISA_SSE2
, CODE_FOR_aesdec
, 0, IX86_BUILTIN_AESDEC128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
24953 { OPTION_MASK_ISA_SSE2
, CODE_FOR_aesdeclast
, 0, IX86_BUILTIN_AESDECLAST128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
24956 { OPTION_MASK_ISA_SSE2
, CODE_FOR_pclmulqdq
, 0, IX86_BUILTIN_PCLMULQDQ128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI_INT
},
24959 { OPTION_MASK_ISA_AVX
, CODE_FOR_addv4df3
, "__builtin_ia32_addpd256", IX86_BUILTIN_ADDPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
24960 { OPTION_MASK_ISA_AVX
, CODE_FOR_addv8sf3
, "__builtin_ia32_addps256", IX86_BUILTIN_ADDPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
24961 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_addsubv4df3
, "__builtin_ia32_addsubpd256", IX86_BUILTIN_ADDSUBPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
24962 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_addsubv8sf3
, "__builtin_ia32_addsubps256", IX86_BUILTIN_ADDSUBPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
24963 { OPTION_MASK_ISA_AVX
, CODE_FOR_andv4df3
, "__builtin_ia32_andpd256", IX86_BUILTIN_ANDPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
24964 { OPTION_MASK_ISA_AVX
, CODE_FOR_andv8sf3
, "__builtin_ia32_andps256", IX86_BUILTIN_ANDPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
24965 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_andnotv4df3
, "__builtin_ia32_andnpd256", IX86_BUILTIN_ANDNPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
24966 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_andnotv8sf3
, "__builtin_ia32_andnps256", IX86_BUILTIN_ANDNPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
24967 { OPTION_MASK_ISA_AVX
, CODE_FOR_divv4df3
, "__builtin_ia32_divpd256", IX86_BUILTIN_DIVPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
24968 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_divv8sf3
, "__builtin_ia32_divps256", IX86_BUILTIN_DIVPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
24969 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_haddv4df3
, "__builtin_ia32_haddpd256", IX86_BUILTIN_HADDPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
24970 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_hsubv8sf3
, "__builtin_ia32_hsubps256", IX86_BUILTIN_HSUBPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
24971 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_hsubv4df3
, "__builtin_ia32_hsubpd256", IX86_BUILTIN_HSUBPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
24972 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_haddv8sf3
, "__builtin_ia32_haddps256", IX86_BUILTIN_HADDPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
24973 { OPTION_MASK_ISA_AVX
, CODE_FOR_smaxv4df3
, "__builtin_ia32_maxpd256", IX86_BUILTIN_MAXPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
24974 { OPTION_MASK_ISA_AVX
, CODE_FOR_smaxv8sf3
, "__builtin_ia32_maxps256", IX86_BUILTIN_MAXPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
24975 { OPTION_MASK_ISA_AVX
, CODE_FOR_sminv4df3
, "__builtin_ia32_minpd256", IX86_BUILTIN_MINPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
24976 { OPTION_MASK_ISA_AVX
, CODE_FOR_sminv8sf3
, "__builtin_ia32_minps256", IX86_BUILTIN_MINPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
24977 { OPTION_MASK_ISA_AVX
, CODE_FOR_mulv4df3
, "__builtin_ia32_mulpd256", IX86_BUILTIN_MULPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
24978 { OPTION_MASK_ISA_AVX
, CODE_FOR_mulv8sf3
, "__builtin_ia32_mulps256", IX86_BUILTIN_MULPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
24979 { OPTION_MASK_ISA_AVX
, CODE_FOR_iorv4df3
, "__builtin_ia32_orpd256", IX86_BUILTIN_ORPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
24980 { OPTION_MASK_ISA_AVX
, CODE_FOR_iorv8sf3
, "__builtin_ia32_orps256", IX86_BUILTIN_ORPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
24981 { OPTION_MASK_ISA_AVX
, CODE_FOR_subv4df3
, "__builtin_ia32_subpd256", IX86_BUILTIN_SUBPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
24982 { OPTION_MASK_ISA_AVX
, CODE_FOR_subv8sf3
, "__builtin_ia32_subps256", IX86_BUILTIN_SUBPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
24983 { OPTION_MASK_ISA_AVX
, CODE_FOR_xorv4df3
, "__builtin_ia32_xorpd256", IX86_BUILTIN_XORPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
24984 { OPTION_MASK_ISA_AVX
, CODE_FOR_xorv8sf3
, "__builtin_ia32_xorps256", IX86_BUILTIN_XORPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
24986 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vpermilvarv2df3
, "__builtin_ia32_vpermilvarpd", IX86_BUILTIN_VPERMILVARPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DI
},
24987 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vpermilvarv4sf3
, "__builtin_ia32_vpermilvarps", IX86_BUILTIN_VPERMILVARPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SI
},
24988 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vpermilvarv4df3
, "__builtin_ia32_vpermilvarpd256", IX86_BUILTIN_VPERMILVARPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DI
},
24989 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vpermilvarv8sf3
, "__builtin_ia32_vpermilvarps256", IX86_BUILTIN_VPERMILVARPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SI
},
24991 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_blendpd256
, "__builtin_ia32_blendpd256", IX86_BUILTIN_BLENDPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF_INT
},
24992 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_blendps256
, "__builtin_ia32_blendps256", IX86_BUILTIN_BLENDPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF_INT
},
24993 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_blendvpd256
, "__builtin_ia32_blendvpd256", IX86_BUILTIN_BLENDVPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF_V4DF
},
24994 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_blendvps256
, "__builtin_ia32_blendvps256", IX86_BUILTIN_BLENDVPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF_V8SF
},
24995 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_dpps256
, "__builtin_ia32_dpps256", IX86_BUILTIN_DPPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF_INT
},
24996 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_shufpd256
, "__builtin_ia32_shufpd256", IX86_BUILTIN_SHUFPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF_INT
},
24997 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_shufps256
, "__builtin_ia32_shufps256", IX86_BUILTIN_SHUFPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF_INT
},
24998 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_cmpsdv2df3
, "__builtin_ia32_cmpsd", IX86_BUILTIN_CMPSD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF_INT
},
24999 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_cmpssv4sf3
, "__builtin_ia32_cmpss", IX86_BUILTIN_CMPSS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF_INT
},
25000 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_cmppdv2df3
, "__builtin_ia32_cmppd", IX86_BUILTIN_CMPPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF_INT
},
25001 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_cmppsv4sf3
, "__builtin_ia32_cmpps", IX86_BUILTIN_CMPPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF_INT
},
25002 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_cmppdv4df3
, "__builtin_ia32_cmppd256", IX86_BUILTIN_CMPPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF_INT
},
25003 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_cmppsv8sf3
, "__builtin_ia32_cmpps256", IX86_BUILTIN_CMPPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF_INT
},
25004 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vextractf128v4df
, "__builtin_ia32_vextractf128_pd256", IX86_BUILTIN_EXTRACTF128PD256
, UNKNOWN
, (int) V2DF_FTYPE_V4DF_INT
},
25005 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vextractf128v8sf
, "__builtin_ia32_vextractf128_ps256", IX86_BUILTIN_EXTRACTF128PS256
, UNKNOWN
, (int) V4SF_FTYPE_V8SF_INT
},
25006 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vextractf128v8si
, "__builtin_ia32_vextractf128_si256", IX86_BUILTIN_EXTRACTF128SI256
, UNKNOWN
, (int) V4SI_FTYPE_V8SI_INT
},
25007 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_cvtdq2pd256
, "__builtin_ia32_cvtdq2pd256", IX86_BUILTIN_CVTDQ2PD256
, UNKNOWN
, (int) V4DF_FTYPE_V4SI
},
25008 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_cvtdq2ps256
, "__builtin_ia32_cvtdq2ps256", IX86_BUILTIN_CVTDQ2PS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SI
},
25009 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_cvtpd2ps256
, "__builtin_ia32_cvtpd2ps256", IX86_BUILTIN_CVTPD2PS256
, UNKNOWN
, (int) V4SF_FTYPE_V4DF
},
25010 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_cvtps2dq256
, "__builtin_ia32_cvtps2dq256", IX86_BUILTIN_CVTPS2DQ256
, UNKNOWN
, (int) V8SI_FTYPE_V8SF
},
25011 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_cvtps2pd256
, "__builtin_ia32_cvtps2pd256", IX86_BUILTIN_CVTPS2PD256
, UNKNOWN
, (int) V4DF_FTYPE_V4SF
},
25012 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_cvttpd2dq256
, "__builtin_ia32_cvttpd2dq256", IX86_BUILTIN_CVTTPD2DQ256
, UNKNOWN
, (int) V4SI_FTYPE_V4DF
},
25013 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_cvtpd2dq256
, "__builtin_ia32_cvtpd2dq256", IX86_BUILTIN_CVTPD2DQ256
, UNKNOWN
, (int) V4SI_FTYPE_V4DF
},
25014 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_cvttps2dq256
, "__builtin_ia32_cvttps2dq256", IX86_BUILTIN_CVTTPS2DQ256
, UNKNOWN
, (int) V8SI_FTYPE_V8SF
},
25015 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vperm2f128v4df3
, "__builtin_ia32_vperm2f128_pd256", IX86_BUILTIN_VPERM2F128PD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF_INT
},
25016 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vperm2f128v8sf3
, "__builtin_ia32_vperm2f128_ps256", IX86_BUILTIN_VPERM2F128PS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF_INT
},
25017 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vperm2f128v8si3
, "__builtin_ia32_vperm2f128_si256", IX86_BUILTIN_VPERM2F128SI256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI_INT
},
25018 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vpermilv2df
, "__builtin_ia32_vpermilpd", IX86_BUILTIN_VPERMILPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_INT
},
25019 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vpermilv4sf
, "__builtin_ia32_vpermilps", IX86_BUILTIN_VPERMILPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_INT
},
25020 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vpermilv4df
, "__builtin_ia32_vpermilpd256", IX86_BUILTIN_VPERMILPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_INT
},
25021 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vpermilv8sf
, "__builtin_ia32_vpermilps256", IX86_BUILTIN_VPERMILPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_INT
},
25022 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vinsertf128v4df
, "__builtin_ia32_vinsertf128_pd256", IX86_BUILTIN_VINSERTF128PD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V2DF_INT
},
25023 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vinsertf128v8sf
, "__builtin_ia32_vinsertf128_ps256", IX86_BUILTIN_VINSERTF128PS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V4SF_INT
},
25024 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vinsertf128v8si
, "__builtin_ia32_vinsertf128_si256", IX86_BUILTIN_VINSERTF128SI256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V4SI_INT
},
25026 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_movshdup256
, "__builtin_ia32_movshdup256", IX86_BUILTIN_MOVSHDUP256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF
},
25027 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_movsldup256
, "__builtin_ia32_movsldup256", IX86_BUILTIN_MOVSLDUP256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF
},
25028 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_movddup256
, "__builtin_ia32_movddup256", IX86_BUILTIN_MOVDDUP256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF
},
25030 { OPTION_MASK_ISA_AVX
, CODE_FOR_sqrtv4df2
, "__builtin_ia32_sqrtpd256", IX86_BUILTIN_SQRTPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF
},
25031 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_sqrtv8sf2
, "__builtin_ia32_sqrtps256", IX86_BUILTIN_SQRTPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF
},
25032 { OPTION_MASK_ISA_AVX
, CODE_FOR_sqrtv8sf2
, "__builtin_ia32_sqrtps_nr256", IX86_BUILTIN_SQRTPS_NR256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF
},
25033 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_rsqrtv8sf2
, "__builtin_ia32_rsqrtps256", IX86_BUILTIN_RSQRTPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF
},
25034 { OPTION_MASK_ISA_AVX
, CODE_FOR_rsqrtv8sf2
, "__builtin_ia32_rsqrtps_nr256", IX86_BUILTIN_RSQRTPS_NR256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF
},
25036 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_rcpv8sf2
, "__builtin_ia32_rcpps256", IX86_BUILTIN_RCPPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF
},
25038 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_roundpd256
, "__builtin_ia32_roundpd256", IX86_BUILTIN_ROUNDPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_INT
},
25039 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_roundps256
, "__builtin_ia32_roundps256", IX86_BUILTIN_ROUNDPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_INT
},
25041 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_unpckhpd256
, "__builtin_ia32_unpckhpd256", IX86_BUILTIN_UNPCKHPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
25042 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_unpcklpd256
, "__builtin_ia32_unpcklpd256", IX86_BUILTIN_UNPCKLPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
25043 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_unpckhps256
, "__builtin_ia32_unpckhps256", IX86_BUILTIN_UNPCKHPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
25044 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_unpcklps256
, "__builtin_ia32_unpcklps256", IX86_BUILTIN_UNPCKLPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
25046 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_si256_si
, "__builtin_ia32_si256_si", IX86_BUILTIN_SI256_SI
, UNKNOWN
, (int) V8SI_FTYPE_V4SI
},
25047 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_ps256_ps
, "__builtin_ia32_ps256_ps", IX86_BUILTIN_PS256_PS
, UNKNOWN
, (int) V8SF_FTYPE_V4SF
},
25048 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_pd256_pd
, "__builtin_ia32_pd256_pd", IX86_BUILTIN_PD256_PD
, UNKNOWN
, (int) V4DF_FTYPE_V2DF
},
25049 { OPTION_MASK_ISA_AVX
, CODE_FOR_vec_extract_lo_v8si
, "__builtin_ia32_si_si256", IX86_BUILTIN_SI_SI256
, UNKNOWN
, (int) V4SI_FTYPE_V8SI
},
25050 { OPTION_MASK_ISA_AVX
, CODE_FOR_vec_extract_lo_v8sf
, "__builtin_ia32_ps_ps256", IX86_BUILTIN_PS_PS256
, UNKNOWN
, (int) V4SF_FTYPE_V8SF
},
25051 { OPTION_MASK_ISA_AVX
, CODE_FOR_vec_extract_lo_v4df
, "__builtin_ia32_pd_pd256", IX86_BUILTIN_PD_PD256
, UNKNOWN
, (int) V2DF_FTYPE_V4DF
},
25053 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vtestpd
, "__builtin_ia32_vtestzpd", IX86_BUILTIN_VTESTZPD
, EQ
, (int) INT_FTYPE_V2DF_V2DF_PTEST
},
25054 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vtestpd
, "__builtin_ia32_vtestcpd", IX86_BUILTIN_VTESTCPD
, LTU
, (int) INT_FTYPE_V2DF_V2DF_PTEST
},
25055 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vtestpd
, "__builtin_ia32_vtestnzcpd", IX86_BUILTIN_VTESTNZCPD
, GTU
, (int) INT_FTYPE_V2DF_V2DF_PTEST
},
25056 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vtestps
, "__builtin_ia32_vtestzps", IX86_BUILTIN_VTESTZPS
, EQ
, (int) INT_FTYPE_V4SF_V4SF_PTEST
},
25057 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vtestps
, "__builtin_ia32_vtestcps", IX86_BUILTIN_VTESTCPS
, LTU
, (int) INT_FTYPE_V4SF_V4SF_PTEST
},
25058 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vtestps
, "__builtin_ia32_vtestnzcps", IX86_BUILTIN_VTESTNZCPS
, GTU
, (int) INT_FTYPE_V4SF_V4SF_PTEST
},
25059 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vtestpd256
, "__builtin_ia32_vtestzpd256", IX86_BUILTIN_VTESTZPD256
, EQ
, (int) INT_FTYPE_V4DF_V4DF_PTEST
},
25060 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vtestpd256
, "__builtin_ia32_vtestcpd256", IX86_BUILTIN_VTESTCPD256
, LTU
, (int) INT_FTYPE_V4DF_V4DF_PTEST
},
25061 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vtestpd256
, "__builtin_ia32_vtestnzcpd256", IX86_BUILTIN_VTESTNZCPD256
, GTU
, (int) INT_FTYPE_V4DF_V4DF_PTEST
},
25062 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vtestps256
, "__builtin_ia32_vtestzps256", IX86_BUILTIN_VTESTZPS256
, EQ
, (int) INT_FTYPE_V8SF_V8SF_PTEST
},
25063 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vtestps256
, "__builtin_ia32_vtestcps256", IX86_BUILTIN_VTESTCPS256
, LTU
, (int) INT_FTYPE_V8SF_V8SF_PTEST
},
25064 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vtestps256
, "__builtin_ia32_vtestnzcps256", IX86_BUILTIN_VTESTNZCPS256
, GTU
, (int) INT_FTYPE_V8SF_V8SF_PTEST
},
25065 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_ptest256
, "__builtin_ia32_ptestz256", IX86_BUILTIN_PTESTZ256
, EQ
, (int) INT_FTYPE_V4DI_V4DI_PTEST
},
25066 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_ptest256
, "__builtin_ia32_ptestc256", IX86_BUILTIN_PTESTC256
, LTU
, (int) INT_FTYPE_V4DI_V4DI_PTEST
},
25067 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_ptest256
, "__builtin_ia32_ptestnzc256", IX86_BUILTIN_PTESTNZC256
, GTU
, (int) INT_FTYPE_V4DI_V4DI_PTEST
},
25069 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_movmskpd256
, "__builtin_ia32_movmskpd256", IX86_BUILTIN_MOVMSKPD256
, UNKNOWN
, (int) INT_FTYPE_V4DF
},
25070 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_movmskps256
, "__builtin_ia32_movmskps256", IX86_BUILTIN_MOVMSKPS256
, UNKNOWN
, (int) INT_FTYPE_V8SF
},
25072 { OPTION_MASK_ISA_AVX
, CODE_FOR_copysignv8sf3
, "__builtin_ia32_copysignps256", IX86_BUILTIN_CPYSGNPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
25073 { OPTION_MASK_ISA_AVX
, CODE_FOR_copysignv4df3
, "__builtin_ia32_copysignpd256", IX86_BUILTIN_CPYSGNPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
25075 { OPTION_MASK_ISA_ABM
, CODE_FOR_clzhi2_abm
, "__builtin_clzs", IX86_BUILTIN_CLZS
, UNKNOWN
, (int) UINT16_FTYPE_UINT16
},
25078 { OPTION_MASK_ISA_BMI
, CODE_FOR_bmi_bextr_si
, "__builtin_ia32_bextr_u32", IX86_BUILTIN_BEXTR32
, UNKNOWN
, (int) UINT_FTYPE_UINT_UINT
},
25079 { OPTION_MASK_ISA_BMI
, CODE_FOR_bmi_bextr_di
, "__builtin_ia32_bextr_u64", IX86_BUILTIN_BEXTR64
, UNKNOWN
, (int) UINT64_FTYPE_UINT64_UINT64
},
25080 { OPTION_MASK_ISA_BMI
, CODE_FOR_ctzhi2
, "__builtin_ctzs", IX86_BUILTIN_CTZS
, UNKNOWN
, (int) UINT16_FTYPE_UINT16
},
25083 { OPTION_MASK_ISA_TBM
, CODE_FOR_tbm_bextri_si
, "__builtin_ia32_bextri_u32", IX86_BUILTIN_BEXTRI32
, UNKNOWN
, (int) UINT_FTYPE_UINT_UINT
},
25084 { OPTION_MASK_ISA_TBM
, CODE_FOR_tbm_bextri_di
, "__builtin_ia32_bextri_u64", IX86_BUILTIN_BEXTRI64
, UNKNOWN
, (int) UINT64_FTYPE_UINT64_UINT64
},
25087 { OPTION_MASK_ISA_F16C
, CODE_FOR_vcvtph2ps
, "__builtin_ia32_vcvtph2ps", IX86_BUILTIN_CVTPH2PS
, UNKNOWN
, (int) V4SF_FTYPE_V8HI
},
25088 { OPTION_MASK_ISA_F16C
, CODE_FOR_vcvtph2ps256
, "__builtin_ia32_vcvtph2ps256", IX86_BUILTIN_CVTPH2PS256
, UNKNOWN
, (int) V8SF_FTYPE_V8HI
},
25089 { OPTION_MASK_ISA_F16C
, CODE_FOR_vcvtps2ph
, "__builtin_ia32_vcvtps2ph", IX86_BUILTIN_CVTPS2PH
, UNKNOWN
, (int) V8HI_FTYPE_V4SF_INT
},
25090 { OPTION_MASK_ISA_F16C
, CODE_FOR_vcvtps2ph256
, "__builtin_ia32_vcvtps2ph256", IX86_BUILTIN_CVTPS2PH256
, UNKNOWN
, (int) V8HI_FTYPE_V8SF_INT
},
25093 /* FMA4 and XOP. */
25094 #define MULTI_ARG_4_DF2_DI_I V2DF_FTYPE_V2DF_V2DF_V2DI_INT
25095 #define MULTI_ARG_4_DF2_DI_I1 V4DF_FTYPE_V4DF_V4DF_V4DI_INT
25096 #define MULTI_ARG_4_SF2_SI_I V4SF_FTYPE_V4SF_V4SF_V4SI_INT
25097 #define MULTI_ARG_4_SF2_SI_I1 V8SF_FTYPE_V8SF_V8SF_V8SI_INT
25098 #define MULTI_ARG_3_SF V4SF_FTYPE_V4SF_V4SF_V4SF
25099 #define MULTI_ARG_3_DF V2DF_FTYPE_V2DF_V2DF_V2DF
25100 #define MULTI_ARG_3_SF2 V8SF_FTYPE_V8SF_V8SF_V8SF
25101 #define MULTI_ARG_3_DF2 V4DF_FTYPE_V4DF_V4DF_V4DF
25102 #define MULTI_ARG_3_DI V2DI_FTYPE_V2DI_V2DI_V2DI
25103 #define MULTI_ARG_3_SI V4SI_FTYPE_V4SI_V4SI_V4SI
25104 #define MULTI_ARG_3_SI_DI V4SI_FTYPE_V4SI_V4SI_V2DI
25105 #define MULTI_ARG_3_HI V8HI_FTYPE_V8HI_V8HI_V8HI
25106 #define MULTI_ARG_3_HI_SI V8HI_FTYPE_V8HI_V8HI_V4SI
25107 #define MULTI_ARG_3_QI V16QI_FTYPE_V16QI_V16QI_V16QI
25108 #define MULTI_ARG_3_DI2 V4DI_FTYPE_V4DI_V4DI_V4DI
25109 #define MULTI_ARG_3_SI2 V8SI_FTYPE_V8SI_V8SI_V8SI
25110 #define MULTI_ARG_3_HI2 V16HI_FTYPE_V16HI_V16HI_V16HI
25111 #define MULTI_ARG_3_QI2 V32QI_FTYPE_V32QI_V32QI_V32QI
25112 #define MULTI_ARG_2_SF V4SF_FTYPE_V4SF_V4SF
25113 #define MULTI_ARG_2_DF V2DF_FTYPE_V2DF_V2DF
25114 #define MULTI_ARG_2_DI V2DI_FTYPE_V2DI_V2DI
25115 #define MULTI_ARG_2_SI V4SI_FTYPE_V4SI_V4SI
25116 #define MULTI_ARG_2_HI V8HI_FTYPE_V8HI_V8HI
25117 #define MULTI_ARG_2_QI V16QI_FTYPE_V16QI_V16QI
25118 #define MULTI_ARG_2_DI_IMM V2DI_FTYPE_V2DI_SI
25119 #define MULTI_ARG_2_SI_IMM V4SI_FTYPE_V4SI_SI
25120 #define MULTI_ARG_2_HI_IMM V8HI_FTYPE_V8HI_SI
25121 #define MULTI_ARG_2_QI_IMM V16QI_FTYPE_V16QI_SI
25122 #define MULTI_ARG_2_DI_CMP V2DI_FTYPE_V2DI_V2DI_CMP
25123 #define MULTI_ARG_2_SI_CMP V4SI_FTYPE_V4SI_V4SI_CMP
25124 #define MULTI_ARG_2_HI_CMP V8HI_FTYPE_V8HI_V8HI_CMP
25125 #define MULTI_ARG_2_QI_CMP V16QI_FTYPE_V16QI_V16QI_CMP
25126 #define MULTI_ARG_2_SF_TF V4SF_FTYPE_V4SF_V4SF_TF
25127 #define MULTI_ARG_2_DF_TF V2DF_FTYPE_V2DF_V2DF_TF
25128 #define MULTI_ARG_2_DI_TF V2DI_FTYPE_V2DI_V2DI_TF
25129 #define MULTI_ARG_2_SI_TF V4SI_FTYPE_V4SI_V4SI_TF
25130 #define MULTI_ARG_2_HI_TF V8HI_FTYPE_V8HI_V8HI_TF
25131 #define MULTI_ARG_2_QI_TF V16QI_FTYPE_V16QI_V16QI_TF
25132 #define MULTI_ARG_1_SF V4SF_FTYPE_V4SF
25133 #define MULTI_ARG_1_DF V2DF_FTYPE_V2DF
25134 #define MULTI_ARG_1_SF2 V8SF_FTYPE_V8SF
25135 #define MULTI_ARG_1_DF2 V4DF_FTYPE_V4DF
25136 #define MULTI_ARG_1_DI V2DI_FTYPE_V2DI
25137 #define MULTI_ARG_1_SI V4SI_FTYPE_V4SI
25138 #define MULTI_ARG_1_HI V8HI_FTYPE_V8HI
25139 #define MULTI_ARG_1_QI V16QI_FTYPE_V16QI
25140 #define MULTI_ARG_1_SI_DI V2DI_FTYPE_V4SI
25141 #define MULTI_ARG_1_HI_DI V2DI_FTYPE_V8HI
25142 #define MULTI_ARG_1_HI_SI V4SI_FTYPE_V8HI
25143 #define MULTI_ARG_1_QI_DI V2DI_FTYPE_V16QI
25144 #define MULTI_ARG_1_QI_SI V4SI_FTYPE_V16QI
25145 #define MULTI_ARG_1_QI_HI V8HI_FTYPE_V16QI
25147 static const struct builtin_description bdesc_multi_arg
[] =
25149 { OPTION_MASK_ISA_FMA4
, CODE_FOR_fma4i_vmfmadd_v4sf
,
25150 "__builtin_ia32_vfmaddss", IX86_BUILTIN_VFMADDSS
,
25151 UNKNOWN
, (int)MULTI_ARG_3_SF
},
25152 { OPTION_MASK_ISA_FMA4
, CODE_FOR_fma4i_vmfmadd_v2df
,
25153 "__builtin_ia32_vfmaddsd", IX86_BUILTIN_VFMADDSD
,
25154 UNKNOWN
, (int)MULTI_ARG_3_DF
},
25156 { OPTION_MASK_ISA_FMA
| OPTION_MASK_ISA_FMA4
, CODE_FOR_fma4i_fmadd_v4sf
,
25157 "__builtin_ia32_vfmaddps", IX86_BUILTIN_VFMADDPS
,
25158 UNKNOWN
, (int)MULTI_ARG_3_SF
},
25159 { OPTION_MASK_ISA_FMA
| OPTION_MASK_ISA_FMA4
, CODE_FOR_fma4i_fmadd_v2df
,
25160 "__builtin_ia32_vfmaddpd", IX86_BUILTIN_VFMADDPD
,
25161 UNKNOWN
, (int)MULTI_ARG_3_DF
},
25162 { OPTION_MASK_ISA_FMA
| OPTION_MASK_ISA_FMA4
, CODE_FOR_fma4i_fmadd_v8sf
,
25163 "__builtin_ia32_vfmaddps256", IX86_BUILTIN_VFMADDPS256
,
25164 UNKNOWN
, (int)MULTI_ARG_3_SF2
},
25165 { OPTION_MASK_ISA_FMA
| OPTION_MASK_ISA_FMA4
, CODE_FOR_fma4i_fmadd_v4df
,
25166 "__builtin_ia32_vfmaddpd256", IX86_BUILTIN_VFMADDPD256
,
25167 UNKNOWN
, (int)MULTI_ARG_3_DF2
},
25169 { OPTION_MASK_ISA_FMA
| OPTION_MASK_ISA_FMA4
, CODE_FOR_fmaddsub_v4sf
,
25170 "__builtin_ia32_vfmaddsubps", IX86_BUILTIN_VFMADDSUBPS
,
25171 UNKNOWN
, (int)MULTI_ARG_3_SF
},
25172 { OPTION_MASK_ISA_FMA
| OPTION_MASK_ISA_FMA4
, CODE_FOR_fmaddsub_v2df
,
25173 "__builtin_ia32_vfmaddsubpd", IX86_BUILTIN_VFMADDSUBPD
,
25174 UNKNOWN
, (int)MULTI_ARG_3_DF
},
25175 { OPTION_MASK_ISA_FMA
| OPTION_MASK_ISA_FMA4
, CODE_FOR_fmaddsub_v8sf
,
25176 "__builtin_ia32_vfmaddsubps256", IX86_BUILTIN_VFMADDSUBPS256
,
25177 UNKNOWN
, (int)MULTI_ARG_3_SF2
},
25178 { OPTION_MASK_ISA_FMA
| OPTION_MASK_ISA_FMA4
, CODE_FOR_fmaddsub_v4df
,
25179 "__builtin_ia32_vfmaddsubpd256", IX86_BUILTIN_VFMADDSUBPD256
,
25180 UNKNOWN
, (int)MULTI_ARG_3_DF2
},
25182 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcmov_v2di
, "__builtin_ia32_vpcmov", IX86_BUILTIN_VPCMOV
, UNKNOWN
, (int)MULTI_ARG_3_DI
},
25183 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcmov_v2di
, "__builtin_ia32_vpcmov_v2di", IX86_BUILTIN_VPCMOV_V2DI
, UNKNOWN
, (int)MULTI_ARG_3_DI
},
25184 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcmov_v4si
, "__builtin_ia32_vpcmov_v4si", IX86_BUILTIN_VPCMOV_V4SI
, UNKNOWN
, (int)MULTI_ARG_3_SI
},
25185 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcmov_v8hi
, "__builtin_ia32_vpcmov_v8hi", IX86_BUILTIN_VPCMOV_V8HI
, UNKNOWN
, (int)MULTI_ARG_3_HI
},
25186 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcmov_v16qi
, "__builtin_ia32_vpcmov_v16qi",IX86_BUILTIN_VPCMOV_V16QI
,UNKNOWN
, (int)MULTI_ARG_3_QI
},
25187 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcmov_v2df
, "__builtin_ia32_vpcmov_v2df", IX86_BUILTIN_VPCMOV_V2DF
, UNKNOWN
, (int)MULTI_ARG_3_DF
},
25188 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcmov_v4sf
, "__builtin_ia32_vpcmov_v4sf", IX86_BUILTIN_VPCMOV_V4SF
, UNKNOWN
, (int)MULTI_ARG_3_SF
},
25190 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcmov_v4di256
, "__builtin_ia32_vpcmov256", IX86_BUILTIN_VPCMOV256
, UNKNOWN
, (int)MULTI_ARG_3_DI2
},
25191 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcmov_v4di256
, "__builtin_ia32_vpcmov_v4di256", IX86_BUILTIN_VPCMOV_V4DI256
, UNKNOWN
, (int)MULTI_ARG_3_DI2
},
25192 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcmov_v8si256
, "__builtin_ia32_vpcmov_v8si256", IX86_BUILTIN_VPCMOV_V8SI256
, UNKNOWN
, (int)MULTI_ARG_3_SI2
},
25193 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcmov_v16hi256
, "__builtin_ia32_vpcmov_v16hi256", IX86_BUILTIN_VPCMOV_V16HI256
, UNKNOWN
, (int)MULTI_ARG_3_HI2
},
25194 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcmov_v32qi256
, "__builtin_ia32_vpcmov_v32qi256", IX86_BUILTIN_VPCMOV_V32QI256
, UNKNOWN
, (int)MULTI_ARG_3_QI2
},
25195 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcmov_v4df256
, "__builtin_ia32_vpcmov_v4df256", IX86_BUILTIN_VPCMOV_V4DF256
, UNKNOWN
, (int)MULTI_ARG_3_DF2
},
25196 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcmov_v8sf256
, "__builtin_ia32_vpcmov_v8sf256", IX86_BUILTIN_VPCMOV_V8SF256
, UNKNOWN
, (int)MULTI_ARG_3_SF2
},
25198 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pperm
, "__builtin_ia32_vpperm", IX86_BUILTIN_VPPERM
, UNKNOWN
, (int)MULTI_ARG_3_QI
},
25200 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pmacssww
, "__builtin_ia32_vpmacssww", IX86_BUILTIN_VPMACSSWW
, UNKNOWN
, (int)MULTI_ARG_3_HI
},
25201 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pmacsww
, "__builtin_ia32_vpmacsww", IX86_BUILTIN_VPMACSWW
, UNKNOWN
, (int)MULTI_ARG_3_HI
},
25202 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pmacsswd
, "__builtin_ia32_vpmacsswd", IX86_BUILTIN_VPMACSSWD
, UNKNOWN
, (int)MULTI_ARG_3_HI_SI
},
25203 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pmacswd
, "__builtin_ia32_vpmacswd", IX86_BUILTIN_VPMACSWD
, UNKNOWN
, (int)MULTI_ARG_3_HI_SI
},
25204 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pmacssdd
, "__builtin_ia32_vpmacssdd", IX86_BUILTIN_VPMACSSDD
, UNKNOWN
, (int)MULTI_ARG_3_SI
},
25205 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pmacsdd
, "__builtin_ia32_vpmacsdd", IX86_BUILTIN_VPMACSDD
, UNKNOWN
, (int)MULTI_ARG_3_SI
},
25206 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pmacssdql
, "__builtin_ia32_vpmacssdql", IX86_BUILTIN_VPMACSSDQL
, UNKNOWN
, (int)MULTI_ARG_3_SI_DI
},
25207 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pmacssdqh
, "__builtin_ia32_vpmacssdqh", IX86_BUILTIN_VPMACSSDQH
, UNKNOWN
, (int)MULTI_ARG_3_SI_DI
},
25208 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pmacsdql
, "__builtin_ia32_vpmacsdql", IX86_BUILTIN_VPMACSDQL
, UNKNOWN
, (int)MULTI_ARG_3_SI_DI
},
25209 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pmacsdqh
, "__builtin_ia32_vpmacsdqh", IX86_BUILTIN_VPMACSDQH
, UNKNOWN
, (int)MULTI_ARG_3_SI_DI
},
25210 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pmadcsswd
, "__builtin_ia32_vpmadcsswd", IX86_BUILTIN_VPMADCSSWD
, UNKNOWN
, (int)MULTI_ARG_3_HI_SI
},
25211 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pmadcswd
, "__builtin_ia32_vpmadcswd", IX86_BUILTIN_VPMADCSWD
, UNKNOWN
, (int)MULTI_ARG_3_HI_SI
},
25213 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_vrotlv2di3
, "__builtin_ia32_vprotq", IX86_BUILTIN_VPROTQ
, UNKNOWN
, (int)MULTI_ARG_2_DI
},
25214 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_vrotlv4si3
, "__builtin_ia32_vprotd", IX86_BUILTIN_VPROTD
, UNKNOWN
, (int)MULTI_ARG_2_SI
},
25215 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_vrotlv8hi3
, "__builtin_ia32_vprotw", IX86_BUILTIN_VPROTW
, UNKNOWN
, (int)MULTI_ARG_2_HI
},
25216 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_vrotlv16qi3
, "__builtin_ia32_vprotb", IX86_BUILTIN_VPROTB
, UNKNOWN
, (int)MULTI_ARG_2_QI
},
25217 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_rotlv2di3
, "__builtin_ia32_vprotqi", IX86_BUILTIN_VPROTQ_IMM
, UNKNOWN
, (int)MULTI_ARG_2_DI_IMM
},
25218 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_rotlv4si3
, "__builtin_ia32_vprotdi", IX86_BUILTIN_VPROTD_IMM
, UNKNOWN
, (int)MULTI_ARG_2_SI_IMM
},
25219 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_rotlv8hi3
, "__builtin_ia32_vprotwi", IX86_BUILTIN_VPROTW_IMM
, UNKNOWN
, (int)MULTI_ARG_2_HI_IMM
},
25220 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_rotlv16qi3
, "__builtin_ia32_vprotbi", IX86_BUILTIN_VPROTB_IMM
, UNKNOWN
, (int)MULTI_ARG_2_QI_IMM
},
25221 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_ashlv2di3
, "__builtin_ia32_vpshaq", IX86_BUILTIN_VPSHAQ
, UNKNOWN
, (int)MULTI_ARG_2_DI
},
25222 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_ashlv4si3
, "__builtin_ia32_vpshad", IX86_BUILTIN_VPSHAD
, UNKNOWN
, (int)MULTI_ARG_2_SI
},
25223 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_ashlv8hi3
, "__builtin_ia32_vpshaw", IX86_BUILTIN_VPSHAW
, UNKNOWN
, (int)MULTI_ARG_2_HI
},
25224 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_ashlv16qi3
, "__builtin_ia32_vpshab", IX86_BUILTIN_VPSHAB
, UNKNOWN
, (int)MULTI_ARG_2_QI
},
25225 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_lshlv2di3
, "__builtin_ia32_vpshlq", IX86_BUILTIN_VPSHLQ
, UNKNOWN
, (int)MULTI_ARG_2_DI
},
25226 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_lshlv4si3
, "__builtin_ia32_vpshld", IX86_BUILTIN_VPSHLD
, UNKNOWN
, (int)MULTI_ARG_2_SI
},
25227 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_lshlv8hi3
, "__builtin_ia32_vpshlw", IX86_BUILTIN_VPSHLW
, UNKNOWN
, (int)MULTI_ARG_2_HI
},
25228 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_lshlv16qi3
, "__builtin_ia32_vpshlb", IX86_BUILTIN_VPSHLB
, UNKNOWN
, (int)MULTI_ARG_2_QI
},
25230 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_vmfrczv4sf2
, "__builtin_ia32_vfrczss", IX86_BUILTIN_VFRCZSS
, UNKNOWN
, (int)MULTI_ARG_2_SF
},
25231 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_vmfrczv2df2
, "__builtin_ia32_vfrczsd", IX86_BUILTIN_VFRCZSD
, UNKNOWN
, (int)MULTI_ARG_2_DF
},
25232 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_frczv4sf2
, "__builtin_ia32_vfrczps", IX86_BUILTIN_VFRCZPS
, UNKNOWN
, (int)MULTI_ARG_1_SF
},
25233 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_frczv2df2
, "__builtin_ia32_vfrczpd", IX86_BUILTIN_VFRCZPD
, UNKNOWN
, (int)MULTI_ARG_1_DF
},
25234 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_frczv8sf2
, "__builtin_ia32_vfrczps256", IX86_BUILTIN_VFRCZPS256
, UNKNOWN
, (int)MULTI_ARG_1_SF2
},
25235 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_frczv4df2
, "__builtin_ia32_vfrczpd256", IX86_BUILTIN_VFRCZPD256
, UNKNOWN
, (int)MULTI_ARG_1_DF2
},
25237 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_phaddbw
, "__builtin_ia32_vphaddbw", IX86_BUILTIN_VPHADDBW
, UNKNOWN
, (int)MULTI_ARG_1_QI_HI
},
25238 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_phaddbd
, "__builtin_ia32_vphaddbd", IX86_BUILTIN_VPHADDBD
, UNKNOWN
, (int)MULTI_ARG_1_QI_SI
},
25239 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_phaddbq
, "__builtin_ia32_vphaddbq", IX86_BUILTIN_VPHADDBQ
, UNKNOWN
, (int)MULTI_ARG_1_QI_DI
},
25240 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_phaddwd
, "__builtin_ia32_vphaddwd", IX86_BUILTIN_VPHADDWD
, UNKNOWN
, (int)MULTI_ARG_1_HI_SI
},
25241 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_phaddwq
, "__builtin_ia32_vphaddwq", IX86_BUILTIN_VPHADDWQ
, UNKNOWN
, (int)MULTI_ARG_1_HI_DI
},
25242 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_phadddq
, "__builtin_ia32_vphadddq", IX86_BUILTIN_VPHADDDQ
, UNKNOWN
, (int)MULTI_ARG_1_SI_DI
},
25243 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_phaddubw
, "__builtin_ia32_vphaddubw", IX86_BUILTIN_VPHADDUBW
, UNKNOWN
, (int)MULTI_ARG_1_QI_HI
},
25244 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_phaddubd
, "__builtin_ia32_vphaddubd", IX86_BUILTIN_VPHADDUBD
, UNKNOWN
, (int)MULTI_ARG_1_QI_SI
},
25245 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_phaddubq
, "__builtin_ia32_vphaddubq", IX86_BUILTIN_VPHADDUBQ
, UNKNOWN
, (int)MULTI_ARG_1_QI_DI
},
25246 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_phadduwd
, "__builtin_ia32_vphadduwd", IX86_BUILTIN_VPHADDUWD
, UNKNOWN
, (int)MULTI_ARG_1_HI_SI
},
25247 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_phadduwq
, "__builtin_ia32_vphadduwq", IX86_BUILTIN_VPHADDUWQ
, UNKNOWN
, (int)MULTI_ARG_1_HI_DI
},
25248 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_phaddudq
, "__builtin_ia32_vphaddudq", IX86_BUILTIN_VPHADDUDQ
, UNKNOWN
, (int)MULTI_ARG_1_SI_DI
},
25249 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_phsubbw
, "__builtin_ia32_vphsubbw", IX86_BUILTIN_VPHSUBBW
, UNKNOWN
, (int)MULTI_ARG_1_QI_HI
},
25250 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_phsubwd
, "__builtin_ia32_vphsubwd", IX86_BUILTIN_VPHSUBWD
, UNKNOWN
, (int)MULTI_ARG_1_HI_SI
},
25251 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_phsubdq
, "__builtin_ia32_vphsubdq", IX86_BUILTIN_VPHSUBDQ
, UNKNOWN
, (int)MULTI_ARG_1_SI_DI
},
25253 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv16qi3
, "__builtin_ia32_vpcomeqb", IX86_BUILTIN_VPCOMEQB
, EQ
, (int)MULTI_ARG_2_QI_CMP
},
25254 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv16qi3
, "__builtin_ia32_vpcomneb", IX86_BUILTIN_VPCOMNEB
, NE
, (int)MULTI_ARG_2_QI_CMP
},
25255 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv16qi3
, "__builtin_ia32_vpcomneqb", IX86_BUILTIN_VPCOMNEB
, NE
, (int)MULTI_ARG_2_QI_CMP
},
25256 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv16qi3
, "__builtin_ia32_vpcomltb", IX86_BUILTIN_VPCOMLTB
, LT
, (int)MULTI_ARG_2_QI_CMP
},
25257 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv16qi3
, "__builtin_ia32_vpcomleb", IX86_BUILTIN_VPCOMLEB
, LE
, (int)MULTI_ARG_2_QI_CMP
},
25258 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv16qi3
, "__builtin_ia32_vpcomgtb", IX86_BUILTIN_VPCOMGTB
, GT
, (int)MULTI_ARG_2_QI_CMP
},
25259 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv16qi3
, "__builtin_ia32_vpcomgeb", IX86_BUILTIN_VPCOMGEB
, GE
, (int)MULTI_ARG_2_QI_CMP
},
25261 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv8hi3
, "__builtin_ia32_vpcomeqw", IX86_BUILTIN_VPCOMEQW
, EQ
, (int)MULTI_ARG_2_HI_CMP
},
25262 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv8hi3
, "__builtin_ia32_vpcomnew", IX86_BUILTIN_VPCOMNEW
, NE
, (int)MULTI_ARG_2_HI_CMP
},
25263 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv8hi3
, "__builtin_ia32_vpcomneqw", IX86_BUILTIN_VPCOMNEW
, NE
, (int)MULTI_ARG_2_HI_CMP
},
25264 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv8hi3
, "__builtin_ia32_vpcomltw", IX86_BUILTIN_VPCOMLTW
, LT
, (int)MULTI_ARG_2_HI_CMP
},
25265 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv8hi3
, "__builtin_ia32_vpcomlew", IX86_BUILTIN_VPCOMLEW
, LE
, (int)MULTI_ARG_2_HI_CMP
},
25266 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv8hi3
, "__builtin_ia32_vpcomgtw", IX86_BUILTIN_VPCOMGTW
, GT
, (int)MULTI_ARG_2_HI_CMP
},
25267 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv8hi3
, "__builtin_ia32_vpcomgew", IX86_BUILTIN_VPCOMGEW
, GE
, (int)MULTI_ARG_2_HI_CMP
},
25269 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv4si3
, "__builtin_ia32_vpcomeqd", IX86_BUILTIN_VPCOMEQD
, EQ
, (int)MULTI_ARG_2_SI_CMP
},
25270 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv4si3
, "__builtin_ia32_vpcomned", IX86_BUILTIN_VPCOMNED
, NE
, (int)MULTI_ARG_2_SI_CMP
},
25271 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv4si3
, "__builtin_ia32_vpcomneqd", IX86_BUILTIN_VPCOMNED
, NE
, (int)MULTI_ARG_2_SI_CMP
},
25272 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv4si3
, "__builtin_ia32_vpcomltd", IX86_BUILTIN_VPCOMLTD
, LT
, (int)MULTI_ARG_2_SI_CMP
},
25273 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv4si3
, "__builtin_ia32_vpcomled", IX86_BUILTIN_VPCOMLED
, LE
, (int)MULTI_ARG_2_SI_CMP
},
25274 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv4si3
, "__builtin_ia32_vpcomgtd", IX86_BUILTIN_VPCOMGTD
, GT
, (int)MULTI_ARG_2_SI_CMP
},
25275 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv4si3
, "__builtin_ia32_vpcomged", IX86_BUILTIN_VPCOMGED
, GE
, (int)MULTI_ARG_2_SI_CMP
},
25277 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv2di3
, "__builtin_ia32_vpcomeqq", IX86_BUILTIN_VPCOMEQQ
, EQ
, (int)MULTI_ARG_2_DI_CMP
},
25278 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv2di3
, "__builtin_ia32_vpcomneq", IX86_BUILTIN_VPCOMNEQ
, NE
, (int)MULTI_ARG_2_DI_CMP
},
25279 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv2di3
, "__builtin_ia32_vpcomneqq", IX86_BUILTIN_VPCOMNEQ
, NE
, (int)MULTI_ARG_2_DI_CMP
},
25280 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv2di3
, "__builtin_ia32_vpcomltq", IX86_BUILTIN_VPCOMLTQ
, LT
, (int)MULTI_ARG_2_DI_CMP
},
25281 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv2di3
, "__builtin_ia32_vpcomleq", IX86_BUILTIN_VPCOMLEQ
, LE
, (int)MULTI_ARG_2_DI_CMP
},
25282 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv2di3
, "__builtin_ia32_vpcomgtq", IX86_BUILTIN_VPCOMGTQ
, GT
, (int)MULTI_ARG_2_DI_CMP
},
25283 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv2di3
, "__builtin_ia32_vpcomgeq", IX86_BUILTIN_VPCOMGEQ
, GE
, (int)MULTI_ARG_2_DI_CMP
},
25285 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_uns2v16qi3
,"__builtin_ia32_vpcomequb", IX86_BUILTIN_VPCOMEQUB
, EQ
, (int)MULTI_ARG_2_QI_CMP
},
25286 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_uns2v16qi3
,"__builtin_ia32_vpcomneub", IX86_BUILTIN_VPCOMNEUB
, NE
, (int)MULTI_ARG_2_QI_CMP
},
25287 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_uns2v16qi3
,"__builtin_ia32_vpcomnequb", IX86_BUILTIN_VPCOMNEUB
, NE
, (int)MULTI_ARG_2_QI_CMP
},
25288 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv16qi3
, "__builtin_ia32_vpcomltub", IX86_BUILTIN_VPCOMLTUB
, LTU
, (int)MULTI_ARG_2_QI_CMP
},
25289 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv16qi3
, "__builtin_ia32_vpcomleub", IX86_BUILTIN_VPCOMLEUB
, LEU
, (int)MULTI_ARG_2_QI_CMP
},
25290 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv16qi3
, "__builtin_ia32_vpcomgtub", IX86_BUILTIN_VPCOMGTUB
, GTU
, (int)MULTI_ARG_2_QI_CMP
},
25291 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv16qi3
, "__builtin_ia32_vpcomgeub", IX86_BUILTIN_VPCOMGEUB
, GEU
, (int)MULTI_ARG_2_QI_CMP
},
25293 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_uns2v8hi3
, "__builtin_ia32_vpcomequw", IX86_BUILTIN_VPCOMEQUW
, EQ
, (int)MULTI_ARG_2_HI_CMP
},
25294 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_uns2v8hi3
, "__builtin_ia32_vpcomneuw", IX86_BUILTIN_VPCOMNEUW
, NE
, (int)MULTI_ARG_2_HI_CMP
},
25295 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_uns2v8hi3
, "__builtin_ia32_vpcomnequw", IX86_BUILTIN_VPCOMNEUW
, NE
, (int)MULTI_ARG_2_HI_CMP
},
25296 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv8hi3
, "__builtin_ia32_vpcomltuw", IX86_BUILTIN_VPCOMLTUW
, LTU
, (int)MULTI_ARG_2_HI_CMP
},
25297 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv8hi3
, "__builtin_ia32_vpcomleuw", IX86_BUILTIN_VPCOMLEUW
, LEU
, (int)MULTI_ARG_2_HI_CMP
},
25298 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv8hi3
, "__builtin_ia32_vpcomgtuw", IX86_BUILTIN_VPCOMGTUW
, GTU
, (int)MULTI_ARG_2_HI_CMP
},
25299 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv8hi3
, "__builtin_ia32_vpcomgeuw", IX86_BUILTIN_VPCOMGEUW
, GEU
, (int)MULTI_ARG_2_HI_CMP
},
25301 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_uns2v4si3
, "__builtin_ia32_vpcomequd", IX86_BUILTIN_VPCOMEQUD
, EQ
, (int)MULTI_ARG_2_SI_CMP
},
25302 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_uns2v4si3
, "__builtin_ia32_vpcomneud", IX86_BUILTIN_VPCOMNEUD
, NE
, (int)MULTI_ARG_2_SI_CMP
},
25303 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_uns2v4si3
, "__builtin_ia32_vpcomnequd", IX86_BUILTIN_VPCOMNEUD
, NE
, (int)MULTI_ARG_2_SI_CMP
},
25304 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv4si3
, "__builtin_ia32_vpcomltud", IX86_BUILTIN_VPCOMLTUD
, LTU
, (int)MULTI_ARG_2_SI_CMP
},
25305 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv4si3
, "__builtin_ia32_vpcomleud", IX86_BUILTIN_VPCOMLEUD
, LEU
, (int)MULTI_ARG_2_SI_CMP
},
25306 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv4si3
, "__builtin_ia32_vpcomgtud", IX86_BUILTIN_VPCOMGTUD
, GTU
, (int)MULTI_ARG_2_SI_CMP
},
25307 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv4si3
, "__builtin_ia32_vpcomgeud", IX86_BUILTIN_VPCOMGEUD
, GEU
, (int)MULTI_ARG_2_SI_CMP
},
25309 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_uns2v2di3
, "__builtin_ia32_vpcomequq", IX86_BUILTIN_VPCOMEQUQ
, EQ
, (int)MULTI_ARG_2_DI_CMP
},
25310 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_uns2v2di3
, "__builtin_ia32_vpcomneuq", IX86_BUILTIN_VPCOMNEUQ
, NE
, (int)MULTI_ARG_2_DI_CMP
},
25311 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_uns2v2di3
, "__builtin_ia32_vpcomnequq", IX86_BUILTIN_VPCOMNEUQ
, NE
, (int)MULTI_ARG_2_DI_CMP
},
25312 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv2di3
, "__builtin_ia32_vpcomltuq", IX86_BUILTIN_VPCOMLTUQ
, LTU
, (int)MULTI_ARG_2_DI_CMP
},
25313 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv2di3
, "__builtin_ia32_vpcomleuq", IX86_BUILTIN_VPCOMLEUQ
, LEU
, (int)MULTI_ARG_2_DI_CMP
},
25314 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv2di3
, "__builtin_ia32_vpcomgtuq", IX86_BUILTIN_VPCOMGTUQ
, GTU
, (int)MULTI_ARG_2_DI_CMP
},
25315 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv2di3
, "__builtin_ia32_vpcomgeuq", IX86_BUILTIN_VPCOMGEUQ
, GEU
, (int)MULTI_ARG_2_DI_CMP
},
25317 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv16qi3
, "__builtin_ia32_vpcomfalseb", IX86_BUILTIN_VPCOMFALSEB
, (enum rtx_code
) PCOM_FALSE
, (int)MULTI_ARG_2_QI_TF
},
25318 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv8hi3
, "__builtin_ia32_vpcomfalsew", IX86_BUILTIN_VPCOMFALSEW
, (enum rtx_code
) PCOM_FALSE
, (int)MULTI_ARG_2_HI_TF
},
25319 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv4si3
, "__builtin_ia32_vpcomfalsed", IX86_BUILTIN_VPCOMFALSED
, (enum rtx_code
) PCOM_FALSE
, (int)MULTI_ARG_2_SI_TF
},
25320 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv2di3
, "__builtin_ia32_vpcomfalseq", IX86_BUILTIN_VPCOMFALSEQ
, (enum rtx_code
) PCOM_FALSE
, (int)MULTI_ARG_2_DI_TF
},
25321 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv16qi3
, "__builtin_ia32_vpcomfalseub",IX86_BUILTIN_VPCOMFALSEUB
,(enum rtx_code
) PCOM_FALSE
, (int)MULTI_ARG_2_QI_TF
},
25322 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv8hi3
, "__builtin_ia32_vpcomfalseuw",IX86_BUILTIN_VPCOMFALSEUW
,(enum rtx_code
) PCOM_FALSE
, (int)MULTI_ARG_2_HI_TF
},
25323 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv4si3
, "__builtin_ia32_vpcomfalseud",IX86_BUILTIN_VPCOMFALSEUD
,(enum rtx_code
) PCOM_FALSE
, (int)MULTI_ARG_2_SI_TF
},
25324 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv2di3
, "__builtin_ia32_vpcomfalseuq",IX86_BUILTIN_VPCOMFALSEUQ
,(enum rtx_code
) PCOM_FALSE
, (int)MULTI_ARG_2_DI_TF
},
25326 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv16qi3
, "__builtin_ia32_vpcomtrueb", IX86_BUILTIN_VPCOMTRUEB
, (enum rtx_code
) PCOM_TRUE
, (int)MULTI_ARG_2_QI_TF
},
25327 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv8hi3
, "__builtin_ia32_vpcomtruew", IX86_BUILTIN_VPCOMTRUEW
, (enum rtx_code
) PCOM_TRUE
, (int)MULTI_ARG_2_HI_TF
},
25328 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv4si3
, "__builtin_ia32_vpcomtrued", IX86_BUILTIN_VPCOMTRUED
, (enum rtx_code
) PCOM_TRUE
, (int)MULTI_ARG_2_SI_TF
},
25329 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv2di3
, "__builtin_ia32_vpcomtrueq", IX86_BUILTIN_VPCOMTRUEQ
, (enum rtx_code
) PCOM_TRUE
, (int)MULTI_ARG_2_DI_TF
},
25330 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv16qi3
, "__builtin_ia32_vpcomtrueub", IX86_BUILTIN_VPCOMTRUEUB
, (enum rtx_code
) PCOM_TRUE
, (int)MULTI_ARG_2_QI_TF
},
25331 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv8hi3
, "__builtin_ia32_vpcomtrueuw", IX86_BUILTIN_VPCOMTRUEUW
, (enum rtx_code
) PCOM_TRUE
, (int)MULTI_ARG_2_HI_TF
},
25332 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv4si3
, "__builtin_ia32_vpcomtrueud", IX86_BUILTIN_VPCOMTRUEUD
, (enum rtx_code
) PCOM_TRUE
, (int)MULTI_ARG_2_SI_TF
},
25333 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv2di3
, "__builtin_ia32_vpcomtrueuq", IX86_BUILTIN_VPCOMTRUEUQ
, (enum rtx_code
) PCOM_TRUE
, (int)MULTI_ARG_2_DI_TF
},
25335 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_vpermil2v2df3
, "__builtin_ia32_vpermil2pd", IX86_BUILTIN_VPERMIL2PD
, UNKNOWN
, (int)MULTI_ARG_4_DF2_DI_I
},
25336 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_vpermil2v4sf3
, "__builtin_ia32_vpermil2ps", IX86_BUILTIN_VPERMIL2PS
, UNKNOWN
, (int)MULTI_ARG_4_SF2_SI_I
},
25337 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_vpermil2v4df3
, "__builtin_ia32_vpermil2pd256", IX86_BUILTIN_VPERMIL2PD256
, UNKNOWN
, (int)MULTI_ARG_4_DF2_DI_I1
},
25338 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_vpermil2v8sf3
, "__builtin_ia32_vpermil2ps256", IX86_BUILTIN_VPERMIL2PS256
, UNKNOWN
, (int)MULTI_ARG_4_SF2_SI_I1
},
25342 /* Set up all the MMX/SSE builtins, even builtins for instructions that are not
25343 in the current target ISA to allow the user to compile particular modules
25344 with different target specific options that differ from the command line
25347 ix86_init_mmx_sse_builtins (void)
25349 const struct builtin_description
* d
;
25350 enum ix86_builtin_func_type ftype
;
25353 /* Add all special builtins with variable number of operands. */
25354 for (i
= 0, d
= bdesc_special_args
;
25355 i
< ARRAY_SIZE (bdesc_special_args
);
25361 ftype
= (enum ix86_builtin_func_type
) d
->flag
;
25362 def_builtin (d
->mask
, d
->name
, ftype
, d
->code
);
25365 /* Add all builtins with variable number of operands. */
25366 for (i
= 0, d
= bdesc_args
;
25367 i
< ARRAY_SIZE (bdesc_args
);
25373 ftype
= (enum ix86_builtin_func_type
) d
->flag
;
25374 def_builtin_const (d
->mask
, d
->name
, ftype
, d
->code
);
25377 /* pcmpestr[im] insns. */
25378 for (i
= 0, d
= bdesc_pcmpestr
;
25379 i
< ARRAY_SIZE (bdesc_pcmpestr
);
25382 if (d
->code
== IX86_BUILTIN_PCMPESTRM128
)
25383 ftype
= V16QI_FTYPE_V16QI_INT_V16QI_INT_INT
;
25385 ftype
= INT_FTYPE_V16QI_INT_V16QI_INT_INT
;
25386 def_builtin_const (d
->mask
, d
->name
, ftype
, d
->code
);
25389 /* pcmpistr[im] insns. */
25390 for (i
= 0, d
= bdesc_pcmpistr
;
25391 i
< ARRAY_SIZE (bdesc_pcmpistr
);
25394 if (d
->code
== IX86_BUILTIN_PCMPISTRM128
)
25395 ftype
= V16QI_FTYPE_V16QI_V16QI_INT
;
25397 ftype
= INT_FTYPE_V16QI_V16QI_INT
;
25398 def_builtin_const (d
->mask
, d
->name
, ftype
, d
->code
);
25401 /* comi/ucomi insns. */
25402 for (i
= 0, d
= bdesc_comi
; i
< ARRAY_SIZE (bdesc_comi
); i
++, d
++)
25404 if (d
->mask
== OPTION_MASK_ISA_SSE2
)
25405 ftype
= INT_FTYPE_V2DF_V2DF
;
25407 ftype
= INT_FTYPE_V4SF_V4SF
;
25408 def_builtin_const (d
->mask
, d
->name
, ftype
, d
->code
);
25412 def_builtin (OPTION_MASK_ISA_SSE
, "__builtin_ia32_ldmxcsr",
25413 VOID_FTYPE_UNSIGNED
, IX86_BUILTIN_LDMXCSR
);
25414 def_builtin (OPTION_MASK_ISA_SSE
, "__builtin_ia32_stmxcsr",
25415 UNSIGNED_FTYPE_VOID
, IX86_BUILTIN_STMXCSR
);
25417 /* SSE or 3DNow!A */
25418 def_builtin (OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
,
25419 "__builtin_ia32_maskmovq", VOID_FTYPE_V8QI_V8QI_PCHAR
,
25420 IX86_BUILTIN_MASKMOVQ
);
25423 def_builtin (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_maskmovdqu",
25424 VOID_FTYPE_V16QI_V16QI_PCHAR
, IX86_BUILTIN_MASKMOVDQU
);
25426 def_builtin (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_clflush",
25427 VOID_FTYPE_PCVOID
, IX86_BUILTIN_CLFLUSH
);
25428 x86_mfence
= def_builtin (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_mfence",
25429 VOID_FTYPE_VOID
, IX86_BUILTIN_MFENCE
);
25432 def_builtin (OPTION_MASK_ISA_SSE3
, "__builtin_ia32_monitor",
25433 VOID_FTYPE_PCVOID_UNSIGNED_UNSIGNED
, IX86_BUILTIN_MONITOR
);
25434 def_builtin (OPTION_MASK_ISA_SSE3
, "__builtin_ia32_mwait",
25435 VOID_FTYPE_UNSIGNED_UNSIGNED
, IX86_BUILTIN_MWAIT
);
25438 def_builtin_const (OPTION_MASK_ISA_AES
, "__builtin_ia32_aesenc128",
25439 V2DI_FTYPE_V2DI_V2DI
, IX86_BUILTIN_AESENC128
);
25440 def_builtin_const (OPTION_MASK_ISA_AES
, "__builtin_ia32_aesenclast128",
25441 V2DI_FTYPE_V2DI_V2DI
, IX86_BUILTIN_AESENCLAST128
);
25442 def_builtin_const (OPTION_MASK_ISA_AES
, "__builtin_ia32_aesdec128",
25443 V2DI_FTYPE_V2DI_V2DI
, IX86_BUILTIN_AESDEC128
);
25444 def_builtin_const (OPTION_MASK_ISA_AES
, "__builtin_ia32_aesdeclast128",
25445 V2DI_FTYPE_V2DI_V2DI
, IX86_BUILTIN_AESDECLAST128
);
25446 def_builtin_const (OPTION_MASK_ISA_AES
, "__builtin_ia32_aesimc128",
25447 V2DI_FTYPE_V2DI
, IX86_BUILTIN_AESIMC128
);
25448 def_builtin_const (OPTION_MASK_ISA_AES
, "__builtin_ia32_aeskeygenassist128",
25449 V2DI_FTYPE_V2DI_INT
, IX86_BUILTIN_AESKEYGENASSIST128
);
25452 def_builtin_const (OPTION_MASK_ISA_PCLMUL
, "__builtin_ia32_pclmulqdq128",
25453 V2DI_FTYPE_V2DI_V2DI_INT
, IX86_BUILTIN_PCLMULQDQ128
);
25456 def_builtin (OPTION_MASK_ISA_RDRND
, "__builtin_ia32_rdrand16_step",
25457 INT_FTYPE_PUSHORT
, IX86_BUILTIN_RDRAND16_STEP
);
25458 def_builtin (OPTION_MASK_ISA_RDRND
, "__builtin_ia32_rdrand32_step",
25459 INT_FTYPE_PUNSIGNED
, IX86_BUILTIN_RDRAND32_STEP
);
25460 def_builtin (OPTION_MASK_ISA_RDRND
| OPTION_MASK_ISA_64BIT
,
25461 "__builtin_ia32_rdrand64_step", INT_FTYPE_PULONGLONG
,
25462 IX86_BUILTIN_RDRAND64_STEP
);
25464 /* MMX access to the vec_init patterns. */
25465 def_builtin_const (OPTION_MASK_ISA_MMX
, "__builtin_ia32_vec_init_v2si",
25466 V2SI_FTYPE_INT_INT
, IX86_BUILTIN_VEC_INIT_V2SI
);
25468 def_builtin_const (OPTION_MASK_ISA_MMX
, "__builtin_ia32_vec_init_v4hi",
25469 V4HI_FTYPE_HI_HI_HI_HI
,
25470 IX86_BUILTIN_VEC_INIT_V4HI
);
25472 def_builtin_const (OPTION_MASK_ISA_MMX
, "__builtin_ia32_vec_init_v8qi",
25473 V8QI_FTYPE_QI_QI_QI_QI_QI_QI_QI_QI
,
25474 IX86_BUILTIN_VEC_INIT_V8QI
);
25476 /* Access to the vec_extract patterns. */
25477 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_vec_ext_v2df",
25478 DOUBLE_FTYPE_V2DF_INT
, IX86_BUILTIN_VEC_EXT_V2DF
);
25479 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_vec_ext_v2di",
25480 DI_FTYPE_V2DI_INT
, IX86_BUILTIN_VEC_EXT_V2DI
);
25481 def_builtin_const (OPTION_MASK_ISA_SSE
, "__builtin_ia32_vec_ext_v4sf",
25482 FLOAT_FTYPE_V4SF_INT
, IX86_BUILTIN_VEC_EXT_V4SF
);
25483 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_vec_ext_v4si",
25484 SI_FTYPE_V4SI_INT
, IX86_BUILTIN_VEC_EXT_V4SI
);
25485 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_vec_ext_v8hi",
25486 HI_FTYPE_V8HI_INT
, IX86_BUILTIN_VEC_EXT_V8HI
);
25488 def_builtin_const (OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
,
25489 "__builtin_ia32_vec_ext_v4hi",
25490 HI_FTYPE_V4HI_INT
, IX86_BUILTIN_VEC_EXT_V4HI
);
25492 def_builtin_const (OPTION_MASK_ISA_MMX
, "__builtin_ia32_vec_ext_v2si",
25493 SI_FTYPE_V2SI_INT
, IX86_BUILTIN_VEC_EXT_V2SI
);
25495 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_vec_ext_v16qi",
25496 QI_FTYPE_V16QI_INT
, IX86_BUILTIN_VEC_EXT_V16QI
);
25498 /* Access to the vec_set patterns. */
25499 def_builtin_const (OPTION_MASK_ISA_SSE4_1
| OPTION_MASK_ISA_64BIT
,
25500 "__builtin_ia32_vec_set_v2di",
25501 V2DI_FTYPE_V2DI_DI_INT
, IX86_BUILTIN_VEC_SET_V2DI
);
25503 def_builtin_const (OPTION_MASK_ISA_SSE4_1
, "__builtin_ia32_vec_set_v4sf",
25504 V4SF_FTYPE_V4SF_FLOAT_INT
, IX86_BUILTIN_VEC_SET_V4SF
);
25506 def_builtin_const (OPTION_MASK_ISA_SSE4_1
, "__builtin_ia32_vec_set_v4si",
25507 V4SI_FTYPE_V4SI_SI_INT
, IX86_BUILTIN_VEC_SET_V4SI
);
25509 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_vec_set_v8hi",
25510 V8HI_FTYPE_V8HI_HI_INT
, IX86_BUILTIN_VEC_SET_V8HI
);
25512 def_builtin_const (OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
,
25513 "__builtin_ia32_vec_set_v4hi",
25514 V4HI_FTYPE_V4HI_HI_INT
, IX86_BUILTIN_VEC_SET_V4HI
);
25516 def_builtin_const (OPTION_MASK_ISA_SSE4_1
, "__builtin_ia32_vec_set_v16qi",
25517 V16QI_FTYPE_V16QI_QI_INT
, IX86_BUILTIN_VEC_SET_V16QI
);
25519 /* Add FMA4 multi-arg argument instructions */
25520 for (i
= 0, d
= bdesc_multi_arg
; i
< ARRAY_SIZE (bdesc_multi_arg
); i
++, d
++)
25525 ftype
= (enum ix86_builtin_func_type
) d
->flag
;
25526 def_builtin_const (d
->mask
, d
->name
, ftype
, d
->code
);
25530 /* Internal method for ix86_init_builtins. */
25533 ix86_init_builtins_va_builtins_abi (void)
25535 tree ms_va_ref
, sysv_va_ref
;
25536 tree fnvoid_va_end_ms
, fnvoid_va_end_sysv
;
25537 tree fnvoid_va_start_ms
, fnvoid_va_start_sysv
;
25538 tree fnvoid_va_copy_ms
, fnvoid_va_copy_sysv
;
25539 tree fnattr_ms
= NULL_TREE
, fnattr_sysv
= NULL_TREE
;
25543 fnattr_ms
= build_tree_list (get_identifier ("ms_abi"), NULL_TREE
);
25544 fnattr_sysv
= build_tree_list (get_identifier ("sysv_abi"), NULL_TREE
);
25545 ms_va_ref
= build_reference_type (ms_va_list_type_node
);
25547 build_pointer_type (TREE_TYPE (sysv_va_list_type_node
));
25550 build_function_type_list (void_type_node
, ms_va_ref
, NULL_TREE
);
25551 fnvoid_va_start_ms
=
25552 build_varargs_function_type_list (void_type_node
, ms_va_ref
, NULL_TREE
);
25553 fnvoid_va_end_sysv
=
25554 build_function_type_list (void_type_node
, sysv_va_ref
, NULL_TREE
);
25555 fnvoid_va_start_sysv
=
25556 build_varargs_function_type_list (void_type_node
, sysv_va_ref
,
25558 fnvoid_va_copy_ms
=
25559 build_function_type_list (void_type_node
, ms_va_ref
, ms_va_list_type_node
,
25561 fnvoid_va_copy_sysv
=
25562 build_function_type_list (void_type_node
, sysv_va_ref
,
25563 sysv_va_ref
, NULL_TREE
);
25565 add_builtin_function ("__builtin_ms_va_start", fnvoid_va_start_ms
,
25566 BUILT_IN_VA_START
, BUILT_IN_NORMAL
, NULL
, fnattr_ms
);
25567 add_builtin_function ("__builtin_ms_va_end", fnvoid_va_end_ms
,
25568 BUILT_IN_VA_END
, BUILT_IN_NORMAL
, NULL
, fnattr_ms
);
25569 add_builtin_function ("__builtin_ms_va_copy", fnvoid_va_copy_ms
,
25570 BUILT_IN_VA_COPY
, BUILT_IN_NORMAL
, NULL
, fnattr_ms
);
25571 add_builtin_function ("__builtin_sysv_va_start", fnvoid_va_start_sysv
,
25572 BUILT_IN_VA_START
, BUILT_IN_NORMAL
, NULL
, fnattr_sysv
);
25573 add_builtin_function ("__builtin_sysv_va_end", fnvoid_va_end_sysv
,
25574 BUILT_IN_VA_END
, BUILT_IN_NORMAL
, NULL
, fnattr_sysv
);
25575 add_builtin_function ("__builtin_sysv_va_copy", fnvoid_va_copy_sysv
,
25576 BUILT_IN_VA_COPY
, BUILT_IN_NORMAL
, NULL
, fnattr_sysv
);
25580 ix86_init_builtin_types (void)
25582 tree float128_type_node
, float80_type_node
;
25584 /* The __float80 type. */
25585 float80_type_node
= long_double_type_node
;
25586 if (TYPE_MODE (float80_type_node
) != XFmode
)
25588 /* The __float80 type. */
25589 float80_type_node
= make_node (REAL_TYPE
);
25591 TYPE_PRECISION (float80_type_node
) = 80;
25592 layout_type (float80_type_node
);
25594 lang_hooks
.types
.register_builtin_type (float80_type_node
, "__float80");
25596 /* The __float128 type. */
25597 float128_type_node
= make_node (REAL_TYPE
);
25598 TYPE_PRECISION (float128_type_node
) = 128;
25599 layout_type (float128_type_node
);
25600 lang_hooks
.types
.register_builtin_type (float128_type_node
, "__float128");
25602 /* This macro is built by i386-builtin-types.awk. */
25603 DEFINE_BUILTIN_PRIMITIVE_TYPES
;
25607 ix86_init_builtins (void)
25611 ix86_init_builtin_types ();
25613 /* TFmode support builtins. */
25614 def_builtin_const (0, "__builtin_infq",
25615 FLOAT128_FTYPE_VOID
, IX86_BUILTIN_INFQ
);
25616 def_builtin_const (0, "__builtin_huge_valq",
25617 FLOAT128_FTYPE_VOID
, IX86_BUILTIN_HUGE_VALQ
);
25619 /* We will expand them to normal call if SSE2 isn't available since
25620 they are used by libgcc. */
25621 t
= ix86_get_builtin_func_type (FLOAT128_FTYPE_FLOAT128
);
25622 t
= add_builtin_function ("__builtin_fabsq", t
, IX86_BUILTIN_FABSQ
,
25623 BUILT_IN_MD
, "__fabstf2", NULL_TREE
);
25624 TREE_READONLY (t
) = 1;
25625 ix86_builtins
[(int) IX86_BUILTIN_FABSQ
] = t
;
25627 t
= ix86_get_builtin_func_type (FLOAT128_FTYPE_FLOAT128_FLOAT128
);
25628 t
= add_builtin_function ("__builtin_copysignq", t
, IX86_BUILTIN_COPYSIGNQ
,
25629 BUILT_IN_MD
, "__copysigntf3", NULL_TREE
);
25630 TREE_READONLY (t
) = 1;
25631 ix86_builtins
[(int) IX86_BUILTIN_COPYSIGNQ
] = t
;
25633 ix86_init_mmx_sse_builtins ();
25636 ix86_init_builtins_va_builtins_abi ();
25638 #ifdef SUBTARGET_INIT_BUILTINS
25639 SUBTARGET_INIT_BUILTINS
;
25643 /* Return the ix86 builtin for CODE. */
25646 ix86_builtin_decl (unsigned code
, bool initialize_p ATTRIBUTE_UNUSED
)
25648 if (code
>= IX86_BUILTIN_MAX
)
25649 return error_mark_node
;
25651 return ix86_builtins
[code
];
25654 /* Errors in the source file can cause expand_expr to return const0_rtx
25655 where we expect a vector. To avoid crashing, use one of the vector
25656 clear instructions. */
25658 safe_vector_operand (rtx x
, enum machine_mode mode
)
25660 if (x
== const0_rtx
)
25661 x
= CONST0_RTX (mode
);
25665 /* Subroutine of ix86_expand_builtin to take care of binop insns. */
25668 ix86_expand_binop_builtin (enum insn_code icode
, tree exp
, rtx target
)
25671 tree arg0
= CALL_EXPR_ARG (exp
, 0);
25672 tree arg1
= CALL_EXPR_ARG (exp
, 1);
25673 rtx op0
= expand_normal (arg0
);
25674 rtx op1
= expand_normal (arg1
);
25675 enum machine_mode tmode
= insn_data
[icode
].operand
[0].mode
;
25676 enum machine_mode mode0
= insn_data
[icode
].operand
[1].mode
;
25677 enum machine_mode mode1
= insn_data
[icode
].operand
[2].mode
;
25679 if (VECTOR_MODE_P (mode0
))
25680 op0
= safe_vector_operand (op0
, mode0
);
25681 if (VECTOR_MODE_P (mode1
))
25682 op1
= safe_vector_operand (op1
, mode1
);
25684 if (optimize
|| !target
25685 || GET_MODE (target
) != tmode
25686 || !insn_data
[icode
].operand
[0].predicate (target
, tmode
))
25687 target
= gen_reg_rtx (tmode
);
25689 if (GET_MODE (op1
) == SImode
&& mode1
== TImode
)
25691 rtx x
= gen_reg_rtx (V4SImode
);
25692 emit_insn (gen_sse2_loadd (x
, op1
));
25693 op1
= gen_lowpart (TImode
, x
);
25696 if (!insn_data
[icode
].operand
[1].predicate (op0
, mode0
))
25697 op0
= copy_to_mode_reg (mode0
, op0
);
25698 if (!insn_data
[icode
].operand
[2].predicate (op1
, mode1
))
25699 op1
= copy_to_mode_reg (mode1
, op1
);
25701 pat
= GEN_FCN (icode
) (target
, op0
, op1
);
25710 /* Subroutine of ix86_expand_builtin to take care of 2-4 argument insns. */
25713 ix86_expand_multi_arg_builtin (enum insn_code icode
, tree exp
, rtx target
,
25714 enum ix86_builtin_func_type m_type
,
25715 enum rtx_code sub_code
)
25720 bool comparison_p
= false;
25722 bool last_arg_constant
= false;
25723 int num_memory
= 0;
25726 enum machine_mode mode
;
25729 enum machine_mode tmode
= insn_data
[icode
].operand
[0].mode
;
25733 case MULTI_ARG_4_DF2_DI_I
:
25734 case MULTI_ARG_4_DF2_DI_I1
:
25735 case MULTI_ARG_4_SF2_SI_I
:
25736 case MULTI_ARG_4_SF2_SI_I1
:
25738 last_arg_constant
= true;
25741 case MULTI_ARG_3_SF
:
25742 case MULTI_ARG_3_DF
:
25743 case MULTI_ARG_3_SF2
:
25744 case MULTI_ARG_3_DF2
:
25745 case MULTI_ARG_3_DI
:
25746 case MULTI_ARG_3_SI
:
25747 case MULTI_ARG_3_SI_DI
:
25748 case MULTI_ARG_3_HI
:
25749 case MULTI_ARG_3_HI_SI
:
25750 case MULTI_ARG_3_QI
:
25751 case MULTI_ARG_3_DI2
:
25752 case MULTI_ARG_3_SI2
:
25753 case MULTI_ARG_3_HI2
:
25754 case MULTI_ARG_3_QI2
:
25758 case MULTI_ARG_2_SF
:
25759 case MULTI_ARG_2_DF
:
25760 case MULTI_ARG_2_DI
:
25761 case MULTI_ARG_2_SI
:
25762 case MULTI_ARG_2_HI
:
25763 case MULTI_ARG_2_QI
:
25767 case MULTI_ARG_2_DI_IMM
:
25768 case MULTI_ARG_2_SI_IMM
:
25769 case MULTI_ARG_2_HI_IMM
:
25770 case MULTI_ARG_2_QI_IMM
:
25772 last_arg_constant
= true;
25775 case MULTI_ARG_1_SF
:
25776 case MULTI_ARG_1_DF
:
25777 case MULTI_ARG_1_SF2
:
25778 case MULTI_ARG_1_DF2
:
25779 case MULTI_ARG_1_DI
:
25780 case MULTI_ARG_1_SI
:
25781 case MULTI_ARG_1_HI
:
25782 case MULTI_ARG_1_QI
:
25783 case MULTI_ARG_1_SI_DI
:
25784 case MULTI_ARG_1_HI_DI
:
25785 case MULTI_ARG_1_HI_SI
:
25786 case MULTI_ARG_1_QI_DI
:
25787 case MULTI_ARG_1_QI_SI
:
25788 case MULTI_ARG_1_QI_HI
:
25792 case MULTI_ARG_2_DI_CMP
:
25793 case MULTI_ARG_2_SI_CMP
:
25794 case MULTI_ARG_2_HI_CMP
:
25795 case MULTI_ARG_2_QI_CMP
:
25797 comparison_p
= true;
25800 case MULTI_ARG_2_SF_TF
:
25801 case MULTI_ARG_2_DF_TF
:
25802 case MULTI_ARG_2_DI_TF
:
25803 case MULTI_ARG_2_SI_TF
:
25804 case MULTI_ARG_2_HI_TF
:
25805 case MULTI_ARG_2_QI_TF
:
25811 gcc_unreachable ();
25814 if (optimize
|| !target
25815 || GET_MODE (target
) != tmode
25816 || !insn_data
[icode
].operand
[0].predicate (target
, tmode
))
25817 target
= gen_reg_rtx (tmode
);
25819 gcc_assert (nargs
<= 4);
25821 for (i
= 0; i
< nargs
; i
++)
25823 tree arg
= CALL_EXPR_ARG (exp
, i
);
25824 rtx op
= expand_normal (arg
);
25825 int adjust
= (comparison_p
) ? 1 : 0;
25826 enum machine_mode mode
= insn_data
[icode
].operand
[i
+adjust
+1].mode
;
25828 if (last_arg_constant
&& i
== nargs
-1)
25830 if (!CONST_INT_P (op
))
25832 error ("last argument must be an immediate");
25833 return gen_reg_rtx (tmode
);
25838 if (VECTOR_MODE_P (mode
))
25839 op
= safe_vector_operand (op
, mode
);
25841 /* If we aren't optimizing, only allow one memory operand to be
25843 if (memory_operand (op
, mode
))
25846 gcc_assert (GET_MODE (op
) == mode
|| GET_MODE (op
) == VOIDmode
);
25849 || !insn_data
[icode
].operand
[i
+adjust
+1].predicate (op
, mode
)
25851 op
= force_reg (mode
, op
);
25855 args
[i
].mode
= mode
;
25861 pat
= GEN_FCN (icode
) (target
, args
[0].op
);
25866 pat
= GEN_FCN (icode
) (target
, args
[0].op
, args
[1].op
,
25867 GEN_INT ((int)sub_code
));
25868 else if (! comparison_p
)
25869 pat
= GEN_FCN (icode
) (target
, args
[0].op
, args
[1].op
);
25872 rtx cmp_op
= gen_rtx_fmt_ee (sub_code
, GET_MODE (target
),
25876 pat
= GEN_FCN (icode
) (target
, cmp_op
, args
[0].op
, args
[1].op
);
25881 pat
= GEN_FCN (icode
) (target
, args
[0].op
, args
[1].op
, args
[2].op
);
25885 pat
= GEN_FCN (icode
) (target
, args
[0].op
, args
[1].op
, args
[2].op
, args
[3].op
);
25889 gcc_unreachable ();
25899 /* Subroutine of ix86_expand_args_builtin to take care of scalar unop
25900 insns with vec_merge. */
25903 ix86_expand_unop_vec_merge_builtin (enum insn_code icode
, tree exp
,
25907 tree arg0
= CALL_EXPR_ARG (exp
, 0);
25908 rtx op1
, op0
= expand_normal (arg0
);
25909 enum machine_mode tmode
= insn_data
[icode
].operand
[0].mode
;
25910 enum machine_mode mode0
= insn_data
[icode
].operand
[1].mode
;
25912 if (optimize
|| !target
25913 || GET_MODE (target
) != tmode
25914 || !insn_data
[icode
].operand
[0].predicate (target
, tmode
))
25915 target
= gen_reg_rtx (tmode
);
25917 if (VECTOR_MODE_P (mode0
))
25918 op0
= safe_vector_operand (op0
, mode0
);
25920 if ((optimize
&& !register_operand (op0
, mode0
))
25921 || !insn_data
[icode
].operand
[1].predicate (op0
, mode0
))
25922 op0
= copy_to_mode_reg (mode0
, op0
);
25925 if (!insn_data
[icode
].operand
[2].predicate (op1
, mode0
))
25926 op1
= copy_to_mode_reg (mode0
, op1
);
25928 pat
= GEN_FCN (icode
) (target
, op0
, op1
);
25935 /* Subroutine of ix86_expand_builtin to take care of comparison insns. */
25938 ix86_expand_sse_compare (const struct builtin_description
*d
,
25939 tree exp
, rtx target
, bool swap
)
25942 tree arg0
= CALL_EXPR_ARG (exp
, 0);
25943 tree arg1
= CALL_EXPR_ARG (exp
, 1);
25944 rtx op0
= expand_normal (arg0
);
25945 rtx op1
= expand_normal (arg1
);
25947 enum machine_mode tmode
= insn_data
[d
->icode
].operand
[0].mode
;
25948 enum machine_mode mode0
= insn_data
[d
->icode
].operand
[1].mode
;
25949 enum machine_mode mode1
= insn_data
[d
->icode
].operand
[2].mode
;
25950 enum rtx_code comparison
= d
->comparison
;
25952 if (VECTOR_MODE_P (mode0
))
25953 op0
= safe_vector_operand (op0
, mode0
);
25954 if (VECTOR_MODE_P (mode1
))
25955 op1
= safe_vector_operand (op1
, mode1
);
25957 /* Swap operands if we have a comparison that isn't available in
25961 rtx tmp
= gen_reg_rtx (mode1
);
25962 emit_move_insn (tmp
, op1
);
25967 if (optimize
|| !target
25968 || GET_MODE (target
) != tmode
25969 || !insn_data
[d
->icode
].operand
[0].predicate (target
, tmode
))
25970 target
= gen_reg_rtx (tmode
);
25972 if ((optimize
&& !register_operand (op0
, mode0
))
25973 || !insn_data
[d
->icode
].operand
[1].predicate (op0
, mode0
))
25974 op0
= copy_to_mode_reg (mode0
, op0
);
25975 if ((optimize
&& !register_operand (op1
, mode1
))
25976 || !insn_data
[d
->icode
].operand
[2].predicate (op1
, mode1
))
25977 op1
= copy_to_mode_reg (mode1
, op1
);
25979 op2
= gen_rtx_fmt_ee (comparison
, mode0
, op0
, op1
);
25980 pat
= GEN_FCN (d
->icode
) (target
, op0
, op1
, op2
);
25987 /* Subroutine of ix86_expand_builtin to take care of comi insns. */
25990 ix86_expand_sse_comi (const struct builtin_description
*d
, tree exp
,
25994 tree arg0
= CALL_EXPR_ARG (exp
, 0);
25995 tree arg1
= CALL_EXPR_ARG (exp
, 1);
25996 rtx op0
= expand_normal (arg0
);
25997 rtx op1
= expand_normal (arg1
);
25998 enum machine_mode mode0
= insn_data
[d
->icode
].operand
[0].mode
;
25999 enum machine_mode mode1
= insn_data
[d
->icode
].operand
[1].mode
;
26000 enum rtx_code comparison
= d
->comparison
;
26002 if (VECTOR_MODE_P (mode0
))
26003 op0
= safe_vector_operand (op0
, mode0
);
26004 if (VECTOR_MODE_P (mode1
))
26005 op1
= safe_vector_operand (op1
, mode1
);
26007 /* Swap operands if we have a comparison that isn't available in
26009 if (d
->flag
& BUILTIN_DESC_SWAP_OPERANDS
)
26016 target
= gen_reg_rtx (SImode
);
26017 emit_move_insn (target
, const0_rtx
);
26018 target
= gen_rtx_SUBREG (QImode
, target
, 0);
26020 if ((optimize
&& !register_operand (op0
, mode0
))
26021 || !insn_data
[d
->icode
].operand
[0].predicate (op0
, mode0
))
26022 op0
= copy_to_mode_reg (mode0
, op0
);
26023 if ((optimize
&& !register_operand (op1
, mode1
))
26024 || !insn_data
[d
->icode
].operand
[1].predicate (op1
, mode1
))
26025 op1
= copy_to_mode_reg (mode1
, op1
);
26027 pat
= GEN_FCN (d
->icode
) (op0
, op1
);
26031 emit_insn (gen_rtx_SET (VOIDmode
,
26032 gen_rtx_STRICT_LOW_PART (VOIDmode
, target
),
26033 gen_rtx_fmt_ee (comparison
, QImode
,
26037 return SUBREG_REG (target
);
26040 /* Subroutine of ix86_expand_builtin to take care of ptest insns. */
26043 ix86_expand_sse_ptest (const struct builtin_description
*d
, tree exp
,
26047 tree arg0
= CALL_EXPR_ARG (exp
, 0);
26048 tree arg1
= CALL_EXPR_ARG (exp
, 1);
26049 rtx op0
= expand_normal (arg0
);
26050 rtx op1
= expand_normal (arg1
);
26051 enum machine_mode mode0
= insn_data
[d
->icode
].operand
[0].mode
;
26052 enum machine_mode mode1
= insn_data
[d
->icode
].operand
[1].mode
;
26053 enum rtx_code comparison
= d
->comparison
;
26055 if (VECTOR_MODE_P (mode0
))
26056 op0
= safe_vector_operand (op0
, mode0
);
26057 if (VECTOR_MODE_P (mode1
))
26058 op1
= safe_vector_operand (op1
, mode1
);
26060 target
= gen_reg_rtx (SImode
);
26061 emit_move_insn (target
, const0_rtx
);
26062 target
= gen_rtx_SUBREG (QImode
, target
, 0);
26064 if ((optimize
&& !register_operand (op0
, mode0
))
26065 || !insn_data
[d
->icode
].operand
[0].predicate (op0
, mode0
))
26066 op0
= copy_to_mode_reg (mode0
, op0
);
26067 if ((optimize
&& !register_operand (op1
, mode1
))
26068 || !insn_data
[d
->icode
].operand
[1].predicate (op1
, mode1
))
26069 op1
= copy_to_mode_reg (mode1
, op1
);
26071 pat
= GEN_FCN (d
->icode
) (op0
, op1
);
26075 emit_insn (gen_rtx_SET (VOIDmode
,
26076 gen_rtx_STRICT_LOW_PART (VOIDmode
, target
),
26077 gen_rtx_fmt_ee (comparison
, QImode
,
26081 return SUBREG_REG (target
);
26084 /* Subroutine of ix86_expand_builtin to take care of pcmpestr[im] insns. */
26087 ix86_expand_sse_pcmpestr (const struct builtin_description
*d
,
26088 tree exp
, rtx target
)
26091 tree arg0
= CALL_EXPR_ARG (exp
, 0);
26092 tree arg1
= CALL_EXPR_ARG (exp
, 1);
26093 tree arg2
= CALL_EXPR_ARG (exp
, 2);
26094 tree arg3
= CALL_EXPR_ARG (exp
, 3);
26095 tree arg4
= CALL_EXPR_ARG (exp
, 4);
26096 rtx scratch0
, scratch1
;
26097 rtx op0
= expand_normal (arg0
);
26098 rtx op1
= expand_normal (arg1
);
26099 rtx op2
= expand_normal (arg2
);
26100 rtx op3
= expand_normal (arg3
);
26101 rtx op4
= expand_normal (arg4
);
26102 enum machine_mode tmode0
, tmode1
, modev2
, modei3
, modev4
, modei5
, modeimm
;
26104 tmode0
= insn_data
[d
->icode
].operand
[0].mode
;
26105 tmode1
= insn_data
[d
->icode
].operand
[1].mode
;
26106 modev2
= insn_data
[d
->icode
].operand
[2].mode
;
26107 modei3
= insn_data
[d
->icode
].operand
[3].mode
;
26108 modev4
= insn_data
[d
->icode
].operand
[4].mode
;
26109 modei5
= insn_data
[d
->icode
].operand
[5].mode
;
26110 modeimm
= insn_data
[d
->icode
].operand
[6].mode
;
26112 if (VECTOR_MODE_P (modev2
))
26113 op0
= safe_vector_operand (op0
, modev2
);
26114 if (VECTOR_MODE_P (modev4
))
26115 op2
= safe_vector_operand (op2
, modev4
);
26117 if (!insn_data
[d
->icode
].operand
[2].predicate (op0
, modev2
))
26118 op0
= copy_to_mode_reg (modev2
, op0
);
26119 if (!insn_data
[d
->icode
].operand
[3].predicate (op1
, modei3
))
26120 op1
= copy_to_mode_reg (modei3
, op1
);
26121 if ((optimize
&& !register_operand (op2
, modev4
))
26122 || !insn_data
[d
->icode
].operand
[4].predicate (op2
, modev4
))
26123 op2
= copy_to_mode_reg (modev4
, op2
);
26124 if (!insn_data
[d
->icode
].operand
[5].predicate (op3
, modei5
))
26125 op3
= copy_to_mode_reg (modei5
, op3
);
26127 if (!insn_data
[d
->icode
].operand
[6].predicate (op4
, modeimm
))
26129 error ("the fifth argument must be a 8-bit immediate");
26133 if (d
->code
== IX86_BUILTIN_PCMPESTRI128
)
26135 if (optimize
|| !target
26136 || GET_MODE (target
) != tmode0
26137 || !insn_data
[d
->icode
].operand
[0].predicate (target
, tmode0
))
26138 target
= gen_reg_rtx (tmode0
);
26140 scratch1
= gen_reg_rtx (tmode1
);
26142 pat
= GEN_FCN (d
->icode
) (target
, scratch1
, op0
, op1
, op2
, op3
, op4
);
26144 else if (d
->code
== IX86_BUILTIN_PCMPESTRM128
)
26146 if (optimize
|| !target
26147 || GET_MODE (target
) != tmode1
26148 || !insn_data
[d
->icode
].operand
[1].predicate (target
, tmode1
))
26149 target
= gen_reg_rtx (tmode1
);
26151 scratch0
= gen_reg_rtx (tmode0
);
26153 pat
= GEN_FCN (d
->icode
) (scratch0
, target
, op0
, op1
, op2
, op3
, op4
);
26157 gcc_assert (d
->flag
);
26159 scratch0
= gen_reg_rtx (tmode0
);
26160 scratch1
= gen_reg_rtx (tmode1
);
26162 pat
= GEN_FCN (d
->icode
) (scratch0
, scratch1
, op0
, op1
, op2
, op3
, op4
);
26172 target
= gen_reg_rtx (SImode
);
26173 emit_move_insn (target
, const0_rtx
);
26174 target
= gen_rtx_SUBREG (QImode
, target
, 0);
26177 (gen_rtx_SET (VOIDmode
, gen_rtx_STRICT_LOW_PART (VOIDmode
, target
),
26178 gen_rtx_fmt_ee (EQ
, QImode
,
26179 gen_rtx_REG ((enum machine_mode
) d
->flag
,
26182 return SUBREG_REG (target
);
26189 /* Subroutine of ix86_expand_builtin to take care of pcmpistr[im] insns. */
26192 ix86_expand_sse_pcmpistr (const struct builtin_description
*d
,
26193 tree exp
, rtx target
)
26196 tree arg0
= CALL_EXPR_ARG (exp
, 0);
26197 tree arg1
= CALL_EXPR_ARG (exp
, 1);
26198 tree arg2
= CALL_EXPR_ARG (exp
, 2);
26199 rtx scratch0
, scratch1
;
26200 rtx op0
= expand_normal (arg0
);
26201 rtx op1
= expand_normal (arg1
);
26202 rtx op2
= expand_normal (arg2
);
26203 enum machine_mode tmode0
, tmode1
, modev2
, modev3
, modeimm
;
26205 tmode0
= insn_data
[d
->icode
].operand
[0].mode
;
26206 tmode1
= insn_data
[d
->icode
].operand
[1].mode
;
26207 modev2
= insn_data
[d
->icode
].operand
[2].mode
;
26208 modev3
= insn_data
[d
->icode
].operand
[3].mode
;
26209 modeimm
= insn_data
[d
->icode
].operand
[4].mode
;
26211 if (VECTOR_MODE_P (modev2
))
26212 op0
= safe_vector_operand (op0
, modev2
);
26213 if (VECTOR_MODE_P (modev3
))
26214 op1
= safe_vector_operand (op1
, modev3
);
26216 if (!insn_data
[d
->icode
].operand
[2].predicate (op0
, modev2
))
26217 op0
= copy_to_mode_reg (modev2
, op0
);
26218 if ((optimize
&& !register_operand (op1
, modev3
))
26219 || !insn_data
[d
->icode
].operand
[3].predicate (op1
, modev3
))
26220 op1
= copy_to_mode_reg (modev3
, op1
);
26222 if (!insn_data
[d
->icode
].operand
[4].predicate (op2
, modeimm
))
26224 error ("the third argument must be a 8-bit immediate");
26228 if (d
->code
== IX86_BUILTIN_PCMPISTRI128
)
26230 if (optimize
|| !target
26231 || GET_MODE (target
) != tmode0
26232 || !insn_data
[d
->icode
].operand
[0].predicate (target
, tmode0
))
26233 target
= gen_reg_rtx (tmode0
);
26235 scratch1
= gen_reg_rtx (tmode1
);
26237 pat
= GEN_FCN (d
->icode
) (target
, scratch1
, op0
, op1
, op2
);
26239 else if (d
->code
== IX86_BUILTIN_PCMPISTRM128
)
26241 if (optimize
|| !target
26242 || GET_MODE (target
) != tmode1
26243 || !insn_data
[d
->icode
].operand
[1].predicate (target
, tmode1
))
26244 target
= gen_reg_rtx (tmode1
);
26246 scratch0
= gen_reg_rtx (tmode0
);
26248 pat
= GEN_FCN (d
->icode
) (scratch0
, target
, op0
, op1
, op2
);
26252 gcc_assert (d
->flag
);
26254 scratch0
= gen_reg_rtx (tmode0
);
26255 scratch1
= gen_reg_rtx (tmode1
);
26257 pat
= GEN_FCN (d
->icode
) (scratch0
, scratch1
, op0
, op1
, op2
);
26267 target
= gen_reg_rtx (SImode
);
26268 emit_move_insn (target
, const0_rtx
);
26269 target
= gen_rtx_SUBREG (QImode
, target
, 0);
26272 (gen_rtx_SET (VOIDmode
, gen_rtx_STRICT_LOW_PART (VOIDmode
, target
),
26273 gen_rtx_fmt_ee (EQ
, QImode
,
26274 gen_rtx_REG ((enum machine_mode
) d
->flag
,
26277 return SUBREG_REG (target
);
26283 /* Subroutine of ix86_expand_builtin to take care of insns with
26284 variable number of operands. */
26287 ix86_expand_args_builtin (const struct builtin_description
*d
,
26288 tree exp
, rtx target
)
26290 rtx pat
, real_target
;
26291 unsigned int i
, nargs
;
26292 unsigned int nargs_constant
= 0;
26293 int num_memory
= 0;
26297 enum machine_mode mode
;
26299 bool last_arg_count
= false;
26300 enum insn_code icode
= d
->icode
;
26301 const struct insn_data_d
*insn_p
= &insn_data
[icode
];
26302 enum machine_mode tmode
= insn_p
->operand
[0].mode
;
26303 enum machine_mode rmode
= VOIDmode
;
26305 enum rtx_code comparison
= d
->comparison
;
26307 switch ((enum ix86_builtin_func_type
) d
->flag
)
26309 case INT_FTYPE_V8SF_V8SF_PTEST
:
26310 case INT_FTYPE_V4DI_V4DI_PTEST
:
26311 case INT_FTYPE_V4DF_V4DF_PTEST
:
26312 case INT_FTYPE_V4SF_V4SF_PTEST
:
26313 case INT_FTYPE_V2DI_V2DI_PTEST
:
26314 case INT_FTYPE_V2DF_V2DF_PTEST
:
26315 return ix86_expand_sse_ptest (d
, exp
, target
);
26316 case FLOAT128_FTYPE_FLOAT128
:
26317 case FLOAT_FTYPE_FLOAT
:
26318 case INT_FTYPE_INT
:
26319 case UINT64_FTYPE_INT
:
26320 case UINT16_FTYPE_UINT16
:
26321 case INT64_FTYPE_INT64
:
26322 case INT64_FTYPE_V4SF
:
26323 case INT64_FTYPE_V2DF
:
26324 case INT_FTYPE_V16QI
:
26325 case INT_FTYPE_V8QI
:
26326 case INT_FTYPE_V8SF
:
26327 case INT_FTYPE_V4DF
:
26328 case INT_FTYPE_V4SF
:
26329 case INT_FTYPE_V2DF
:
26330 case V16QI_FTYPE_V16QI
:
26331 case V8SI_FTYPE_V8SF
:
26332 case V8SI_FTYPE_V4SI
:
26333 case V8HI_FTYPE_V8HI
:
26334 case V8HI_FTYPE_V16QI
:
26335 case V8QI_FTYPE_V8QI
:
26336 case V8SF_FTYPE_V8SF
:
26337 case V8SF_FTYPE_V8SI
:
26338 case V8SF_FTYPE_V4SF
:
26339 case V8SF_FTYPE_V8HI
:
26340 case V4SI_FTYPE_V4SI
:
26341 case V4SI_FTYPE_V16QI
:
26342 case V4SI_FTYPE_V4SF
:
26343 case V4SI_FTYPE_V8SI
:
26344 case V4SI_FTYPE_V8HI
:
26345 case V4SI_FTYPE_V4DF
:
26346 case V4SI_FTYPE_V2DF
:
26347 case V4HI_FTYPE_V4HI
:
26348 case V4DF_FTYPE_V4DF
:
26349 case V4DF_FTYPE_V4SI
:
26350 case V4DF_FTYPE_V4SF
:
26351 case V4DF_FTYPE_V2DF
:
26352 case V4SF_FTYPE_V4SF
:
26353 case V4SF_FTYPE_V4SI
:
26354 case V4SF_FTYPE_V8SF
:
26355 case V4SF_FTYPE_V4DF
:
26356 case V4SF_FTYPE_V8HI
:
26357 case V4SF_FTYPE_V2DF
:
26358 case V2DI_FTYPE_V2DI
:
26359 case V2DI_FTYPE_V16QI
:
26360 case V2DI_FTYPE_V8HI
:
26361 case V2DI_FTYPE_V4SI
:
26362 case V2DF_FTYPE_V2DF
:
26363 case V2DF_FTYPE_V4SI
:
26364 case V2DF_FTYPE_V4DF
:
26365 case V2DF_FTYPE_V4SF
:
26366 case V2DF_FTYPE_V2SI
:
26367 case V2SI_FTYPE_V2SI
:
26368 case V2SI_FTYPE_V4SF
:
26369 case V2SI_FTYPE_V2SF
:
26370 case V2SI_FTYPE_V2DF
:
26371 case V2SF_FTYPE_V2SF
:
26372 case V2SF_FTYPE_V2SI
:
26375 case V4SF_FTYPE_V4SF_VEC_MERGE
:
26376 case V2DF_FTYPE_V2DF_VEC_MERGE
:
26377 return ix86_expand_unop_vec_merge_builtin (icode
, exp
, target
);
26378 case FLOAT128_FTYPE_FLOAT128_FLOAT128
:
26379 case V16QI_FTYPE_V16QI_V16QI
:
26380 case V16QI_FTYPE_V8HI_V8HI
:
26381 case V8QI_FTYPE_V8QI_V8QI
:
26382 case V8QI_FTYPE_V4HI_V4HI
:
26383 case V8HI_FTYPE_V8HI_V8HI
:
26384 case V8HI_FTYPE_V16QI_V16QI
:
26385 case V8HI_FTYPE_V4SI_V4SI
:
26386 case V8SF_FTYPE_V8SF_V8SF
:
26387 case V8SF_FTYPE_V8SF_V8SI
:
26388 case V4SI_FTYPE_V4SI_V4SI
:
26389 case V4SI_FTYPE_V8HI_V8HI
:
26390 case V4SI_FTYPE_V4SF_V4SF
:
26391 case V4SI_FTYPE_V2DF_V2DF
:
26392 case V4HI_FTYPE_V4HI_V4HI
:
26393 case V4HI_FTYPE_V8QI_V8QI
:
26394 case V4HI_FTYPE_V2SI_V2SI
:
26395 case V4DF_FTYPE_V4DF_V4DF
:
26396 case V4DF_FTYPE_V4DF_V4DI
:
26397 case V4SF_FTYPE_V4SF_V4SF
:
26398 case V4SF_FTYPE_V4SF_V4SI
:
26399 case V4SF_FTYPE_V4SF_V2SI
:
26400 case V4SF_FTYPE_V4SF_V2DF
:
26401 case V4SF_FTYPE_V4SF_DI
:
26402 case V4SF_FTYPE_V4SF_SI
:
26403 case V2DI_FTYPE_V2DI_V2DI
:
26404 case V2DI_FTYPE_V16QI_V16QI
:
26405 case V2DI_FTYPE_V4SI_V4SI
:
26406 case V2DI_FTYPE_V2DI_V16QI
:
26407 case V2DI_FTYPE_V2DF_V2DF
:
26408 case V2SI_FTYPE_V2SI_V2SI
:
26409 case V2SI_FTYPE_V4HI_V4HI
:
26410 case V2SI_FTYPE_V2SF_V2SF
:
26411 case V2DF_FTYPE_V2DF_V2DF
:
26412 case V2DF_FTYPE_V2DF_V4SF
:
26413 case V2DF_FTYPE_V2DF_V2DI
:
26414 case V2DF_FTYPE_V2DF_DI
:
26415 case V2DF_FTYPE_V2DF_SI
:
26416 case V2SF_FTYPE_V2SF_V2SF
:
26417 case V1DI_FTYPE_V1DI_V1DI
:
26418 case V1DI_FTYPE_V8QI_V8QI
:
26419 case V1DI_FTYPE_V2SI_V2SI
:
26420 if (comparison
== UNKNOWN
)
26421 return ix86_expand_binop_builtin (icode
, exp
, target
);
26424 case V4SF_FTYPE_V4SF_V4SF_SWAP
:
26425 case V2DF_FTYPE_V2DF_V2DF_SWAP
:
26426 gcc_assert (comparison
!= UNKNOWN
);
26430 case V8HI_FTYPE_V8HI_V8HI_COUNT
:
26431 case V8HI_FTYPE_V8HI_SI_COUNT
:
26432 case V4SI_FTYPE_V4SI_V4SI_COUNT
:
26433 case V4SI_FTYPE_V4SI_SI_COUNT
:
26434 case V4HI_FTYPE_V4HI_V4HI_COUNT
:
26435 case V4HI_FTYPE_V4HI_SI_COUNT
:
26436 case V2DI_FTYPE_V2DI_V2DI_COUNT
:
26437 case V2DI_FTYPE_V2DI_SI_COUNT
:
26438 case V2SI_FTYPE_V2SI_V2SI_COUNT
:
26439 case V2SI_FTYPE_V2SI_SI_COUNT
:
26440 case V1DI_FTYPE_V1DI_V1DI_COUNT
:
26441 case V1DI_FTYPE_V1DI_SI_COUNT
:
26443 last_arg_count
= true;
26445 case UINT64_FTYPE_UINT64_UINT64
:
26446 case UINT_FTYPE_UINT_UINT
:
26447 case UINT_FTYPE_UINT_USHORT
:
26448 case UINT_FTYPE_UINT_UCHAR
:
26449 case UINT16_FTYPE_UINT16_INT
:
26450 case UINT8_FTYPE_UINT8_INT
:
26453 case V2DI_FTYPE_V2DI_INT_CONVERT
:
26456 nargs_constant
= 1;
26458 case V8HI_FTYPE_V8HI_INT
:
26459 case V8HI_FTYPE_V8SF_INT
:
26460 case V8HI_FTYPE_V4SF_INT
:
26461 case V8SF_FTYPE_V8SF_INT
:
26462 case V4SI_FTYPE_V4SI_INT
:
26463 case V4SI_FTYPE_V8SI_INT
:
26464 case V4HI_FTYPE_V4HI_INT
:
26465 case V4DF_FTYPE_V4DF_INT
:
26466 case V4SF_FTYPE_V4SF_INT
:
26467 case V4SF_FTYPE_V8SF_INT
:
26468 case V2DI_FTYPE_V2DI_INT
:
26469 case V2DF_FTYPE_V2DF_INT
:
26470 case V2DF_FTYPE_V4DF_INT
:
26472 nargs_constant
= 1;
26474 case V16QI_FTYPE_V16QI_V16QI_V16QI
:
26475 case V8SF_FTYPE_V8SF_V8SF_V8SF
:
26476 case V4DF_FTYPE_V4DF_V4DF_V4DF
:
26477 case V4SF_FTYPE_V4SF_V4SF_V4SF
:
26478 case V2DF_FTYPE_V2DF_V2DF_V2DF
:
26481 case V16QI_FTYPE_V16QI_V16QI_INT
:
26482 case V8HI_FTYPE_V8HI_V8HI_INT
:
26483 case V8SI_FTYPE_V8SI_V8SI_INT
:
26484 case V8SI_FTYPE_V8SI_V4SI_INT
:
26485 case V8SF_FTYPE_V8SF_V8SF_INT
:
26486 case V8SF_FTYPE_V8SF_V4SF_INT
:
26487 case V4SI_FTYPE_V4SI_V4SI_INT
:
26488 case V4DF_FTYPE_V4DF_V4DF_INT
:
26489 case V4DF_FTYPE_V4DF_V2DF_INT
:
26490 case V4SF_FTYPE_V4SF_V4SF_INT
:
26491 case V2DI_FTYPE_V2DI_V2DI_INT
:
26492 case V2DF_FTYPE_V2DF_V2DF_INT
:
26494 nargs_constant
= 1;
26496 case V2DI_FTYPE_V2DI_V2DI_INT_CONVERT
:
26499 nargs_constant
= 1;
26501 case V1DI_FTYPE_V1DI_V1DI_INT_CONVERT
:
26504 nargs_constant
= 1;
26506 case V2DI_FTYPE_V2DI_UINT_UINT
:
26508 nargs_constant
= 2;
26510 case V2DF_FTYPE_V2DF_V2DF_V2DI_INT
:
26511 case V4DF_FTYPE_V4DF_V4DF_V4DI_INT
:
26512 case V4SF_FTYPE_V4SF_V4SF_V4SI_INT
:
26513 case V8SF_FTYPE_V8SF_V8SF_V8SI_INT
:
26515 nargs_constant
= 1;
26517 case V2DI_FTYPE_V2DI_V2DI_UINT_UINT
:
26519 nargs_constant
= 2;
26522 gcc_unreachable ();
26525 gcc_assert (nargs
<= ARRAY_SIZE (args
));
26527 if (comparison
!= UNKNOWN
)
26529 gcc_assert (nargs
== 2);
26530 return ix86_expand_sse_compare (d
, exp
, target
, swap
);
26533 if (rmode
== VOIDmode
|| rmode
== tmode
)
26537 || GET_MODE (target
) != tmode
26538 || !insn_p
->operand
[0].predicate (target
, tmode
))
26539 target
= gen_reg_rtx (tmode
);
26540 real_target
= target
;
26544 target
= gen_reg_rtx (rmode
);
26545 real_target
= simplify_gen_subreg (tmode
, target
, rmode
, 0);
26548 for (i
= 0; i
< nargs
; i
++)
26550 tree arg
= CALL_EXPR_ARG (exp
, i
);
26551 rtx op
= expand_normal (arg
);
26552 enum machine_mode mode
= insn_p
->operand
[i
+ 1].mode
;
26553 bool match
= insn_p
->operand
[i
+ 1].predicate (op
, mode
);
26555 if (last_arg_count
&& (i
+ 1) == nargs
)
26557 /* SIMD shift insns take either an 8-bit immediate or
26558 register as count. But builtin functions take int as
26559 count. If count doesn't match, we put it in register. */
26562 op
= simplify_gen_subreg (SImode
, op
, GET_MODE (op
), 0);
26563 if (!insn_p
->operand
[i
+ 1].predicate (op
, mode
))
26564 op
= copy_to_reg (op
);
26567 else if ((nargs
- i
) <= nargs_constant
)
26572 case CODE_FOR_sse4_1_roundpd
:
26573 case CODE_FOR_sse4_1_roundps
:
26574 case CODE_FOR_sse4_1_roundsd
:
26575 case CODE_FOR_sse4_1_roundss
:
26576 case CODE_FOR_sse4_1_blendps
:
26577 case CODE_FOR_avx_blendpd256
:
26578 case CODE_FOR_avx_vpermilv4df
:
26579 case CODE_FOR_avx_roundpd256
:
26580 case CODE_FOR_avx_roundps256
:
26581 error ("the last argument must be a 4-bit immediate");
26584 case CODE_FOR_sse4_1_blendpd
:
26585 case CODE_FOR_avx_vpermilv2df
:
26586 case CODE_FOR_xop_vpermil2v2df3
:
26587 case CODE_FOR_xop_vpermil2v4sf3
:
26588 case CODE_FOR_xop_vpermil2v4df3
:
26589 case CODE_FOR_xop_vpermil2v8sf3
:
26590 error ("the last argument must be a 2-bit immediate");
26593 case CODE_FOR_avx_vextractf128v4df
:
26594 case CODE_FOR_avx_vextractf128v8sf
:
26595 case CODE_FOR_avx_vextractf128v8si
:
26596 case CODE_FOR_avx_vinsertf128v4df
:
26597 case CODE_FOR_avx_vinsertf128v8sf
:
26598 case CODE_FOR_avx_vinsertf128v8si
:
26599 error ("the last argument must be a 1-bit immediate");
26602 case CODE_FOR_avx_cmpsdv2df3
:
26603 case CODE_FOR_avx_cmpssv4sf3
:
26604 case CODE_FOR_avx_cmppdv2df3
:
26605 case CODE_FOR_avx_cmppsv4sf3
:
26606 case CODE_FOR_avx_cmppdv4df3
:
26607 case CODE_FOR_avx_cmppsv8sf3
:
26608 error ("the last argument must be a 5-bit immediate");
26612 switch (nargs_constant
)
26615 if ((nargs
- i
) == nargs_constant
)
26617 error ("the next to last argument must be an 8-bit immediate");
26621 error ("the last argument must be an 8-bit immediate");
26624 gcc_unreachable ();
26631 if (VECTOR_MODE_P (mode
))
26632 op
= safe_vector_operand (op
, mode
);
26634 /* If we aren't optimizing, only allow one memory operand to
26636 if (memory_operand (op
, mode
))
26639 if (GET_MODE (op
) == mode
|| GET_MODE (op
) == VOIDmode
)
26641 if (optimize
|| !match
|| num_memory
> 1)
26642 op
= copy_to_mode_reg (mode
, op
);
26646 op
= copy_to_reg (op
);
26647 op
= simplify_gen_subreg (mode
, op
, GET_MODE (op
), 0);
26652 args
[i
].mode
= mode
;
26658 pat
= GEN_FCN (icode
) (real_target
, args
[0].op
);
26661 pat
= GEN_FCN (icode
) (real_target
, args
[0].op
, args
[1].op
);
26664 pat
= GEN_FCN (icode
) (real_target
, args
[0].op
, args
[1].op
,
26668 pat
= GEN_FCN (icode
) (real_target
, args
[0].op
, args
[1].op
,
26669 args
[2].op
, args
[3].op
);
26672 gcc_unreachable ();
26682 /* Subroutine of ix86_expand_builtin to take care of special insns
26683 with variable number of operands. */
26686 ix86_expand_special_args_builtin (const struct builtin_description
*d
,
26687 tree exp
, rtx target
)
26691 unsigned int i
, nargs
, arg_adjust
, memory
;
26695 enum machine_mode mode
;
26697 enum insn_code icode
= d
->icode
;
26698 bool last_arg_constant
= false;
26699 const struct insn_data_d
*insn_p
= &insn_data
[icode
];
26700 enum machine_mode tmode
= insn_p
->operand
[0].mode
;
26701 enum { load
, store
} klass
;
26703 switch ((enum ix86_builtin_func_type
) d
->flag
)
26705 case VOID_FTYPE_VOID
:
26706 if (icode
== CODE_FOR_avx_vzeroupper
)
26707 target
= GEN_INT (vzeroupper_intrinsic
);
26708 emit_insn (GEN_FCN (icode
) (target
));
26710 case VOID_FTYPE_UINT64
:
26711 case VOID_FTYPE_UNSIGNED
:
26717 case UINT64_FTYPE_VOID
:
26718 case UNSIGNED_FTYPE_VOID
:
26723 case UINT64_FTYPE_PUNSIGNED
:
26724 case V2DI_FTYPE_PV2DI
:
26725 case V32QI_FTYPE_PCCHAR
:
26726 case V16QI_FTYPE_PCCHAR
:
26727 case V8SF_FTYPE_PCV4SF
:
26728 case V8SF_FTYPE_PCFLOAT
:
26729 case V4SF_FTYPE_PCFLOAT
:
26730 case V4DF_FTYPE_PCV2DF
:
26731 case V4DF_FTYPE_PCDOUBLE
:
26732 case V2DF_FTYPE_PCDOUBLE
:
26733 case VOID_FTYPE_PVOID
:
26738 case VOID_FTYPE_PV2SF_V4SF
:
26739 case VOID_FTYPE_PV4DI_V4DI
:
26740 case VOID_FTYPE_PV2DI_V2DI
:
26741 case VOID_FTYPE_PCHAR_V32QI
:
26742 case VOID_FTYPE_PCHAR_V16QI
:
26743 case VOID_FTYPE_PFLOAT_V8SF
:
26744 case VOID_FTYPE_PFLOAT_V4SF
:
26745 case VOID_FTYPE_PDOUBLE_V4DF
:
26746 case VOID_FTYPE_PDOUBLE_V2DF
:
26747 case VOID_FTYPE_PULONGLONG_ULONGLONG
:
26748 case VOID_FTYPE_PINT_INT
:
26751 /* Reserve memory operand for target. */
26752 memory
= ARRAY_SIZE (args
);
26754 case V4SF_FTYPE_V4SF_PCV2SF
:
26755 case V2DF_FTYPE_V2DF_PCDOUBLE
:
26760 case V8SF_FTYPE_PCV8SF_V8SF
:
26761 case V4DF_FTYPE_PCV4DF_V4DF
:
26762 case V4SF_FTYPE_PCV4SF_V4SF
:
26763 case V2DF_FTYPE_PCV2DF_V2DF
:
26768 case VOID_FTYPE_PV8SF_V8SF_V8SF
:
26769 case VOID_FTYPE_PV4DF_V4DF_V4DF
:
26770 case VOID_FTYPE_PV4SF_V4SF_V4SF
:
26771 case VOID_FTYPE_PV2DF_V2DF_V2DF
:
26774 /* Reserve memory operand for target. */
26775 memory
= ARRAY_SIZE (args
);
26777 case VOID_FTYPE_UINT_UINT_UINT
:
26778 case VOID_FTYPE_UINT64_UINT_UINT
:
26779 case UCHAR_FTYPE_UINT_UINT_UINT
:
26780 case UCHAR_FTYPE_UINT64_UINT_UINT
:
26783 memory
= ARRAY_SIZE (args
);
26784 last_arg_constant
= true;
26787 gcc_unreachable ();
26790 gcc_assert (nargs
<= ARRAY_SIZE (args
));
26792 if (klass
== store
)
26794 arg
= CALL_EXPR_ARG (exp
, 0);
26795 op
= expand_normal (arg
);
26796 gcc_assert (target
== 0);
26798 target
= gen_rtx_MEM (tmode
, copy_to_mode_reg (Pmode
, op
));
26800 target
= force_reg (tmode
, op
);
26808 || GET_MODE (target
) != tmode
26809 || !insn_p
->operand
[0].predicate (target
, tmode
))
26810 target
= gen_reg_rtx (tmode
);
26813 for (i
= 0; i
< nargs
; i
++)
26815 enum machine_mode mode
= insn_p
->operand
[i
+ 1].mode
;
26818 arg
= CALL_EXPR_ARG (exp
, i
+ arg_adjust
);
26819 op
= expand_normal (arg
);
26820 match
= insn_p
->operand
[i
+ 1].predicate (op
, mode
);
26822 if (last_arg_constant
&& (i
+ 1) == nargs
)
26826 if (icode
== CODE_FOR_lwp_lwpvalsi3
26827 || icode
== CODE_FOR_lwp_lwpinssi3
26828 || icode
== CODE_FOR_lwp_lwpvaldi3
26829 || icode
== CODE_FOR_lwp_lwpinsdi3
)
26830 error ("the last argument must be a 32-bit immediate");
26832 error ("the last argument must be an 8-bit immediate");
26840 /* This must be the memory operand. */
26841 op
= gen_rtx_MEM (mode
, copy_to_mode_reg (Pmode
, op
));
26842 gcc_assert (GET_MODE (op
) == mode
26843 || GET_MODE (op
) == VOIDmode
);
26847 /* This must be register. */
26848 if (VECTOR_MODE_P (mode
))
26849 op
= safe_vector_operand (op
, mode
);
26851 gcc_assert (GET_MODE (op
) == mode
26852 || GET_MODE (op
) == VOIDmode
);
26853 op
= copy_to_mode_reg (mode
, op
);
26858 args
[i
].mode
= mode
;
26864 pat
= GEN_FCN (icode
) (target
);
26867 pat
= GEN_FCN (icode
) (target
, args
[0].op
);
26870 pat
= GEN_FCN (icode
) (target
, args
[0].op
, args
[1].op
);
26873 pat
= GEN_FCN (icode
) (target
, args
[0].op
, args
[1].op
, args
[2].op
);
26876 gcc_unreachable ();
26882 return klass
== store
? 0 : target
;
26885 /* Return the integer constant in ARG. Constrain it to be in the range
26886 of the subparts of VEC_TYPE; issue an error if not. */
26889 get_element_number (tree vec_type
, tree arg
)
26891 unsigned HOST_WIDE_INT elt
, max
= TYPE_VECTOR_SUBPARTS (vec_type
) - 1;
26893 if (!host_integerp (arg
, 1)
26894 || (elt
= tree_low_cst (arg
, 1), elt
> max
))
26896 error ("selector must be an integer constant in the range 0..%wi", max
);
26903 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
26904 ix86_expand_vector_init. We DO have language-level syntax for this, in
26905 the form of (type){ init-list }. Except that since we can't place emms
26906 instructions from inside the compiler, we can't allow the use of MMX
26907 registers unless the user explicitly asks for it. So we do *not* define
26908 vec_set/vec_extract/vec_init patterns for MMX modes in mmx.md. Instead
26909 we have builtins invoked by mmintrin.h that gives us license to emit
26910 these sorts of instructions. */
26913 ix86_expand_vec_init_builtin (tree type
, tree exp
, rtx target
)
26915 enum machine_mode tmode
= TYPE_MODE (type
);
26916 enum machine_mode inner_mode
= GET_MODE_INNER (tmode
);
26917 int i
, n_elt
= GET_MODE_NUNITS (tmode
);
26918 rtvec v
= rtvec_alloc (n_elt
);
26920 gcc_assert (VECTOR_MODE_P (tmode
));
26921 gcc_assert (call_expr_nargs (exp
) == n_elt
);
26923 for (i
= 0; i
< n_elt
; ++i
)
26925 rtx x
= expand_normal (CALL_EXPR_ARG (exp
, i
));
26926 RTVEC_ELT (v
, i
) = gen_lowpart (inner_mode
, x
);
26929 if (!target
|| !register_operand (target
, tmode
))
26930 target
= gen_reg_rtx (tmode
);
26932 ix86_expand_vector_init (true, target
, gen_rtx_PARALLEL (tmode
, v
));
26936 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
26937 ix86_expand_vector_extract. They would be redundant (for non-MMX) if we
26938 had a language-level syntax for referencing vector elements. */
26941 ix86_expand_vec_ext_builtin (tree exp
, rtx target
)
26943 enum machine_mode tmode
, mode0
;
26948 arg0
= CALL_EXPR_ARG (exp
, 0);
26949 arg1
= CALL_EXPR_ARG (exp
, 1);
26951 op0
= expand_normal (arg0
);
26952 elt
= get_element_number (TREE_TYPE (arg0
), arg1
);
26954 tmode
= TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0
)));
26955 mode0
= TYPE_MODE (TREE_TYPE (arg0
));
26956 gcc_assert (VECTOR_MODE_P (mode0
));
26958 op0
= force_reg (mode0
, op0
);
26960 if (optimize
|| !target
|| !register_operand (target
, tmode
))
26961 target
= gen_reg_rtx (tmode
);
26963 ix86_expand_vector_extract (true, target
, op0
, elt
);
26968 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
26969 ix86_expand_vector_set. They would be redundant (for non-MMX) if we had
26970 a language-level syntax for referencing vector elements. */
26973 ix86_expand_vec_set_builtin (tree exp
)
26975 enum machine_mode tmode
, mode1
;
26976 tree arg0
, arg1
, arg2
;
26978 rtx op0
, op1
, target
;
26980 arg0
= CALL_EXPR_ARG (exp
, 0);
26981 arg1
= CALL_EXPR_ARG (exp
, 1);
26982 arg2
= CALL_EXPR_ARG (exp
, 2);
26984 tmode
= TYPE_MODE (TREE_TYPE (arg0
));
26985 mode1
= TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0
)));
26986 gcc_assert (VECTOR_MODE_P (tmode
));
26988 op0
= expand_expr (arg0
, NULL_RTX
, tmode
, EXPAND_NORMAL
);
26989 op1
= expand_expr (arg1
, NULL_RTX
, mode1
, EXPAND_NORMAL
);
26990 elt
= get_element_number (TREE_TYPE (arg0
), arg2
);
26992 if (GET_MODE (op1
) != mode1
&& GET_MODE (op1
) != VOIDmode
)
26993 op1
= convert_modes (mode1
, GET_MODE (op1
), op1
, true);
26995 op0
= force_reg (tmode
, op0
);
26996 op1
= force_reg (mode1
, op1
);
26998 /* OP0 is the source of these builtin functions and shouldn't be
26999 modified. Create a copy, use it and return it as target. */
27000 target
= gen_reg_rtx (tmode
);
27001 emit_move_insn (target
, op0
);
27002 ix86_expand_vector_set (true, target
, op1
, elt
);
27007 /* Expand an expression EXP that calls a built-in function,
27008 with result going to TARGET if that's convenient
27009 (and in mode MODE if that's convenient).
27010 SUBTARGET may be used as the target for computing one of EXP's operands.
27011 IGNORE is nonzero if the value is to be ignored. */
27014 ix86_expand_builtin (tree exp
, rtx target
, rtx subtarget ATTRIBUTE_UNUSED
,
27015 enum machine_mode mode ATTRIBUTE_UNUSED
,
27016 int ignore ATTRIBUTE_UNUSED
)
27018 const struct builtin_description
*d
;
27020 enum insn_code icode
;
27021 tree fndecl
= TREE_OPERAND (CALL_EXPR_FN (exp
), 0);
27022 tree arg0
, arg1
, arg2
;
27023 rtx op0
, op1
, op2
, pat
;
27024 enum machine_mode mode0
, mode1
, mode2
;
27025 unsigned int fcode
= DECL_FUNCTION_CODE (fndecl
);
27027 /* Determine whether the builtin function is available under the current ISA.
27028 Originally the builtin was not created if it wasn't applicable to the
27029 current ISA based on the command line switches. With function specific
27030 options, we need to check in the context of the function making the call
27031 whether it is supported. */
27032 if (ix86_builtins_isa
[fcode
].isa
27033 && !(ix86_builtins_isa
[fcode
].isa
& ix86_isa_flags
))
27035 char *opts
= ix86_target_string (ix86_builtins_isa
[fcode
].isa
, 0, NULL
,
27036 NULL
, NULL
, false);
27039 error ("%qE needs unknown isa option", fndecl
);
27042 gcc_assert (opts
!= NULL
);
27043 error ("%qE needs isa option %s", fndecl
, opts
);
27051 case IX86_BUILTIN_MASKMOVQ
:
27052 case IX86_BUILTIN_MASKMOVDQU
:
27053 icode
= (fcode
== IX86_BUILTIN_MASKMOVQ
27054 ? CODE_FOR_mmx_maskmovq
27055 : CODE_FOR_sse2_maskmovdqu
);
27056 /* Note the arg order is different from the operand order. */
27057 arg1
= CALL_EXPR_ARG (exp
, 0);
27058 arg2
= CALL_EXPR_ARG (exp
, 1);
27059 arg0
= CALL_EXPR_ARG (exp
, 2);
27060 op0
= expand_normal (arg0
);
27061 op1
= expand_normal (arg1
);
27062 op2
= expand_normal (arg2
);
27063 mode0
= insn_data
[icode
].operand
[0].mode
;
27064 mode1
= insn_data
[icode
].operand
[1].mode
;
27065 mode2
= insn_data
[icode
].operand
[2].mode
;
27067 op0
= force_reg (Pmode
, op0
);
27068 op0
= gen_rtx_MEM (mode1
, op0
);
27070 if (!insn_data
[icode
].operand
[0].predicate (op0
, mode0
))
27071 op0
= copy_to_mode_reg (mode0
, op0
);
27072 if (!insn_data
[icode
].operand
[1].predicate (op1
, mode1
))
27073 op1
= copy_to_mode_reg (mode1
, op1
);
27074 if (!insn_data
[icode
].operand
[2].predicate (op2
, mode2
))
27075 op2
= copy_to_mode_reg (mode2
, op2
);
27076 pat
= GEN_FCN (icode
) (op0
, op1
, op2
);
27082 case IX86_BUILTIN_LDMXCSR
:
27083 op0
= expand_normal (CALL_EXPR_ARG (exp
, 0));
27084 target
= assign_386_stack_local (SImode
, SLOT_VIRTUAL
);
27085 emit_move_insn (target
, op0
);
27086 emit_insn (gen_sse_ldmxcsr (target
));
27089 case IX86_BUILTIN_STMXCSR
:
27090 target
= assign_386_stack_local (SImode
, SLOT_VIRTUAL
);
27091 emit_insn (gen_sse_stmxcsr (target
));
27092 return copy_to_mode_reg (SImode
, target
);
27094 case IX86_BUILTIN_CLFLUSH
:
27095 arg0
= CALL_EXPR_ARG (exp
, 0);
27096 op0
= expand_normal (arg0
);
27097 icode
= CODE_FOR_sse2_clflush
;
27098 if (!insn_data
[icode
].operand
[0].predicate (op0
, Pmode
))
27099 op0
= copy_to_mode_reg (Pmode
, op0
);
27101 emit_insn (gen_sse2_clflush (op0
));
27104 case IX86_BUILTIN_MONITOR
:
27105 arg0
= CALL_EXPR_ARG (exp
, 0);
27106 arg1
= CALL_EXPR_ARG (exp
, 1);
27107 arg2
= CALL_EXPR_ARG (exp
, 2);
27108 op0
= expand_normal (arg0
);
27109 op1
= expand_normal (arg1
);
27110 op2
= expand_normal (arg2
);
27112 op0
= copy_to_mode_reg (Pmode
, op0
);
27114 op1
= copy_to_mode_reg (SImode
, op1
);
27116 op2
= copy_to_mode_reg (SImode
, op2
);
27117 emit_insn (ix86_gen_monitor (op0
, op1
, op2
));
27120 case IX86_BUILTIN_MWAIT
:
27121 arg0
= CALL_EXPR_ARG (exp
, 0);
27122 arg1
= CALL_EXPR_ARG (exp
, 1);
27123 op0
= expand_normal (arg0
);
27124 op1
= expand_normal (arg1
);
27126 op0
= copy_to_mode_reg (SImode
, op0
);
27128 op1
= copy_to_mode_reg (SImode
, op1
);
27129 emit_insn (gen_sse3_mwait (op0
, op1
));
27132 case IX86_BUILTIN_VEC_INIT_V2SI
:
27133 case IX86_BUILTIN_VEC_INIT_V4HI
:
27134 case IX86_BUILTIN_VEC_INIT_V8QI
:
27135 return ix86_expand_vec_init_builtin (TREE_TYPE (exp
), exp
, target
);
27137 case IX86_BUILTIN_VEC_EXT_V2DF
:
27138 case IX86_BUILTIN_VEC_EXT_V2DI
:
27139 case IX86_BUILTIN_VEC_EXT_V4SF
:
27140 case IX86_BUILTIN_VEC_EXT_V4SI
:
27141 case IX86_BUILTIN_VEC_EXT_V8HI
:
27142 case IX86_BUILTIN_VEC_EXT_V2SI
:
27143 case IX86_BUILTIN_VEC_EXT_V4HI
:
27144 case IX86_BUILTIN_VEC_EXT_V16QI
:
27145 return ix86_expand_vec_ext_builtin (exp
, target
);
27147 case IX86_BUILTIN_VEC_SET_V2DI
:
27148 case IX86_BUILTIN_VEC_SET_V4SF
:
27149 case IX86_BUILTIN_VEC_SET_V4SI
:
27150 case IX86_BUILTIN_VEC_SET_V8HI
:
27151 case IX86_BUILTIN_VEC_SET_V4HI
:
27152 case IX86_BUILTIN_VEC_SET_V16QI
:
27153 return ix86_expand_vec_set_builtin (exp
);
27155 case IX86_BUILTIN_VEC_PERM_V2DF
:
27156 case IX86_BUILTIN_VEC_PERM_V4SF
:
27157 case IX86_BUILTIN_VEC_PERM_V2DI
:
27158 case IX86_BUILTIN_VEC_PERM_V4SI
:
27159 case IX86_BUILTIN_VEC_PERM_V8HI
:
27160 case IX86_BUILTIN_VEC_PERM_V16QI
:
27161 case IX86_BUILTIN_VEC_PERM_V2DI_U
:
27162 case IX86_BUILTIN_VEC_PERM_V4SI_U
:
27163 case IX86_BUILTIN_VEC_PERM_V8HI_U
:
27164 case IX86_BUILTIN_VEC_PERM_V16QI_U
:
27165 case IX86_BUILTIN_VEC_PERM_V4DF
:
27166 case IX86_BUILTIN_VEC_PERM_V8SF
:
27167 return ix86_expand_vec_perm_builtin (exp
);
27169 case IX86_BUILTIN_INFQ
:
27170 case IX86_BUILTIN_HUGE_VALQ
:
27172 REAL_VALUE_TYPE inf
;
27176 tmp
= CONST_DOUBLE_FROM_REAL_VALUE (inf
, mode
);
27178 tmp
= validize_mem (force_const_mem (mode
, tmp
));
27181 target
= gen_reg_rtx (mode
);
27183 emit_move_insn (target
, tmp
);
27187 case IX86_BUILTIN_LLWPCB
:
27188 arg0
= CALL_EXPR_ARG (exp
, 0);
27189 op0
= expand_normal (arg0
);
27190 icode
= CODE_FOR_lwp_llwpcb
;
27191 if (!insn_data
[icode
].operand
[0].predicate (op0
, Pmode
))
27192 op0
= copy_to_mode_reg (Pmode
, op0
);
27193 emit_insn (gen_lwp_llwpcb (op0
));
27196 case IX86_BUILTIN_SLWPCB
:
27197 icode
= CODE_FOR_lwp_slwpcb
;
27199 || !insn_data
[icode
].operand
[0].predicate (target
, Pmode
))
27200 target
= gen_reg_rtx (Pmode
);
27201 emit_insn (gen_lwp_slwpcb (target
));
27204 case IX86_BUILTIN_BEXTRI32
:
27205 case IX86_BUILTIN_BEXTRI64
:
27206 arg0
= CALL_EXPR_ARG (exp
, 0);
27207 arg1
= CALL_EXPR_ARG (exp
, 1);
27208 op0
= expand_normal (arg0
);
27209 op1
= expand_normal (arg1
);
27210 icode
= (fcode
== IX86_BUILTIN_BEXTRI32
27211 ? CODE_FOR_tbm_bextri_si
27212 : CODE_FOR_tbm_bextri_di
);
27213 if (!CONST_INT_P (op1
))
27215 error ("last argument must be an immediate");
27220 unsigned char length
= (INTVAL (op1
) >> 8) & 0xFF;
27221 unsigned char lsb_index
= INTVAL (op1
) & 0xFF;
27222 op1
= GEN_INT (length
);
27223 op2
= GEN_INT (lsb_index
);
27224 pat
= GEN_FCN (icode
) (target
, op0
, op1
, op2
);
27230 case IX86_BUILTIN_RDRAND16_STEP
:
27231 icode
= CODE_FOR_rdrandhi_1
;
27235 case IX86_BUILTIN_RDRAND32_STEP
:
27236 icode
= CODE_FOR_rdrandsi_1
;
27240 case IX86_BUILTIN_RDRAND64_STEP
:
27241 icode
= CODE_FOR_rdranddi_1
;
27245 op0
= gen_reg_rtx (mode0
);
27246 emit_insn (GEN_FCN (icode
) (op0
));
27248 op1
= gen_reg_rtx (SImode
);
27249 emit_move_insn (op1
, CONST1_RTX (SImode
));
27251 /* Emit SImode conditional move. */
27252 if (mode0
== HImode
)
27254 op2
= gen_reg_rtx (SImode
);
27255 emit_insn (gen_zero_extendhisi2 (op2
, op0
));
27257 else if (mode0
== SImode
)
27260 op2
= gen_rtx_SUBREG (SImode
, op0
, 0);
27262 pat
= gen_rtx_GEU (VOIDmode
, gen_rtx_REG (CCCmode
, FLAGS_REG
),
27264 emit_insn (gen_rtx_SET (VOIDmode
, op1
,
27265 gen_rtx_IF_THEN_ELSE (SImode
, pat
, op2
, op1
)));
27266 emit_move_insn (target
, op1
);
27268 arg0
= CALL_EXPR_ARG (exp
, 0);
27269 op1
= expand_normal (arg0
);
27270 if (!address_operand (op1
, VOIDmode
))
27271 op1
= copy_addr_to_reg (op1
);
27272 emit_move_insn (gen_rtx_MEM (mode0
, op1
), op0
);
27279 for (i
= 0, d
= bdesc_special_args
;
27280 i
< ARRAY_SIZE (bdesc_special_args
);
27282 if (d
->code
== fcode
)
27283 return ix86_expand_special_args_builtin (d
, exp
, target
);
27285 for (i
= 0, d
= bdesc_args
;
27286 i
< ARRAY_SIZE (bdesc_args
);
27288 if (d
->code
== fcode
)
27291 case IX86_BUILTIN_FABSQ
:
27292 case IX86_BUILTIN_COPYSIGNQ
:
27294 /* Emit a normal call if SSE2 isn't available. */
27295 return expand_call (exp
, target
, ignore
);
27297 return ix86_expand_args_builtin (d
, exp
, target
);
27300 for (i
= 0, d
= bdesc_comi
; i
< ARRAY_SIZE (bdesc_comi
); i
++, d
++)
27301 if (d
->code
== fcode
)
27302 return ix86_expand_sse_comi (d
, exp
, target
);
27304 for (i
= 0, d
= bdesc_pcmpestr
;
27305 i
< ARRAY_SIZE (bdesc_pcmpestr
);
27307 if (d
->code
== fcode
)
27308 return ix86_expand_sse_pcmpestr (d
, exp
, target
);
27310 for (i
= 0, d
= bdesc_pcmpistr
;
27311 i
< ARRAY_SIZE (bdesc_pcmpistr
);
27313 if (d
->code
== fcode
)
27314 return ix86_expand_sse_pcmpistr (d
, exp
, target
);
27316 for (i
= 0, d
= bdesc_multi_arg
; i
< ARRAY_SIZE (bdesc_multi_arg
); i
++, d
++)
27317 if (d
->code
== fcode
)
27318 return ix86_expand_multi_arg_builtin (d
->icode
, exp
, target
,
27319 (enum ix86_builtin_func_type
)
27320 d
->flag
, d
->comparison
);
27322 gcc_unreachable ();
27325 /* Returns a function decl for a vectorized version of the builtin function
27326 with builtin function code FN and the result vector type TYPE, or NULL_TREE
27327 if it is not available. */
27330 ix86_builtin_vectorized_function (tree fndecl
, tree type_out
,
27333 enum machine_mode in_mode
, out_mode
;
27335 enum built_in_function fn
= DECL_FUNCTION_CODE (fndecl
);
27337 if (TREE_CODE (type_out
) != VECTOR_TYPE
27338 || TREE_CODE (type_in
) != VECTOR_TYPE
27339 || DECL_BUILT_IN_CLASS (fndecl
) != BUILT_IN_NORMAL
)
27342 out_mode
= TYPE_MODE (TREE_TYPE (type_out
));
27343 out_n
= TYPE_VECTOR_SUBPARTS (type_out
);
27344 in_mode
= TYPE_MODE (TREE_TYPE (type_in
));
27345 in_n
= TYPE_VECTOR_SUBPARTS (type_in
);
27349 case BUILT_IN_SQRT
:
27350 if (out_mode
== DFmode
&& in_mode
== DFmode
)
27352 if (out_n
== 2 && in_n
== 2)
27353 return ix86_builtins
[IX86_BUILTIN_SQRTPD
];
27354 else if (out_n
== 4 && in_n
== 4)
27355 return ix86_builtins
[IX86_BUILTIN_SQRTPD256
];
27359 case BUILT_IN_SQRTF
:
27360 if (out_mode
== SFmode
&& in_mode
== SFmode
)
27362 if (out_n
== 4 && in_n
== 4)
27363 return ix86_builtins
[IX86_BUILTIN_SQRTPS_NR
];
27364 else if (out_n
== 8 && in_n
== 8)
27365 return ix86_builtins
[IX86_BUILTIN_SQRTPS_NR256
];
27369 case BUILT_IN_LRINT
:
27370 if (out_mode
== SImode
&& out_n
== 4
27371 && in_mode
== DFmode
&& in_n
== 2)
27372 return ix86_builtins
[IX86_BUILTIN_VEC_PACK_SFIX
];
27375 case BUILT_IN_LRINTF
:
27376 if (out_mode
== SImode
&& in_mode
== SFmode
)
27378 if (out_n
== 4 && in_n
== 4)
27379 return ix86_builtins
[IX86_BUILTIN_CVTPS2DQ
];
27380 else if (out_n
== 8 && in_n
== 8)
27381 return ix86_builtins
[IX86_BUILTIN_CVTPS2DQ256
];
27385 case BUILT_IN_COPYSIGN
:
27386 if (out_mode
== DFmode
&& in_mode
== DFmode
)
27388 if (out_n
== 2 && in_n
== 2)
27389 return ix86_builtins
[IX86_BUILTIN_CPYSGNPD
];
27390 else if (out_n
== 4 && in_n
== 4)
27391 return ix86_builtins
[IX86_BUILTIN_CPYSGNPD256
];
27395 case BUILT_IN_COPYSIGNF
:
27396 if (out_mode
== SFmode
&& in_mode
== SFmode
)
27398 if (out_n
== 4 && in_n
== 4)
27399 return ix86_builtins
[IX86_BUILTIN_CPYSGNPS
];
27400 else if (out_n
== 8 && in_n
== 8)
27401 return ix86_builtins
[IX86_BUILTIN_CPYSGNPS256
];
27406 if (out_mode
== DFmode
&& in_mode
== DFmode
)
27408 if (out_n
== 2 && in_n
== 2)
27409 return ix86_builtins
[IX86_BUILTIN_VFMADDPD
];
27410 if (out_n
== 4 && in_n
== 4)
27411 return ix86_builtins
[IX86_BUILTIN_VFMADDPD256
];
27415 case BUILT_IN_FMAF
:
27416 if (out_mode
== SFmode
&& in_mode
== SFmode
)
27418 if (out_n
== 4 && in_n
== 4)
27419 return ix86_builtins
[IX86_BUILTIN_VFMADDPS
];
27420 if (out_n
== 8 && in_n
== 8)
27421 return ix86_builtins
[IX86_BUILTIN_VFMADDPS256
];
27429 /* Dispatch to a handler for a vectorization library. */
27430 if (ix86_veclib_handler
)
27431 return ix86_veclib_handler ((enum built_in_function
) fn
, type_out
,
27437 /* Handler for an SVML-style interface to
27438 a library with vectorized intrinsics. */
27441 ix86_veclibabi_svml (enum built_in_function fn
, tree type_out
, tree type_in
)
27444 tree fntype
, new_fndecl
, args
;
27447 enum machine_mode el_mode
, in_mode
;
27450 /* The SVML is suitable for unsafe math only. */
27451 if (!flag_unsafe_math_optimizations
)
27454 el_mode
= TYPE_MODE (TREE_TYPE (type_out
));
27455 n
= TYPE_VECTOR_SUBPARTS (type_out
);
27456 in_mode
= TYPE_MODE (TREE_TYPE (type_in
));
27457 in_n
= TYPE_VECTOR_SUBPARTS (type_in
);
27458 if (el_mode
!= in_mode
27466 case BUILT_IN_LOG10
:
27468 case BUILT_IN_TANH
:
27470 case BUILT_IN_ATAN
:
27471 case BUILT_IN_ATAN2
:
27472 case BUILT_IN_ATANH
:
27473 case BUILT_IN_CBRT
:
27474 case BUILT_IN_SINH
:
27476 case BUILT_IN_ASINH
:
27477 case BUILT_IN_ASIN
:
27478 case BUILT_IN_COSH
:
27480 case BUILT_IN_ACOSH
:
27481 case BUILT_IN_ACOS
:
27482 if (el_mode
!= DFmode
|| n
!= 2)
27486 case BUILT_IN_EXPF
:
27487 case BUILT_IN_LOGF
:
27488 case BUILT_IN_LOG10F
:
27489 case BUILT_IN_POWF
:
27490 case BUILT_IN_TANHF
:
27491 case BUILT_IN_TANF
:
27492 case BUILT_IN_ATANF
:
27493 case BUILT_IN_ATAN2F
:
27494 case BUILT_IN_ATANHF
:
27495 case BUILT_IN_CBRTF
:
27496 case BUILT_IN_SINHF
:
27497 case BUILT_IN_SINF
:
27498 case BUILT_IN_ASINHF
:
27499 case BUILT_IN_ASINF
:
27500 case BUILT_IN_COSHF
:
27501 case BUILT_IN_COSF
:
27502 case BUILT_IN_ACOSHF
:
27503 case BUILT_IN_ACOSF
:
27504 if (el_mode
!= SFmode
|| n
!= 4)
27512 bname
= IDENTIFIER_POINTER (DECL_NAME (implicit_built_in_decls
[fn
]));
27514 if (fn
== BUILT_IN_LOGF
)
27515 strcpy (name
, "vmlsLn4");
27516 else if (fn
== BUILT_IN_LOG
)
27517 strcpy (name
, "vmldLn2");
27520 sprintf (name
, "vmls%s", bname
+10);
27521 name
[strlen (name
)-1] = '4';
27524 sprintf (name
, "vmld%s2", bname
+10);
27526 /* Convert to uppercase. */
27530 for (args
= DECL_ARGUMENTS (implicit_built_in_decls
[fn
]); args
;
27531 args
= TREE_CHAIN (args
))
27535 fntype
= build_function_type_list (type_out
, type_in
, NULL
);
27537 fntype
= build_function_type_list (type_out
, type_in
, type_in
, NULL
);
27539 /* Build a function declaration for the vectorized function. */
27540 new_fndecl
= build_decl (BUILTINS_LOCATION
,
27541 FUNCTION_DECL
, get_identifier (name
), fntype
);
27542 TREE_PUBLIC (new_fndecl
) = 1;
27543 DECL_EXTERNAL (new_fndecl
) = 1;
27544 DECL_IS_NOVOPS (new_fndecl
) = 1;
27545 TREE_READONLY (new_fndecl
) = 1;
27550 /* Handler for an ACML-style interface to
27551 a library with vectorized intrinsics. */
27554 ix86_veclibabi_acml (enum built_in_function fn
, tree type_out
, tree type_in
)
27556 char name
[20] = "__vr.._";
27557 tree fntype
, new_fndecl
, args
;
27560 enum machine_mode el_mode
, in_mode
;
27563 /* The ACML is 64bits only and suitable for unsafe math only as
27564 it does not correctly support parts of IEEE with the required
27565 precision such as denormals. */
27567 || !flag_unsafe_math_optimizations
)
27570 el_mode
= TYPE_MODE (TREE_TYPE (type_out
));
27571 n
= TYPE_VECTOR_SUBPARTS (type_out
);
27572 in_mode
= TYPE_MODE (TREE_TYPE (type_in
));
27573 in_n
= TYPE_VECTOR_SUBPARTS (type_in
);
27574 if (el_mode
!= in_mode
27584 case BUILT_IN_LOG2
:
27585 case BUILT_IN_LOG10
:
27588 if (el_mode
!= DFmode
27593 case BUILT_IN_SINF
:
27594 case BUILT_IN_COSF
:
27595 case BUILT_IN_EXPF
:
27596 case BUILT_IN_POWF
:
27597 case BUILT_IN_LOGF
:
27598 case BUILT_IN_LOG2F
:
27599 case BUILT_IN_LOG10F
:
27602 if (el_mode
!= SFmode
27611 bname
= IDENTIFIER_POINTER (DECL_NAME (implicit_built_in_decls
[fn
]));
27612 sprintf (name
+ 7, "%s", bname
+10);
27615 for (args
= DECL_ARGUMENTS (implicit_built_in_decls
[fn
]); args
;
27616 args
= TREE_CHAIN (args
))
27620 fntype
= build_function_type_list (type_out
, type_in
, NULL
);
27622 fntype
= build_function_type_list (type_out
, type_in
, type_in
, NULL
);
27624 /* Build a function declaration for the vectorized function. */
27625 new_fndecl
= build_decl (BUILTINS_LOCATION
,
27626 FUNCTION_DECL
, get_identifier (name
), fntype
);
27627 TREE_PUBLIC (new_fndecl
) = 1;
27628 DECL_EXTERNAL (new_fndecl
) = 1;
27629 DECL_IS_NOVOPS (new_fndecl
) = 1;
27630 TREE_READONLY (new_fndecl
) = 1;
27636 /* Returns a decl of a function that implements conversion of an integer vector
27637 into a floating-point vector, or vice-versa. DEST_TYPE and SRC_TYPE
27638 are the types involved when converting according to CODE.
27639 Return NULL_TREE if it is not available. */
27642 ix86_vectorize_builtin_conversion (unsigned int code
,
27643 tree dest_type
, tree src_type
)
27651 switch (TYPE_MODE (src_type
))
27654 switch (TYPE_MODE (dest_type
))
27657 return (TYPE_UNSIGNED (src_type
)
27658 ? ix86_builtins
[IX86_BUILTIN_CVTUDQ2PS
]
27659 : ix86_builtins
[IX86_BUILTIN_CVTDQ2PS
]);
27661 return (TYPE_UNSIGNED (src_type
)
27663 : ix86_builtins
[IX86_BUILTIN_CVTDQ2PD256
]);
27669 switch (TYPE_MODE (dest_type
))
27672 return (TYPE_UNSIGNED (src_type
)
27674 : ix86_builtins
[IX86_BUILTIN_CVTDQ2PS256
]);
27683 case FIX_TRUNC_EXPR
:
27684 switch (TYPE_MODE (dest_type
))
27687 switch (TYPE_MODE (src_type
))
27690 return (TYPE_UNSIGNED (dest_type
)
27692 : ix86_builtins
[IX86_BUILTIN_CVTTPS2DQ
]);
27694 return (TYPE_UNSIGNED (dest_type
)
27696 : ix86_builtins
[IX86_BUILTIN_CVTTPD2DQ256
]);
27703 switch (TYPE_MODE (src_type
))
27706 return (TYPE_UNSIGNED (dest_type
)
27708 : ix86_builtins
[IX86_BUILTIN_CVTTPS2DQ256
]);
27725 /* Returns a code for a target-specific builtin that implements
27726 reciprocal of the function, or NULL_TREE if not available. */
27729 ix86_builtin_reciprocal (unsigned int fn
, bool md_fn
,
27730 bool sqrt ATTRIBUTE_UNUSED
)
27732 if (! (TARGET_SSE_MATH
&& !optimize_insn_for_size_p ()
27733 && flag_finite_math_only
&& !flag_trapping_math
27734 && flag_unsafe_math_optimizations
))
27738 /* Machine dependent builtins. */
27741 /* Vectorized version of sqrt to rsqrt conversion. */
27742 case IX86_BUILTIN_SQRTPS_NR
:
27743 return ix86_builtins
[IX86_BUILTIN_RSQRTPS_NR
];
27745 case IX86_BUILTIN_SQRTPS_NR256
:
27746 return ix86_builtins
[IX86_BUILTIN_RSQRTPS_NR256
];
27752 /* Normal builtins. */
27755 /* Sqrt to rsqrt conversion. */
27756 case BUILT_IN_SQRTF
:
27757 return ix86_builtins
[IX86_BUILTIN_RSQRTF
];
27764 /* Helper for avx_vpermilps256_operand et al. This is also used by
27765 the expansion functions to turn the parallel back into a mask.
27766 The return value is 0 for no match and the imm8+1 for a match. */
27769 avx_vpermilp_parallel (rtx par
, enum machine_mode mode
)
27771 unsigned i
, nelt
= GET_MODE_NUNITS (mode
);
27773 unsigned char ipar
[8];
27775 if (XVECLEN (par
, 0) != (int) nelt
)
27778 /* Validate that all of the elements are constants, and not totally
27779 out of range. Copy the data into an integral array to make the
27780 subsequent checks easier. */
27781 for (i
= 0; i
< nelt
; ++i
)
27783 rtx er
= XVECEXP (par
, 0, i
);
27784 unsigned HOST_WIDE_INT ei
;
27786 if (!CONST_INT_P (er
))
27797 /* In the 256-bit DFmode case, we can only move elements within
27799 for (i
= 0; i
< 2; ++i
)
27803 mask
|= ipar
[i
] << i
;
27805 for (i
= 2; i
< 4; ++i
)
27809 mask
|= (ipar
[i
] - 2) << i
;
27814 /* In the 256-bit SFmode case, we have full freedom of movement
27815 within the low 128-bit lane, but the high 128-bit lane must
27816 mirror the exact same pattern. */
27817 for (i
= 0; i
< 4; ++i
)
27818 if (ipar
[i
] + 4 != ipar
[i
+ 4])
27825 /* In the 128-bit case, we've full freedom in the placement of
27826 the elements from the source operand. */
27827 for (i
= 0; i
< nelt
; ++i
)
27828 mask
|= ipar
[i
] << (i
* (nelt
/ 2));
27832 gcc_unreachable ();
27835 /* Make sure success has a non-zero value by adding one. */
27839 /* Helper for avx_vperm2f128_v4df_operand et al. This is also used by
27840 the expansion functions to turn the parallel back into a mask.
27841 The return value is 0 for no match and the imm8+1 for a match. */
27844 avx_vperm2f128_parallel (rtx par
, enum machine_mode mode
)
27846 unsigned i
, nelt
= GET_MODE_NUNITS (mode
), nelt2
= nelt
/ 2;
27848 unsigned char ipar
[8];
27850 if (XVECLEN (par
, 0) != (int) nelt
)
27853 /* Validate that all of the elements are constants, and not totally
27854 out of range. Copy the data into an integral array to make the
27855 subsequent checks easier. */
27856 for (i
= 0; i
< nelt
; ++i
)
27858 rtx er
= XVECEXP (par
, 0, i
);
27859 unsigned HOST_WIDE_INT ei
;
27861 if (!CONST_INT_P (er
))
27864 if (ei
>= 2 * nelt
)
27869 /* Validate that the halves of the permute are halves. */
27870 for (i
= 0; i
< nelt2
- 1; ++i
)
27871 if (ipar
[i
] + 1 != ipar
[i
+ 1])
27873 for (i
= nelt2
; i
< nelt
- 1; ++i
)
27874 if (ipar
[i
] + 1 != ipar
[i
+ 1])
27877 /* Reconstruct the mask. */
27878 for (i
= 0; i
< 2; ++i
)
27880 unsigned e
= ipar
[i
* nelt2
];
27884 mask
|= e
<< (i
* 4);
27887 /* Make sure success has a non-zero value by adding one. */
27892 /* Store OPERAND to the memory after reload is completed. This means
27893 that we can't easily use assign_stack_local. */
27895 ix86_force_to_memory (enum machine_mode mode
, rtx operand
)
27899 gcc_assert (reload_completed
);
27900 if (ix86_using_red_zone ())
27902 result
= gen_rtx_MEM (mode
,
27903 gen_rtx_PLUS (Pmode
,
27905 GEN_INT (-RED_ZONE_SIZE
)));
27906 emit_move_insn (result
, operand
);
27908 else if (TARGET_64BIT
)
27914 operand
= gen_lowpart (DImode
, operand
);
27918 gen_rtx_SET (VOIDmode
,
27919 gen_rtx_MEM (DImode
,
27920 gen_rtx_PRE_DEC (DImode
,
27921 stack_pointer_rtx
)),
27925 gcc_unreachable ();
27927 result
= gen_rtx_MEM (mode
, stack_pointer_rtx
);
27936 split_double_mode (mode
, &operand
, 1, operands
, operands
+ 1);
27938 gen_rtx_SET (VOIDmode
,
27939 gen_rtx_MEM (SImode
,
27940 gen_rtx_PRE_DEC (Pmode
,
27941 stack_pointer_rtx
)),
27944 gen_rtx_SET (VOIDmode
,
27945 gen_rtx_MEM (SImode
,
27946 gen_rtx_PRE_DEC (Pmode
,
27947 stack_pointer_rtx
)),
27952 /* Store HImodes as SImodes. */
27953 operand
= gen_lowpart (SImode
, operand
);
27957 gen_rtx_SET (VOIDmode
,
27958 gen_rtx_MEM (GET_MODE (operand
),
27959 gen_rtx_PRE_DEC (SImode
,
27960 stack_pointer_rtx
)),
27964 gcc_unreachable ();
27966 result
= gen_rtx_MEM (mode
, stack_pointer_rtx
);
27971 /* Free operand from the memory. */
27973 ix86_free_from_memory (enum machine_mode mode
)
27975 if (!ix86_using_red_zone ())
27979 if (mode
== DImode
|| TARGET_64BIT
)
27983 /* Use LEA to deallocate stack space. In peephole2 it will be converted
27984 to pop or add instruction if registers are available. */
27985 emit_insn (gen_rtx_SET (VOIDmode
, stack_pointer_rtx
,
27986 gen_rtx_PLUS (Pmode
, stack_pointer_rtx
,
27991 /* Implement TARGET_IRA_COVER_CLASSES. If -mfpmath=sse, we prefer
27992 SSE_REGS to FLOAT_REGS if their costs for a pseudo are the
27994 static const reg_class_t
*
27995 i386_ira_cover_classes (void)
27997 static const reg_class_t sse_fpmath_classes
[] = {
27998 GENERAL_REGS
, SSE_REGS
, MMX_REGS
, FLOAT_REGS
, LIM_REG_CLASSES
28000 static const reg_class_t no_sse_fpmath_classes
[] = {
28001 GENERAL_REGS
, FLOAT_REGS
, MMX_REGS
, SSE_REGS
, LIM_REG_CLASSES
28004 return TARGET_SSE_MATH
? sse_fpmath_classes
: no_sse_fpmath_classes
;
28007 /* Implement TARGET_PREFERRED_RELOAD_CLASS.
28009 Put float CONST_DOUBLE in the constant pool instead of fp regs.
28010 QImode must go into class Q_REGS.
28011 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
28012 movdf to do mem-to-mem moves through integer regs. */
28015 ix86_preferred_reload_class (rtx x
, reg_class_t regclass
)
28017 enum machine_mode mode
= GET_MODE (x
);
28019 /* We're only allowed to return a subclass of CLASS. Many of the
28020 following checks fail for NO_REGS, so eliminate that early. */
28021 if (regclass
== NO_REGS
)
28024 /* All classes can load zeros. */
28025 if (x
== CONST0_RTX (mode
))
28028 /* Force constants into memory if we are loading a (nonzero) constant into
28029 an MMX or SSE register. This is because there are no MMX/SSE instructions
28030 to load from a constant. */
28032 && (MAYBE_MMX_CLASS_P (regclass
) || MAYBE_SSE_CLASS_P (regclass
)))
28035 /* Prefer SSE regs only, if we can use them for math. */
28036 if (TARGET_SSE_MATH
&& !TARGET_MIX_SSE_I387
&& SSE_FLOAT_MODE_P (mode
))
28037 return SSE_CLASS_P (regclass
) ? regclass
: NO_REGS
;
28039 /* Floating-point constants need more complex checks. */
28040 if (GET_CODE (x
) == CONST_DOUBLE
&& GET_MODE (x
) != VOIDmode
)
28042 /* General regs can load everything. */
28043 if (reg_class_subset_p (regclass
, GENERAL_REGS
))
28046 /* Floats can load 0 and 1 plus some others. Note that we eliminated
28047 zero above. We only want to wind up preferring 80387 registers if
28048 we plan on doing computation with them. */
28050 && standard_80387_constant_p (x
))
28052 /* Limit class to non-sse. */
28053 if (regclass
== FLOAT_SSE_REGS
)
28055 if (regclass
== FP_TOP_SSE_REGS
)
28057 if (regclass
== FP_SECOND_SSE_REGS
)
28058 return FP_SECOND_REG
;
28059 if (regclass
== FLOAT_INT_REGS
|| regclass
== FLOAT_REGS
)
28066 /* Generally when we see PLUS here, it's the function invariant
28067 (plus soft-fp const_int). Which can only be computed into general
28069 if (GET_CODE (x
) == PLUS
)
28070 return reg_class_subset_p (regclass
, GENERAL_REGS
) ? regclass
: NO_REGS
;
28072 /* QImode constants are easy to load, but non-constant QImode data
28073 must go into Q_REGS. */
28074 if (GET_MODE (x
) == QImode
&& !CONSTANT_P (x
))
28076 if (reg_class_subset_p (regclass
, Q_REGS
))
28078 if (reg_class_subset_p (Q_REGS
, regclass
))
28086 /* Discourage putting floating-point values in SSE registers unless
28087 SSE math is being used, and likewise for the 387 registers. */
28089 ix86_preferred_output_reload_class (rtx x
, reg_class_t regclass
)
28091 enum machine_mode mode
= GET_MODE (x
);
28093 /* Restrict the output reload class to the register bank that we are doing
28094 math on. If we would like not to return a subset of CLASS, reject this
28095 alternative: if reload cannot do this, it will still use its choice. */
28096 mode
= GET_MODE (x
);
28097 if (TARGET_SSE_MATH
&& SSE_FLOAT_MODE_P (mode
))
28098 return MAYBE_SSE_CLASS_P (regclass
) ? SSE_REGS
: NO_REGS
;
28100 if (X87_FLOAT_MODE_P (mode
))
28102 if (regclass
== FP_TOP_SSE_REGS
)
28104 else if (regclass
== FP_SECOND_SSE_REGS
)
28105 return FP_SECOND_REG
;
28107 return FLOAT_CLASS_P (regclass
) ? regclass
: NO_REGS
;
28114 ix86_secondary_reload (bool in_p
, rtx x
, reg_class_t rclass
,
28115 enum machine_mode mode
,
28116 secondary_reload_info
*sri ATTRIBUTE_UNUSED
)
28118 /* QImode spills from non-QI registers require
28119 intermediate register on 32bit targets. */
28120 if (!in_p
&& mode
== QImode
&& !TARGET_64BIT
28121 && (rclass
== GENERAL_REGS
28122 || rclass
== LEGACY_REGS
28123 || rclass
== INDEX_REGS
))
28132 if (regno
>= FIRST_PSEUDO_REGISTER
|| GET_CODE (x
) == SUBREG
)
28133 regno
= true_regnum (x
);
28135 /* Return Q_REGS if the operand is in memory. */
28143 /* Implement TARGET_CLASS_LIKELY_SPILLED_P. */
28146 ix86_class_likely_spilled_p (reg_class_t rclass
)
28157 case SSE_FIRST_REG
:
28159 case FP_SECOND_REG
:
28169 /* If we are copying between general and FP registers, we need a memory
28170 location. The same is true for SSE and MMX registers.
28172 To optimize register_move_cost performance, allow inline variant.
28174 The macro can't work reliably when one of the CLASSES is class containing
28175 registers from multiple units (SSE, MMX, integer). We avoid this by never
28176 combining those units in single alternative in the machine description.
28177 Ensure that this constraint holds to avoid unexpected surprises.
28179 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
28180 enforce these sanity checks. */
28183 inline_secondary_memory_needed (enum reg_class class1
, enum reg_class class2
,
28184 enum machine_mode mode
, int strict
)
28186 if (MAYBE_FLOAT_CLASS_P (class1
) != FLOAT_CLASS_P (class1
)
28187 || MAYBE_FLOAT_CLASS_P (class2
) != FLOAT_CLASS_P (class2
)
28188 || MAYBE_SSE_CLASS_P (class1
) != SSE_CLASS_P (class1
)
28189 || MAYBE_SSE_CLASS_P (class2
) != SSE_CLASS_P (class2
)
28190 || MAYBE_MMX_CLASS_P (class1
) != MMX_CLASS_P (class1
)
28191 || MAYBE_MMX_CLASS_P (class2
) != MMX_CLASS_P (class2
))
28193 gcc_assert (!strict
);
28197 if (FLOAT_CLASS_P (class1
) != FLOAT_CLASS_P (class2
))
28200 /* ??? This is a lie. We do have moves between mmx/general, and for
28201 mmx/sse2. But by saying we need secondary memory we discourage the
28202 register allocator from using the mmx registers unless needed. */
28203 if (MMX_CLASS_P (class1
) != MMX_CLASS_P (class2
))
28206 if (SSE_CLASS_P (class1
) != SSE_CLASS_P (class2
))
28208 /* SSE1 doesn't have any direct moves from other classes. */
28212 /* If the target says that inter-unit moves are more expensive
28213 than moving through memory, then don't generate them. */
28214 if (!TARGET_INTER_UNIT_MOVES
)
28217 /* Between SSE and general, we have moves no larger than word size. */
28218 if (GET_MODE_SIZE (mode
) > UNITS_PER_WORD
)
28226 ix86_secondary_memory_needed (enum reg_class class1
, enum reg_class class2
,
28227 enum machine_mode mode
, int strict
)
28229 return inline_secondary_memory_needed (class1
, class2
, mode
, strict
);
28232 /* Return true if the registers in CLASS cannot represent the change from
28233 modes FROM to TO. */
28236 ix86_cannot_change_mode_class (enum machine_mode from
, enum machine_mode to
,
28237 enum reg_class regclass
)
28242 /* x87 registers can't do subreg at all, as all values are reformatted
28243 to extended precision. */
28244 if (MAYBE_FLOAT_CLASS_P (regclass
))
28247 if (MAYBE_SSE_CLASS_P (regclass
) || MAYBE_MMX_CLASS_P (regclass
))
28249 /* Vector registers do not support QI or HImode loads. If we don't
28250 disallow a change to these modes, reload will assume it's ok to
28251 drop the subreg from (subreg:SI (reg:HI 100) 0). This affects
28252 the vec_dupv4hi pattern. */
28253 if (GET_MODE_SIZE (from
) < 4)
28256 /* Vector registers do not support subreg with nonzero offsets, which
28257 are otherwise valid for integer registers. Since we can't see
28258 whether we have a nonzero offset from here, prohibit all
28259 nonparadoxical subregs changing size. */
28260 if (GET_MODE_SIZE (to
) < GET_MODE_SIZE (from
))
28267 /* Return the cost of moving data of mode M between a
28268 register and memory. A value of 2 is the default; this cost is
28269 relative to those in `REGISTER_MOVE_COST'.
28271 This function is used extensively by register_move_cost that is used to
28272 build tables at startup. Make it inline in this case.
28273 When IN is 2, return maximum of in and out move cost.
28275 If moving between registers and memory is more expensive than
28276 between two registers, you should define this macro to express the
28279 Model also increased moving costs of QImode registers in non
28283 inline_memory_move_cost (enum machine_mode mode
, enum reg_class regclass
,
28287 if (FLOAT_CLASS_P (regclass
))
28305 return MAX (ix86_cost
->fp_load
[index
], ix86_cost
->fp_store
[index
]);
28306 return in
? ix86_cost
->fp_load
[index
] : ix86_cost
->fp_store
[index
];
28308 if (SSE_CLASS_P (regclass
))
28311 switch (GET_MODE_SIZE (mode
))
28326 return MAX (ix86_cost
->sse_load
[index
], ix86_cost
->sse_store
[index
]);
28327 return in
? ix86_cost
->sse_load
[index
] : ix86_cost
->sse_store
[index
];
28329 if (MMX_CLASS_P (regclass
))
28332 switch (GET_MODE_SIZE (mode
))
28344 return MAX (ix86_cost
->mmx_load
[index
], ix86_cost
->mmx_store
[index
]);
28345 return in
? ix86_cost
->mmx_load
[index
] : ix86_cost
->mmx_store
[index
];
28347 switch (GET_MODE_SIZE (mode
))
28350 if (Q_CLASS_P (regclass
) || TARGET_64BIT
)
28353 return ix86_cost
->int_store
[0];
28354 if (TARGET_PARTIAL_REG_DEPENDENCY
28355 && optimize_function_for_speed_p (cfun
))
28356 cost
= ix86_cost
->movzbl_load
;
28358 cost
= ix86_cost
->int_load
[0];
28360 return MAX (cost
, ix86_cost
->int_store
[0]);
28366 return MAX (ix86_cost
->movzbl_load
, ix86_cost
->int_store
[0] + 4);
28368 return ix86_cost
->movzbl_load
;
28370 return ix86_cost
->int_store
[0] + 4;
28375 return MAX (ix86_cost
->int_load
[1], ix86_cost
->int_store
[1]);
28376 return in
? ix86_cost
->int_load
[1] : ix86_cost
->int_store
[1];
28378 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
28379 if (mode
== TFmode
)
28382 cost
= MAX (ix86_cost
->int_load
[2] , ix86_cost
->int_store
[2]);
28384 cost
= ix86_cost
->int_load
[2];
28386 cost
= ix86_cost
->int_store
[2];
28387 return (cost
* (((int) GET_MODE_SIZE (mode
)
28388 + UNITS_PER_WORD
- 1) / UNITS_PER_WORD
));
28393 ix86_memory_move_cost (enum machine_mode mode
, reg_class_t regclass
,
28396 return inline_memory_move_cost (mode
, (enum reg_class
) regclass
, in
? 1 : 0);
28400 /* Return the cost of moving data from a register in class CLASS1 to
28401 one in class CLASS2.
28403 It is not required that the cost always equal 2 when FROM is the same as TO;
28404 on some machines it is expensive to move between registers if they are not
28405 general registers. */
28408 ix86_register_move_cost (enum machine_mode mode
, reg_class_t class1_i
,
28409 reg_class_t class2_i
)
28411 enum reg_class class1
= (enum reg_class
) class1_i
;
28412 enum reg_class class2
= (enum reg_class
) class2_i
;
28414 /* In case we require secondary memory, compute cost of the store followed
28415 by load. In order to avoid bad register allocation choices, we need
28416 for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */
28418 if (inline_secondary_memory_needed (class1
, class2
, mode
, 0))
28422 cost
+= inline_memory_move_cost (mode
, class1
, 2);
28423 cost
+= inline_memory_move_cost (mode
, class2
, 2);
28425 /* In case of copying from general_purpose_register we may emit multiple
28426 stores followed by single load causing memory size mismatch stall.
28427 Count this as arbitrarily high cost of 20. */
28428 if (CLASS_MAX_NREGS (class1
, mode
) > CLASS_MAX_NREGS (class2
, mode
))
28431 /* In the case of FP/MMX moves, the registers actually overlap, and we
28432 have to switch modes in order to treat them differently. */
28433 if ((MMX_CLASS_P (class1
) && MAYBE_FLOAT_CLASS_P (class2
))
28434 || (MMX_CLASS_P (class2
) && MAYBE_FLOAT_CLASS_P (class1
)))
28440 /* Moves between SSE/MMX and integer unit are expensive. */
28441 if (MMX_CLASS_P (class1
) != MMX_CLASS_P (class2
)
28442 || SSE_CLASS_P (class1
) != SSE_CLASS_P (class2
))
28444 /* ??? By keeping returned value relatively high, we limit the number
28445 of moves between integer and MMX/SSE registers for all targets.
28446 Additionally, high value prevents problem with x86_modes_tieable_p(),
28447 where integer modes in MMX/SSE registers are not tieable
28448 because of missing QImode and HImode moves to, from or between
28449 MMX/SSE registers. */
28450 return MAX (8, ix86_cost
->mmxsse_to_integer
);
28452 if (MAYBE_FLOAT_CLASS_P (class1
))
28453 return ix86_cost
->fp_move
;
28454 if (MAYBE_SSE_CLASS_P (class1
))
28455 return ix86_cost
->sse_move
;
28456 if (MAYBE_MMX_CLASS_P (class1
))
28457 return ix86_cost
->mmx_move
;
28461 /* Return 1 if hard register REGNO can hold a value of machine-mode MODE. */
28464 ix86_hard_regno_mode_ok (int regno
, enum machine_mode mode
)
28466 /* Flags and only flags can only hold CCmode values. */
28467 if (CC_REGNO_P (regno
))
28468 return GET_MODE_CLASS (mode
) == MODE_CC
;
28469 if (GET_MODE_CLASS (mode
) == MODE_CC
28470 || GET_MODE_CLASS (mode
) == MODE_RANDOM
28471 || GET_MODE_CLASS (mode
) == MODE_PARTIAL_INT
)
28473 if (FP_REGNO_P (regno
))
28474 return VALID_FP_MODE_P (mode
);
28475 if (SSE_REGNO_P (regno
))
28477 /* We implement the move patterns for all vector modes into and
28478 out of SSE registers, even when no operation instructions
28479 are available. OImode move is available only when AVX is
28481 return ((TARGET_AVX
&& mode
== OImode
)
28482 || VALID_AVX256_REG_MODE (mode
)
28483 || VALID_SSE_REG_MODE (mode
)
28484 || VALID_SSE2_REG_MODE (mode
)
28485 || VALID_MMX_REG_MODE (mode
)
28486 || VALID_MMX_REG_MODE_3DNOW (mode
));
28488 if (MMX_REGNO_P (regno
))
28490 /* We implement the move patterns for 3DNOW modes even in MMX mode,
28491 so if the register is available at all, then we can move data of
28492 the given mode into or out of it. */
28493 return (VALID_MMX_REG_MODE (mode
)
28494 || VALID_MMX_REG_MODE_3DNOW (mode
));
28497 if (mode
== QImode
)
28499 /* Take care for QImode values - they can be in non-QI regs,
28500 but then they do cause partial register stalls. */
28501 if (regno
<= BX_REG
|| TARGET_64BIT
)
28503 if (!TARGET_PARTIAL_REG_STALL
)
28505 return reload_in_progress
|| reload_completed
;
28507 /* We handle both integer and floats in the general purpose registers. */
28508 else if (VALID_INT_MODE_P (mode
))
28510 else if (VALID_FP_MODE_P (mode
))
28512 else if (VALID_DFP_MODE_P (mode
))
28514 /* Lots of MMX code casts 8 byte vector modes to DImode. If we then go
28515 on to use that value in smaller contexts, this can easily force a
28516 pseudo to be allocated to GENERAL_REGS. Since this is no worse than
28517 supporting DImode, allow it. */
28518 else if (VALID_MMX_REG_MODE_3DNOW (mode
) || VALID_MMX_REG_MODE (mode
))
28524 /* A subroutine of ix86_modes_tieable_p. Return true if MODE is a
28525 tieable integer mode. */
28528 ix86_tieable_integer_mode_p (enum machine_mode mode
)
28537 return TARGET_64BIT
|| !TARGET_PARTIAL_REG_STALL
;
28540 return TARGET_64BIT
;
28547 /* Return true if MODE1 is accessible in a register that can hold MODE2
28548 without copying. That is, all register classes that can hold MODE2
28549 can also hold MODE1. */
28552 ix86_modes_tieable_p (enum machine_mode mode1
, enum machine_mode mode2
)
28554 if (mode1
== mode2
)
28557 if (ix86_tieable_integer_mode_p (mode1
)
28558 && ix86_tieable_integer_mode_p (mode2
))
28561 /* MODE2 being XFmode implies fp stack or general regs, which means we
28562 can tie any smaller floating point modes to it. Note that we do not
28563 tie this with TFmode. */
28564 if (mode2
== XFmode
)
28565 return mode1
== SFmode
|| mode1
== DFmode
;
28567 /* MODE2 being DFmode implies fp stack, general or sse regs, which means
28568 that we can tie it with SFmode. */
28569 if (mode2
== DFmode
)
28570 return mode1
== SFmode
;
28572 /* If MODE2 is only appropriate for an SSE register, then tie with
28573 any other mode acceptable to SSE registers. */
28574 if (GET_MODE_SIZE (mode2
) == 16
28575 && ix86_hard_regno_mode_ok (FIRST_SSE_REG
, mode2
))
28576 return (GET_MODE_SIZE (mode1
) == 16
28577 && ix86_hard_regno_mode_ok (FIRST_SSE_REG
, mode1
));
28579 /* If MODE2 is appropriate for an MMX register, then tie
28580 with any other mode acceptable to MMX registers. */
28581 if (GET_MODE_SIZE (mode2
) == 8
28582 && ix86_hard_regno_mode_ok (FIRST_MMX_REG
, mode2
))
28583 return (GET_MODE_SIZE (mode1
) == 8
28584 && ix86_hard_regno_mode_ok (FIRST_MMX_REG
, mode1
));
28589 /* Compute a (partial) cost for rtx X. Return true if the complete
28590 cost has been computed, and false if subexpressions should be
28591 scanned. In either case, *TOTAL contains the cost result. */
28594 ix86_rtx_costs (rtx x
, int code
, int outer_code_i
, int *total
, bool speed
)
28596 enum rtx_code outer_code
= (enum rtx_code
) outer_code_i
;
28597 enum machine_mode mode
= GET_MODE (x
);
28598 const struct processor_costs
*cost
= speed
? ix86_cost
: &ix86_size_cost
;
28606 if (TARGET_64BIT
&& !x86_64_immediate_operand (x
, VOIDmode
))
28608 else if (TARGET_64BIT
&& !x86_64_zext_immediate_operand (x
, VOIDmode
))
28610 else if (flag_pic
&& SYMBOLIC_CONST (x
)
28612 || (!GET_CODE (x
) != LABEL_REF
28613 && (GET_CODE (x
) != SYMBOL_REF
28614 || !SYMBOL_REF_LOCAL_P (x
)))))
28621 if (mode
== VOIDmode
)
28624 switch (standard_80387_constant_p (x
))
28629 default: /* Other constants */
28634 /* Start with (MEM (SYMBOL_REF)), since that's where
28635 it'll probably end up. Add a penalty for size. */
28636 *total
= (COSTS_N_INSNS (1)
28637 + (flag_pic
!= 0 && !TARGET_64BIT
)
28638 + (mode
== SFmode
? 0 : mode
== DFmode
? 1 : 2));
28644 /* The zero extensions is often completely free on x86_64, so make
28645 it as cheap as possible. */
28646 if (TARGET_64BIT
&& mode
== DImode
28647 && GET_MODE (XEXP (x
, 0)) == SImode
)
28649 else if (TARGET_ZERO_EXTEND_WITH_AND
)
28650 *total
= cost
->add
;
28652 *total
= cost
->movzx
;
28656 *total
= cost
->movsx
;
28660 if (CONST_INT_P (XEXP (x
, 1))
28661 && (GET_MODE (XEXP (x
, 0)) != DImode
|| TARGET_64BIT
))
28663 HOST_WIDE_INT value
= INTVAL (XEXP (x
, 1));
28666 *total
= cost
->add
;
28669 if ((value
== 2 || value
== 3)
28670 && cost
->lea
<= cost
->shift_const
)
28672 *total
= cost
->lea
;
28682 if (!TARGET_64BIT
&& GET_MODE (XEXP (x
, 0)) == DImode
)
28684 if (CONST_INT_P (XEXP (x
, 1)))
28686 if (INTVAL (XEXP (x
, 1)) > 32)
28687 *total
= cost
->shift_const
+ COSTS_N_INSNS (2);
28689 *total
= cost
->shift_const
* 2;
28693 if (GET_CODE (XEXP (x
, 1)) == AND
)
28694 *total
= cost
->shift_var
* 2;
28696 *total
= cost
->shift_var
* 6 + COSTS_N_INSNS (2);
28701 if (CONST_INT_P (XEXP (x
, 1)))
28702 *total
= cost
->shift_const
;
28704 *total
= cost
->shift_var
;
28712 gcc_assert (FLOAT_MODE_P (mode
));
28713 gcc_assert (TARGET_FMA
|| TARGET_FMA4
);
28715 /* ??? SSE scalar/vector cost should be used here. */
28716 /* ??? Bald assumption that fma has the same cost as fmul. */
28717 *total
= cost
->fmul
;
28718 *total
+= rtx_cost (XEXP (x
, 1), FMA
, speed
);
28720 /* Negate in op0 or op2 is free: FMS, FNMA, FNMS. */
28722 if (GET_CODE (sub
) == NEG
)
28724 *total
+= rtx_cost (sub
, FMA
, speed
);
28727 if (GET_CODE (sub
) == NEG
)
28729 *total
+= rtx_cost (sub
, FMA
, speed
);
28734 if (SSE_FLOAT_MODE_P (mode
) && TARGET_SSE_MATH
)
28736 /* ??? SSE scalar cost should be used here. */
28737 *total
= cost
->fmul
;
28740 else if (X87_FLOAT_MODE_P (mode
))
28742 *total
= cost
->fmul
;
28745 else if (FLOAT_MODE_P (mode
))
28747 /* ??? SSE vector cost should be used here. */
28748 *total
= cost
->fmul
;
28753 rtx op0
= XEXP (x
, 0);
28754 rtx op1
= XEXP (x
, 1);
28756 if (CONST_INT_P (XEXP (x
, 1)))
28758 unsigned HOST_WIDE_INT value
= INTVAL (XEXP (x
, 1));
28759 for (nbits
= 0; value
!= 0; value
&= value
- 1)
28763 /* This is arbitrary. */
28766 /* Compute costs correctly for widening multiplication. */
28767 if ((GET_CODE (op0
) == SIGN_EXTEND
|| GET_CODE (op0
) == ZERO_EXTEND
)
28768 && GET_MODE_SIZE (GET_MODE (XEXP (op0
, 0))) * 2
28769 == GET_MODE_SIZE (mode
))
28771 int is_mulwiden
= 0;
28772 enum machine_mode inner_mode
= GET_MODE (op0
);
28774 if (GET_CODE (op0
) == GET_CODE (op1
))
28775 is_mulwiden
= 1, op1
= XEXP (op1
, 0);
28776 else if (CONST_INT_P (op1
))
28778 if (GET_CODE (op0
) == SIGN_EXTEND
)
28779 is_mulwiden
= trunc_int_for_mode (INTVAL (op1
), inner_mode
)
28782 is_mulwiden
= !(INTVAL (op1
) & ~GET_MODE_MASK (inner_mode
));
28786 op0
= XEXP (op0
, 0), mode
= GET_MODE (op0
);
28789 *total
= (cost
->mult_init
[MODE_INDEX (mode
)]
28790 + nbits
* cost
->mult_bit
28791 + rtx_cost (op0
, outer_code
, speed
) + rtx_cost (op1
, outer_code
, speed
));
28800 if (SSE_FLOAT_MODE_P (mode
) && TARGET_SSE_MATH
)
28801 /* ??? SSE cost should be used here. */
28802 *total
= cost
->fdiv
;
28803 else if (X87_FLOAT_MODE_P (mode
))
28804 *total
= cost
->fdiv
;
28805 else if (FLOAT_MODE_P (mode
))
28806 /* ??? SSE vector cost should be used here. */
28807 *total
= cost
->fdiv
;
28809 *total
= cost
->divide
[MODE_INDEX (mode
)];
28813 if (GET_MODE_CLASS (mode
) == MODE_INT
28814 && GET_MODE_BITSIZE (mode
) <= GET_MODE_BITSIZE (Pmode
))
28816 if (GET_CODE (XEXP (x
, 0)) == PLUS
28817 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == MULT
28818 && CONST_INT_P (XEXP (XEXP (XEXP (x
, 0), 0), 1))
28819 && CONSTANT_P (XEXP (x
, 1)))
28821 HOST_WIDE_INT val
= INTVAL (XEXP (XEXP (XEXP (x
, 0), 0), 1));
28822 if (val
== 2 || val
== 4 || val
== 8)
28824 *total
= cost
->lea
;
28825 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 1), outer_code
, speed
);
28826 *total
+= rtx_cost (XEXP (XEXP (XEXP (x
, 0), 0), 0),
28827 outer_code
, speed
);
28828 *total
+= rtx_cost (XEXP (x
, 1), outer_code
, speed
);
28832 else if (GET_CODE (XEXP (x
, 0)) == MULT
28833 && CONST_INT_P (XEXP (XEXP (x
, 0), 1)))
28835 HOST_WIDE_INT val
= INTVAL (XEXP (XEXP (x
, 0), 1));
28836 if (val
== 2 || val
== 4 || val
== 8)
28838 *total
= cost
->lea
;
28839 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 0), outer_code
, speed
);
28840 *total
+= rtx_cost (XEXP (x
, 1), outer_code
, speed
);
28844 else if (GET_CODE (XEXP (x
, 0)) == PLUS
)
28846 *total
= cost
->lea
;
28847 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 0), outer_code
, speed
);
28848 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 1), outer_code
, speed
);
28849 *total
+= rtx_cost (XEXP (x
, 1), outer_code
, speed
);
28856 if (SSE_FLOAT_MODE_P (mode
) && TARGET_SSE_MATH
)
28858 /* ??? SSE cost should be used here. */
28859 *total
= cost
->fadd
;
28862 else if (X87_FLOAT_MODE_P (mode
))
28864 *total
= cost
->fadd
;
28867 else if (FLOAT_MODE_P (mode
))
28869 /* ??? SSE vector cost should be used here. */
28870 *total
= cost
->fadd
;
28878 if (!TARGET_64BIT
&& mode
== DImode
)
28880 *total
= (cost
->add
* 2
28881 + (rtx_cost (XEXP (x
, 0), outer_code
, speed
)
28882 << (GET_MODE (XEXP (x
, 0)) != DImode
))
28883 + (rtx_cost (XEXP (x
, 1), outer_code
, speed
)
28884 << (GET_MODE (XEXP (x
, 1)) != DImode
)));
28890 if (SSE_FLOAT_MODE_P (mode
) && TARGET_SSE_MATH
)
28892 /* ??? SSE cost should be used here. */
28893 *total
= cost
->fchs
;
28896 else if (X87_FLOAT_MODE_P (mode
))
28898 *total
= cost
->fchs
;
28901 else if (FLOAT_MODE_P (mode
))
28903 /* ??? SSE vector cost should be used here. */
28904 *total
= cost
->fchs
;
28910 if (!TARGET_64BIT
&& mode
== DImode
)
28911 *total
= cost
->add
* 2;
28913 *total
= cost
->add
;
28917 if (GET_CODE (XEXP (x
, 0)) == ZERO_EXTRACT
28918 && XEXP (XEXP (x
, 0), 1) == const1_rtx
28919 && CONST_INT_P (XEXP (XEXP (x
, 0), 2))
28920 && XEXP (x
, 1) == const0_rtx
)
28922 /* This kind of construct is implemented using test[bwl].
28923 Treat it as if we had an AND. */
28924 *total
= (cost
->add
28925 + rtx_cost (XEXP (XEXP (x
, 0), 0), outer_code
, speed
)
28926 + rtx_cost (const1_rtx
, outer_code
, speed
));
28932 if (!(SSE_FLOAT_MODE_P (mode
) && TARGET_SSE_MATH
))
28937 if (SSE_FLOAT_MODE_P (mode
) && TARGET_SSE_MATH
)
28938 /* ??? SSE cost should be used here. */
28939 *total
= cost
->fabs
;
28940 else if (X87_FLOAT_MODE_P (mode
))
28941 *total
= cost
->fabs
;
28942 else if (FLOAT_MODE_P (mode
))
28943 /* ??? SSE vector cost should be used here. */
28944 *total
= cost
->fabs
;
28948 if (SSE_FLOAT_MODE_P (mode
) && TARGET_SSE_MATH
)
28949 /* ??? SSE cost should be used here. */
28950 *total
= cost
->fsqrt
;
28951 else if (X87_FLOAT_MODE_P (mode
))
28952 *total
= cost
->fsqrt
;
28953 else if (FLOAT_MODE_P (mode
))
28954 /* ??? SSE vector cost should be used here. */
28955 *total
= cost
->fsqrt
;
28959 if (XINT (x
, 1) == UNSPEC_TP
)
28966 case VEC_DUPLICATE
:
28967 /* ??? Assume all of these vector manipulation patterns are
28968 recognizable. In which case they all pretty much have the
28970 *total
= COSTS_N_INSNS (1);
28980 static int current_machopic_label_num
;
28982 /* Given a symbol name and its associated stub, write out the
28983 definition of the stub. */
28986 machopic_output_stub (FILE *file
, const char *symb
, const char *stub
)
28988 unsigned int length
;
28989 char *binder_name
, *symbol_name
, lazy_ptr_name
[32];
28990 int label
= ++current_machopic_label_num
;
28992 /* For 64-bit we shouldn't get here. */
28993 gcc_assert (!TARGET_64BIT
);
28995 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
28996 symb
= targetm
.strip_name_encoding (symb
);
28998 length
= strlen (stub
);
28999 binder_name
= XALLOCAVEC (char, length
+ 32);
29000 GEN_BINDER_NAME_FOR_STUB (binder_name
, stub
, length
);
29002 length
= strlen (symb
);
29003 symbol_name
= XALLOCAVEC (char, length
+ 32);
29004 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name
, symb
, length
);
29006 sprintf (lazy_ptr_name
, "L%d$lz", label
);
29008 if (MACHOPIC_ATT_STUB
)
29009 switch_to_section (darwin_sections
[machopic_picsymbol_stub3_section
]);
29010 else if (MACHOPIC_PURE
)
29012 if (TARGET_DEEP_BRANCH_PREDICTION
)
29013 switch_to_section (darwin_sections
[machopic_picsymbol_stub2_section
]);
29015 switch_to_section (darwin_sections
[machopic_picsymbol_stub_section
]);
29018 switch_to_section (darwin_sections
[machopic_symbol_stub_section
]);
29020 fprintf (file
, "%s:\n", stub
);
29021 fprintf (file
, "\t.indirect_symbol %s\n", symbol_name
);
29023 if (MACHOPIC_ATT_STUB
)
29025 fprintf (file
, "\thlt ; hlt ; hlt ; hlt ; hlt\n");
29027 else if (MACHOPIC_PURE
)
29030 if (TARGET_DEEP_BRANCH_PREDICTION
)
29032 /* 25-byte PIC stub using "CALL get_pc_thunk". */
29033 rtx tmp
= gen_rtx_REG (SImode
, 2 /* ECX */);
29034 output_set_got (tmp
, NULL_RTX
); /* "CALL ___<cpu>.get_pc_thunk.cx". */
29035 fprintf (file
, "LPC$%d:\tmovl\t%s-LPC$%d(%%ecx),%%ecx\n", label
, lazy_ptr_name
, label
);
29039 /* 26-byte PIC stub using inline picbase: "CALL L42 ! L42: pop %eax". */
29040 fprintf (file
, "\tcall LPC$%d\nLPC$%d:\tpopl %%ecx\n", label
, label
);
29041 fprintf (file
, "\tmovl %s-LPC$%d(%%ecx),%%ecx\n", lazy_ptr_name
, label
);
29043 fprintf (file
, "\tjmp\t*%%ecx\n");
29046 fprintf (file
, "\tjmp\t*%s\n", lazy_ptr_name
);
29048 /* The AT&T-style ("self-modifying") stub is not lazily bound, thus
29049 it needs no stub-binding-helper. */
29050 if (MACHOPIC_ATT_STUB
)
29053 fprintf (file
, "%s:\n", binder_name
);
29057 fprintf (file
, "\tlea\t%s-%s(%%ecx),%%ecx\n", lazy_ptr_name
, binder_name
);
29058 fprintf (file
, "\tpushl\t%%ecx\n");
29061 fprintf (file
, "\tpushl\t$%s\n", lazy_ptr_name
);
29063 fputs ("\tjmp\tdyld_stub_binding_helper\n", file
);
29065 /* N.B. Keep the correspondence of these
29066 'symbol_ptr/symbol_ptr2/symbol_ptr3' sections consistent with the
29067 old-pic/new-pic/non-pic stubs; altering this will break
29068 compatibility with existing dylibs. */
29072 if (TARGET_DEEP_BRANCH_PREDICTION
)
29073 /* 25-byte PIC stub using "CALL get_pc_thunk". */
29074 switch_to_section (darwin_sections
[machopic_lazy_symbol_ptr2_section
]);
29076 /* 26-byte PIC stub using inline picbase: "CALL L42 ! L42: pop %ebx". */
29077 switch_to_section (darwin_sections
[machopic_lazy_symbol_ptr_section
]);
29080 /* 16-byte -mdynamic-no-pic stub. */
29081 switch_to_section(darwin_sections
[machopic_lazy_symbol_ptr3_section
]);
29083 fprintf (file
, "%s:\n", lazy_ptr_name
);
29084 fprintf (file
, "\t.indirect_symbol %s\n", symbol_name
);
29085 fprintf (file
, ASM_LONG
"%s\n", binder_name
);
29087 #endif /* TARGET_MACHO */
29089 /* Order the registers for register allocator. */
29092 x86_order_regs_for_local_alloc (void)
29097 /* First allocate the local general purpose registers. */
29098 for (i
= 0; i
< FIRST_PSEUDO_REGISTER
; i
++)
29099 if (GENERAL_REGNO_P (i
) && call_used_regs
[i
])
29100 reg_alloc_order
[pos
++] = i
;
29102 /* Global general purpose registers. */
29103 for (i
= 0; i
< FIRST_PSEUDO_REGISTER
; i
++)
29104 if (GENERAL_REGNO_P (i
) && !call_used_regs
[i
])
29105 reg_alloc_order
[pos
++] = i
;
29107 /* x87 registers come first in case we are doing FP math
29109 if (!TARGET_SSE_MATH
)
29110 for (i
= FIRST_STACK_REG
; i
<= LAST_STACK_REG
; i
++)
29111 reg_alloc_order
[pos
++] = i
;
29113 /* SSE registers. */
29114 for (i
= FIRST_SSE_REG
; i
<= LAST_SSE_REG
; i
++)
29115 reg_alloc_order
[pos
++] = i
;
29116 for (i
= FIRST_REX_SSE_REG
; i
<= LAST_REX_SSE_REG
; i
++)
29117 reg_alloc_order
[pos
++] = i
;
29119 /* x87 registers. */
29120 if (TARGET_SSE_MATH
)
29121 for (i
= FIRST_STACK_REG
; i
<= LAST_STACK_REG
; i
++)
29122 reg_alloc_order
[pos
++] = i
;
29124 for (i
= FIRST_MMX_REG
; i
<= LAST_MMX_REG
; i
++)
29125 reg_alloc_order
[pos
++] = i
;
29127 /* Initialize the rest of array as we do not allocate some registers
29129 while (pos
< FIRST_PSEUDO_REGISTER
)
29130 reg_alloc_order
[pos
++] = 0;
29133 /* Handle a "callee_pop_aggregate_return" attribute; arguments as
29134 in struct attribute_spec handler. */
29136 ix86_handle_callee_pop_aggregate_return (tree
*node
, tree name
,
29138 int flags ATTRIBUTE_UNUSED
,
29139 bool *no_add_attrs
)
29141 if (TREE_CODE (*node
) != FUNCTION_TYPE
29142 && TREE_CODE (*node
) != METHOD_TYPE
29143 && TREE_CODE (*node
) != FIELD_DECL
29144 && TREE_CODE (*node
) != TYPE_DECL
)
29146 warning (OPT_Wattributes
, "%qE attribute only applies to functions",
29148 *no_add_attrs
= true;
29153 warning (OPT_Wattributes
, "%qE attribute only available for 32-bit",
29155 *no_add_attrs
= true;
29158 if (is_attribute_p ("callee_pop_aggregate_return", name
))
29162 cst
= TREE_VALUE (args
);
29163 if (TREE_CODE (cst
) != INTEGER_CST
)
29165 warning (OPT_Wattributes
,
29166 "%qE attribute requires an integer constant argument",
29168 *no_add_attrs
= true;
29170 else if (compare_tree_int (cst
, 0) != 0
29171 && compare_tree_int (cst
, 1) != 0)
29173 warning (OPT_Wattributes
,
29174 "argument to %qE attribute is neither zero, nor one",
29176 *no_add_attrs
= true;
29185 /* Handle a "ms_abi" or "sysv" attribute; arguments as in
29186 struct attribute_spec.handler. */
29188 ix86_handle_abi_attribute (tree
*node
, tree name
,
29189 tree args ATTRIBUTE_UNUSED
,
29190 int flags ATTRIBUTE_UNUSED
, bool *no_add_attrs
)
29192 if (TREE_CODE (*node
) != FUNCTION_TYPE
29193 && TREE_CODE (*node
) != METHOD_TYPE
29194 && TREE_CODE (*node
) != FIELD_DECL
29195 && TREE_CODE (*node
) != TYPE_DECL
)
29197 warning (OPT_Wattributes
, "%qE attribute only applies to functions",
29199 *no_add_attrs
= true;
29204 warning (OPT_Wattributes
, "%qE attribute only available for 64-bit",
29206 *no_add_attrs
= true;
29210 /* Can combine regparm with all attributes but fastcall. */
29211 if (is_attribute_p ("ms_abi", name
))
29213 if (lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (*node
)))
29215 error ("ms_abi and sysv_abi attributes are not compatible");
29220 else if (is_attribute_p ("sysv_abi", name
))
29222 if (lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (*node
)))
29224 error ("ms_abi and sysv_abi attributes are not compatible");
29233 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
29234 struct attribute_spec.handler. */
29236 ix86_handle_struct_attribute (tree
*node
, tree name
,
29237 tree args ATTRIBUTE_UNUSED
,
29238 int flags ATTRIBUTE_UNUSED
, bool *no_add_attrs
)
29241 if (DECL_P (*node
))
29243 if (TREE_CODE (*node
) == TYPE_DECL
)
29244 type
= &TREE_TYPE (*node
);
29249 if (!(type
&& (TREE_CODE (*type
) == RECORD_TYPE
29250 || TREE_CODE (*type
) == UNION_TYPE
)))
29252 warning (OPT_Wattributes
, "%qE attribute ignored",
29254 *no_add_attrs
= true;
29257 else if ((is_attribute_p ("ms_struct", name
)
29258 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type
)))
29259 || ((is_attribute_p ("gcc_struct", name
)
29260 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type
)))))
29262 warning (OPT_Wattributes
, "%qE incompatible attribute ignored",
29264 *no_add_attrs
= true;
29271 ix86_handle_fndecl_attribute (tree
*node
, tree name
,
29272 tree args ATTRIBUTE_UNUSED
,
29273 int flags ATTRIBUTE_UNUSED
, bool *no_add_attrs
)
29275 if (TREE_CODE (*node
) != FUNCTION_DECL
)
29277 warning (OPT_Wattributes
, "%qE attribute only applies to functions",
29279 *no_add_attrs
= true;
29285 ix86_ms_bitfield_layout_p (const_tree record_type
)
29287 return ((TARGET_MS_BITFIELD_LAYOUT
29288 && !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type
)))
29289 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type
)));
29292 /* Returns an expression indicating where the this parameter is
29293 located on entry to the FUNCTION. */
29296 x86_this_parameter (tree function
)
29298 tree type
= TREE_TYPE (function
);
29299 bool aggr
= aggregate_value_p (TREE_TYPE (type
), type
) != 0;
29304 const int *parm_regs
;
29306 if (ix86_function_type_abi (type
) == MS_ABI
)
29307 parm_regs
= x86_64_ms_abi_int_parameter_registers
;
29309 parm_regs
= x86_64_int_parameter_registers
;
29310 return gen_rtx_REG (DImode
, parm_regs
[aggr
]);
29313 nregs
= ix86_function_regparm (type
, function
);
29315 if (nregs
> 0 && !stdarg_p (type
))
29319 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type
)))
29320 regno
= aggr
? DX_REG
: CX_REG
;
29321 else if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (type
)))
29325 return gen_rtx_MEM (SImode
,
29326 plus_constant (stack_pointer_rtx
, 4));
29335 return gen_rtx_MEM (SImode
,
29336 plus_constant (stack_pointer_rtx
, 4));
29339 return gen_rtx_REG (SImode
, regno
);
29342 return gen_rtx_MEM (SImode
, plus_constant (stack_pointer_rtx
, aggr
? 8 : 4));
29345 /* Determine whether x86_output_mi_thunk can succeed. */
29348 x86_can_output_mi_thunk (const_tree thunk ATTRIBUTE_UNUSED
,
29349 HOST_WIDE_INT delta ATTRIBUTE_UNUSED
,
29350 HOST_WIDE_INT vcall_offset
, const_tree function
)
29352 /* 64-bit can handle anything. */
29356 /* For 32-bit, everything's fine if we have one free register. */
29357 if (ix86_function_regparm (TREE_TYPE (function
), function
) < 3)
29360 /* Need a free register for vcall_offset. */
29364 /* Need a free register for GOT references. */
29365 if (flag_pic
&& !targetm
.binds_local_p (function
))
29368 /* Otherwise ok. */
29372 /* Output the assembler code for a thunk function. THUNK_DECL is the
29373 declaration for the thunk function itself, FUNCTION is the decl for
29374 the target function. DELTA is an immediate constant offset to be
29375 added to THIS. If VCALL_OFFSET is nonzero, the word at
29376 *(*this + vcall_offset) should be added to THIS. */
29379 x86_output_mi_thunk (FILE *file
,
29380 tree thunk ATTRIBUTE_UNUSED
, HOST_WIDE_INT delta
,
29381 HOST_WIDE_INT vcall_offset
, tree function
)
29384 rtx this_param
= x86_this_parameter (function
);
29387 /* Make sure unwind info is emitted for the thunk if needed. */
29388 final_start_function (emit_barrier (), file
, 1);
29390 /* If VCALL_OFFSET, we'll need THIS in a register. Might as well
29391 pull it in now and let DELTA benefit. */
29392 if (REG_P (this_param
))
29393 this_reg
= this_param
;
29394 else if (vcall_offset
)
29396 /* Put the this parameter into %eax. */
29397 xops
[0] = this_param
;
29398 xops
[1] = this_reg
= gen_rtx_REG (Pmode
, AX_REG
);
29399 output_asm_insn ("mov%z1\t{%0, %1|%1, %0}", xops
);
29402 this_reg
= NULL_RTX
;
29404 /* Adjust the this parameter by a fixed constant. */
29407 xops
[0] = GEN_INT (delta
);
29408 xops
[1] = this_reg
? this_reg
: this_param
;
29411 if (!x86_64_general_operand (xops
[0], DImode
))
29413 tmp
= gen_rtx_REG (DImode
, R10_REG
);
29415 output_asm_insn ("mov{q}\t{%1, %0|%0, %1}", xops
);
29417 xops
[1] = this_param
;
29419 if (x86_maybe_negate_const_int (&xops
[0], DImode
))
29420 output_asm_insn ("sub{q}\t{%0, %1|%1, %0}", xops
);
29422 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops
);
29424 else if (x86_maybe_negate_const_int (&xops
[0], SImode
))
29425 output_asm_insn ("sub{l}\t{%0, %1|%1, %0}", xops
);
29427 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops
);
29430 /* Adjust the this parameter by a value stored in the vtable. */
29434 tmp
= gen_rtx_REG (DImode
, R10_REG
);
29437 int tmp_regno
= CX_REG
;
29438 if (lookup_attribute ("fastcall",
29439 TYPE_ATTRIBUTES (TREE_TYPE (function
)))
29440 || lookup_attribute ("thiscall",
29441 TYPE_ATTRIBUTES (TREE_TYPE (function
))))
29442 tmp_regno
= AX_REG
;
29443 tmp
= gen_rtx_REG (SImode
, tmp_regno
);
29446 xops
[0] = gen_rtx_MEM (Pmode
, this_reg
);
29448 output_asm_insn ("mov%z1\t{%0, %1|%1, %0}", xops
);
29450 /* Adjust the this parameter. */
29451 xops
[0] = gen_rtx_MEM (Pmode
, plus_constant (tmp
, vcall_offset
));
29452 if (TARGET_64BIT
&& !memory_operand (xops
[0], Pmode
))
29454 rtx tmp2
= gen_rtx_REG (DImode
, R11_REG
);
29455 xops
[0] = GEN_INT (vcall_offset
);
29457 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops
);
29458 xops
[0] = gen_rtx_MEM (Pmode
, gen_rtx_PLUS (Pmode
, tmp
, tmp2
));
29460 xops
[1] = this_reg
;
29461 output_asm_insn ("add%z1\t{%0, %1|%1, %0}", xops
);
29464 /* If necessary, drop THIS back to its stack slot. */
29465 if (this_reg
&& this_reg
!= this_param
)
29467 xops
[0] = this_reg
;
29468 xops
[1] = this_param
;
29469 output_asm_insn ("mov%z1\t{%0, %1|%1, %0}", xops
);
29472 xops
[0] = XEXP (DECL_RTL (function
), 0);
29475 if (!flag_pic
|| targetm
.binds_local_p (function
)
29476 || DEFAULT_ABI
== MS_ABI
)
29477 output_asm_insn ("jmp\t%P0", xops
);
29478 /* All thunks should be in the same object as their target,
29479 and thus binds_local_p should be true. */
29480 else if (TARGET_64BIT
&& cfun
->machine
->call_abi
== MS_ABI
)
29481 gcc_unreachable ();
29484 tmp
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, xops
[0]), UNSPEC_GOTPCREL
);
29485 tmp
= gen_rtx_CONST (Pmode
, tmp
);
29486 tmp
= gen_rtx_MEM (QImode
, tmp
);
29488 output_asm_insn ("jmp\t%A0", xops
);
29493 if (!flag_pic
|| targetm
.binds_local_p (function
))
29494 output_asm_insn ("jmp\t%P0", xops
);
29499 rtx sym_ref
= XEXP (DECL_RTL (function
), 0);
29500 if (TARGET_MACHO_BRANCH_ISLANDS
)
29501 sym_ref
= (gen_rtx_SYMBOL_REF
29503 machopic_indirection_name (sym_ref
, /*stub_p=*/true)));
29504 tmp
= gen_rtx_MEM (QImode
, sym_ref
);
29506 output_asm_insn ("jmp\t%0", xops
);
29509 #endif /* TARGET_MACHO */
29511 tmp
= gen_rtx_REG (SImode
, CX_REG
);
29512 output_set_got (tmp
, NULL_RTX
);
29515 output_asm_insn ("mov{l}\t{%0@GOT(%1), %1|%1, %0@GOT[%1]}", xops
);
29516 output_asm_insn ("jmp\t{*}%1", xops
);
29519 final_end_function ();
29523 x86_file_start (void)
29525 default_file_start ();
29527 darwin_file_start ();
29529 if (X86_FILE_START_VERSION_DIRECTIVE
)
29530 fputs ("\t.version\t\"01.01\"\n", asm_out_file
);
29531 if (X86_FILE_START_FLTUSED
)
29532 fputs ("\t.global\t__fltused\n", asm_out_file
);
29533 if (ix86_asm_dialect
== ASM_INTEL
)
29534 fputs ("\t.intel_syntax noprefix\n", asm_out_file
);
29538 x86_field_alignment (tree field
, int computed
)
29540 enum machine_mode mode
;
29541 tree type
= TREE_TYPE (field
);
29543 if (TARGET_64BIT
|| TARGET_ALIGN_DOUBLE
)
29545 mode
= TYPE_MODE (strip_array_types (type
));
29546 if (mode
== DFmode
|| mode
== DCmode
29547 || GET_MODE_CLASS (mode
) == MODE_INT
29548 || GET_MODE_CLASS (mode
) == MODE_COMPLEX_INT
)
29549 return MIN (32, computed
);
29553 /* Output assembler code to FILE to increment profiler label # LABELNO
29554 for profiling a function entry. */
29556 x86_function_profiler (FILE *file
, int labelno ATTRIBUTE_UNUSED
)
29558 const char *mcount_name
= (flag_fentry
? MCOUNT_NAME_BEFORE_PROLOGUE
29563 #ifndef NO_PROFILE_COUNTERS
29564 fprintf (file
, "\tleaq\t%sP%d(%%rip),%%r11\n", LPREFIX
, labelno
);
29567 if (DEFAULT_ABI
== SYSV_ABI
&& flag_pic
)
29568 fprintf (file
, "\tcall\t*%s@GOTPCREL(%%rip)\n", mcount_name
);
29570 fprintf (file
, "\tcall\t%s\n", mcount_name
);
29574 #ifndef NO_PROFILE_COUNTERS
29575 fprintf (file
, "\tleal\t%sP%d@GOTOFF(%%ebx),%%" PROFILE_COUNT_REGISTER
"\n",
29578 fprintf (file
, "\tcall\t*%s@GOT(%%ebx)\n", mcount_name
);
29582 #ifndef NO_PROFILE_COUNTERS
29583 fprintf (file
, "\tmovl\t$%sP%d,%%" PROFILE_COUNT_REGISTER
"\n",
29586 fprintf (file
, "\tcall\t%s\n", mcount_name
);
29590 /* We don't have exact information about the insn sizes, but we may assume
29591 quite safely that we are informed about all 1 byte insns and memory
29592 address sizes. This is enough to eliminate unnecessary padding in
29596 min_insn_size (rtx insn
)
29600 if (!INSN_P (insn
) || !active_insn_p (insn
))
29603 /* Discard alignments we've emit and jump instructions. */
29604 if (GET_CODE (PATTERN (insn
)) == UNSPEC_VOLATILE
29605 && XINT (PATTERN (insn
), 1) == UNSPECV_ALIGN
)
29607 if (JUMP_TABLE_DATA_P (insn
))
29610 /* Important case - calls are always 5 bytes.
29611 It is common to have many calls in the row. */
29613 && symbolic_reference_mentioned_p (PATTERN (insn
))
29614 && !SIBLING_CALL_P (insn
))
29616 len
= get_attr_length (insn
);
29620 /* For normal instructions we rely on get_attr_length being exact,
29621 with a few exceptions. */
29622 if (!JUMP_P (insn
))
29624 enum attr_type type
= get_attr_type (insn
);
29629 if (GET_CODE (PATTERN (insn
)) == ASM_INPUT
29630 || asm_noperands (PATTERN (insn
)) >= 0)
29637 /* Otherwise trust get_attr_length. */
29641 l
= get_attr_length_address (insn
);
29642 if (l
< 4 && symbolic_reference_mentioned_p (PATTERN (insn
)))
29651 #ifdef ASM_OUTPUT_MAX_SKIP_PAD
29653 /* AMD K8 core mispredicts jumps when there are more than 3 jumps in 16 byte
29657 ix86_avoid_jump_mispredicts (void)
29659 rtx insn
, start
= get_insns ();
29660 int nbytes
= 0, njumps
= 0;
29663 /* Look for all minimal intervals of instructions containing 4 jumps.
29664 The intervals are bounded by START and INSN. NBYTES is the total
29665 size of instructions in the interval including INSN and not including
29666 START. When the NBYTES is smaller than 16 bytes, it is possible
29667 that the end of START and INSN ends up in the same 16byte page.
29669 The smallest offset in the page INSN can start is the case where START
29670 ends on the offset 0. Offset of INSN is then NBYTES - sizeof (INSN).
29671 We add p2align to 16byte window with maxskip 15 - NBYTES + sizeof (INSN).
29673 for (insn
= start
; insn
; insn
= NEXT_INSN (insn
))
29677 if (LABEL_P (insn
))
29679 int align
= label_to_alignment (insn
);
29680 int max_skip
= label_to_max_skip (insn
);
29684 /* If align > 3, only up to 16 - max_skip - 1 bytes can be
29685 already in the current 16 byte page, because otherwise
29686 ASM_OUTPUT_MAX_SKIP_ALIGN could skip max_skip or fewer
29687 bytes to reach 16 byte boundary. */
29689 || (align
<= 3 && max_skip
!= (1 << align
) - 1))
29692 fprintf (dump_file
, "Label %i with max_skip %i\n",
29693 INSN_UID (insn
), max_skip
);
29696 while (nbytes
+ max_skip
>= 16)
29698 start
= NEXT_INSN (start
);
29699 if ((JUMP_P (start
)
29700 && GET_CODE (PATTERN (start
)) != ADDR_VEC
29701 && GET_CODE (PATTERN (start
)) != ADDR_DIFF_VEC
)
29703 njumps
--, isjump
= 1;
29706 nbytes
-= min_insn_size (start
);
29712 min_size
= min_insn_size (insn
);
29713 nbytes
+= min_size
;
29715 fprintf (dump_file
, "Insn %i estimated to %i bytes\n",
29716 INSN_UID (insn
), min_size
);
29718 && GET_CODE (PATTERN (insn
)) != ADDR_VEC
29719 && GET_CODE (PATTERN (insn
)) != ADDR_DIFF_VEC
)
29727 start
= NEXT_INSN (start
);
29728 if ((JUMP_P (start
)
29729 && GET_CODE (PATTERN (start
)) != ADDR_VEC
29730 && GET_CODE (PATTERN (start
)) != ADDR_DIFF_VEC
)
29732 njumps
--, isjump
= 1;
29735 nbytes
-= min_insn_size (start
);
29737 gcc_assert (njumps
>= 0);
29739 fprintf (dump_file
, "Interval %i to %i has %i bytes\n",
29740 INSN_UID (start
), INSN_UID (insn
), nbytes
);
29742 if (njumps
== 3 && isjump
&& nbytes
< 16)
29744 int padsize
= 15 - nbytes
+ min_insn_size (insn
);
29747 fprintf (dump_file
, "Padding insn %i by %i bytes!\n",
29748 INSN_UID (insn
), padsize
);
29749 emit_insn_before (gen_pad (GEN_INT (padsize
)), insn
);
29755 /* AMD Athlon works faster
29756 when RET is not destination of conditional jump or directly preceded
29757 by other jump instruction. We avoid the penalty by inserting NOP just
29758 before the RET instructions in such cases. */
29760 ix86_pad_returns (void)
29765 FOR_EACH_EDGE (e
, ei
, EXIT_BLOCK_PTR
->preds
)
29767 basic_block bb
= e
->src
;
29768 rtx ret
= BB_END (bb
);
29770 bool replace
= false;
29772 if (!JUMP_P (ret
) || GET_CODE (PATTERN (ret
)) != RETURN
29773 || optimize_bb_for_size_p (bb
))
29775 for (prev
= PREV_INSN (ret
); prev
; prev
= PREV_INSN (prev
))
29776 if (active_insn_p (prev
) || LABEL_P (prev
))
29778 if (prev
&& LABEL_P (prev
))
29783 FOR_EACH_EDGE (e
, ei
, bb
->preds
)
29784 if (EDGE_FREQUENCY (e
) && e
->src
->index
>= 0
29785 && !(e
->flags
& EDGE_FALLTHRU
))
29790 prev
= prev_active_insn (ret
);
29792 && ((JUMP_P (prev
) && any_condjump_p (prev
))
29795 /* Empty functions get branch mispredict even when
29796 the jump destination is not visible to us. */
29797 if (!prev
&& !optimize_function_for_size_p (cfun
))
29802 emit_jump_insn_before (gen_return_internal_long (), ret
);
29808 /* Count the minimum number of instructions in BB. Return 4 if the
29809 number of instructions >= 4. */
29812 ix86_count_insn_bb (basic_block bb
)
29815 int insn_count
= 0;
29817 /* Count number of instructions in this block. Return 4 if the number
29818 of instructions >= 4. */
29819 FOR_BB_INSNS (bb
, insn
)
29821 /* Only happen in exit blocks. */
29823 && GET_CODE (PATTERN (insn
)) == RETURN
)
29826 if (NONDEBUG_INSN_P (insn
)
29827 && GET_CODE (PATTERN (insn
)) != USE
29828 && GET_CODE (PATTERN (insn
)) != CLOBBER
)
29831 if (insn_count
>= 4)
29840 /* Count the minimum number of instructions in code path in BB.
29841 Return 4 if the number of instructions >= 4. */
29844 ix86_count_insn (basic_block bb
)
29848 int min_prev_count
;
29850 /* Only bother counting instructions along paths with no
29851 more than 2 basic blocks between entry and exit. Given
29852 that BB has an edge to exit, determine if a predecessor
29853 of BB has an edge from entry. If so, compute the number
29854 of instructions in the predecessor block. If there
29855 happen to be multiple such blocks, compute the minimum. */
29856 min_prev_count
= 4;
29857 FOR_EACH_EDGE (e
, ei
, bb
->preds
)
29860 edge_iterator prev_ei
;
29862 if (e
->src
== ENTRY_BLOCK_PTR
)
29864 min_prev_count
= 0;
29867 FOR_EACH_EDGE (prev_e
, prev_ei
, e
->src
->preds
)
29869 if (prev_e
->src
== ENTRY_BLOCK_PTR
)
29871 int count
= ix86_count_insn_bb (e
->src
);
29872 if (count
< min_prev_count
)
29873 min_prev_count
= count
;
29879 if (min_prev_count
< 4)
29880 min_prev_count
+= ix86_count_insn_bb (bb
);
29882 return min_prev_count
;
29885 /* Pad short funtion to 4 instructions. */
29888 ix86_pad_short_function (void)
29893 FOR_EACH_EDGE (e
, ei
, EXIT_BLOCK_PTR
->preds
)
29895 rtx ret
= BB_END (e
->src
);
29896 if (JUMP_P (ret
) && GET_CODE (PATTERN (ret
)) == RETURN
)
29898 int insn_count
= ix86_count_insn (e
->src
);
29900 /* Pad short function. */
29901 if (insn_count
< 4)
29905 /* Find epilogue. */
29908 || NOTE_KIND (insn
) != NOTE_INSN_EPILOGUE_BEG
))
29909 insn
= PREV_INSN (insn
);
29914 /* Two NOPs count as one instruction. */
29915 insn_count
= 2 * (4 - insn_count
);
29916 emit_insn_before (gen_nops (GEN_INT (insn_count
)), insn
);
29922 /* Implement machine specific optimizations. We implement padding of returns
29923 for K8 CPUs and pass to avoid 4 jumps in the single 16 byte window. */
29927 /* We are freeing block_for_insn in the toplev to keep compatibility
29928 with old MDEP_REORGS that are not CFG based. Recompute it now. */
29929 compute_bb_for_insn ();
29931 if (optimize
&& optimize_function_for_speed_p (cfun
))
29933 if (TARGET_PAD_SHORT_FUNCTION
)
29934 ix86_pad_short_function ();
29935 else if (TARGET_PAD_RETURNS
)
29936 ix86_pad_returns ();
29937 #ifdef ASM_OUTPUT_MAX_SKIP_PAD
29938 if (TARGET_FOUR_JUMP_LIMIT
)
29939 ix86_avoid_jump_mispredicts ();
29943 /* Run the vzeroupper optimization if needed. */
29944 if (TARGET_VZEROUPPER
)
29945 move_or_delete_vzeroupper ();
29948 /* Return nonzero when QImode register that must be represented via REX prefix
29951 x86_extended_QIreg_mentioned_p (rtx insn
)
29954 extract_insn_cached (insn
);
29955 for (i
= 0; i
< recog_data
.n_operands
; i
++)
29956 if (REG_P (recog_data
.operand
[i
])
29957 && REGNO (recog_data
.operand
[i
]) > BX_REG
)
29962 /* Return nonzero when P points to register encoded via REX prefix.
29963 Called via for_each_rtx. */
29965 extended_reg_mentioned_1 (rtx
*p
, void *data ATTRIBUTE_UNUSED
)
29967 unsigned int regno
;
29970 regno
= REGNO (*p
);
29971 return REX_INT_REGNO_P (regno
) || REX_SSE_REGNO_P (regno
);
29974 /* Return true when INSN mentions register that must be encoded using REX
29977 x86_extended_reg_mentioned_p (rtx insn
)
29979 return for_each_rtx (INSN_P (insn
) ? &PATTERN (insn
) : &insn
,
29980 extended_reg_mentioned_1
, NULL
);
29983 /* If profitable, negate (without causing overflow) integer constant
29984 of mode MODE at location LOC. Return true in this case. */
29986 x86_maybe_negate_const_int (rtx
*loc
, enum machine_mode mode
)
29990 if (!CONST_INT_P (*loc
))
29996 /* DImode x86_64 constants must fit in 32 bits. */
29997 gcc_assert (x86_64_immediate_operand (*loc
, mode
));
30008 gcc_unreachable ();
30011 /* Avoid overflows. */
30012 if (mode_signbit_p (mode
, *loc
))
30015 val
= INTVAL (*loc
);
30017 /* Make things pretty and `subl $4,%eax' rather than `addl $-4,%eax'.
30018 Exceptions: -128 encodes smaller than 128, so swap sign and op. */
30019 if ((val
< 0 && val
!= -128)
30022 *loc
= GEN_INT (-val
);
30029 /* Generate an unsigned DImode/SImode to FP conversion. This is the same code
30030 optabs would emit if we didn't have TFmode patterns. */
30033 x86_emit_floatuns (rtx operands
[2])
30035 rtx neglab
, donelab
, i0
, i1
, f0
, in
, out
;
30036 enum machine_mode mode
, inmode
;
30038 inmode
= GET_MODE (operands
[1]);
30039 gcc_assert (inmode
== SImode
|| inmode
== DImode
);
30042 in
= force_reg (inmode
, operands
[1]);
30043 mode
= GET_MODE (out
);
30044 neglab
= gen_label_rtx ();
30045 donelab
= gen_label_rtx ();
30046 f0
= gen_reg_rtx (mode
);
30048 emit_cmp_and_jump_insns (in
, const0_rtx
, LT
, const0_rtx
, inmode
, 0, neglab
);
30050 expand_float (out
, in
, 0);
30052 emit_jump_insn (gen_jump (donelab
));
30055 emit_label (neglab
);
30057 i0
= expand_simple_binop (inmode
, LSHIFTRT
, in
, const1_rtx
, NULL
,
30059 i1
= expand_simple_binop (inmode
, AND
, in
, const1_rtx
, NULL
,
30061 i0
= expand_simple_binop (inmode
, IOR
, i0
, i1
, i0
, 1, OPTAB_DIRECT
);
30063 expand_float (f0
, i0
, 0);
30065 emit_insn (gen_rtx_SET (VOIDmode
, out
, gen_rtx_PLUS (mode
, f0
, f0
)));
30067 emit_label (donelab
);
30070 /* AVX does not support 32-byte integer vector operations,
30071 thus the longest vector we are faced with is V16QImode. */
30072 #define MAX_VECT_LEN 16
30074 struct expand_vec_perm_d
30076 rtx target
, op0
, op1
;
30077 unsigned char perm
[MAX_VECT_LEN
];
30078 enum machine_mode vmode
;
30079 unsigned char nelt
;
30083 static bool expand_vec_perm_1 (struct expand_vec_perm_d
*d
);
30084 static bool expand_vec_perm_broadcast_1 (struct expand_vec_perm_d
*d
);
30086 /* Get a vector mode of the same size as the original but with elements
30087 twice as wide. This is only guaranteed to apply to integral vectors. */
30089 static inline enum machine_mode
30090 get_mode_wider_vector (enum machine_mode o
)
30092 /* ??? Rely on the ordering that genmodes.c gives to vectors. */
30093 enum machine_mode n
= GET_MODE_WIDER_MODE (o
);
30094 gcc_assert (GET_MODE_NUNITS (o
) == GET_MODE_NUNITS (n
) * 2);
30095 gcc_assert (GET_MODE_SIZE (o
) == GET_MODE_SIZE (n
));
30099 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
30100 with all elements equal to VAR. Return true if successful. */
30103 ix86_expand_vector_init_duplicate (bool mmx_ok
, enum machine_mode mode
,
30104 rtx target
, rtx val
)
30127 /* First attempt to recognize VAL as-is. */
30128 dup
= gen_rtx_VEC_DUPLICATE (mode
, val
);
30129 insn
= emit_insn (gen_rtx_SET (VOIDmode
, target
, dup
));
30130 if (recog_memoized (insn
) < 0)
30133 /* If that fails, force VAL into a register. */
30136 XEXP (dup
, 0) = force_reg (GET_MODE_INNER (mode
), val
);
30137 seq
= get_insns ();
30140 emit_insn_before (seq
, insn
);
30142 ok
= recog_memoized (insn
) >= 0;
30151 if (TARGET_SSE
|| TARGET_3DNOW_A
)
30155 val
= gen_lowpart (SImode
, val
);
30156 x
= gen_rtx_TRUNCATE (HImode
, val
);
30157 x
= gen_rtx_VEC_DUPLICATE (mode
, x
);
30158 emit_insn (gen_rtx_SET (VOIDmode
, target
, x
));
30171 struct expand_vec_perm_d dperm
;
30175 memset (&dperm
, 0, sizeof (dperm
));
30176 dperm
.target
= target
;
30177 dperm
.vmode
= mode
;
30178 dperm
.nelt
= GET_MODE_NUNITS (mode
);
30179 dperm
.op0
= dperm
.op1
= gen_reg_rtx (mode
);
30181 /* Extend to SImode using a paradoxical SUBREG. */
30182 tmp1
= gen_reg_rtx (SImode
);
30183 emit_move_insn (tmp1
, gen_lowpart (SImode
, val
));
30185 /* Insert the SImode value as low element of a V4SImode vector. */
30186 tmp2
= gen_lowpart (V4SImode
, dperm
.op0
);
30187 emit_insn (gen_vec_setv4si_0 (tmp2
, CONST0_RTX (V4SImode
), tmp1
));
30189 ok
= (expand_vec_perm_1 (&dperm
)
30190 || expand_vec_perm_broadcast_1 (&dperm
));
30202 /* Replicate the value once into the next wider mode and recurse. */
30204 enum machine_mode smode
, wsmode
, wvmode
;
30207 smode
= GET_MODE_INNER (mode
);
30208 wvmode
= get_mode_wider_vector (mode
);
30209 wsmode
= GET_MODE_INNER (wvmode
);
30211 val
= convert_modes (wsmode
, smode
, val
, true);
30212 x
= expand_simple_binop (wsmode
, ASHIFT
, val
,
30213 GEN_INT (GET_MODE_BITSIZE (smode
)),
30214 NULL_RTX
, 1, OPTAB_LIB_WIDEN
);
30215 val
= expand_simple_binop (wsmode
, IOR
, val
, x
, x
, 1, OPTAB_LIB_WIDEN
);
30217 x
= gen_lowpart (wvmode
, target
);
30218 ok
= ix86_expand_vector_init_duplicate (mmx_ok
, wvmode
, x
, val
);
30226 enum machine_mode hvmode
= (mode
== V16HImode
? V8HImode
: V16QImode
);
30227 rtx x
= gen_reg_rtx (hvmode
);
30229 ok
= ix86_expand_vector_init_duplicate (false, hvmode
, x
, val
);
30232 x
= gen_rtx_VEC_CONCAT (mode
, x
, x
);
30233 emit_insn (gen_rtx_SET (VOIDmode
, target
, x
));
30242 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
30243 whose ONE_VAR element is VAR, and other elements are zero. Return true
30247 ix86_expand_vector_init_one_nonzero (bool mmx_ok
, enum machine_mode mode
,
30248 rtx target
, rtx var
, int one_var
)
30250 enum machine_mode vsimode
;
30253 bool use_vector_set
= false;
30258 /* For SSE4.1, we normally use vector set. But if the second
30259 element is zero and inter-unit moves are OK, we use movq
30261 use_vector_set
= (TARGET_64BIT
30263 && !(TARGET_INTER_UNIT_MOVES
30269 use_vector_set
= TARGET_SSE4_1
;
30272 use_vector_set
= TARGET_SSE2
;
30275 use_vector_set
= TARGET_SSE
|| TARGET_3DNOW_A
;
30282 use_vector_set
= TARGET_AVX
;
30285 /* Use ix86_expand_vector_set in 64bit mode only. */
30286 use_vector_set
= TARGET_AVX
&& TARGET_64BIT
;
30292 if (use_vector_set
)
30294 emit_insn (gen_rtx_SET (VOIDmode
, target
, CONST0_RTX (mode
)));
30295 var
= force_reg (GET_MODE_INNER (mode
), var
);
30296 ix86_expand_vector_set (mmx_ok
, target
, var
, one_var
);
30312 var
= force_reg (GET_MODE_INNER (mode
), var
);
30313 x
= gen_rtx_VEC_CONCAT (mode
, var
, CONST0_RTX (GET_MODE_INNER (mode
)));
30314 emit_insn (gen_rtx_SET (VOIDmode
, target
, x
));
30319 if (!REG_P (target
) || REGNO (target
) < FIRST_PSEUDO_REGISTER
)
30320 new_target
= gen_reg_rtx (mode
);
30322 new_target
= target
;
30323 var
= force_reg (GET_MODE_INNER (mode
), var
);
30324 x
= gen_rtx_VEC_DUPLICATE (mode
, var
);
30325 x
= gen_rtx_VEC_MERGE (mode
, x
, CONST0_RTX (mode
), const1_rtx
);
30326 emit_insn (gen_rtx_SET (VOIDmode
, new_target
, x
));
30329 /* We need to shuffle the value to the correct position, so
30330 create a new pseudo to store the intermediate result. */
30332 /* With SSE2, we can use the integer shuffle insns. */
30333 if (mode
!= V4SFmode
&& TARGET_SSE2
)
30335 emit_insn (gen_sse2_pshufd_1 (new_target
, new_target
,
30337 GEN_INT (one_var
== 1 ? 0 : 1),
30338 GEN_INT (one_var
== 2 ? 0 : 1),
30339 GEN_INT (one_var
== 3 ? 0 : 1)));
30340 if (target
!= new_target
)
30341 emit_move_insn (target
, new_target
);
30345 /* Otherwise convert the intermediate result to V4SFmode and
30346 use the SSE1 shuffle instructions. */
30347 if (mode
!= V4SFmode
)
30349 tmp
= gen_reg_rtx (V4SFmode
);
30350 emit_move_insn (tmp
, gen_lowpart (V4SFmode
, new_target
));
30355 emit_insn (gen_sse_shufps_v4sf (tmp
, tmp
, tmp
,
30357 GEN_INT (one_var
== 1 ? 0 : 1),
30358 GEN_INT (one_var
== 2 ? 0+4 : 1+4),
30359 GEN_INT (one_var
== 3 ? 0+4 : 1+4)));
30361 if (mode
!= V4SFmode
)
30362 emit_move_insn (target
, gen_lowpart (V4SImode
, tmp
));
30363 else if (tmp
!= target
)
30364 emit_move_insn (target
, tmp
);
30366 else if (target
!= new_target
)
30367 emit_move_insn (target
, new_target
);
30372 vsimode
= V4SImode
;
30378 vsimode
= V2SImode
;
30384 /* Zero extend the variable element to SImode and recurse. */
30385 var
= convert_modes (SImode
, GET_MODE_INNER (mode
), var
, true);
30387 x
= gen_reg_rtx (vsimode
);
30388 if (!ix86_expand_vector_init_one_nonzero (mmx_ok
, vsimode
, x
,
30390 gcc_unreachable ();
30392 emit_move_insn (target
, gen_lowpart (mode
, x
));
30400 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
30401 consisting of the values in VALS. It is known that all elements
30402 except ONE_VAR are constants. Return true if successful. */
30405 ix86_expand_vector_init_one_var (bool mmx_ok
, enum machine_mode mode
,
30406 rtx target
, rtx vals
, int one_var
)
30408 rtx var
= XVECEXP (vals
, 0, one_var
);
30409 enum machine_mode wmode
;
30412 const_vec
= copy_rtx (vals
);
30413 XVECEXP (const_vec
, 0, one_var
) = CONST0_RTX (GET_MODE_INNER (mode
));
30414 const_vec
= gen_rtx_CONST_VECTOR (mode
, XVEC (const_vec
, 0));
30422 /* For the two element vectors, it's just as easy to use
30423 the general case. */
30427 /* Use ix86_expand_vector_set in 64bit mode only. */
30450 /* There's no way to set one QImode entry easily. Combine
30451 the variable value with its adjacent constant value, and
30452 promote to an HImode set. */
30453 x
= XVECEXP (vals
, 0, one_var
^ 1);
30456 var
= convert_modes (HImode
, QImode
, var
, true);
30457 var
= expand_simple_binop (HImode
, ASHIFT
, var
, GEN_INT (8),
30458 NULL_RTX
, 1, OPTAB_LIB_WIDEN
);
30459 x
= GEN_INT (INTVAL (x
) & 0xff);
30463 var
= convert_modes (HImode
, QImode
, var
, true);
30464 x
= gen_int_mode (INTVAL (x
) << 8, HImode
);
30466 if (x
!= const0_rtx
)
30467 var
= expand_simple_binop (HImode
, IOR
, var
, x
, var
,
30468 1, OPTAB_LIB_WIDEN
);
30470 x
= gen_reg_rtx (wmode
);
30471 emit_move_insn (x
, gen_lowpart (wmode
, const_vec
));
30472 ix86_expand_vector_set (mmx_ok
, x
, var
, one_var
>> 1);
30474 emit_move_insn (target
, gen_lowpart (mode
, x
));
30481 emit_move_insn (target
, const_vec
);
30482 ix86_expand_vector_set (mmx_ok
, target
, var
, one_var
);
30486 /* A subroutine of ix86_expand_vector_init_general. Use vector
30487 concatenate to handle the most general case: all values variable,
30488 and none identical. */
30491 ix86_expand_vector_init_concat (enum machine_mode mode
,
30492 rtx target
, rtx
*ops
, int n
)
30494 enum machine_mode cmode
, hmode
= VOIDmode
;
30495 rtx first
[8], second
[4];
30535 gcc_unreachable ();
30538 if (!register_operand (ops
[1], cmode
))
30539 ops
[1] = force_reg (cmode
, ops
[1]);
30540 if (!register_operand (ops
[0], cmode
))
30541 ops
[0] = force_reg (cmode
, ops
[0]);
30542 emit_insn (gen_rtx_SET (VOIDmode
, target
,
30543 gen_rtx_VEC_CONCAT (mode
, ops
[0],
30563 gcc_unreachable ();
30579 gcc_unreachable ();
30584 /* FIXME: We process inputs backward to help RA. PR 36222. */
30587 for (; i
> 0; i
-= 2, j
--)
30589 first
[j
] = gen_reg_rtx (cmode
);
30590 v
= gen_rtvec (2, ops
[i
- 1], ops
[i
]);
30591 ix86_expand_vector_init (false, first
[j
],
30592 gen_rtx_PARALLEL (cmode
, v
));
30598 gcc_assert (hmode
!= VOIDmode
);
30599 for (i
= j
= 0; i
< n
; i
+= 2, j
++)
30601 second
[j
] = gen_reg_rtx (hmode
);
30602 ix86_expand_vector_init_concat (hmode
, second
[j
],
30606 ix86_expand_vector_init_concat (mode
, target
, second
, n
);
30609 ix86_expand_vector_init_concat (mode
, target
, first
, n
);
30613 gcc_unreachable ();
30617 /* A subroutine of ix86_expand_vector_init_general. Use vector
30618 interleave to handle the most general case: all values variable,
30619 and none identical. */
30622 ix86_expand_vector_init_interleave (enum machine_mode mode
,
30623 rtx target
, rtx
*ops
, int n
)
30625 enum machine_mode first_imode
, second_imode
, third_imode
, inner_mode
;
30628 rtx (*gen_load_even
) (rtx
, rtx
, rtx
);
30629 rtx (*gen_interleave_first_low
) (rtx
, rtx
, rtx
);
30630 rtx (*gen_interleave_second_low
) (rtx
, rtx
, rtx
);
30635 gen_load_even
= gen_vec_setv8hi
;
30636 gen_interleave_first_low
= gen_vec_interleave_lowv4si
;
30637 gen_interleave_second_low
= gen_vec_interleave_lowv2di
;
30638 inner_mode
= HImode
;
30639 first_imode
= V4SImode
;
30640 second_imode
= V2DImode
;
30641 third_imode
= VOIDmode
;
30644 gen_load_even
= gen_vec_setv16qi
;
30645 gen_interleave_first_low
= gen_vec_interleave_lowv8hi
;
30646 gen_interleave_second_low
= gen_vec_interleave_lowv4si
;
30647 inner_mode
= QImode
;
30648 first_imode
= V8HImode
;
30649 second_imode
= V4SImode
;
30650 third_imode
= V2DImode
;
30653 gcc_unreachable ();
30656 for (i
= 0; i
< n
; i
++)
30658 /* Extend the odd elment to SImode using a paradoxical SUBREG. */
30659 op0
= gen_reg_rtx (SImode
);
30660 emit_move_insn (op0
, gen_lowpart (SImode
, ops
[i
+ i
]));
30662 /* Insert the SImode value as low element of V4SImode vector. */
30663 op1
= gen_reg_rtx (V4SImode
);
30664 op0
= gen_rtx_VEC_MERGE (V4SImode
,
30665 gen_rtx_VEC_DUPLICATE (V4SImode
,
30667 CONST0_RTX (V4SImode
),
30669 emit_insn (gen_rtx_SET (VOIDmode
, op1
, op0
));
30671 /* Cast the V4SImode vector back to a vector in orignal mode. */
30672 op0
= gen_reg_rtx (mode
);
30673 emit_move_insn (op0
, gen_lowpart (mode
, op1
));
30675 /* Load even elements into the second positon. */
30676 emit_insn (gen_load_even (op0
,
30677 force_reg (inner_mode
,
30681 /* Cast vector to FIRST_IMODE vector. */
30682 ops
[i
] = gen_reg_rtx (first_imode
);
30683 emit_move_insn (ops
[i
], gen_lowpart (first_imode
, op0
));
30686 /* Interleave low FIRST_IMODE vectors. */
30687 for (i
= j
= 0; i
< n
; i
+= 2, j
++)
30689 op0
= gen_reg_rtx (first_imode
);
30690 emit_insn (gen_interleave_first_low (op0
, ops
[i
], ops
[i
+ 1]));
30692 /* Cast FIRST_IMODE vector to SECOND_IMODE vector. */
30693 ops
[j
] = gen_reg_rtx (second_imode
);
30694 emit_move_insn (ops
[j
], gen_lowpart (second_imode
, op0
));
30697 /* Interleave low SECOND_IMODE vectors. */
30698 switch (second_imode
)
30701 for (i
= j
= 0; i
< n
/ 2; i
+= 2, j
++)
30703 op0
= gen_reg_rtx (second_imode
);
30704 emit_insn (gen_interleave_second_low (op0
, ops
[i
],
30707 /* Cast the SECOND_IMODE vector to the THIRD_IMODE
30709 ops
[j
] = gen_reg_rtx (third_imode
);
30710 emit_move_insn (ops
[j
], gen_lowpart (third_imode
, op0
));
30712 second_imode
= V2DImode
;
30713 gen_interleave_second_low
= gen_vec_interleave_lowv2di
;
30717 op0
= gen_reg_rtx (second_imode
);
30718 emit_insn (gen_interleave_second_low (op0
, ops
[0],
30721 /* Cast the SECOND_IMODE vector back to a vector on original
30723 emit_insn (gen_rtx_SET (VOIDmode
, target
,
30724 gen_lowpart (mode
, op0
)));
30728 gcc_unreachable ();
30732 /* A subroutine of ix86_expand_vector_init. Handle the most general case:
30733 all values variable, and none identical. */
30736 ix86_expand_vector_init_general (bool mmx_ok
, enum machine_mode mode
,
30737 rtx target
, rtx vals
)
30739 rtx ops
[32], op0
, op1
;
30740 enum machine_mode half_mode
= VOIDmode
;
30747 if (!mmx_ok
&& !TARGET_SSE
)
30759 n
= GET_MODE_NUNITS (mode
);
30760 for (i
= 0; i
< n
; i
++)
30761 ops
[i
] = XVECEXP (vals
, 0, i
);
30762 ix86_expand_vector_init_concat (mode
, target
, ops
, n
);
30766 half_mode
= V16QImode
;
30770 half_mode
= V8HImode
;
30774 n
= GET_MODE_NUNITS (mode
);
30775 for (i
= 0; i
< n
; i
++)
30776 ops
[i
] = XVECEXP (vals
, 0, i
);
30777 op0
= gen_reg_rtx (half_mode
);
30778 op1
= gen_reg_rtx (half_mode
);
30779 ix86_expand_vector_init_interleave (half_mode
, op0
, ops
,
30781 ix86_expand_vector_init_interleave (half_mode
, op1
,
30782 &ops
[n
>> 1], n
>> 2);
30783 emit_insn (gen_rtx_SET (VOIDmode
, target
,
30784 gen_rtx_VEC_CONCAT (mode
, op0
, op1
)));
30788 if (!TARGET_SSE4_1
)
30796 /* Don't use ix86_expand_vector_init_interleave if we can't
30797 move from GPR to SSE register directly. */
30798 if (!TARGET_INTER_UNIT_MOVES
)
30801 n
= GET_MODE_NUNITS (mode
);
30802 for (i
= 0; i
< n
; i
++)
30803 ops
[i
] = XVECEXP (vals
, 0, i
);
30804 ix86_expand_vector_init_interleave (mode
, target
, ops
, n
>> 1);
30812 gcc_unreachable ();
30816 int i
, j
, n_elts
, n_words
, n_elt_per_word
;
30817 enum machine_mode inner_mode
;
30818 rtx words
[4], shift
;
30820 inner_mode
= GET_MODE_INNER (mode
);
30821 n_elts
= GET_MODE_NUNITS (mode
);
30822 n_words
= GET_MODE_SIZE (mode
) / UNITS_PER_WORD
;
30823 n_elt_per_word
= n_elts
/ n_words
;
30824 shift
= GEN_INT (GET_MODE_BITSIZE (inner_mode
));
30826 for (i
= 0; i
< n_words
; ++i
)
30828 rtx word
= NULL_RTX
;
30830 for (j
= 0; j
< n_elt_per_word
; ++j
)
30832 rtx elt
= XVECEXP (vals
, 0, (i
+1)*n_elt_per_word
- j
- 1);
30833 elt
= convert_modes (word_mode
, inner_mode
, elt
, true);
30839 word
= expand_simple_binop (word_mode
, ASHIFT
, word
, shift
,
30840 word
, 1, OPTAB_LIB_WIDEN
);
30841 word
= expand_simple_binop (word_mode
, IOR
, word
, elt
,
30842 word
, 1, OPTAB_LIB_WIDEN
);
30850 emit_move_insn (target
, gen_lowpart (mode
, words
[0]));
30851 else if (n_words
== 2)
30853 rtx tmp
= gen_reg_rtx (mode
);
30854 emit_clobber (tmp
);
30855 emit_move_insn (gen_lowpart (word_mode
, tmp
), words
[0]);
30856 emit_move_insn (gen_highpart (word_mode
, tmp
), words
[1]);
30857 emit_move_insn (target
, tmp
);
30859 else if (n_words
== 4)
30861 rtx tmp
= gen_reg_rtx (V4SImode
);
30862 gcc_assert (word_mode
== SImode
);
30863 vals
= gen_rtx_PARALLEL (V4SImode
, gen_rtvec_v (4, words
));
30864 ix86_expand_vector_init_general (false, V4SImode
, tmp
, vals
);
30865 emit_move_insn (target
, gen_lowpart (mode
, tmp
));
30868 gcc_unreachable ();
30872 /* Initialize vector TARGET via VALS. Suppress the use of MMX
30873 instructions unless MMX_OK is true. */
30876 ix86_expand_vector_init (bool mmx_ok
, rtx target
, rtx vals
)
30878 enum machine_mode mode
= GET_MODE (target
);
30879 enum machine_mode inner_mode
= GET_MODE_INNER (mode
);
30880 int n_elts
= GET_MODE_NUNITS (mode
);
30881 int n_var
= 0, one_var
= -1;
30882 bool all_same
= true, all_const_zero
= true;
30886 for (i
= 0; i
< n_elts
; ++i
)
30888 x
= XVECEXP (vals
, 0, i
);
30889 if (!(CONST_INT_P (x
)
30890 || GET_CODE (x
) == CONST_DOUBLE
30891 || GET_CODE (x
) == CONST_FIXED
))
30892 n_var
++, one_var
= i
;
30893 else if (x
!= CONST0_RTX (inner_mode
))
30894 all_const_zero
= false;
30895 if (i
> 0 && !rtx_equal_p (x
, XVECEXP (vals
, 0, 0)))
30899 /* Constants are best loaded from the constant pool. */
30902 emit_move_insn (target
, gen_rtx_CONST_VECTOR (mode
, XVEC (vals
, 0)));
30906 /* If all values are identical, broadcast the value. */
30908 && ix86_expand_vector_init_duplicate (mmx_ok
, mode
, target
,
30909 XVECEXP (vals
, 0, 0)))
30912 /* Values where only one field is non-constant are best loaded from
30913 the pool and overwritten via move later. */
30917 && ix86_expand_vector_init_one_nonzero (mmx_ok
, mode
, target
,
30918 XVECEXP (vals
, 0, one_var
),
30922 if (ix86_expand_vector_init_one_var (mmx_ok
, mode
, target
, vals
, one_var
))
30926 ix86_expand_vector_init_general (mmx_ok
, mode
, target
, vals
);
30930 ix86_expand_vector_set (bool mmx_ok
, rtx target
, rtx val
, int elt
)
30932 enum machine_mode mode
= GET_MODE (target
);
30933 enum machine_mode inner_mode
= GET_MODE_INNER (mode
);
30934 enum machine_mode half_mode
;
30935 bool use_vec_merge
= false;
30937 static rtx (*gen_extract
[6][2]) (rtx
, rtx
)
30939 { gen_vec_extract_lo_v32qi
, gen_vec_extract_hi_v32qi
},
30940 { gen_vec_extract_lo_v16hi
, gen_vec_extract_hi_v16hi
},
30941 { gen_vec_extract_lo_v8si
, gen_vec_extract_hi_v8si
},
30942 { gen_vec_extract_lo_v4di
, gen_vec_extract_hi_v4di
},
30943 { gen_vec_extract_lo_v8sf
, gen_vec_extract_hi_v8sf
},
30944 { gen_vec_extract_lo_v4df
, gen_vec_extract_hi_v4df
}
30946 static rtx (*gen_insert
[6][2]) (rtx
, rtx
, rtx
)
30948 { gen_vec_set_lo_v32qi
, gen_vec_set_hi_v32qi
},
30949 { gen_vec_set_lo_v16hi
, gen_vec_set_hi_v16hi
},
30950 { gen_vec_set_lo_v8si
, gen_vec_set_hi_v8si
},
30951 { gen_vec_set_lo_v4di
, gen_vec_set_hi_v4di
},
30952 { gen_vec_set_lo_v8sf
, gen_vec_set_hi_v8sf
},
30953 { gen_vec_set_lo_v4df
, gen_vec_set_hi_v4df
}
30963 tmp
= gen_reg_rtx (GET_MODE_INNER (mode
));
30964 ix86_expand_vector_extract (true, tmp
, target
, 1 - elt
);
30966 tmp
= gen_rtx_VEC_CONCAT (mode
, tmp
, val
);
30968 tmp
= gen_rtx_VEC_CONCAT (mode
, val
, tmp
);
30969 emit_insn (gen_rtx_SET (VOIDmode
, target
, tmp
));
30975 use_vec_merge
= TARGET_SSE4_1
;
30983 /* For the two element vectors, we implement a VEC_CONCAT with
30984 the extraction of the other element. */
30986 tmp
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (1, GEN_INT (1 - elt
)));
30987 tmp
= gen_rtx_VEC_SELECT (inner_mode
, target
, tmp
);
30990 op0
= val
, op1
= tmp
;
30992 op0
= tmp
, op1
= val
;
30994 tmp
= gen_rtx_VEC_CONCAT (mode
, op0
, op1
);
30995 emit_insn (gen_rtx_SET (VOIDmode
, target
, tmp
));
31000 use_vec_merge
= TARGET_SSE4_1
;
31007 use_vec_merge
= true;
31011 /* tmp = target = A B C D */
31012 tmp
= copy_to_reg (target
);
31013 /* target = A A B B */
31014 emit_insn (gen_vec_interleave_lowv4sf (target
, target
, target
));
31015 /* target = X A B B */
31016 ix86_expand_vector_set (false, target
, val
, 0);
31017 /* target = A X C D */
31018 emit_insn (gen_sse_shufps_v4sf (target
, target
, tmp
,
31019 const1_rtx
, const0_rtx
,
31020 GEN_INT (2+4), GEN_INT (3+4)));
31024 /* tmp = target = A B C D */
31025 tmp
= copy_to_reg (target
);
31026 /* tmp = X B C D */
31027 ix86_expand_vector_set (false, tmp
, val
, 0);
31028 /* target = A B X D */
31029 emit_insn (gen_sse_shufps_v4sf (target
, target
, tmp
,
31030 const0_rtx
, const1_rtx
,
31031 GEN_INT (0+4), GEN_INT (3+4)));
31035 /* tmp = target = A B C D */
31036 tmp
= copy_to_reg (target
);
31037 /* tmp = X B C D */
31038 ix86_expand_vector_set (false, tmp
, val
, 0);
31039 /* target = A B X D */
31040 emit_insn (gen_sse_shufps_v4sf (target
, target
, tmp
,
31041 const0_rtx
, const1_rtx
,
31042 GEN_INT (2+4), GEN_INT (0+4)));
31046 gcc_unreachable ();
31051 use_vec_merge
= TARGET_SSE4_1
;
31055 /* Element 0 handled by vec_merge below. */
31058 use_vec_merge
= true;
31064 /* With SSE2, use integer shuffles to swap element 0 and ELT,
31065 store into element 0, then shuffle them back. */
31069 order
[0] = GEN_INT (elt
);
31070 order
[1] = const1_rtx
;
31071 order
[2] = const2_rtx
;
31072 order
[3] = GEN_INT (3);
31073 order
[elt
] = const0_rtx
;
31075 emit_insn (gen_sse2_pshufd_1 (target
, target
, order
[0],
31076 order
[1], order
[2], order
[3]));
31078 ix86_expand_vector_set (false, target
, val
, 0);
31080 emit_insn (gen_sse2_pshufd_1 (target
, target
, order
[0],
31081 order
[1], order
[2], order
[3]));
31085 /* For SSE1, we have to reuse the V4SF code. */
31086 ix86_expand_vector_set (false, gen_lowpart (V4SFmode
, target
),
31087 gen_lowpart (SFmode
, val
), elt
);
31092 use_vec_merge
= TARGET_SSE2
;
31095 use_vec_merge
= mmx_ok
&& (TARGET_SSE
|| TARGET_3DNOW_A
);
31099 use_vec_merge
= TARGET_SSE4_1
;
31106 half_mode
= V16QImode
;
31112 half_mode
= V8HImode
;
31118 half_mode
= V4SImode
;
31124 half_mode
= V2DImode
;
31130 half_mode
= V4SFmode
;
31136 half_mode
= V2DFmode
;
31142 /* Compute offset. */
31146 gcc_assert (i
<= 1);
31148 /* Extract the half. */
31149 tmp
= gen_reg_rtx (half_mode
);
31150 emit_insn (gen_extract
[j
][i
] (tmp
, target
));
31152 /* Put val in tmp at elt. */
31153 ix86_expand_vector_set (false, tmp
, val
, elt
);
31156 emit_insn (gen_insert
[j
][i
] (target
, target
, tmp
));
31165 tmp
= gen_rtx_VEC_DUPLICATE (mode
, val
);
31166 tmp
= gen_rtx_VEC_MERGE (mode
, tmp
, target
, GEN_INT (1 << elt
));
31167 emit_insn (gen_rtx_SET (VOIDmode
, target
, tmp
));
31171 rtx mem
= assign_stack_temp (mode
, GET_MODE_SIZE (mode
), false);
31173 emit_move_insn (mem
, target
);
31175 tmp
= adjust_address (mem
, inner_mode
, elt
*GET_MODE_SIZE (inner_mode
));
31176 emit_move_insn (tmp
, val
);
31178 emit_move_insn (target
, mem
);
31183 ix86_expand_vector_extract (bool mmx_ok
, rtx target
, rtx vec
, int elt
)
31185 enum machine_mode mode
= GET_MODE (vec
);
31186 enum machine_mode inner_mode
= GET_MODE_INNER (mode
);
31187 bool use_vec_extr
= false;
31200 use_vec_extr
= true;
31204 use_vec_extr
= TARGET_SSE4_1
;
31216 tmp
= gen_reg_rtx (mode
);
31217 emit_insn (gen_sse_shufps_v4sf (tmp
, vec
, vec
,
31218 GEN_INT (elt
), GEN_INT (elt
),
31219 GEN_INT (elt
+4), GEN_INT (elt
+4)));
31223 tmp
= gen_reg_rtx (mode
);
31224 emit_insn (gen_vec_interleave_highv4sf (tmp
, vec
, vec
));
31228 gcc_unreachable ();
31231 use_vec_extr
= true;
31236 use_vec_extr
= TARGET_SSE4_1
;
31250 tmp
= gen_reg_rtx (mode
);
31251 emit_insn (gen_sse2_pshufd_1 (tmp
, vec
,
31252 GEN_INT (elt
), GEN_INT (elt
),
31253 GEN_INT (elt
), GEN_INT (elt
)));
31257 tmp
= gen_reg_rtx (mode
);
31258 emit_insn (gen_vec_interleave_highv4si (tmp
, vec
, vec
));
31262 gcc_unreachable ();
31265 use_vec_extr
= true;
31270 /* For SSE1, we have to reuse the V4SF code. */
31271 ix86_expand_vector_extract (false, gen_lowpart (SFmode
, target
),
31272 gen_lowpart (V4SFmode
, vec
), elt
);
31278 use_vec_extr
= TARGET_SSE2
;
31281 use_vec_extr
= mmx_ok
&& (TARGET_SSE
|| TARGET_3DNOW_A
);
31285 use_vec_extr
= TARGET_SSE4_1
;
31289 /* ??? Could extract the appropriate HImode element and shift. */
31296 tmp
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (1, GEN_INT (elt
)));
31297 tmp
= gen_rtx_VEC_SELECT (inner_mode
, vec
, tmp
);
31299 /* Let the rtl optimizers know about the zero extension performed. */
31300 if (inner_mode
== QImode
|| inner_mode
== HImode
)
31302 tmp
= gen_rtx_ZERO_EXTEND (SImode
, tmp
);
31303 target
= gen_lowpart (SImode
, target
);
31306 emit_insn (gen_rtx_SET (VOIDmode
, target
, tmp
));
31310 rtx mem
= assign_stack_temp (mode
, GET_MODE_SIZE (mode
), false);
31312 emit_move_insn (mem
, vec
);
31314 tmp
= adjust_address (mem
, inner_mode
, elt
*GET_MODE_SIZE (inner_mode
));
31315 emit_move_insn (target
, tmp
);
31319 /* Expand a vector reduction on V4SFmode for SSE1. FN is the binary
31320 pattern to reduce; DEST is the destination; IN is the input vector. */
31323 ix86_expand_reduc_v4sf (rtx (*fn
) (rtx
, rtx
, rtx
), rtx dest
, rtx in
)
31325 rtx tmp1
, tmp2
, tmp3
;
31327 tmp1
= gen_reg_rtx (V4SFmode
);
31328 tmp2
= gen_reg_rtx (V4SFmode
);
31329 tmp3
= gen_reg_rtx (V4SFmode
);
31331 emit_insn (gen_sse_movhlps (tmp1
, in
, in
));
31332 emit_insn (fn (tmp2
, tmp1
, in
));
31334 emit_insn (gen_sse_shufps_v4sf (tmp3
, tmp2
, tmp2
,
31335 const1_rtx
, const1_rtx
,
31336 GEN_INT (1+4), GEN_INT (1+4)));
31337 emit_insn (fn (dest
, tmp2
, tmp3
));
31340 /* Target hook for scalar_mode_supported_p. */
31342 ix86_scalar_mode_supported_p (enum machine_mode mode
)
31344 if (DECIMAL_FLOAT_MODE_P (mode
))
31345 return default_decimal_float_supported_p ();
31346 else if (mode
== TFmode
)
31349 return default_scalar_mode_supported_p (mode
);
31352 /* Implements target hook vector_mode_supported_p. */
31354 ix86_vector_mode_supported_p (enum machine_mode mode
)
31356 if (TARGET_SSE
&& VALID_SSE_REG_MODE (mode
))
31358 if (TARGET_SSE2
&& VALID_SSE2_REG_MODE (mode
))
31360 if (TARGET_AVX
&& VALID_AVX256_REG_MODE (mode
))
31362 if (TARGET_MMX
&& VALID_MMX_REG_MODE (mode
))
31364 if (TARGET_3DNOW
&& VALID_MMX_REG_MODE_3DNOW (mode
))
31369 /* Target hook for c_mode_for_suffix. */
31370 static enum machine_mode
31371 ix86_c_mode_for_suffix (char suffix
)
31381 /* Worker function for TARGET_MD_ASM_CLOBBERS.
31383 We do this in the new i386 backend to maintain source compatibility
31384 with the old cc0-based compiler. */
31387 ix86_md_asm_clobbers (tree outputs ATTRIBUTE_UNUSED
,
31388 tree inputs ATTRIBUTE_UNUSED
,
31391 clobbers
= tree_cons (NULL_TREE
, build_string (5, "flags"),
31393 clobbers
= tree_cons (NULL_TREE
, build_string (4, "fpsr"),
31398 /* Implements target vector targetm.asm.encode_section_info. This
31399 is not used by netware. */
31401 static void ATTRIBUTE_UNUSED
31402 ix86_encode_section_info (tree decl
, rtx rtl
, int first
)
31404 default_encode_section_info (decl
, rtl
, first
);
31406 if (TREE_CODE (decl
) == VAR_DECL
31407 && (TREE_STATIC (decl
) || DECL_EXTERNAL (decl
))
31408 && ix86_in_large_data_p (decl
))
31409 SYMBOL_REF_FLAGS (XEXP (rtl
, 0)) |= SYMBOL_FLAG_FAR_ADDR
;
31412 /* Worker function for REVERSE_CONDITION. */
31415 ix86_reverse_condition (enum rtx_code code
, enum machine_mode mode
)
31417 return (mode
!= CCFPmode
&& mode
!= CCFPUmode
31418 ? reverse_condition (code
)
31419 : reverse_condition_maybe_unordered (code
));
31422 /* Output code to perform an x87 FP register move, from OPERANDS[1]
31426 output_387_reg_move (rtx insn
, rtx
*operands
)
31428 if (REG_P (operands
[0]))
31430 if (REG_P (operands
[1])
31431 && find_regno_note (insn
, REG_DEAD
, REGNO (operands
[1])))
31433 if (REGNO (operands
[0]) == FIRST_STACK_REG
)
31434 return output_387_ffreep (operands
, 0);
31435 return "fstp\t%y0";
31437 if (STACK_TOP_P (operands
[0]))
31438 return "fld%Z1\t%y1";
31441 else if (MEM_P (operands
[0]))
31443 gcc_assert (REG_P (operands
[1]));
31444 if (find_regno_note (insn
, REG_DEAD
, REGNO (operands
[1])))
31445 return "fstp%Z0\t%y0";
31448 /* There is no non-popping store to memory for XFmode.
31449 So if we need one, follow the store with a load. */
31450 if (GET_MODE (operands
[0]) == XFmode
)
31451 return "fstp%Z0\t%y0\n\tfld%Z0\t%y0";
31453 return "fst%Z0\t%y0";
31460 /* Output code to perform a conditional jump to LABEL, if C2 flag in
31461 FP status register is set. */
31464 ix86_emit_fp_unordered_jump (rtx label
)
31466 rtx reg
= gen_reg_rtx (HImode
);
31469 emit_insn (gen_x86_fnstsw_1 (reg
));
31471 if (TARGET_SAHF
&& (TARGET_USE_SAHF
|| optimize_insn_for_size_p ()))
31473 emit_insn (gen_x86_sahf_1 (reg
));
31475 temp
= gen_rtx_REG (CCmode
, FLAGS_REG
);
31476 temp
= gen_rtx_UNORDERED (VOIDmode
, temp
, const0_rtx
);
31480 emit_insn (gen_testqi_ext_ccno_0 (reg
, GEN_INT (0x04)));
31482 temp
= gen_rtx_REG (CCNOmode
, FLAGS_REG
);
31483 temp
= gen_rtx_NE (VOIDmode
, temp
, const0_rtx
);
31486 temp
= gen_rtx_IF_THEN_ELSE (VOIDmode
, temp
,
31487 gen_rtx_LABEL_REF (VOIDmode
, label
),
31489 temp
= gen_rtx_SET (VOIDmode
, pc_rtx
, temp
);
31491 emit_jump_insn (temp
);
31492 predict_jump (REG_BR_PROB_BASE
* 10 / 100);
31495 /* Output code to perform a log1p XFmode calculation. */
31497 void ix86_emit_i387_log1p (rtx op0
, rtx op1
)
31499 rtx label1
= gen_label_rtx ();
31500 rtx label2
= gen_label_rtx ();
31502 rtx tmp
= gen_reg_rtx (XFmode
);
31503 rtx tmp2
= gen_reg_rtx (XFmode
);
31506 emit_insn (gen_absxf2 (tmp
, op1
));
31507 test
= gen_rtx_GE (VOIDmode
, tmp
,
31508 CONST_DOUBLE_FROM_REAL_VALUE (
31509 REAL_VALUE_ATOF ("0.29289321881345247561810596348408353", XFmode
),
31511 emit_jump_insn (gen_cbranchxf4 (test
, XEXP (test
, 0), XEXP (test
, 1), label1
));
31513 emit_move_insn (tmp2
, standard_80387_constant_rtx (4)); /* fldln2 */
31514 emit_insn (gen_fyl2xp1xf3_i387 (op0
, op1
, tmp2
));
31515 emit_jump (label2
);
31517 emit_label (label1
);
31518 emit_move_insn (tmp
, CONST1_RTX (XFmode
));
31519 emit_insn (gen_addxf3 (tmp
, op1
, tmp
));
31520 emit_move_insn (tmp2
, standard_80387_constant_rtx (4)); /* fldln2 */
31521 emit_insn (gen_fyl2xxf3_i387 (op0
, tmp
, tmp2
));
31523 emit_label (label2
);
31526 /* Output code to perform a Newton-Rhapson approximation of a single precision
31527 floating point divide [http://en.wikipedia.org/wiki/N-th_root_algorithm]. */
31529 void ix86_emit_swdivsf (rtx res
, rtx a
, rtx b
, enum machine_mode mode
)
31531 rtx x0
, x1
, e0
, e1
, two
;
31533 x0
= gen_reg_rtx (mode
);
31534 e0
= gen_reg_rtx (mode
);
31535 e1
= gen_reg_rtx (mode
);
31536 x1
= gen_reg_rtx (mode
);
31538 two
= CONST_DOUBLE_FROM_REAL_VALUE (dconst2
, SFmode
);
31540 if (VECTOR_MODE_P (mode
))
31541 two
= ix86_build_const_vector (mode
, true, two
);
31543 two
= force_reg (mode
, two
);
31545 /* a / b = a * rcp(b) * (2.0 - b * rcp(b)) */
31547 /* x0 = rcp(b) estimate */
31548 emit_insn (gen_rtx_SET (VOIDmode
, x0
,
31549 gen_rtx_UNSPEC (mode
, gen_rtvec (1, b
),
31552 emit_insn (gen_rtx_SET (VOIDmode
, e0
,
31553 gen_rtx_MULT (mode
, x0
, a
)));
31555 emit_insn (gen_rtx_SET (VOIDmode
, e1
,
31556 gen_rtx_MULT (mode
, x0
, b
)));
31558 emit_insn (gen_rtx_SET (VOIDmode
, x1
,
31559 gen_rtx_MINUS (mode
, two
, e1
)));
31560 /* res = e0 * x1 */
31561 emit_insn (gen_rtx_SET (VOIDmode
, res
,
31562 gen_rtx_MULT (mode
, e0
, x1
)));
31565 /* Output code to perform a Newton-Rhapson approximation of a
31566 single precision floating point [reciprocal] square root. */
31568 void ix86_emit_swsqrtsf (rtx res
, rtx a
, enum machine_mode mode
,
31571 rtx x0
, e0
, e1
, e2
, e3
, mthree
, mhalf
;
31574 x0
= gen_reg_rtx (mode
);
31575 e0
= gen_reg_rtx (mode
);
31576 e1
= gen_reg_rtx (mode
);
31577 e2
= gen_reg_rtx (mode
);
31578 e3
= gen_reg_rtx (mode
);
31580 real_from_integer (&r
, VOIDmode
, -3, -1, 0);
31581 mthree
= CONST_DOUBLE_FROM_REAL_VALUE (r
, SFmode
);
31583 real_arithmetic (&r
, NEGATE_EXPR
, &dconsthalf
, NULL
);
31584 mhalf
= CONST_DOUBLE_FROM_REAL_VALUE (r
, SFmode
);
31586 if (VECTOR_MODE_P (mode
))
31588 mthree
= ix86_build_const_vector (mode
, true, mthree
);
31589 mhalf
= ix86_build_const_vector (mode
, true, mhalf
);
31592 /* sqrt(a) = -0.5 * a * rsqrtss(a) * (a * rsqrtss(a) * rsqrtss(a) - 3.0)
31593 rsqrt(a) = -0.5 * rsqrtss(a) * (a * rsqrtss(a) * rsqrtss(a) - 3.0) */
31595 /* x0 = rsqrt(a) estimate */
31596 emit_insn (gen_rtx_SET (VOIDmode
, x0
,
31597 gen_rtx_UNSPEC (mode
, gen_rtvec (1, a
),
31600 /* If (a == 0.0) Filter out infinity to prevent NaN for sqrt(0.0). */
31605 zero
= gen_reg_rtx (mode
);
31606 mask
= gen_reg_rtx (mode
);
31608 zero
= force_reg (mode
, CONST0_RTX(mode
));
31609 emit_insn (gen_rtx_SET (VOIDmode
, mask
,
31610 gen_rtx_NE (mode
, zero
, a
)));
31612 emit_insn (gen_rtx_SET (VOIDmode
, x0
,
31613 gen_rtx_AND (mode
, x0
, mask
)));
31617 emit_insn (gen_rtx_SET (VOIDmode
, e0
,
31618 gen_rtx_MULT (mode
, x0
, a
)));
31620 emit_insn (gen_rtx_SET (VOIDmode
, e1
,
31621 gen_rtx_MULT (mode
, e0
, x0
)));
31624 mthree
= force_reg (mode
, mthree
);
31625 emit_insn (gen_rtx_SET (VOIDmode
, e2
,
31626 gen_rtx_PLUS (mode
, e1
, mthree
)));
31628 mhalf
= force_reg (mode
, mhalf
);
31630 /* e3 = -.5 * x0 */
31631 emit_insn (gen_rtx_SET (VOIDmode
, e3
,
31632 gen_rtx_MULT (mode
, x0
, mhalf
)));
31634 /* e3 = -.5 * e0 */
31635 emit_insn (gen_rtx_SET (VOIDmode
, e3
,
31636 gen_rtx_MULT (mode
, e0
, mhalf
)));
31637 /* ret = e2 * e3 */
31638 emit_insn (gen_rtx_SET (VOIDmode
, res
,
31639 gen_rtx_MULT (mode
, e2
, e3
)));
31642 /* Solaris implementation of TARGET_ASM_NAMED_SECTION. */
31644 static void ATTRIBUTE_UNUSED
31645 i386_solaris_elf_named_section (const char *name
, unsigned int flags
,
31648 /* With Binutils 2.15, the "@unwind" marker must be specified on
31649 every occurrence of the ".eh_frame" section, not just the first
31652 && strcmp (name
, ".eh_frame") == 0)
31654 fprintf (asm_out_file
, "\t.section\t%s,\"%s\",@unwind\n", name
,
31655 flags
& SECTION_WRITE
? "aw" : "a");
31658 default_elf_asm_named_section (name
, flags
, decl
);
31661 /* Return the mangling of TYPE if it is an extended fundamental type. */
31663 static const char *
31664 ix86_mangle_type (const_tree type
)
31666 type
= TYPE_MAIN_VARIANT (type
);
31668 if (TREE_CODE (type
) != VOID_TYPE
&& TREE_CODE (type
) != BOOLEAN_TYPE
31669 && TREE_CODE (type
) != INTEGER_TYPE
&& TREE_CODE (type
) != REAL_TYPE
)
31672 switch (TYPE_MODE (type
))
31675 /* __float128 is "g". */
31678 /* "long double" or __float80 is "e". */
31685 /* For 32-bit code we can save PIC register setup by using
31686 __stack_chk_fail_local hidden function instead of calling
31687 __stack_chk_fail directly. 64-bit code doesn't need to setup any PIC
31688 register, so it is better to call __stack_chk_fail directly. */
31691 ix86_stack_protect_fail (void)
31693 return TARGET_64BIT
31694 ? default_external_stack_protect_fail ()
31695 : default_hidden_stack_protect_fail ();
31698 /* Select a format to encode pointers in exception handling data. CODE
31699 is 0 for data, 1 for code labels, 2 for function pointers. GLOBAL is
31700 true if the symbol may be affected by dynamic relocations.
31702 ??? All x86 object file formats are capable of representing this.
31703 After all, the relocation needed is the same as for the call insn.
31704 Whether or not a particular assembler allows us to enter such, I
31705 guess we'll have to see. */
31707 asm_preferred_eh_data_format (int code
, int global
)
31711 int type
= DW_EH_PE_sdata8
;
31713 || ix86_cmodel
== CM_SMALL_PIC
31714 || (ix86_cmodel
== CM_MEDIUM_PIC
&& (global
|| code
)))
31715 type
= DW_EH_PE_sdata4
;
31716 return (global
? DW_EH_PE_indirect
: 0) | DW_EH_PE_pcrel
| type
;
31718 if (ix86_cmodel
== CM_SMALL
31719 || (ix86_cmodel
== CM_MEDIUM
&& code
))
31720 return DW_EH_PE_udata4
;
31721 return DW_EH_PE_absptr
;
31724 /* Expand copysign from SIGN to the positive value ABS_VALUE
31725 storing in RESULT. If MASK is non-null, it shall be a mask to mask out
31728 ix86_sse_copysign_to_positive (rtx result
, rtx abs_value
, rtx sign
, rtx mask
)
31730 enum machine_mode mode
= GET_MODE (sign
);
31731 rtx sgn
= gen_reg_rtx (mode
);
31732 if (mask
== NULL_RTX
)
31734 enum machine_mode vmode
;
31736 if (mode
== SFmode
)
31738 else if (mode
== DFmode
)
31743 mask
= ix86_build_signbit_mask (vmode
, VECTOR_MODE_P (mode
), false);
31744 if (!VECTOR_MODE_P (mode
))
31746 /* We need to generate a scalar mode mask in this case. */
31747 rtx tmp
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (1, const0_rtx
));
31748 tmp
= gen_rtx_VEC_SELECT (mode
, mask
, tmp
);
31749 mask
= gen_reg_rtx (mode
);
31750 emit_insn (gen_rtx_SET (VOIDmode
, mask
, tmp
));
31754 mask
= gen_rtx_NOT (mode
, mask
);
31755 emit_insn (gen_rtx_SET (VOIDmode
, sgn
,
31756 gen_rtx_AND (mode
, mask
, sign
)));
31757 emit_insn (gen_rtx_SET (VOIDmode
, result
,
31758 gen_rtx_IOR (mode
, abs_value
, sgn
)));
31761 /* Expand fabs (OP0) and return a new rtx that holds the result. The
31762 mask for masking out the sign-bit is stored in *SMASK, if that is
31765 ix86_expand_sse_fabs (rtx op0
, rtx
*smask
)
31767 enum machine_mode vmode
, mode
= GET_MODE (op0
);
31770 xa
= gen_reg_rtx (mode
);
31771 if (mode
== SFmode
)
31773 else if (mode
== DFmode
)
31777 mask
= ix86_build_signbit_mask (vmode
, VECTOR_MODE_P (mode
), true);
31778 if (!VECTOR_MODE_P (mode
))
31780 /* We need to generate a scalar mode mask in this case. */
31781 rtx tmp
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (1, const0_rtx
));
31782 tmp
= gen_rtx_VEC_SELECT (mode
, mask
, tmp
);
31783 mask
= gen_reg_rtx (mode
);
31784 emit_insn (gen_rtx_SET (VOIDmode
, mask
, tmp
));
31786 emit_insn (gen_rtx_SET (VOIDmode
, xa
,
31787 gen_rtx_AND (mode
, op0
, mask
)));
31795 /* Expands a comparison of OP0 with OP1 using comparison code CODE,
31796 swapping the operands if SWAP_OPERANDS is true. The expanded
31797 code is a forward jump to a newly created label in case the
31798 comparison is true. The generated label rtx is returned. */
31800 ix86_expand_sse_compare_and_jump (enum rtx_code code
, rtx op0
, rtx op1
,
31801 bool swap_operands
)
31812 label
= gen_label_rtx ();
31813 tmp
= gen_rtx_REG (CCFPUmode
, FLAGS_REG
);
31814 emit_insn (gen_rtx_SET (VOIDmode
, tmp
,
31815 gen_rtx_COMPARE (CCFPUmode
, op0
, op1
)));
31816 tmp
= gen_rtx_fmt_ee (code
, VOIDmode
, tmp
, const0_rtx
);
31817 tmp
= gen_rtx_IF_THEN_ELSE (VOIDmode
, tmp
,
31818 gen_rtx_LABEL_REF (VOIDmode
, label
), pc_rtx
);
31819 tmp
= emit_jump_insn (gen_rtx_SET (VOIDmode
, pc_rtx
, tmp
));
31820 JUMP_LABEL (tmp
) = label
;
31825 /* Expand a mask generating SSE comparison instruction comparing OP0 with OP1
31826 using comparison code CODE. Operands are swapped for the comparison if
31827 SWAP_OPERANDS is true. Returns a rtx for the generated mask. */
31829 ix86_expand_sse_compare_mask (enum rtx_code code
, rtx op0
, rtx op1
,
31830 bool swap_operands
)
31832 enum machine_mode mode
= GET_MODE (op0
);
31833 rtx mask
= gen_reg_rtx (mode
);
31842 if (mode
== DFmode
)
31843 emit_insn (gen_sse2_maskcmpdf3 (mask
, op0
, op1
,
31844 gen_rtx_fmt_ee (code
, mode
, op0
, op1
)));
31846 emit_insn (gen_sse_maskcmpsf3 (mask
, op0
, op1
,
31847 gen_rtx_fmt_ee (code
, mode
, op0
, op1
)));
31852 /* Generate and return a rtx of mode MODE for 2**n where n is the number
31853 of bits of the mantissa of MODE, which must be one of DFmode or SFmode. */
31855 ix86_gen_TWO52 (enum machine_mode mode
)
31857 REAL_VALUE_TYPE TWO52r
;
31860 real_ldexp (&TWO52r
, &dconst1
, mode
== DFmode
? 52 : 23);
31861 TWO52
= const_double_from_real_value (TWO52r
, mode
);
31862 TWO52
= force_reg (mode
, TWO52
);
31867 /* Expand SSE sequence for computing lround from OP1 storing
31870 ix86_expand_lround (rtx op0
, rtx op1
)
31872 /* C code for the stuff we're doing below:
31873 tmp = op1 + copysign (nextafter (0.5, 0.0), op1)
31876 enum machine_mode mode
= GET_MODE (op1
);
31877 const struct real_format
*fmt
;
31878 REAL_VALUE_TYPE pred_half
, half_minus_pred_half
;
31881 /* load nextafter (0.5, 0.0) */
31882 fmt
= REAL_MODE_FORMAT (mode
);
31883 real_2expN (&half_minus_pred_half
, -(fmt
->p
) - 1, mode
);
31884 REAL_ARITHMETIC (pred_half
, MINUS_EXPR
, dconsthalf
, half_minus_pred_half
);
31886 /* adj = copysign (0.5, op1) */
31887 adj
= force_reg (mode
, const_double_from_real_value (pred_half
, mode
));
31888 ix86_sse_copysign_to_positive (adj
, adj
, force_reg (mode
, op1
), NULL_RTX
);
31890 /* adj = op1 + adj */
31891 adj
= expand_simple_binop (mode
, PLUS
, adj
, op1
, NULL_RTX
, 0, OPTAB_DIRECT
);
31893 /* op0 = (imode)adj */
31894 expand_fix (op0
, adj
, 0);
31897 /* Expand SSE2 sequence for computing lround from OPERAND1 storing
31900 ix86_expand_lfloorceil (rtx op0
, rtx op1
, bool do_floor
)
31902 /* C code for the stuff we're doing below (for do_floor):
31904 xi -= (double)xi > op1 ? 1 : 0;
31907 enum machine_mode fmode
= GET_MODE (op1
);
31908 enum machine_mode imode
= GET_MODE (op0
);
31909 rtx ireg
, freg
, label
, tmp
;
31911 /* reg = (long)op1 */
31912 ireg
= gen_reg_rtx (imode
);
31913 expand_fix (ireg
, op1
, 0);
31915 /* freg = (double)reg */
31916 freg
= gen_reg_rtx (fmode
);
31917 expand_float (freg
, ireg
, 0);
31919 /* ireg = (freg > op1) ? ireg - 1 : ireg */
31920 label
= ix86_expand_sse_compare_and_jump (UNLE
,
31921 freg
, op1
, !do_floor
);
31922 tmp
= expand_simple_binop (imode
, do_floor
? MINUS
: PLUS
,
31923 ireg
, const1_rtx
, NULL_RTX
, 0, OPTAB_DIRECT
);
31924 emit_move_insn (ireg
, tmp
);
31926 emit_label (label
);
31927 LABEL_NUSES (label
) = 1;
31929 emit_move_insn (op0
, ireg
);
31932 /* Expand rint (IEEE round to nearest) rounding OPERAND1 and storing the
31933 result in OPERAND0. */
31935 ix86_expand_rint (rtx operand0
, rtx operand1
)
31937 /* C code for the stuff we're doing below:
31938 xa = fabs (operand1);
31939 if (!isless (xa, 2**52))
31941 xa = xa + 2**52 - 2**52;
31942 return copysign (xa, operand1);
31944 enum machine_mode mode
= GET_MODE (operand0
);
31945 rtx res
, xa
, label
, TWO52
, mask
;
31947 res
= gen_reg_rtx (mode
);
31948 emit_move_insn (res
, operand1
);
31950 /* xa = abs (operand1) */
31951 xa
= ix86_expand_sse_fabs (res
, &mask
);
31953 /* if (!isless (xa, TWO52)) goto label; */
31954 TWO52
= ix86_gen_TWO52 (mode
);
31955 label
= ix86_expand_sse_compare_and_jump (UNLE
, TWO52
, xa
, false);
31957 xa
= expand_simple_binop (mode
, PLUS
, xa
, TWO52
, NULL_RTX
, 0, OPTAB_DIRECT
);
31958 xa
= expand_simple_binop (mode
, MINUS
, xa
, TWO52
, xa
, 0, OPTAB_DIRECT
);
31960 ix86_sse_copysign_to_positive (res
, xa
, res
, mask
);
31962 emit_label (label
);
31963 LABEL_NUSES (label
) = 1;
31965 emit_move_insn (operand0
, res
);
31968 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
31971 ix86_expand_floorceildf_32 (rtx operand0
, rtx operand1
, bool do_floor
)
31973 /* C code for the stuff we expand below.
31974 double xa = fabs (x), x2;
31975 if (!isless (xa, TWO52))
31977 xa = xa + TWO52 - TWO52;
31978 x2 = copysign (xa, x);
31987 enum machine_mode mode
= GET_MODE (operand0
);
31988 rtx xa
, TWO52
, tmp
, label
, one
, res
, mask
;
31990 TWO52
= ix86_gen_TWO52 (mode
);
31992 /* Temporary for holding the result, initialized to the input
31993 operand to ease control flow. */
31994 res
= gen_reg_rtx (mode
);
31995 emit_move_insn (res
, operand1
);
31997 /* xa = abs (operand1) */
31998 xa
= ix86_expand_sse_fabs (res
, &mask
);
32000 /* if (!isless (xa, TWO52)) goto label; */
32001 label
= ix86_expand_sse_compare_and_jump (UNLE
, TWO52
, xa
, false);
32003 /* xa = xa + TWO52 - TWO52; */
32004 xa
= expand_simple_binop (mode
, PLUS
, xa
, TWO52
, NULL_RTX
, 0, OPTAB_DIRECT
);
32005 xa
= expand_simple_binop (mode
, MINUS
, xa
, TWO52
, xa
, 0, OPTAB_DIRECT
);
32007 /* xa = copysign (xa, operand1) */
32008 ix86_sse_copysign_to_positive (xa
, xa
, res
, mask
);
32010 /* generate 1.0 or -1.0 */
32011 one
= force_reg (mode
,
32012 const_double_from_real_value (do_floor
32013 ? dconst1
: dconstm1
, mode
));
32015 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
32016 tmp
= ix86_expand_sse_compare_mask (UNGT
, xa
, res
, !do_floor
);
32017 emit_insn (gen_rtx_SET (VOIDmode
, tmp
,
32018 gen_rtx_AND (mode
, one
, tmp
)));
32019 /* We always need to subtract here to preserve signed zero. */
32020 tmp
= expand_simple_binop (mode
, MINUS
,
32021 xa
, tmp
, NULL_RTX
, 0, OPTAB_DIRECT
);
32022 emit_move_insn (res
, tmp
);
32024 emit_label (label
);
32025 LABEL_NUSES (label
) = 1;
32027 emit_move_insn (operand0
, res
);
32030 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
32033 ix86_expand_floorceil (rtx operand0
, rtx operand1
, bool do_floor
)
32035 /* C code for the stuff we expand below.
32036 double xa = fabs (x), x2;
32037 if (!isless (xa, TWO52))
32039 x2 = (double)(long)x;
32046 if (HONOR_SIGNED_ZEROS (mode))
32047 return copysign (x2, x);
32050 enum machine_mode mode
= GET_MODE (operand0
);
32051 rtx xa
, xi
, TWO52
, tmp
, label
, one
, res
, mask
;
32053 TWO52
= ix86_gen_TWO52 (mode
);
32055 /* Temporary for holding the result, initialized to the input
32056 operand to ease control flow. */
32057 res
= gen_reg_rtx (mode
);
32058 emit_move_insn (res
, operand1
);
32060 /* xa = abs (operand1) */
32061 xa
= ix86_expand_sse_fabs (res
, &mask
);
32063 /* if (!isless (xa, TWO52)) goto label; */
32064 label
= ix86_expand_sse_compare_and_jump (UNLE
, TWO52
, xa
, false);
32066 /* xa = (double)(long)x */
32067 xi
= gen_reg_rtx (mode
== DFmode
? DImode
: SImode
);
32068 expand_fix (xi
, res
, 0);
32069 expand_float (xa
, xi
, 0);
32072 one
= force_reg (mode
, const_double_from_real_value (dconst1
, mode
));
32074 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
32075 tmp
= ix86_expand_sse_compare_mask (UNGT
, xa
, res
, !do_floor
);
32076 emit_insn (gen_rtx_SET (VOIDmode
, tmp
,
32077 gen_rtx_AND (mode
, one
, tmp
)));
32078 tmp
= expand_simple_binop (mode
, do_floor
? MINUS
: PLUS
,
32079 xa
, tmp
, NULL_RTX
, 0, OPTAB_DIRECT
);
32080 emit_move_insn (res
, tmp
);
32082 if (HONOR_SIGNED_ZEROS (mode
))
32083 ix86_sse_copysign_to_positive (res
, res
, force_reg (mode
, operand1
), mask
);
32085 emit_label (label
);
32086 LABEL_NUSES (label
) = 1;
32088 emit_move_insn (operand0
, res
);
32091 /* Expand SSE sequence for computing round from OPERAND1 storing
32092 into OPERAND0. Sequence that works without relying on DImode truncation
32093 via cvttsd2siq that is only available on 64bit targets. */
32095 ix86_expand_rounddf_32 (rtx operand0
, rtx operand1
)
32097 /* C code for the stuff we expand below.
32098 double xa = fabs (x), xa2, x2;
32099 if (!isless (xa, TWO52))
32101 Using the absolute value and copying back sign makes
32102 -0.0 -> -0.0 correct.
32103 xa2 = xa + TWO52 - TWO52;
32108 else if (dxa > 0.5)
32110 x2 = copysign (xa2, x);
32113 enum machine_mode mode
= GET_MODE (operand0
);
32114 rtx xa
, xa2
, dxa
, TWO52
, tmp
, label
, half
, mhalf
, one
, res
, mask
;
32116 TWO52
= ix86_gen_TWO52 (mode
);
32118 /* Temporary for holding the result, initialized to the input
32119 operand to ease control flow. */
32120 res
= gen_reg_rtx (mode
);
32121 emit_move_insn (res
, operand1
);
32123 /* xa = abs (operand1) */
32124 xa
= ix86_expand_sse_fabs (res
, &mask
);
32126 /* if (!isless (xa, TWO52)) goto label; */
32127 label
= ix86_expand_sse_compare_and_jump (UNLE
, TWO52
, xa
, false);
32129 /* xa2 = xa + TWO52 - TWO52; */
32130 xa2
= expand_simple_binop (mode
, PLUS
, xa
, TWO52
, NULL_RTX
, 0, OPTAB_DIRECT
);
32131 xa2
= expand_simple_binop (mode
, MINUS
, xa2
, TWO52
, xa2
, 0, OPTAB_DIRECT
);
32133 /* dxa = xa2 - xa; */
32134 dxa
= expand_simple_binop (mode
, MINUS
, xa2
, xa
, NULL_RTX
, 0, OPTAB_DIRECT
);
32136 /* generate 0.5, 1.0 and -0.5 */
32137 half
= force_reg (mode
, const_double_from_real_value (dconsthalf
, mode
));
32138 one
= expand_simple_binop (mode
, PLUS
, half
, half
, NULL_RTX
, 0, OPTAB_DIRECT
);
32139 mhalf
= expand_simple_binop (mode
, MINUS
, half
, one
, NULL_RTX
,
32143 tmp
= gen_reg_rtx (mode
);
32144 /* xa2 = xa2 - (dxa > 0.5 ? 1 : 0) */
32145 tmp
= ix86_expand_sse_compare_mask (UNGT
, dxa
, half
, false);
32146 emit_insn (gen_rtx_SET (VOIDmode
, tmp
,
32147 gen_rtx_AND (mode
, one
, tmp
)));
32148 xa2
= expand_simple_binop (mode
, MINUS
, xa2
, tmp
, NULL_RTX
, 0, OPTAB_DIRECT
);
32149 /* xa2 = xa2 + (dxa <= -0.5 ? 1 : 0) */
32150 tmp
= ix86_expand_sse_compare_mask (UNGE
, mhalf
, dxa
, false);
32151 emit_insn (gen_rtx_SET (VOIDmode
, tmp
,
32152 gen_rtx_AND (mode
, one
, tmp
)));
32153 xa2
= expand_simple_binop (mode
, PLUS
, xa2
, tmp
, NULL_RTX
, 0, OPTAB_DIRECT
);
32155 /* res = copysign (xa2, operand1) */
32156 ix86_sse_copysign_to_positive (res
, xa2
, force_reg (mode
, operand1
), mask
);
32158 emit_label (label
);
32159 LABEL_NUSES (label
) = 1;
32161 emit_move_insn (operand0
, res
);
32164 /* Expand SSE sequence for computing trunc from OPERAND1 storing
32167 ix86_expand_trunc (rtx operand0
, rtx operand1
)
32169 /* C code for SSE variant we expand below.
32170 double xa = fabs (x), x2;
32171 if (!isless (xa, TWO52))
32173 x2 = (double)(long)x;
32174 if (HONOR_SIGNED_ZEROS (mode))
32175 return copysign (x2, x);
32178 enum machine_mode mode
= GET_MODE (operand0
);
32179 rtx xa
, xi
, TWO52
, label
, res
, mask
;
32181 TWO52
= ix86_gen_TWO52 (mode
);
32183 /* Temporary for holding the result, initialized to the input
32184 operand to ease control flow. */
32185 res
= gen_reg_rtx (mode
);
32186 emit_move_insn (res
, operand1
);
32188 /* xa = abs (operand1) */
32189 xa
= ix86_expand_sse_fabs (res
, &mask
);
32191 /* if (!isless (xa, TWO52)) goto label; */
32192 label
= ix86_expand_sse_compare_and_jump (UNLE
, TWO52
, xa
, false);
32194 /* x = (double)(long)x */
32195 xi
= gen_reg_rtx (mode
== DFmode
? DImode
: SImode
);
32196 expand_fix (xi
, res
, 0);
32197 expand_float (res
, xi
, 0);
32199 if (HONOR_SIGNED_ZEROS (mode
))
32200 ix86_sse_copysign_to_positive (res
, res
, force_reg (mode
, operand1
), mask
);
32202 emit_label (label
);
32203 LABEL_NUSES (label
) = 1;
32205 emit_move_insn (operand0
, res
);
32208 /* Expand SSE sequence for computing trunc from OPERAND1 storing
32211 ix86_expand_truncdf_32 (rtx operand0
, rtx operand1
)
32213 enum machine_mode mode
= GET_MODE (operand0
);
32214 rtx xa
, mask
, TWO52
, label
, one
, res
, smask
, tmp
;
32216 /* C code for SSE variant we expand below.
32217 double xa = fabs (x), x2;
32218 if (!isless (xa, TWO52))
32220 xa2 = xa + TWO52 - TWO52;
32224 x2 = copysign (xa2, x);
32228 TWO52
= ix86_gen_TWO52 (mode
);
32230 /* Temporary for holding the result, initialized to the input
32231 operand to ease control flow. */
32232 res
= gen_reg_rtx (mode
);
32233 emit_move_insn (res
, operand1
);
32235 /* xa = abs (operand1) */
32236 xa
= ix86_expand_sse_fabs (res
, &smask
);
32238 /* if (!isless (xa, TWO52)) goto label; */
32239 label
= ix86_expand_sse_compare_and_jump (UNLE
, TWO52
, xa
, false);
32241 /* res = xa + TWO52 - TWO52; */
32242 tmp
= expand_simple_binop (mode
, PLUS
, xa
, TWO52
, NULL_RTX
, 0, OPTAB_DIRECT
);
32243 tmp
= expand_simple_binop (mode
, MINUS
, tmp
, TWO52
, tmp
, 0, OPTAB_DIRECT
);
32244 emit_move_insn (res
, tmp
);
32247 one
= force_reg (mode
, const_double_from_real_value (dconst1
, mode
));
32249 /* Compensate: res = xa2 - (res > xa ? 1 : 0) */
32250 mask
= ix86_expand_sse_compare_mask (UNGT
, res
, xa
, false);
32251 emit_insn (gen_rtx_SET (VOIDmode
, mask
,
32252 gen_rtx_AND (mode
, mask
, one
)));
32253 tmp
= expand_simple_binop (mode
, MINUS
,
32254 res
, mask
, NULL_RTX
, 0, OPTAB_DIRECT
);
32255 emit_move_insn (res
, tmp
);
32257 /* res = copysign (res, operand1) */
32258 ix86_sse_copysign_to_positive (res
, res
, force_reg (mode
, operand1
), smask
);
32260 emit_label (label
);
32261 LABEL_NUSES (label
) = 1;
32263 emit_move_insn (operand0
, res
);
32266 /* Expand SSE sequence for computing round from OPERAND1 storing
32269 ix86_expand_round (rtx operand0
, rtx operand1
)
32271 /* C code for the stuff we're doing below:
32272 double xa = fabs (x);
32273 if (!isless (xa, TWO52))
32275 xa = (double)(long)(xa + nextafter (0.5, 0.0));
32276 return copysign (xa, x);
32278 enum machine_mode mode
= GET_MODE (operand0
);
32279 rtx res
, TWO52
, xa
, label
, xi
, half
, mask
;
32280 const struct real_format
*fmt
;
32281 REAL_VALUE_TYPE pred_half
, half_minus_pred_half
;
32283 /* Temporary for holding the result, initialized to the input
32284 operand to ease control flow. */
32285 res
= gen_reg_rtx (mode
);
32286 emit_move_insn (res
, operand1
);
32288 TWO52
= ix86_gen_TWO52 (mode
);
32289 xa
= ix86_expand_sse_fabs (res
, &mask
);
32290 label
= ix86_expand_sse_compare_and_jump (UNLE
, TWO52
, xa
, false);
32292 /* load nextafter (0.5, 0.0) */
32293 fmt
= REAL_MODE_FORMAT (mode
);
32294 real_2expN (&half_minus_pred_half
, -(fmt
->p
) - 1, mode
);
32295 REAL_ARITHMETIC (pred_half
, MINUS_EXPR
, dconsthalf
, half_minus_pred_half
);
32297 /* xa = xa + 0.5 */
32298 half
= force_reg (mode
, const_double_from_real_value (pred_half
, mode
));
32299 xa
= expand_simple_binop (mode
, PLUS
, xa
, half
, NULL_RTX
, 0, OPTAB_DIRECT
);
32301 /* xa = (double)(int64_t)xa */
32302 xi
= gen_reg_rtx (mode
== DFmode
? DImode
: SImode
);
32303 expand_fix (xi
, xa
, 0);
32304 expand_float (xa
, xi
, 0);
32306 /* res = copysign (xa, operand1) */
32307 ix86_sse_copysign_to_positive (res
, xa
, force_reg (mode
, operand1
), mask
);
32309 emit_label (label
);
32310 LABEL_NUSES (label
) = 1;
32312 emit_move_insn (operand0
, res
);
32316 /* Table of valid machine attributes. */
32317 static const struct attribute_spec ix86_attribute_table
[] =
32319 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
32320 /* Stdcall attribute says callee is responsible for popping arguments
32321 if they are not variable. */
32322 { "stdcall", 0, 0, false, true, true, ix86_handle_cconv_attribute
},
32323 /* Fastcall attribute says callee is responsible for popping arguments
32324 if they are not variable. */
32325 { "fastcall", 0, 0, false, true, true, ix86_handle_cconv_attribute
},
32326 /* Thiscall attribute says callee is responsible for popping arguments
32327 if they are not variable. */
32328 { "thiscall", 0, 0, false, true, true, ix86_handle_cconv_attribute
},
32329 /* Cdecl attribute says the callee is a normal C declaration */
32330 { "cdecl", 0, 0, false, true, true, ix86_handle_cconv_attribute
},
32331 /* Regparm attribute specifies how many integer arguments are to be
32332 passed in registers. */
32333 { "regparm", 1, 1, false, true, true, ix86_handle_cconv_attribute
},
32334 /* Sseregparm attribute says we are using x86_64 calling conventions
32335 for FP arguments. */
32336 { "sseregparm", 0, 0, false, true, true, ix86_handle_cconv_attribute
},
32337 /* force_align_arg_pointer says this function realigns the stack at entry. */
32338 { (const char *)&ix86_force_align_arg_pointer_string
, 0, 0,
32339 false, true, true, ix86_handle_cconv_attribute
},
32340 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
32341 { "dllimport", 0, 0, false, false, false, handle_dll_attribute
},
32342 { "dllexport", 0, 0, false, false, false, handle_dll_attribute
},
32343 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute
},
32345 { "ms_struct", 0, 0, false, false, false, ix86_handle_struct_attribute
},
32346 { "gcc_struct", 0, 0, false, false, false, ix86_handle_struct_attribute
},
32347 #ifdef SUBTARGET_ATTRIBUTE_TABLE
32348 SUBTARGET_ATTRIBUTE_TABLE
,
32350 /* ms_abi and sysv_abi calling convention function attributes. */
32351 { "ms_abi", 0, 0, false, true, true, ix86_handle_abi_attribute
},
32352 { "sysv_abi", 0, 0, false, true, true, ix86_handle_abi_attribute
},
32353 { "ms_hook_prologue", 0, 0, true, false, false, ix86_handle_fndecl_attribute
},
32354 { "callee_pop_aggregate_return", 1, 1, false, true, true,
32355 ix86_handle_callee_pop_aggregate_return
},
32357 { NULL
, 0, 0, false, false, false, NULL
}
32360 /* Implement targetm.vectorize.builtin_vectorization_cost. */
32362 ix86_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost
,
32363 tree vectype ATTRIBUTE_UNUSED
,
32364 int misalign ATTRIBUTE_UNUSED
)
32366 switch (type_of_cost
)
32369 return ix86_cost
->scalar_stmt_cost
;
32372 return ix86_cost
->scalar_load_cost
;
32375 return ix86_cost
->scalar_store_cost
;
32378 return ix86_cost
->vec_stmt_cost
;
32381 return ix86_cost
->vec_align_load_cost
;
32384 return ix86_cost
->vec_store_cost
;
32386 case vec_to_scalar
:
32387 return ix86_cost
->vec_to_scalar_cost
;
32389 case scalar_to_vec
:
32390 return ix86_cost
->scalar_to_vec_cost
;
32392 case unaligned_load
:
32393 case unaligned_store
:
32394 return ix86_cost
->vec_unalign_load_cost
;
32396 case cond_branch_taken
:
32397 return ix86_cost
->cond_taken_branch_cost
;
32399 case cond_branch_not_taken
:
32400 return ix86_cost
->cond_not_taken_branch_cost
;
32406 gcc_unreachable ();
32411 /* Implement targetm.vectorize.builtin_vec_perm. */
32414 ix86_vectorize_builtin_vec_perm (tree vec_type
, tree
*mask_type
)
32416 tree itype
= TREE_TYPE (vec_type
);
32417 bool u
= TYPE_UNSIGNED (itype
);
32418 enum machine_mode vmode
= TYPE_MODE (vec_type
);
32419 enum ix86_builtins fcode
;
32420 bool ok
= TARGET_SSE2
;
32426 fcode
= IX86_BUILTIN_VEC_PERM_V4DF
;
32429 fcode
= IX86_BUILTIN_VEC_PERM_V2DF
;
32431 itype
= ix86_get_builtin_type (IX86_BT_DI
);
32436 fcode
= IX86_BUILTIN_VEC_PERM_V8SF
;
32440 fcode
= IX86_BUILTIN_VEC_PERM_V4SF
;
32442 itype
= ix86_get_builtin_type (IX86_BT_SI
);
32446 fcode
= u
? IX86_BUILTIN_VEC_PERM_V2DI_U
: IX86_BUILTIN_VEC_PERM_V2DI
;
32449 fcode
= u
? IX86_BUILTIN_VEC_PERM_V4SI_U
: IX86_BUILTIN_VEC_PERM_V4SI
;
32452 fcode
= u
? IX86_BUILTIN_VEC_PERM_V8HI_U
: IX86_BUILTIN_VEC_PERM_V8HI
;
32455 fcode
= u
? IX86_BUILTIN_VEC_PERM_V16QI_U
: IX86_BUILTIN_VEC_PERM_V16QI
;
32465 *mask_type
= itype
;
32466 return ix86_builtins
[(int) fcode
];
32469 /* Return a vector mode with twice as many elements as VMODE. */
32470 /* ??? Consider moving this to a table generated by genmodes.c. */
32472 static enum machine_mode
32473 doublesize_vector_mode (enum machine_mode vmode
)
32477 case V2SFmode
: return V4SFmode
;
32478 case V1DImode
: return V2DImode
;
32479 case V2SImode
: return V4SImode
;
32480 case V4HImode
: return V8HImode
;
32481 case V8QImode
: return V16QImode
;
32483 case V2DFmode
: return V4DFmode
;
32484 case V4SFmode
: return V8SFmode
;
32485 case V2DImode
: return V4DImode
;
32486 case V4SImode
: return V8SImode
;
32487 case V8HImode
: return V16HImode
;
32488 case V16QImode
: return V32QImode
;
32490 case V4DFmode
: return V8DFmode
;
32491 case V8SFmode
: return V16SFmode
;
32492 case V4DImode
: return V8DImode
;
32493 case V8SImode
: return V16SImode
;
32494 case V16HImode
: return V32HImode
;
32495 case V32QImode
: return V64QImode
;
32498 gcc_unreachable ();
32502 /* Construct (set target (vec_select op0 (parallel perm))) and
32503 return true if that's a valid instruction in the active ISA. */
32506 expand_vselect (rtx target
, rtx op0
, const unsigned char *perm
, unsigned nelt
)
32508 rtx rperm
[MAX_VECT_LEN
], x
;
32511 for (i
= 0; i
< nelt
; ++i
)
32512 rperm
[i
] = GEN_INT (perm
[i
]);
32514 x
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec_v (nelt
, rperm
));
32515 x
= gen_rtx_VEC_SELECT (GET_MODE (target
), op0
, x
);
32516 x
= gen_rtx_SET (VOIDmode
, target
, x
);
32519 if (recog_memoized (x
) < 0)
32527 /* Similar, but generate a vec_concat from op0 and op1 as well. */
32530 expand_vselect_vconcat (rtx target
, rtx op0
, rtx op1
,
32531 const unsigned char *perm
, unsigned nelt
)
32533 enum machine_mode v2mode
;
32536 v2mode
= doublesize_vector_mode (GET_MODE (op0
));
32537 x
= gen_rtx_VEC_CONCAT (v2mode
, op0
, op1
);
32538 return expand_vselect (target
, x
, perm
, nelt
);
32541 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
32542 in terms of blendp[sd] / pblendw / pblendvb. */
32545 expand_vec_perm_blend (struct expand_vec_perm_d
*d
)
32547 enum machine_mode vmode
= d
->vmode
;
32548 unsigned i
, mask
, nelt
= d
->nelt
;
32549 rtx target
, op0
, op1
, x
;
32551 if (!TARGET_SSE4_1
|| d
->op0
== d
->op1
)
32553 if (!(GET_MODE_SIZE (vmode
) == 16 || vmode
== V4DFmode
|| vmode
== V8SFmode
))
32556 /* This is a blend, not a permute. Elements must stay in their
32557 respective lanes. */
32558 for (i
= 0; i
< nelt
; ++i
)
32560 unsigned e
= d
->perm
[i
];
32561 if (!(e
== i
|| e
== i
+ nelt
))
32568 /* ??? Without SSE4.1, we could implement this with and/andn/or. This
32569 decision should be extracted elsewhere, so that we only try that
32570 sequence once all budget==3 options have been tried. */
32572 /* For bytes, see if bytes move in pairs so we can use pblendw with
32573 an immediate argument, rather than pblendvb with a vector argument. */
32574 if (vmode
== V16QImode
)
32576 bool pblendw_ok
= true;
32577 for (i
= 0; i
< 16 && pblendw_ok
; i
+= 2)
32578 pblendw_ok
= (d
->perm
[i
] + 1 == d
->perm
[i
+ 1]);
32582 rtx rperm
[16], vperm
;
32584 for (i
= 0; i
< nelt
; ++i
)
32585 rperm
[i
] = (d
->perm
[i
] < nelt
? const0_rtx
: constm1_rtx
);
32587 vperm
= gen_rtx_CONST_VECTOR (V16QImode
, gen_rtvec_v (16, rperm
));
32588 vperm
= force_reg (V16QImode
, vperm
);
32590 emit_insn (gen_sse4_1_pblendvb (d
->target
, d
->op0
, d
->op1
, vperm
));
32595 target
= d
->target
;
32607 for (i
= 0; i
< nelt
; ++i
)
32608 mask
|= (d
->perm
[i
] >= nelt
) << i
;
32612 for (i
= 0; i
< 2; ++i
)
32613 mask
|= (d
->perm
[i
] >= 2 ? 15 : 0) << (i
* 4);
32617 for (i
= 0; i
< 4; ++i
)
32618 mask
|= (d
->perm
[i
] >= 4 ? 3 : 0) << (i
* 2);
32622 for (i
= 0; i
< 8; ++i
)
32623 mask
|= (d
->perm
[i
* 2] >= 16) << i
;
32627 target
= gen_lowpart (vmode
, target
);
32628 op0
= gen_lowpart (vmode
, op0
);
32629 op1
= gen_lowpart (vmode
, op1
);
32633 gcc_unreachable ();
32636 /* This matches five different patterns with the different modes. */
32637 x
= gen_rtx_VEC_MERGE (vmode
, op1
, op0
, GEN_INT (mask
));
32638 x
= gen_rtx_SET (VOIDmode
, target
, x
);
32644 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
32645 in terms of the variable form of vpermilps.
32647 Note that we will have already failed the immediate input vpermilps,
32648 which requires that the high and low part shuffle be identical; the
32649 variable form doesn't require that. */
32652 expand_vec_perm_vpermil (struct expand_vec_perm_d
*d
)
32654 rtx rperm
[8], vperm
;
32657 if (!TARGET_AVX
|| d
->vmode
!= V8SFmode
|| d
->op0
!= d
->op1
)
32660 /* We can only permute within the 128-bit lane. */
32661 for (i
= 0; i
< 8; ++i
)
32663 unsigned e
= d
->perm
[i
];
32664 if (i
< 4 ? e
>= 4 : e
< 4)
32671 for (i
= 0; i
< 8; ++i
)
32673 unsigned e
= d
->perm
[i
];
32675 /* Within each 128-bit lane, the elements of op0 are numbered
32676 from 0 and the elements of op1 are numbered from 4. */
32682 rperm
[i
] = GEN_INT (e
);
32685 vperm
= gen_rtx_CONST_VECTOR (V8SImode
, gen_rtvec_v (8, rperm
));
32686 vperm
= force_reg (V8SImode
, vperm
);
32687 emit_insn (gen_avx_vpermilvarv8sf3 (d
->target
, d
->op0
, vperm
));
32692 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
32693 in terms of pshufb or vpperm. */
32696 expand_vec_perm_pshufb (struct expand_vec_perm_d
*d
)
32698 unsigned i
, nelt
, eltsz
;
32699 rtx rperm
[16], vperm
, target
, op0
, op1
;
32701 if (!(d
->op0
== d
->op1
? TARGET_SSSE3
: TARGET_XOP
))
32703 if (GET_MODE_SIZE (d
->vmode
) != 16)
32710 eltsz
= GET_MODE_SIZE (GET_MODE_INNER (d
->vmode
));
32712 for (i
= 0; i
< nelt
; ++i
)
32714 unsigned j
, e
= d
->perm
[i
];
32715 for (j
= 0; j
< eltsz
; ++j
)
32716 rperm
[i
* eltsz
+ j
] = GEN_INT (e
* eltsz
+ j
);
32719 vperm
= gen_rtx_CONST_VECTOR (V16QImode
, gen_rtvec_v (16, rperm
));
32720 vperm
= force_reg (V16QImode
, vperm
);
32722 target
= gen_lowpart (V16QImode
, d
->target
);
32723 op0
= gen_lowpart (V16QImode
, d
->op0
);
32724 if (d
->op0
== d
->op1
)
32725 emit_insn (gen_ssse3_pshufbv16qi3 (target
, op0
, vperm
));
32728 op1
= gen_lowpart (V16QImode
, d
->op1
);
32729 emit_insn (gen_xop_pperm (target
, op0
, op1
, vperm
));
32735 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to instantiate D
32736 in a single instruction. */
32739 expand_vec_perm_1 (struct expand_vec_perm_d
*d
)
32741 unsigned i
, nelt
= d
->nelt
;
32742 unsigned char perm2
[MAX_VECT_LEN
];
32744 /* Check plain VEC_SELECT first, because AVX has instructions that could
32745 match both SEL and SEL+CONCAT, but the plain SEL will allow a memory
32746 input where SEL+CONCAT may not. */
32747 if (d
->op0
== d
->op1
)
32749 int mask
= nelt
- 1;
32751 for (i
= 0; i
< nelt
; i
++)
32752 perm2
[i
] = d
->perm
[i
] & mask
;
32754 if (expand_vselect (d
->target
, d
->op0
, perm2
, nelt
))
32757 /* There are plenty of patterns in sse.md that are written for
32758 SEL+CONCAT and are not replicated for a single op. Perhaps
32759 that should be changed, to avoid the nastiness here. */
32761 /* Recognize interleave style patterns, which means incrementing
32762 every other permutation operand. */
32763 for (i
= 0; i
< nelt
; i
+= 2)
32765 perm2
[i
] = d
->perm
[i
] & mask
;
32766 perm2
[i
+ 1] = (d
->perm
[i
+ 1] & mask
) + nelt
;
32768 if (expand_vselect_vconcat (d
->target
, d
->op0
, d
->op0
, perm2
, nelt
))
32771 /* Recognize shufps, which means adding {0, 0, nelt, nelt}. */
32774 for (i
= 0; i
< nelt
; i
+= 4)
32776 perm2
[i
+ 0] = d
->perm
[i
+ 0] & mask
;
32777 perm2
[i
+ 1] = d
->perm
[i
+ 1] & mask
;
32778 perm2
[i
+ 2] = (d
->perm
[i
+ 2] & mask
) + nelt
;
32779 perm2
[i
+ 3] = (d
->perm
[i
+ 3] & mask
) + nelt
;
32782 if (expand_vselect_vconcat (d
->target
, d
->op0
, d
->op0
, perm2
, nelt
))
32787 /* Finally, try the fully general two operand permute. */
32788 if (expand_vselect_vconcat (d
->target
, d
->op0
, d
->op1
, d
->perm
, nelt
))
32791 /* Recognize interleave style patterns with reversed operands. */
32792 if (d
->op0
!= d
->op1
)
32794 for (i
= 0; i
< nelt
; ++i
)
32796 unsigned e
= d
->perm
[i
];
32804 if (expand_vselect_vconcat (d
->target
, d
->op1
, d
->op0
, perm2
, nelt
))
32808 /* Try the SSE4.1 blend variable merge instructions. */
32809 if (expand_vec_perm_blend (d
))
32812 /* Try one of the AVX vpermil variable permutations. */
32813 if (expand_vec_perm_vpermil (d
))
32816 /* Try the SSSE3 pshufb or XOP vpperm variable permutation. */
32817 if (expand_vec_perm_pshufb (d
))
32823 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
32824 in terms of a pair of pshuflw + pshufhw instructions. */
32827 expand_vec_perm_pshuflw_pshufhw (struct expand_vec_perm_d
*d
)
32829 unsigned char perm2
[MAX_VECT_LEN
];
32833 if (d
->vmode
!= V8HImode
|| d
->op0
!= d
->op1
)
32836 /* The two permutations only operate in 64-bit lanes. */
32837 for (i
= 0; i
< 4; ++i
)
32838 if (d
->perm
[i
] >= 4)
32840 for (i
= 4; i
< 8; ++i
)
32841 if (d
->perm
[i
] < 4)
32847 /* Emit the pshuflw. */
32848 memcpy (perm2
, d
->perm
, 4);
32849 for (i
= 4; i
< 8; ++i
)
32851 ok
= expand_vselect (d
->target
, d
->op0
, perm2
, 8);
32854 /* Emit the pshufhw. */
32855 memcpy (perm2
+ 4, d
->perm
+ 4, 4);
32856 for (i
= 0; i
< 4; ++i
)
32858 ok
= expand_vselect (d
->target
, d
->target
, perm2
, 8);
32864 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
32865 the permutation using the SSSE3 palignr instruction. This succeeds
32866 when all of the elements in PERM fit within one vector and we merely
32867 need to shift them down so that a single vector permutation has a
32868 chance to succeed. */
32871 expand_vec_perm_palignr (struct expand_vec_perm_d
*d
)
32873 unsigned i
, nelt
= d
->nelt
;
32878 /* Even with AVX, palignr only operates on 128-bit vectors. */
32879 if (!TARGET_SSSE3
|| GET_MODE_SIZE (d
->vmode
) != 16)
32882 min
= nelt
, max
= 0;
32883 for (i
= 0; i
< nelt
; ++i
)
32885 unsigned e
= d
->perm
[i
];
32891 if (min
== 0 || max
- min
>= nelt
)
32894 /* Given that we have SSSE3, we know we'll be able to implement the
32895 single operand permutation after the palignr with pshufb. */
32899 shift
= GEN_INT (min
* GET_MODE_BITSIZE (GET_MODE_INNER (d
->vmode
)));
32900 emit_insn (gen_ssse3_palignrti (gen_lowpart (TImode
, d
->target
),
32901 gen_lowpart (TImode
, d
->op1
),
32902 gen_lowpart (TImode
, d
->op0
), shift
));
32904 d
->op0
= d
->op1
= d
->target
;
32907 for (i
= 0; i
< nelt
; ++i
)
32909 unsigned e
= d
->perm
[i
] - min
;
32915 /* Test for the degenerate case where the alignment by itself
32916 produces the desired permutation. */
32920 ok
= expand_vec_perm_1 (d
);
32926 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
32927 a two vector permutation into a single vector permutation by using
32928 an interleave operation to merge the vectors. */
32931 expand_vec_perm_interleave2 (struct expand_vec_perm_d
*d
)
32933 struct expand_vec_perm_d dremap
, dfinal
;
32934 unsigned i
, nelt
= d
->nelt
, nelt2
= nelt
/ 2;
32935 unsigned contents
, h1
, h2
, h3
, h4
;
32936 unsigned char remap
[2 * MAX_VECT_LEN
];
32940 if (d
->op0
== d
->op1
)
32943 /* The 256-bit unpck[lh]p[sd] instructions only operate within the 128-bit
32944 lanes. We can use similar techniques with the vperm2f128 instruction,
32945 but it requires slightly different logic. */
32946 if (GET_MODE_SIZE (d
->vmode
) != 16)
32949 /* Examine from whence the elements come. */
32951 for (i
= 0; i
< nelt
; ++i
)
32952 contents
|= 1u << d
->perm
[i
];
32954 /* Split the two input vectors into 4 halves. */
32955 h1
= (1u << nelt2
) - 1;
32960 memset (remap
, 0xff, sizeof (remap
));
32963 /* If the elements from the low halves use interleave low, and similarly
32964 for interleave high. If the elements are from mis-matched halves, we
32965 can use shufps for V4SF/V4SI or do a DImode shuffle. */
32966 if ((contents
& (h1
| h3
)) == contents
)
32968 for (i
= 0; i
< nelt2
; ++i
)
32971 remap
[i
+ nelt
] = i
* 2 + 1;
32972 dremap
.perm
[i
* 2] = i
;
32973 dremap
.perm
[i
* 2 + 1] = i
+ nelt
;
32976 else if ((contents
& (h2
| h4
)) == contents
)
32978 for (i
= 0; i
< nelt2
; ++i
)
32980 remap
[i
+ nelt2
] = i
* 2;
32981 remap
[i
+ nelt
+ nelt2
] = i
* 2 + 1;
32982 dremap
.perm
[i
* 2] = i
+ nelt2
;
32983 dremap
.perm
[i
* 2 + 1] = i
+ nelt
+ nelt2
;
32986 else if ((contents
& (h1
| h4
)) == contents
)
32988 for (i
= 0; i
< nelt2
; ++i
)
32991 remap
[i
+ nelt
+ nelt2
] = i
+ nelt2
;
32992 dremap
.perm
[i
] = i
;
32993 dremap
.perm
[i
+ nelt2
] = i
+ nelt
+ nelt2
;
32997 dremap
.vmode
= V2DImode
;
32999 dremap
.perm
[0] = 0;
33000 dremap
.perm
[1] = 3;
33003 else if ((contents
& (h2
| h3
)) == contents
)
33005 for (i
= 0; i
< nelt2
; ++i
)
33007 remap
[i
+ nelt2
] = i
;
33008 remap
[i
+ nelt
] = i
+ nelt2
;
33009 dremap
.perm
[i
] = i
+ nelt2
;
33010 dremap
.perm
[i
+ nelt2
] = i
+ nelt
;
33014 dremap
.vmode
= V2DImode
;
33016 dremap
.perm
[0] = 1;
33017 dremap
.perm
[1] = 2;
33023 /* Use the remapping array set up above to move the elements from their
33024 swizzled locations into their final destinations. */
33026 for (i
= 0; i
< nelt
; ++i
)
33028 unsigned e
= remap
[d
->perm
[i
]];
33029 gcc_assert (e
< nelt
);
33030 dfinal
.perm
[i
] = e
;
33032 dfinal
.op0
= gen_reg_rtx (dfinal
.vmode
);
33033 dfinal
.op1
= dfinal
.op0
;
33034 dremap
.target
= dfinal
.op0
;
33036 /* Test if the final remap can be done with a single insn. For V4SFmode or
33037 V4SImode this *will* succeed. For V8HImode or V16QImode it may not. */
33039 ok
= expand_vec_perm_1 (&dfinal
);
33040 seq
= get_insns ();
33046 if (dremap
.vmode
!= dfinal
.vmode
)
33048 dremap
.target
= gen_lowpart (dremap
.vmode
, dremap
.target
);
33049 dremap
.op0
= gen_lowpart (dremap
.vmode
, dremap
.op0
);
33050 dremap
.op1
= gen_lowpart (dremap
.vmode
, dremap
.op1
);
33053 ok
= expand_vec_perm_1 (&dremap
);
33060 /* A subroutine of expand_vec_perm_even_odd_1. Implement the double-word
33061 permutation with two pshufb insns and an ior. We should have already
33062 failed all two instruction sequences. */
33065 expand_vec_perm_pshufb2 (struct expand_vec_perm_d
*d
)
33067 rtx rperm
[2][16], vperm
, l
, h
, op
, m128
;
33068 unsigned int i
, nelt
, eltsz
;
33070 if (!TARGET_SSSE3
|| GET_MODE_SIZE (d
->vmode
) != 16)
33072 gcc_assert (d
->op0
!= d
->op1
);
33075 eltsz
= GET_MODE_SIZE (GET_MODE_INNER (d
->vmode
));
33077 /* Generate two permutation masks. If the required element is within
33078 the given vector it is shuffled into the proper lane. If the required
33079 element is in the other vector, force a zero into the lane by setting
33080 bit 7 in the permutation mask. */
33081 m128
= GEN_INT (-128);
33082 for (i
= 0; i
< nelt
; ++i
)
33084 unsigned j
, e
= d
->perm
[i
];
33085 unsigned which
= (e
>= nelt
);
33089 for (j
= 0; j
< eltsz
; ++j
)
33091 rperm
[which
][i
*eltsz
+ j
] = GEN_INT (e
*eltsz
+ j
);
33092 rperm
[1-which
][i
*eltsz
+ j
] = m128
;
33096 vperm
= gen_rtx_CONST_VECTOR (V16QImode
, gen_rtvec_v (16, rperm
[0]));
33097 vperm
= force_reg (V16QImode
, vperm
);
33099 l
= gen_reg_rtx (V16QImode
);
33100 op
= gen_lowpart (V16QImode
, d
->op0
);
33101 emit_insn (gen_ssse3_pshufbv16qi3 (l
, op
, vperm
));
33103 vperm
= gen_rtx_CONST_VECTOR (V16QImode
, gen_rtvec_v (16, rperm
[1]));
33104 vperm
= force_reg (V16QImode
, vperm
);
33106 h
= gen_reg_rtx (V16QImode
);
33107 op
= gen_lowpart (V16QImode
, d
->op1
);
33108 emit_insn (gen_ssse3_pshufbv16qi3 (h
, op
, vperm
));
33110 op
= gen_lowpart (V16QImode
, d
->target
);
33111 emit_insn (gen_iorv16qi3 (op
, l
, h
));
33116 /* A subroutine of ix86_expand_vec_perm_builtin_1. Implement extract-even
33117 and extract-odd permutations. */
33120 expand_vec_perm_even_odd_1 (struct expand_vec_perm_d
*d
, unsigned odd
)
33127 t1
= gen_reg_rtx (V4DFmode
);
33128 t2
= gen_reg_rtx (V4DFmode
);
33130 /* Shuffle the lanes around into { 0 1 4 5 } and { 2 3 6 7 }. */
33131 emit_insn (gen_avx_vperm2f128v4df3 (t1
, d
->op0
, d
->op1
, GEN_INT (0x20)));
33132 emit_insn (gen_avx_vperm2f128v4df3 (t2
, d
->op0
, d
->op1
, GEN_INT (0x31)));
33134 /* Now an unpck[lh]pd will produce the result required. */
33136 t3
= gen_avx_unpckhpd256 (d
->target
, t1
, t2
);
33138 t3
= gen_avx_unpcklpd256 (d
->target
, t1
, t2
);
33144 int mask
= odd
? 0xdd : 0x88;
33146 t1
= gen_reg_rtx (V8SFmode
);
33147 t2
= gen_reg_rtx (V8SFmode
);
33148 t3
= gen_reg_rtx (V8SFmode
);
33150 /* Shuffle within the 128-bit lanes to produce:
33151 { 0 2 8 a 4 6 c e } | { 1 3 9 b 5 7 d f }. */
33152 emit_insn (gen_avx_shufps256 (t1
, d
->op0
, d
->op1
,
33155 /* Shuffle the lanes around to produce:
33156 { 4 6 c e 0 2 8 a } and { 5 7 d f 1 3 9 b }. */
33157 emit_insn (gen_avx_vperm2f128v8sf3 (t2
, t1
, t1
,
33160 /* Shuffle within the 128-bit lanes to produce:
33161 { 0 2 4 6 4 6 0 2 } | { 1 3 5 7 5 7 1 3 }. */
33162 emit_insn (gen_avx_shufps256 (t3
, t1
, t2
, GEN_INT (0x44)));
33164 /* Shuffle within the 128-bit lanes to produce:
33165 { 8 a c e c e 8 a } | { 9 b d f d f 9 b }. */
33166 emit_insn (gen_avx_shufps256 (t2
, t1
, t2
, GEN_INT (0xee)));
33168 /* Shuffle the lanes around to produce:
33169 { 0 2 4 6 8 a c e } | { 1 3 5 7 9 b d f }. */
33170 emit_insn (gen_avx_vperm2f128v8sf3 (d
->target
, t3
, t2
,
33179 /* These are always directly implementable by expand_vec_perm_1. */
33180 gcc_unreachable ();
33184 return expand_vec_perm_pshufb2 (d
);
33187 /* We need 2*log2(N)-1 operations to achieve odd/even
33188 with interleave. */
33189 t1
= gen_reg_rtx (V8HImode
);
33190 t2
= gen_reg_rtx (V8HImode
);
33191 emit_insn (gen_vec_interleave_highv8hi (t1
, d
->op0
, d
->op1
));
33192 emit_insn (gen_vec_interleave_lowv8hi (d
->target
, d
->op0
, d
->op1
));
33193 emit_insn (gen_vec_interleave_highv8hi (t2
, d
->target
, t1
));
33194 emit_insn (gen_vec_interleave_lowv8hi (d
->target
, d
->target
, t1
));
33196 t3
= gen_vec_interleave_highv8hi (d
->target
, d
->target
, t2
);
33198 t3
= gen_vec_interleave_lowv8hi (d
->target
, d
->target
, t2
);
33205 return expand_vec_perm_pshufb2 (d
);
33208 t1
= gen_reg_rtx (V16QImode
);
33209 t2
= gen_reg_rtx (V16QImode
);
33210 t3
= gen_reg_rtx (V16QImode
);
33211 emit_insn (gen_vec_interleave_highv16qi (t1
, d
->op0
, d
->op1
));
33212 emit_insn (gen_vec_interleave_lowv16qi (d
->target
, d
->op0
, d
->op1
));
33213 emit_insn (gen_vec_interleave_highv16qi (t2
, d
->target
, t1
));
33214 emit_insn (gen_vec_interleave_lowv16qi (d
->target
, d
->target
, t1
));
33215 emit_insn (gen_vec_interleave_highv16qi (t3
, d
->target
, t2
));
33216 emit_insn (gen_vec_interleave_lowv16qi (d
->target
, d
->target
, t2
));
33218 t3
= gen_vec_interleave_highv16qi (d
->target
, d
->target
, t3
);
33220 t3
= gen_vec_interleave_lowv16qi (d
->target
, d
->target
, t3
);
33226 gcc_unreachable ();
33232 /* A subroutine of ix86_expand_vec_perm_builtin_1. Pattern match
33233 extract-even and extract-odd permutations. */
33236 expand_vec_perm_even_odd (struct expand_vec_perm_d
*d
)
33238 unsigned i
, odd
, nelt
= d
->nelt
;
33241 if (odd
!= 0 && odd
!= 1)
33244 for (i
= 1; i
< nelt
; ++i
)
33245 if (d
->perm
[i
] != 2 * i
+ odd
)
33248 return expand_vec_perm_even_odd_1 (d
, odd
);
33251 /* A subroutine of ix86_expand_vec_perm_builtin_1. Implement broadcast
33252 permutations. We assume that expand_vec_perm_1 has already failed. */
33255 expand_vec_perm_broadcast_1 (struct expand_vec_perm_d
*d
)
33257 unsigned elt
= d
->perm
[0], nelt2
= d
->nelt
/ 2;
33258 enum machine_mode vmode
= d
->vmode
;
33259 unsigned char perm2
[4];
33267 /* These are special-cased in sse.md so that we can optionally
33268 use the vbroadcast instruction. They expand to two insns
33269 if the input happens to be in a register. */
33270 gcc_unreachable ();
33276 /* These are always implementable using standard shuffle patterns. */
33277 gcc_unreachable ();
33281 /* These can be implemented via interleave. We save one insn by
33282 stopping once we have promoted to V4SImode and then use pshufd. */
33285 optab otab
= vec_interleave_low_optab
;
33289 otab
= vec_interleave_high_optab
;
33294 op0
= expand_binop (vmode
, otab
, op0
, op0
, NULL
, 0, OPTAB_DIRECT
);
33295 vmode
= get_mode_wider_vector (vmode
);
33296 op0
= gen_lowpart (vmode
, op0
);
33298 while (vmode
!= V4SImode
);
33300 memset (perm2
, elt
, 4);
33301 ok
= expand_vselect (gen_lowpart (V4SImode
, d
->target
), op0
, perm2
, 4);
33306 gcc_unreachable ();
33310 /* A subroutine of ix86_expand_vec_perm_builtin_1. Pattern match
33311 broadcast permutations. */
33314 expand_vec_perm_broadcast (struct expand_vec_perm_d
*d
)
33316 unsigned i
, elt
, nelt
= d
->nelt
;
33318 if (d
->op0
!= d
->op1
)
33322 for (i
= 1; i
< nelt
; ++i
)
33323 if (d
->perm
[i
] != elt
)
33326 return expand_vec_perm_broadcast_1 (d
);
33329 /* The guts of ix86_expand_vec_perm_builtin, also used by the ok hook.
33330 With all of the interface bits taken care of, perform the expansion
33331 in D and return true on success. */
33334 ix86_expand_vec_perm_builtin_1 (struct expand_vec_perm_d
*d
)
33336 /* Try a single instruction expansion. */
33337 if (expand_vec_perm_1 (d
))
33340 /* Try sequences of two instructions. */
33342 if (expand_vec_perm_pshuflw_pshufhw (d
))
33345 if (expand_vec_perm_palignr (d
))
33348 if (expand_vec_perm_interleave2 (d
))
33351 if (expand_vec_perm_broadcast (d
))
33354 /* Try sequences of three instructions. */
33356 if (expand_vec_perm_pshufb2 (d
))
33359 /* ??? Look for narrow permutations whose element orderings would
33360 allow the promotion to a wider mode. */
33362 /* ??? Look for sequences of interleave or a wider permute that place
33363 the data into the correct lanes for a half-vector shuffle like
33364 pshuf[lh]w or vpermilps. */
33366 /* ??? Look for sequences of interleave that produce the desired results.
33367 The combinatorics of punpck[lh] get pretty ugly... */
33369 if (expand_vec_perm_even_odd (d
))
33375 /* Extract the values from the vector CST into the permutation array in D.
33376 Return 0 on error, 1 if all values from the permutation come from the
33377 first vector, 2 if all values from the second vector, and 3 otherwise. */
33380 extract_vec_perm_cst (struct expand_vec_perm_d
*d
, tree cst
)
33382 tree list
= TREE_VECTOR_CST_ELTS (cst
);
33383 unsigned i
, nelt
= d
->nelt
;
33386 for (i
= 0; i
< nelt
; ++i
, list
= TREE_CHAIN (list
))
33388 unsigned HOST_WIDE_INT e
;
33390 if (!host_integerp (TREE_VALUE (list
), 1))
33392 e
= tree_low_cst (TREE_VALUE (list
), 1);
33396 ret
|= (e
< nelt
? 1 : 2);
33399 gcc_assert (list
== NULL
);
33401 /* For all elements from second vector, fold the elements to first. */
33403 for (i
= 0; i
< nelt
; ++i
)
33404 d
->perm
[i
] -= nelt
;
33410 ix86_expand_vec_perm_builtin (tree exp
)
33412 struct expand_vec_perm_d d
;
33413 tree arg0
, arg1
, arg2
;
33415 arg0
= CALL_EXPR_ARG (exp
, 0);
33416 arg1
= CALL_EXPR_ARG (exp
, 1);
33417 arg2
= CALL_EXPR_ARG (exp
, 2);
33419 d
.vmode
= TYPE_MODE (TREE_TYPE (arg0
));
33420 d
.nelt
= GET_MODE_NUNITS (d
.vmode
);
33421 d
.testing_p
= false;
33422 gcc_assert (VECTOR_MODE_P (d
.vmode
));
33424 if (TREE_CODE (arg2
) != VECTOR_CST
)
33426 error_at (EXPR_LOCATION (exp
),
33427 "vector permutation requires vector constant");
33431 switch (extract_vec_perm_cst (&d
, arg2
))
33437 error_at (EXPR_LOCATION (exp
), "invalid vector permutation constant");
33441 if (!operand_equal_p (arg0
, arg1
, 0))
33443 d
.op0
= expand_expr (arg0
, NULL_RTX
, d
.vmode
, EXPAND_NORMAL
);
33444 d
.op0
= force_reg (d
.vmode
, d
.op0
);
33445 d
.op1
= expand_expr (arg1
, NULL_RTX
, d
.vmode
, EXPAND_NORMAL
);
33446 d
.op1
= force_reg (d
.vmode
, d
.op1
);
33450 /* The elements of PERM do not suggest that only the first operand
33451 is used, but both operands are identical. Allow easier matching
33452 of the permutation by folding the permutation into the single
33455 unsigned i
, nelt
= d
.nelt
;
33456 for (i
= 0; i
< nelt
; ++i
)
33457 if (d
.perm
[i
] >= nelt
)
33463 d
.op0
= expand_expr (arg0
, NULL_RTX
, d
.vmode
, EXPAND_NORMAL
);
33464 d
.op0
= force_reg (d
.vmode
, d
.op0
);
33469 d
.op0
= expand_expr (arg1
, NULL_RTX
, d
.vmode
, EXPAND_NORMAL
);
33470 d
.op0
= force_reg (d
.vmode
, d
.op0
);
33475 d
.target
= gen_reg_rtx (d
.vmode
);
33476 if (ix86_expand_vec_perm_builtin_1 (&d
))
33479 /* For compiler generated permutations, we should never got here, because
33480 the compiler should also be checking the ok hook. But since this is a
33481 builtin the user has access too, so don't abort. */
33485 sorry ("vector permutation (%d %d)", d
.perm
[0], d
.perm
[1]);
33488 sorry ("vector permutation (%d %d %d %d)",
33489 d
.perm
[0], d
.perm
[1], d
.perm
[2], d
.perm
[3]);
33492 sorry ("vector permutation (%d %d %d %d %d %d %d %d)",
33493 d
.perm
[0], d
.perm
[1], d
.perm
[2], d
.perm
[3],
33494 d
.perm
[4], d
.perm
[5], d
.perm
[6], d
.perm
[7]);
33497 sorry ("vector permutation "
33498 "(%d %d %d %d %d %d %d %d %d %d %d %d %d %d %d %d)",
33499 d
.perm
[0], d
.perm
[1], d
.perm
[2], d
.perm
[3],
33500 d
.perm
[4], d
.perm
[5], d
.perm
[6], d
.perm
[7],
33501 d
.perm
[8], d
.perm
[9], d
.perm
[10], d
.perm
[11],
33502 d
.perm
[12], d
.perm
[13], d
.perm
[14], d
.perm
[15]);
33505 gcc_unreachable ();
33508 return CONST0_RTX (d
.vmode
);
33511 /* Implement targetm.vectorize.builtin_vec_perm_ok. */
33514 ix86_vectorize_builtin_vec_perm_ok (tree vec_type
, tree mask
)
33516 struct expand_vec_perm_d d
;
33520 d
.vmode
= TYPE_MODE (vec_type
);
33521 d
.nelt
= GET_MODE_NUNITS (d
.vmode
);
33522 d
.testing_p
= true;
33524 /* Given sufficient ISA support we can just return true here
33525 for selected vector modes. */
33526 if (GET_MODE_SIZE (d
.vmode
) == 16)
33528 /* All implementable with a single vpperm insn. */
33531 /* All implementable with 2 pshufb + 1 ior. */
33534 /* All implementable with shufpd or unpck[lh]pd. */
33539 vec_mask
= extract_vec_perm_cst (&d
, mask
);
33541 /* This hook is cannot be called in response to something that the
33542 user does (unlike the builtin expander) so we shouldn't ever see
33543 an error generated from the extract. */
33544 gcc_assert (vec_mask
> 0 && vec_mask
<= 3);
33545 one_vec
= (vec_mask
!= 3);
33547 /* Implementable with shufps or pshufd. */
33548 if (one_vec
&& (d
.vmode
== V4SFmode
|| d
.vmode
== V4SImode
))
33551 /* Otherwise we have to go through the motions and see if we can
33552 figure out how to generate the requested permutation. */
33553 d
.target
= gen_raw_REG (d
.vmode
, LAST_VIRTUAL_REGISTER
+ 1);
33554 d
.op1
= d
.op0
= gen_raw_REG (d
.vmode
, LAST_VIRTUAL_REGISTER
+ 2);
33556 d
.op1
= gen_raw_REG (d
.vmode
, LAST_VIRTUAL_REGISTER
+ 3);
33559 ret
= ix86_expand_vec_perm_builtin_1 (&d
);
33566 ix86_expand_vec_extract_even_odd (rtx targ
, rtx op0
, rtx op1
, unsigned odd
)
33568 struct expand_vec_perm_d d
;
33574 d
.vmode
= GET_MODE (targ
);
33575 d
.nelt
= nelt
= GET_MODE_NUNITS (d
.vmode
);
33576 d
.testing_p
= false;
33578 for (i
= 0; i
< nelt
; ++i
)
33579 d
.perm
[i
] = i
* 2 + odd
;
33581 /* We'll either be able to implement the permutation directly... */
33582 if (expand_vec_perm_1 (&d
))
33585 /* ... or we use the special-case patterns. */
33586 expand_vec_perm_even_odd_1 (&d
, odd
);
33589 /* This function returns the calling abi specific va_list type node.
33590 It returns the FNDECL specific va_list type. */
33593 ix86_fn_abi_va_list (tree fndecl
)
33596 return va_list_type_node
;
33597 gcc_assert (fndecl
!= NULL_TREE
);
33599 if (ix86_function_abi ((const_tree
) fndecl
) == MS_ABI
)
33600 return ms_va_list_type_node
;
33602 return sysv_va_list_type_node
;
33605 /* Returns the canonical va_list type specified by TYPE. If there
33606 is no valid TYPE provided, it return NULL_TREE. */
33609 ix86_canonical_va_list_type (tree type
)
33613 /* Resolve references and pointers to va_list type. */
33614 if (TREE_CODE (type
) == MEM_REF
)
33615 type
= TREE_TYPE (type
);
33616 else if (POINTER_TYPE_P (type
) && POINTER_TYPE_P (TREE_TYPE(type
)))
33617 type
= TREE_TYPE (type
);
33618 else if (POINTER_TYPE_P (type
) && TREE_CODE (TREE_TYPE (type
)) == ARRAY_TYPE
)
33619 type
= TREE_TYPE (type
);
33623 wtype
= va_list_type_node
;
33624 gcc_assert (wtype
!= NULL_TREE
);
33626 if (TREE_CODE (wtype
) == ARRAY_TYPE
)
33628 /* If va_list is an array type, the argument may have decayed
33629 to a pointer type, e.g. by being passed to another function.
33630 In that case, unwrap both types so that we can compare the
33631 underlying records. */
33632 if (TREE_CODE (htype
) == ARRAY_TYPE
33633 || POINTER_TYPE_P (htype
))
33635 wtype
= TREE_TYPE (wtype
);
33636 htype
= TREE_TYPE (htype
);
33639 if (TYPE_MAIN_VARIANT (wtype
) == TYPE_MAIN_VARIANT (htype
))
33640 return va_list_type_node
;
33641 wtype
= sysv_va_list_type_node
;
33642 gcc_assert (wtype
!= NULL_TREE
);
33644 if (TREE_CODE (wtype
) == ARRAY_TYPE
)
33646 /* If va_list is an array type, the argument may have decayed
33647 to a pointer type, e.g. by being passed to another function.
33648 In that case, unwrap both types so that we can compare the
33649 underlying records. */
33650 if (TREE_CODE (htype
) == ARRAY_TYPE
33651 || POINTER_TYPE_P (htype
))
33653 wtype
= TREE_TYPE (wtype
);
33654 htype
= TREE_TYPE (htype
);
33657 if (TYPE_MAIN_VARIANT (wtype
) == TYPE_MAIN_VARIANT (htype
))
33658 return sysv_va_list_type_node
;
33659 wtype
= ms_va_list_type_node
;
33660 gcc_assert (wtype
!= NULL_TREE
);
33662 if (TREE_CODE (wtype
) == ARRAY_TYPE
)
33664 /* If va_list is an array type, the argument may have decayed
33665 to a pointer type, e.g. by being passed to another function.
33666 In that case, unwrap both types so that we can compare the
33667 underlying records. */
33668 if (TREE_CODE (htype
) == ARRAY_TYPE
33669 || POINTER_TYPE_P (htype
))
33671 wtype
= TREE_TYPE (wtype
);
33672 htype
= TREE_TYPE (htype
);
33675 if (TYPE_MAIN_VARIANT (wtype
) == TYPE_MAIN_VARIANT (htype
))
33676 return ms_va_list_type_node
;
33679 return std_canonical_va_list_type (type
);
33682 /* Iterate through the target-specific builtin types for va_list.
33683 IDX denotes the iterator, *PTREE is set to the result type of
33684 the va_list builtin, and *PNAME to its internal type.
33685 Returns zero if there is no element for this index, otherwise
33686 IDX should be increased upon the next call.
33687 Note, do not iterate a base builtin's name like __builtin_va_list.
33688 Used from c_common_nodes_and_builtins. */
33691 ix86_enum_va_list (int idx
, const char **pname
, tree
*ptree
)
33701 *ptree
= ms_va_list_type_node
;
33702 *pname
= "__builtin_ms_va_list";
33706 *ptree
= sysv_va_list_type_node
;
33707 *pname
= "__builtin_sysv_va_list";
33715 #undef TARGET_SCHED_DISPATCH
33716 #define TARGET_SCHED_DISPATCH has_dispatch
33717 #undef TARGET_SCHED_DISPATCH_DO
33718 #define TARGET_SCHED_DISPATCH_DO do_dispatch
33720 /* The size of the dispatch window is the total number of bytes of
33721 object code allowed in a window. */
33722 #define DISPATCH_WINDOW_SIZE 16
33724 /* Number of dispatch windows considered for scheduling. */
33725 #define MAX_DISPATCH_WINDOWS 3
33727 /* Maximum number of instructions in a window. */
33730 /* Maximum number of immediate operands in a window. */
33733 /* Maximum number of immediate bits allowed in a window. */
33734 #define MAX_IMM_SIZE 128
33736 /* Maximum number of 32 bit immediates allowed in a window. */
33737 #define MAX_IMM_32 4
33739 /* Maximum number of 64 bit immediates allowed in a window. */
33740 #define MAX_IMM_64 2
33742 /* Maximum total of loads or prefetches allowed in a window. */
33745 /* Maximum total of stores allowed in a window. */
33746 #define MAX_STORE 1
33752 /* Dispatch groups. Istructions that affect the mix in a dispatch window. */
33753 enum dispatch_group
{
33768 /* Number of allowable groups in a dispatch window. It is an array
33769 indexed by dispatch_group enum. 100 is used as a big number,
33770 because the number of these kind of operations does not have any
33771 effect in dispatch window, but we need them for other reasons in
33773 static unsigned int num_allowable_groups
[disp_last
] = {
33774 0, 2, 1, 1, 2, 4, 4, 2, 1, BIG
, BIG
33777 char group_name
[disp_last
+ 1][16] = {
33778 "disp_no_group", "disp_load", "disp_store", "disp_load_store",
33779 "disp_prefetch", "disp_imm", "disp_imm_32", "disp_imm_64",
33780 "disp_branch", "disp_cmp", "disp_jcc", "disp_last"
33783 /* Instruction path. */
33786 path_single
, /* Single micro op. */
33787 path_double
, /* Double micro op. */
33788 path_multi
, /* Instructions with more than 2 micro op.. */
33792 /* sched_insn_info defines a window to the instructions scheduled in
33793 the basic block. It contains a pointer to the insn_info table and
33794 the instruction scheduled.
33796 Windows are allocated for each basic block and are linked
33798 typedef struct sched_insn_info_s
{
33800 enum dispatch_group group
;
33801 enum insn_path path
;
33806 /* Linked list of dispatch windows. This is a two way list of
33807 dispatch windows of a basic block. It contains information about
33808 the number of uops in the window and the total number of
33809 instructions and of bytes in the object code for this dispatch
33811 typedef struct dispatch_windows_s
{
33812 int num_insn
; /* Number of insn in the window. */
33813 int num_uops
; /* Number of uops in the window. */
33814 int window_size
; /* Number of bytes in the window. */
33815 int window_num
; /* Window number between 0 or 1. */
33816 int num_imm
; /* Number of immediates in an insn. */
33817 int num_imm_32
; /* Number of 32 bit immediates in an insn. */
33818 int num_imm_64
; /* Number of 64 bit immediates in an insn. */
33819 int imm_size
; /* Total immediates in the window. */
33820 int num_loads
; /* Total memory loads in the window. */
33821 int num_stores
; /* Total memory stores in the window. */
33822 int violation
; /* Violation exists in window. */
33823 sched_insn_info
*window
; /* Pointer to the window. */
33824 struct dispatch_windows_s
*next
;
33825 struct dispatch_windows_s
*prev
;
33826 } dispatch_windows
;
33828 /* Immediate valuse used in an insn. */
33829 typedef struct imm_info_s
33836 static dispatch_windows
*dispatch_window_list
;
33837 static dispatch_windows
*dispatch_window_list1
;
33839 /* Get dispatch group of insn. */
33841 static enum dispatch_group
33842 get_mem_group (rtx insn
)
33844 enum attr_memory memory
;
33846 if (INSN_CODE (insn
) < 0)
33847 return disp_no_group
;
33848 memory
= get_attr_memory (insn
);
33849 if (memory
== MEMORY_STORE
)
33852 if (memory
== MEMORY_LOAD
)
33855 if (memory
== MEMORY_BOTH
)
33856 return disp_load_store
;
33858 return disp_no_group
;
33861 /* Return true if insn is a compare instruction. */
33866 enum attr_type type
;
33868 type
= get_attr_type (insn
);
33869 return (type
== TYPE_TEST
33870 || type
== TYPE_ICMP
33871 || type
== TYPE_FCMP
33872 || GET_CODE (PATTERN (insn
)) == COMPARE
);
33875 /* Return true if a dispatch violation encountered. */
33878 dispatch_violation (void)
33880 if (dispatch_window_list
->next
)
33881 return dispatch_window_list
->next
->violation
;
33882 return dispatch_window_list
->violation
;
33885 /* Return true if insn is a branch instruction. */
33888 is_branch (rtx insn
)
33890 return (CALL_P (insn
) || JUMP_P (insn
));
33893 /* Return true if insn is a prefetch instruction. */
33896 is_prefetch (rtx insn
)
33898 return NONJUMP_INSN_P (insn
) && GET_CODE (PATTERN (insn
)) == PREFETCH
;
33901 /* This function initializes a dispatch window and the list container holding a
33902 pointer to the window. */
33905 init_window (int window_num
)
33908 dispatch_windows
*new_list
;
33910 if (window_num
== 0)
33911 new_list
= dispatch_window_list
;
33913 new_list
= dispatch_window_list1
;
33915 new_list
->num_insn
= 0;
33916 new_list
->num_uops
= 0;
33917 new_list
->window_size
= 0;
33918 new_list
->next
= NULL
;
33919 new_list
->prev
= NULL
;
33920 new_list
->window_num
= window_num
;
33921 new_list
->num_imm
= 0;
33922 new_list
->num_imm_32
= 0;
33923 new_list
->num_imm_64
= 0;
33924 new_list
->imm_size
= 0;
33925 new_list
->num_loads
= 0;
33926 new_list
->num_stores
= 0;
33927 new_list
->violation
= false;
33929 for (i
= 0; i
< MAX_INSN
; i
++)
33931 new_list
->window
[i
].insn
= NULL
;
33932 new_list
->window
[i
].group
= disp_no_group
;
33933 new_list
->window
[i
].path
= no_path
;
33934 new_list
->window
[i
].byte_len
= 0;
33935 new_list
->window
[i
].imm_bytes
= 0;
33940 /* This function allocates and initializes a dispatch window and the
33941 list container holding a pointer to the window. */
33943 static dispatch_windows
*
33944 allocate_window (void)
33946 dispatch_windows
*new_list
= XNEW (struct dispatch_windows_s
);
33947 new_list
->window
= XNEWVEC (struct sched_insn_info_s
, MAX_INSN
+ 1);
33952 /* This routine initializes the dispatch scheduling information. It
33953 initiates building dispatch scheduler tables and constructs the
33954 first dispatch window. */
33957 init_dispatch_sched (void)
33959 /* Allocate a dispatch list and a window. */
33960 dispatch_window_list
= allocate_window ();
33961 dispatch_window_list1
= allocate_window ();
33966 /* This function returns true if a branch is detected. End of a basic block
33967 does not have to be a branch, but here we assume only branches end a
33971 is_end_basic_block (enum dispatch_group group
)
33973 return group
== disp_branch
;
33976 /* This function is called when the end of a window processing is reached. */
33979 process_end_window (void)
33981 gcc_assert (dispatch_window_list
->num_insn
<= MAX_INSN
);
33982 if (dispatch_window_list
->next
)
33984 gcc_assert (dispatch_window_list1
->num_insn
<= MAX_INSN
);
33985 gcc_assert (dispatch_window_list
->window_size
33986 + dispatch_window_list1
->window_size
<= 48);
33992 /* Allocates a new dispatch window and adds it to WINDOW_LIST.
33993 WINDOW_NUM is either 0 or 1. A maximum of two windows are generated
33994 for 48 bytes of instructions. Note that these windows are not dispatch
33995 windows that their sizes are DISPATCH_WINDOW_SIZE. */
33997 static dispatch_windows
*
33998 allocate_next_window (int window_num
)
34000 if (window_num
== 0)
34002 if (dispatch_window_list
->next
)
34005 return dispatch_window_list
;
34008 dispatch_window_list
->next
= dispatch_window_list1
;
34009 dispatch_window_list1
->prev
= dispatch_window_list
;
34011 return dispatch_window_list1
;
34014 /* Increment the number of immediate operands of an instruction. */
34017 find_constant_1 (rtx
*in_rtx
, imm_info
*imm_values
)
34022 switch ( GET_CODE (*in_rtx
))
34027 (imm_values
->imm
)++;
34028 if (x86_64_immediate_operand (*in_rtx
, SImode
))
34029 (imm_values
->imm32
)++;
34031 (imm_values
->imm64
)++;
34035 (imm_values
->imm
)++;
34036 (imm_values
->imm64
)++;
34040 if (LABEL_KIND (*in_rtx
) == LABEL_NORMAL
)
34042 (imm_values
->imm
)++;
34043 (imm_values
->imm32
)++;
34054 /* Compute number of immediate operands of an instruction. */
34057 find_constant (rtx in_rtx
, imm_info
*imm_values
)
34059 for_each_rtx (INSN_P (in_rtx
) ? &PATTERN (in_rtx
) : &in_rtx
,
34060 (rtx_function
) find_constant_1
, (void *) imm_values
);
34063 /* Return total size of immediate operands of an instruction along with number
34064 of corresponding immediate-operands. It initializes its parameters to zero
34065 befor calling FIND_CONSTANT.
34066 INSN is the input instruction. IMM is the total of immediates.
34067 IMM32 is the number of 32 bit immediates. IMM64 is the number of 64
34071 get_num_immediates (rtx insn
, int *imm
, int *imm32
, int *imm64
)
34073 imm_info imm_values
= {0, 0, 0};
34075 find_constant (insn
, &imm_values
);
34076 *imm
= imm_values
.imm
;
34077 *imm32
= imm_values
.imm32
;
34078 *imm64
= imm_values
.imm64
;
34079 return imm_values
.imm32
* 4 + imm_values
.imm64
* 8;
34082 /* This function indicates if an operand of an instruction is an
34086 has_immediate (rtx insn
)
34088 int num_imm_operand
;
34089 int num_imm32_operand
;
34090 int num_imm64_operand
;
34093 return get_num_immediates (insn
, &num_imm_operand
, &num_imm32_operand
,
34094 &num_imm64_operand
);
34098 /* Return single or double path for instructions. */
34100 static enum insn_path
34101 get_insn_path (rtx insn
)
34103 enum attr_amdfam10_decode path
= get_attr_amdfam10_decode (insn
);
34105 if ((int)path
== 0)
34106 return path_single
;
34108 if ((int)path
== 1)
34109 return path_double
;
34114 /* Return insn dispatch group. */
34116 static enum dispatch_group
34117 get_insn_group (rtx insn
)
34119 enum dispatch_group group
= get_mem_group (insn
);
34123 if (is_branch (insn
))
34124 return disp_branch
;
34129 if (has_immediate (insn
))
34132 if (is_prefetch (insn
))
34133 return disp_prefetch
;
34135 return disp_no_group
;
34138 /* Count number of GROUP restricted instructions in a dispatch
34139 window WINDOW_LIST. */
34142 count_num_restricted (rtx insn
, dispatch_windows
*window_list
)
34144 enum dispatch_group group
= get_insn_group (insn
);
34146 int num_imm_operand
;
34147 int num_imm32_operand
;
34148 int num_imm64_operand
;
34150 if (group
== disp_no_group
)
34153 if (group
== disp_imm
)
34155 imm_size
= get_num_immediates (insn
, &num_imm_operand
, &num_imm32_operand
,
34156 &num_imm64_operand
);
34157 if (window_list
->imm_size
+ imm_size
> MAX_IMM_SIZE
34158 || num_imm_operand
+ window_list
->num_imm
> MAX_IMM
34159 || (num_imm32_operand
> 0
34160 && (window_list
->num_imm_32
+ num_imm32_operand
> MAX_IMM_32
34161 || window_list
->num_imm_64
* 2 + num_imm32_operand
> MAX_IMM_32
))
34162 || (num_imm64_operand
> 0
34163 && (window_list
->num_imm_64
+ num_imm64_operand
> MAX_IMM_64
34164 || window_list
->num_imm_32
+ num_imm64_operand
* 2 > MAX_IMM_32
))
34165 || (window_list
->imm_size
+ imm_size
== MAX_IMM_SIZE
34166 && num_imm64_operand
> 0
34167 && ((window_list
->num_imm_64
> 0
34168 && window_list
->num_insn
>= 2)
34169 || window_list
->num_insn
>= 3)))
34175 if ((group
== disp_load_store
34176 && (window_list
->num_loads
>= MAX_LOAD
34177 || window_list
->num_stores
>= MAX_STORE
))
34178 || ((group
== disp_load
34179 || group
== disp_prefetch
)
34180 && window_list
->num_loads
>= MAX_LOAD
)
34181 || (group
== disp_store
34182 && window_list
->num_stores
>= MAX_STORE
))
34188 /* This function returns true if insn satisfies dispatch rules on the
34189 last window scheduled. */
34192 fits_dispatch_window (rtx insn
)
34194 dispatch_windows
*window_list
= dispatch_window_list
;
34195 dispatch_windows
*window_list_next
= dispatch_window_list
->next
;
34196 unsigned int num_restrict
;
34197 enum dispatch_group group
= get_insn_group (insn
);
34198 enum insn_path path
= get_insn_path (insn
);
34201 /* Make disp_cmp and disp_jcc get scheduled at the latest. These
34202 instructions should be given the lowest priority in the
34203 scheduling process in Haifa scheduler to make sure they will be
34204 scheduled in the same dispatch window as the refrence to them. */
34205 if (group
== disp_jcc
|| group
== disp_cmp
)
34208 /* Check nonrestricted. */
34209 if (group
== disp_no_group
|| group
== disp_branch
)
34212 /* Get last dispatch window. */
34213 if (window_list_next
)
34214 window_list
= window_list_next
;
34216 if (window_list
->window_num
== 1)
34218 sum
= window_list
->prev
->window_size
+ window_list
->window_size
;
34221 || (min_insn_size (insn
) + sum
) >= 48)
34222 /* Window 1 is full. Go for next window. */
34226 num_restrict
= count_num_restricted (insn
, window_list
);
34228 if (num_restrict
> num_allowable_groups
[group
])
34231 /* See if it fits in the first window. */
34232 if (window_list
->window_num
== 0)
34234 /* The first widow should have only single and double path
34236 if (path
== path_double
34237 && (window_list
->num_uops
+ 2) > MAX_INSN
)
34239 else if (path
!= path_single
)
34245 /* Add an instruction INSN with NUM_UOPS micro-operations to the
34246 dispatch window WINDOW_LIST. */
34249 add_insn_window (rtx insn
, dispatch_windows
*window_list
, int num_uops
)
34251 int byte_len
= min_insn_size (insn
);
34252 int num_insn
= window_list
->num_insn
;
34254 sched_insn_info
*window
= window_list
->window
;
34255 enum dispatch_group group
= get_insn_group (insn
);
34256 enum insn_path path
= get_insn_path (insn
);
34257 int num_imm_operand
;
34258 int num_imm32_operand
;
34259 int num_imm64_operand
;
34261 if (!window_list
->violation
&& group
!= disp_cmp
34262 && !fits_dispatch_window (insn
))
34263 window_list
->violation
= true;
34265 imm_size
= get_num_immediates (insn
, &num_imm_operand
, &num_imm32_operand
,
34266 &num_imm64_operand
);
34268 /* Initialize window with new instruction. */
34269 window
[num_insn
].insn
= insn
;
34270 window
[num_insn
].byte_len
= byte_len
;
34271 window
[num_insn
].group
= group
;
34272 window
[num_insn
].path
= path
;
34273 window
[num_insn
].imm_bytes
= imm_size
;
34275 window_list
->window_size
+= byte_len
;
34276 window_list
->num_insn
= num_insn
+ 1;
34277 window_list
->num_uops
= window_list
->num_uops
+ num_uops
;
34278 window_list
->imm_size
+= imm_size
;
34279 window_list
->num_imm
+= num_imm_operand
;
34280 window_list
->num_imm_32
+= num_imm32_operand
;
34281 window_list
->num_imm_64
+= num_imm64_operand
;
34283 if (group
== disp_store
)
34284 window_list
->num_stores
+= 1;
34285 else if (group
== disp_load
34286 || group
== disp_prefetch
)
34287 window_list
->num_loads
+= 1;
34288 else if (group
== disp_load_store
)
34290 window_list
->num_stores
+= 1;
34291 window_list
->num_loads
+= 1;
34295 /* Adds a scheduled instruction, INSN, to the current dispatch window.
34296 If the total bytes of instructions or the number of instructions in
34297 the window exceed allowable, it allocates a new window. */
34300 add_to_dispatch_window (rtx insn
)
34303 dispatch_windows
*window_list
;
34304 dispatch_windows
*next_list
;
34305 dispatch_windows
*window0_list
;
34306 enum insn_path path
;
34307 enum dispatch_group insn_group
;
34315 if (INSN_CODE (insn
) < 0)
34318 byte_len
= min_insn_size (insn
);
34319 window_list
= dispatch_window_list
;
34320 next_list
= window_list
->next
;
34321 path
= get_insn_path (insn
);
34322 insn_group
= get_insn_group (insn
);
34324 /* Get the last dispatch window. */
34326 window_list
= dispatch_window_list
->next
;
34328 if (path
== path_single
)
34330 else if (path
== path_double
)
34333 insn_num_uops
= (int) path
;
34335 /* If current window is full, get a new window.
34336 Window number zero is full, if MAX_INSN uops are scheduled in it.
34337 Window number one is full, if window zero's bytes plus window
34338 one's bytes is 32, or if the bytes of the new instruction added
34339 to the total makes it greater than 48, or it has already MAX_INSN
34340 instructions in it. */
34341 num_insn
= window_list
->num_insn
;
34342 num_uops
= window_list
->num_uops
;
34343 window_num
= window_list
->window_num
;
34344 insn_fits
= fits_dispatch_window (insn
);
34346 if (num_insn
>= MAX_INSN
34347 || num_uops
+ insn_num_uops
> MAX_INSN
34350 window_num
= ~window_num
& 1;
34351 window_list
= allocate_next_window (window_num
);
34354 if (window_num
== 0)
34356 add_insn_window (insn
, window_list
, insn_num_uops
);
34357 if (window_list
->num_insn
>= MAX_INSN
34358 && insn_group
== disp_branch
)
34360 process_end_window ();
34364 else if (window_num
== 1)
34366 window0_list
= window_list
->prev
;
34367 sum
= window0_list
->window_size
+ window_list
->window_size
;
34369 || (byte_len
+ sum
) >= 48)
34371 process_end_window ();
34372 window_list
= dispatch_window_list
;
34375 add_insn_window (insn
, window_list
, insn_num_uops
);
34378 gcc_unreachable ();
34380 if (is_end_basic_block (insn_group
))
34382 /* End of basic block is reached do end-basic-block process. */
34383 process_end_window ();
34388 /* Print the dispatch window, WINDOW_NUM, to FILE. */
34390 DEBUG_FUNCTION
static void
34391 debug_dispatch_window_file (FILE *file
, int window_num
)
34393 dispatch_windows
*list
;
34396 if (window_num
== 0)
34397 list
= dispatch_window_list
;
34399 list
= dispatch_window_list1
;
34401 fprintf (file
, "Window #%d:\n", list
->window_num
);
34402 fprintf (file
, " num_insn = %d, num_uops = %d, window_size = %d\n",
34403 list
->num_insn
, list
->num_uops
, list
->window_size
);
34404 fprintf (file
, " num_imm = %d, num_imm_32 = %d, num_imm_64 = %d, imm_size = %d\n",
34405 list
->num_imm
, list
->num_imm_32
, list
->num_imm_64
, list
->imm_size
);
34407 fprintf (file
, " num_loads = %d, num_stores = %d\n", list
->num_loads
,
34409 fprintf (file
, " insn info:\n");
34411 for (i
= 0; i
< MAX_INSN
; i
++)
34413 if (!list
->window
[i
].insn
)
34415 fprintf (file
, " group[%d] = %s, insn[%d] = %p, path[%d] = %d byte_len[%d] = %d, imm_bytes[%d] = %d\n",
34416 i
, group_name
[list
->window
[i
].group
],
34417 i
, (void *)list
->window
[i
].insn
,
34418 i
, list
->window
[i
].path
,
34419 i
, list
->window
[i
].byte_len
,
34420 i
, list
->window
[i
].imm_bytes
);
34424 /* Print to stdout a dispatch window. */
34426 DEBUG_FUNCTION
void
34427 debug_dispatch_window (int window_num
)
34429 debug_dispatch_window_file (stdout
, window_num
);
34432 /* Print INSN dispatch information to FILE. */
34434 DEBUG_FUNCTION
static void
34435 debug_insn_dispatch_info_file (FILE *file
, rtx insn
)
34438 enum insn_path path
;
34439 enum dispatch_group group
;
34441 int num_imm_operand
;
34442 int num_imm32_operand
;
34443 int num_imm64_operand
;
34445 if (INSN_CODE (insn
) < 0)
34448 byte_len
= min_insn_size (insn
);
34449 path
= get_insn_path (insn
);
34450 group
= get_insn_group (insn
);
34451 imm_size
= get_num_immediates (insn
, &num_imm_operand
, &num_imm32_operand
,
34452 &num_imm64_operand
);
34454 fprintf (file
, " insn info:\n");
34455 fprintf (file
, " group = %s, path = %d, byte_len = %d\n",
34456 group_name
[group
], path
, byte_len
);
34457 fprintf (file
, " num_imm = %d, num_imm_32 = %d, num_imm_64 = %d, imm_size = %d\n",
34458 num_imm_operand
, num_imm32_operand
, num_imm64_operand
, imm_size
);
34461 /* Print to STDERR the status of the ready list with respect to
34462 dispatch windows. */
34464 DEBUG_FUNCTION
void
34465 debug_ready_dispatch (void)
34468 int no_ready
= number_in_ready ();
34470 fprintf (stdout
, "Number of ready: %d\n", no_ready
);
34472 for (i
= 0; i
< no_ready
; i
++)
34473 debug_insn_dispatch_info_file (stdout
, get_ready_element (i
));
34476 /* This routine is the driver of the dispatch scheduler. */
34479 do_dispatch (rtx insn
, int mode
)
34481 if (mode
== DISPATCH_INIT
)
34482 init_dispatch_sched ();
34483 else if (mode
== ADD_TO_DISPATCH_WINDOW
)
34484 add_to_dispatch_window (insn
);
34487 /* Return TRUE if Dispatch Scheduling is supported. */
34490 has_dispatch (rtx insn
, int action
)
34492 if (ix86_tune
== PROCESSOR_BDVER1
&& flag_dispatch_scheduler
)
34498 case IS_DISPATCH_ON
:
34503 return is_cmp (insn
);
34505 case DISPATCH_VIOLATION
:
34506 return dispatch_violation ();
34508 case FITS_DISPATCH_WINDOW
:
34509 return fits_dispatch_window (insn
);
34515 /* ??? No autovectorization into MMX or 3DNOW until we can reliably
34516 place emms and femms instructions. */
34518 static enum machine_mode
34519 ix86_preferred_simd_mode (enum machine_mode mode
)
34521 /* Disable double precision vectorizer if needed. */
34522 if (mode
== DFmode
&& !TARGET_VECTORIZE_DOUBLE
)
34525 if (!TARGET_AVX
&& !TARGET_SSE
)
34531 return TARGET_AVX
? V8SFmode
: V4SFmode
;
34533 return TARGET_AVX
? V4DFmode
: V2DFmode
;
34549 /* If AVX is enabled then try vectorizing with both 256bit and 128bit
34552 static unsigned int
34553 ix86_autovectorize_vector_sizes (void)
34555 return TARGET_AVX
? 32 | 16 : 0;
34558 /* Initialize the GCC target structure. */
34559 #undef TARGET_RETURN_IN_MEMORY
34560 #define TARGET_RETURN_IN_MEMORY ix86_return_in_memory
34562 #undef TARGET_LEGITIMIZE_ADDRESS
34563 #define TARGET_LEGITIMIZE_ADDRESS ix86_legitimize_address
34565 #undef TARGET_ATTRIBUTE_TABLE
34566 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
34567 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
34568 # undef TARGET_MERGE_DECL_ATTRIBUTES
34569 # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
34572 #undef TARGET_COMP_TYPE_ATTRIBUTES
34573 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
34575 #undef TARGET_INIT_BUILTINS
34576 #define TARGET_INIT_BUILTINS ix86_init_builtins
34577 #undef TARGET_BUILTIN_DECL
34578 #define TARGET_BUILTIN_DECL ix86_builtin_decl
34579 #undef TARGET_EXPAND_BUILTIN
34580 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
34582 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
34583 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
34584 ix86_builtin_vectorized_function
34586 #undef TARGET_VECTORIZE_BUILTIN_CONVERSION
34587 #define TARGET_VECTORIZE_BUILTIN_CONVERSION ix86_vectorize_builtin_conversion
34589 #undef TARGET_BUILTIN_RECIPROCAL
34590 #define TARGET_BUILTIN_RECIPROCAL ix86_builtin_reciprocal
34592 #undef TARGET_ASM_FUNCTION_EPILOGUE
34593 #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
34595 #undef TARGET_ENCODE_SECTION_INFO
34596 #ifndef SUBTARGET_ENCODE_SECTION_INFO
34597 #define TARGET_ENCODE_SECTION_INFO ix86_encode_section_info
34599 #define TARGET_ENCODE_SECTION_INFO SUBTARGET_ENCODE_SECTION_INFO
34602 #undef TARGET_ASM_OPEN_PAREN
34603 #define TARGET_ASM_OPEN_PAREN ""
34604 #undef TARGET_ASM_CLOSE_PAREN
34605 #define TARGET_ASM_CLOSE_PAREN ""
34607 #undef TARGET_ASM_BYTE_OP
34608 #define TARGET_ASM_BYTE_OP ASM_BYTE
34610 #undef TARGET_ASM_ALIGNED_HI_OP
34611 #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
34612 #undef TARGET_ASM_ALIGNED_SI_OP
34613 #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
34615 #undef TARGET_ASM_ALIGNED_DI_OP
34616 #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
34619 #undef TARGET_PROFILE_BEFORE_PROLOGUE
34620 #define TARGET_PROFILE_BEFORE_PROLOGUE ix86_profile_before_prologue
34622 #undef TARGET_ASM_UNALIGNED_HI_OP
34623 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
34624 #undef TARGET_ASM_UNALIGNED_SI_OP
34625 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
34626 #undef TARGET_ASM_UNALIGNED_DI_OP
34627 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
34629 #undef TARGET_PRINT_OPERAND
34630 #define TARGET_PRINT_OPERAND ix86_print_operand
34631 #undef TARGET_PRINT_OPERAND_ADDRESS
34632 #define TARGET_PRINT_OPERAND_ADDRESS ix86_print_operand_address
34633 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
34634 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P ix86_print_operand_punct_valid_p
34635 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
34636 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA i386_asm_output_addr_const_extra
34638 #undef TARGET_SCHED_INIT_GLOBAL
34639 #define TARGET_SCHED_INIT_GLOBAL ix86_sched_init_global
34640 #undef TARGET_SCHED_ADJUST_COST
34641 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
34642 #undef TARGET_SCHED_ISSUE_RATE
34643 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
34644 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
34645 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
34646 ia32_multipass_dfa_lookahead
34648 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
34649 #define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
34652 #undef TARGET_HAVE_TLS
34653 #define TARGET_HAVE_TLS true
34655 #undef TARGET_CANNOT_FORCE_CONST_MEM
34656 #define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
34657 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
34658 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P hook_bool_mode_const_rtx_true
34660 #undef TARGET_DELEGITIMIZE_ADDRESS
34661 #define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address
34663 #undef TARGET_MS_BITFIELD_LAYOUT_P
34664 #define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
34667 #undef TARGET_BINDS_LOCAL_P
34668 #define TARGET_BINDS_LOCAL_P darwin_binds_local_p
34670 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
34671 #undef TARGET_BINDS_LOCAL_P
34672 #define TARGET_BINDS_LOCAL_P i386_pe_binds_local_p
34675 #undef TARGET_ASM_OUTPUT_MI_THUNK
34676 #define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
34677 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
34678 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
34680 #undef TARGET_ASM_FILE_START
34681 #define TARGET_ASM_FILE_START x86_file_start
34683 #undef TARGET_DEFAULT_TARGET_FLAGS
34684 #define TARGET_DEFAULT_TARGET_FLAGS \
34686 | TARGET_SUBTARGET_DEFAULT \
34687 | TARGET_TLS_DIRECT_SEG_REFS_DEFAULT)
34689 #undef TARGET_HANDLE_OPTION
34690 #define TARGET_HANDLE_OPTION ix86_handle_option
34692 #undef TARGET_OPTION_OVERRIDE
34693 #define TARGET_OPTION_OVERRIDE ix86_option_override
34694 #undef TARGET_OPTION_OPTIMIZATION_TABLE
34695 #define TARGET_OPTION_OPTIMIZATION_TABLE ix86_option_optimization_table
34696 #undef TARGET_OPTION_INIT_STRUCT
34697 #define TARGET_OPTION_INIT_STRUCT ix86_option_init_struct
34699 #undef TARGET_REGISTER_MOVE_COST
34700 #define TARGET_REGISTER_MOVE_COST ix86_register_move_cost
34701 #undef TARGET_MEMORY_MOVE_COST
34702 #define TARGET_MEMORY_MOVE_COST ix86_memory_move_cost
34703 #undef TARGET_RTX_COSTS
34704 #define TARGET_RTX_COSTS ix86_rtx_costs
34705 #undef TARGET_ADDRESS_COST
34706 #define TARGET_ADDRESS_COST ix86_address_cost
34708 #undef TARGET_FIXED_CONDITION_CODE_REGS
34709 #define TARGET_FIXED_CONDITION_CODE_REGS ix86_fixed_condition_code_regs
34710 #undef TARGET_CC_MODES_COMPATIBLE
34711 #define TARGET_CC_MODES_COMPATIBLE ix86_cc_modes_compatible
34713 #undef TARGET_MACHINE_DEPENDENT_REORG
34714 #define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg
34716 #undef TARGET_BUILTIN_SETJMP_FRAME_VALUE
34717 #define TARGET_BUILTIN_SETJMP_FRAME_VALUE ix86_builtin_setjmp_frame_value
34719 #undef TARGET_BUILD_BUILTIN_VA_LIST
34720 #define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list
34722 #undef TARGET_ENUM_VA_LIST_P
34723 #define TARGET_ENUM_VA_LIST_P ix86_enum_va_list
34725 #undef TARGET_FN_ABI_VA_LIST
34726 #define TARGET_FN_ABI_VA_LIST ix86_fn_abi_va_list
34728 #undef TARGET_CANONICAL_VA_LIST_TYPE
34729 #define TARGET_CANONICAL_VA_LIST_TYPE ix86_canonical_va_list_type
34731 #undef TARGET_EXPAND_BUILTIN_VA_START
34732 #define TARGET_EXPAND_BUILTIN_VA_START ix86_va_start
34734 #undef TARGET_MD_ASM_CLOBBERS
34735 #define TARGET_MD_ASM_CLOBBERS ix86_md_asm_clobbers
34737 #undef TARGET_PROMOTE_PROTOTYPES
34738 #define TARGET_PROMOTE_PROTOTYPES hook_bool_const_tree_true
34739 #undef TARGET_STRUCT_VALUE_RTX
34740 #define TARGET_STRUCT_VALUE_RTX ix86_struct_value_rtx
34741 #undef TARGET_SETUP_INCOMING_VARARGS
34742 #define TARGET_SETUP_INCOMING_VARARGS ix86_setup_incoming_varargs
34743 #undef TARGET_MUST_PASS_IN_STACK
34744 #define TARGET_MUST_PASS_IN_STACK ix86_must_pass_in_stack
34745 #undef TARGET_FUNCTION_ARG_ADVANCE
34746 #define TARGET_FUNCTION_ARG_ADVANCE ix86_function_arg_advance
34747 #undef TARGET_FUNCTION_ARG
34748 #define TARGET_FUNCTION_ARG ix86_function_arg
34749 #undef TARGET_FUNCTION_ARG_BOUNDARY
34750 #define TARGET_FUNCTION_ARG_BOUNDARY ix86_function_arg_boundary
34751 #undef TARGET_PASS_BY_REFERENCE
34752 #define TARGET_PASS_BY_REFERENCE ix86_pass_by_reference
34753 #undef TARGET_INTERNAL_ARG_POINTER
34754 #define TARGET_INTERNAL_ARG_POINTER ix86_internal_arg_pointer
34755 #undef TARGET_UPDATE_STACK_BOUNDARY
34756 #define TARGET_UPDATE_STACK_BOUNDARY ix86_update_stack_boundary
34757 #undef TARGET_GET_DRAP_RTX
34758 #define TARGET_GET_DRAP_RTX ix86_get_drap_rtx
34759 #undef TARGET_STRICT_ARGUMENT_NAMING
34760 #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
34761 #undef TARGET_STATIC_CHAIN
34762 #define TARGET_STATIC_CHAIN ix86_static_chain
34763 #undef TARGET_TRAMPOLINE_INIT
34764 #define TARGET_TRAMPOLINE_INIT ix86_trampoline_init
34765 #undef TARGET_RETURN_POPS_ARGS
34766 #define TARGET_RETURN_POPS_ARGS ix86_return_pops_args
34768 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
34769 #define TARGET_GIMPLIFY_VA_ARG_EXPR ix86_gimplify_va_arg
34771 #undef TARGET_SCALAR_MODE_SUPPORTED_P
34772 #define TARGET_SCALAR_MODE_SUPPORTED_P ix86_scalar_mode_supported_p
34774 #undef TARGET_VECTOR_MODE_SUPPORTED_P
34775 #define TARGET_VECTOR_MODE_SUPPORTED_P ix86_vector_mode_supported_p
34777 #undef TARGET_C_MODE_FOR_SUFFIX
34778 #define TARGET_C_MODE_FOR_SUFFIX ix86_c_mode_for_suffix
34781 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
34782 #define TARGET_ASM_OUTPUT_DWARF_DTPREL i386_output_dwarf_dtprel
34785 #ifdef SUBTARGET_INSERT_ATTRIBUTES
34786 #undef TARGET_INSERT_ATTRIBUTES
34787 #define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
34790 #undef TARGET_MANGLE_TYPE
34791 #define TARGET_MANGLE_TYPE ix86_mangle_type
34793 #undef TARGET_STACK_PROTECT_FAIL
34794 #define TARGET_STACK_PROTECT_FAIL ix86_stack_protect_fail
34796 #undef TARGET_SUPPORTS_SPLIT_STACK
34797 #define TARGET_SUPPORTS_SPLIT_STACK ix86_supports_split_stack
34799 #undef TARGET_FUNCTION_VALUE
34800 #define TARGET_FUNCTION_VALUE ix86_function_value
34802 #undef TARGET_FUNCTION_VALUE_REGNO_P
34803 #define TARGET_FUNCTION_VALUE_REGNO_P ix86_function_value_regno_p
34805 #undef TARGET_SECONDARY_RELOAD
34806 #define TARGET_SECONDARY_RELOAD ix86_secondary_reload
34808 #undef TARGET_PREFERRED_RELOAD_CLASS
34809 #define TARGET_PREFERRED_RELOAD_CLASS ix86_preferred_reload_class
34810 #undef TARGET_PREFERRED_OUTPUT_RELOAD_CLASS
34811 #define TARGET_PREFERRED_OUTPUT_RELOAD_CLASS ix86_preferred_output_reload_class
34812 #undef TARGET_CLASS_LIKELY_SPILLED_P
34813 #define TARGET_CLASS_LIKELY_SPILLED_P ix86_class_likely_spilled_p
34815 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
34816 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
34817 ix86_builtin_vectorization_cost
34818 #undef TARGET_VECTORIZE_BUILTIN_VEC_PERM
34819 #define TARGET_VECTORIZE_BUILTIN_VEC_PERM \
34820 ix86_vectorize_builtin_vec_perm
34821 #undef TARGET_VECTORIZE_BUILTIN_VEC_PERM_OK
34822 #define TARGET_VECTORIZE_BUILTIN_VEC_PERM_OK \
34823 ix86_vectorize_builtin_vec_perm_ok
34824 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
34825 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE \
34826 ix86_preferred_simd_mode
34827 #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
34828 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
34829 ix86_autovectorize_vector_sizes
34831 #undef TARGET_SET_CURRENT_FUNCTION
34832 #define TARGET_SET_CURRENT_FUNCTION ix86_set_current_function
34834 #undef TARGET_OPTION_VALID_ATTRIBUTE_P
34835 #define TARGET_OPTION_VALID_ATTRIBUTE_P ix86_valid_target_attribute_p
34837 #undef TARGET_OPTION_SAVE
34838 #define TARGET_OPTION_SAVE ix86_function_specific_save
34840 #undef TARGET_OPTION_RESTORE
34841 #define TARGET_OPTION_RESTORE ix86_function_specific_restore
34843 #undef TARGET_OPTION_PRINT
34844 #define TARGET_OPTION_PRINT ix86_function_specific_print
34846 #undef TARGET_CAN_INLINE_P
34847 #define TARGET_CAN_INLINE_P ix86_can_inline_p
34849 #undef TARGET_EXPAND_TO_RTL_HOOK
34850 #define TARGET_EXPAND_TO_RTL_HOOK ix86_maybe_switch_abi
34852 #undef TARGET_LEGITIMATE_ADDRESS_P
34853 #define TARGET_LEGITIMATE_ADDRESS_P ix86_legitimate_address_p
34855 #undef TARGET_IRA_COVER_CLASSES
34856 #define TARGET_IRA_COVER_CLASSES i386_ira_cover_classes
34858 #undef TARGET_FRAME_POINTER_REQUIRED
34859 #define TARGET_FRAME_POINTER_REQUIRED ix86_frame_pointer_required
34861 #undef TARGET_CAN_ELIMINATE
34862 #define TARGET_CAN_ELIMINATE ix86_can_eliminate
34864 #undef TARGET_EXTRA_LIVE_ON_ENTRY
34865 #define TARGET_EXTRA_LIVE_ON_ENTRY ix86_live_on_entry
34867 #undef TARGET_ASM_CODE_END
34868 #define TARGET_ASM_CODE_END ix86_code_end
34870 #undef TARGET_CONDITIONAL_REGISTER_USAGE
34871 #define TARGET_CONDITIONAL_REGISTER_USAGE ix86_conditional_register_usage
34873 struct gcc_target targetm
= TARGET_INITIALIZER
;
34875 #include "gt-i386.h"